1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (c) 2014-2021 Netronome Systems, Inc. 3 * All rights reserved. 4 * 5 * Small portions derived from code Copyright(c) 2010-2015 Intel Corporation. 6 */ 7 8 #include "nfp_rxtx.h" 9 10 #include <ethdev_pci.h> 11 #include <rte_security.h> 12 13 #include "nfd3/nfp_nfd3.h" 14 #include "nfdk/nfp_nfdk.h" 15 #include "nfdk/nfp_nfdk_vec.h" 16 #include "flower/nfp_flower.h" 17 18 #include "nfp_ipsec.h" 19 #include "nfp_logs.h" 20 #include "nfp_net_meta.h" 21 #include "nfp_rxtx_vec.h" 22 23 /* 24 * The bit format and map of nfp packet type for rxd.offload_info in Rx descriptor. 25 * 26 * Bit format about nfp packet type refers to the following: 27 * --------------------------------- 28 * 1 0 29 * 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 30 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 31 * | |ol3|tunnel | l3 | l4 | 32 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 33 * 34 * Bit map about nfp packet type refers to the following: 35 * 36 * L4: bit 0~2, used for layer 4 or inner layer 4. 37 * 000: NFP_NET_PTYPE_L4_NONE 38 * 001: NFP_NET_PTYPE_L4_TCP 39 * 010: NFP_NET_PTYPE_L4_UDP 40 * 011: NFP_NET_PTYPE_L4_FRAG 41 * 100: NFP_NET_PTYPE_L4_NONFRAG 42 * 101: NFP_NET_PTYPE_L4_ICMP 43 * 110: NFP_NET_PTYPE_L4_SCTP 44 * 111: reserved 45 * 46 * L3: bit 3~5, used for layer 3 or inner layer 3. 47 * 000: NFP_NET_PTYPE_L3_NONE 48 * 001: NFP_NET_PTYPE_L3_IPV6 49 * 010: NFP_NET_PTYPE_L3_IPV4 50 * 011: NFP_NET_PTYPE_L3_IPV4_EXT 51 * 100: NFP_NET_PTYPE_L3_IPV6_EXT 52 * 101: NFP_NET_PTYPE_L3_IPV4_EXT_UNKNOWN 53 * 110: NFP_NET_PTYPE_L3_IPV6_EXT_UNKNOWN 54 * 111: reserved 55 * 56 * Tunnel: bit 6~9, used for tunnel. 57 * 0000: NFP_NET_PTYPE_TUNNEL_NONE 58 * 0001: NFP_NET_PTYPE_TUNNEL_VXLAN 59 * 0100: NFP_NET_PTYPE_TUNNEL_NVGRE 60 * 0101: NFP_NET_PTYPE_TUNNEL_GENEVE 61 * 0010, 0011, 0110~1111: reserved 62 * 63 * Outer L3: bit 10~11, used for outer layer 3. 64 * 00: NFP_NET_PTYPE_OUTER_L3_NONE 65 * 01: NFP_NET_PTYPE_OUTER_L3_IPV6 66 * 10: NFP_NET_PTYPE_OUTER_L3_IPV4 67 * 11: reserved 68 * 69 * Reserved: bit 10~15, used for extension. 70 */ 71 72 /* Mask and offset about nfp packet type based on the bit map above. */ 73 #define NFP_NET_PTYPE_L4_MASK 0x0007 74 #define NFP_NET_PTYPE_L3_MASK 0x0038 75 #define NFP_NET_PTYPE_TUNNEL_MASK 0x03c0 76 #define NFP_NET_PTYPE_OUTER_L3_MASK 0x0c00 77 78 #define NFP_NET_PTYPE_L4_OFFSET 0 79 #define NFP_NET_PTYPE_L3_OFFSET 3 80 #define NFP_NET_PTYPE_TUNNEL_OFFSET 6 81 #define NFP_NET_PTYPE_OUTER_L3_OFFSET 10 82 83 /* Case about nfp packet type based on the bit map above. */ 84 #define NFP_NET_PTYPE_L4_NONE 0 85 #define NFP_NET_PTYPE_L4_TCP 1 86 #define NFP_NET_PTYPE_L4_UDP 2 87 #define NFP_NET_PTYPE_L4_FRAG 3 88 #define NFP_NET_PTYPE_L4_NONFRAG 4 89 #define NFP_NET_PTYPE_L4_ICMP 5 90 #define NFP_NET_PTYPE_L4_SCTP 6 91 92 #define NFP_NET_PTYPE_L3_NONE 0 93 #define NFP_NET_PTYPE_L3_IPV6 1 94 #define NFP_NET_PTYPE_L3_IPV4 2 95 #define NFP_NET_PTYPE_L3_IPV4_EXT 3 96 #define NFP_NET_PTYPE_L3_IPV6_EXT 4 97 #define NFP_NET_PTYPE_L3_IPV4_EXT_UNKNOWN 5 98 #define NFP_NET_PTYPE_L3_IPV6_EXT_UNKNOWN 6 99 100 #define NFP_NET_PTYPE_TUNNEL_NONE 0 101 #define NFP_NET_PTYPE_TUNNEL_VXLAN 1 102 #define NFP_NET_PTYPE_TUNNEL_NVGRE 4 103 #define NFP_NET_PTYPE_TUNNEL_GENEVE 5 104 105 #define NFP_NET_PTYPE_OUTER_L3_NONE 0 106 #define NFP_NET_PTYPE_OUTER_L3_IPV6 1 107 #define NFP_NET_PTYPE_OUTER_L3_IPV4 2 108 109 #define NFP_PTYPE2RTE(tunnel, type) ((tunnel) ? RTE_PTYPE_INNER_##type : RTE_PTYPE_##type) 110 111 /* Record NFP packet type parsed from rxd.offload_info. */ 112 struct nfp_ptype_parsed { 113 uint8_t l4_ptype; /**< Packet type of layer 4, or inner layer 4. */ 114 uint8_t l3_ptype; /**< Packet type of layer 3, or inner layer 3. */ 115 uint8_t tunnel_ptype; /**< Packet type of tunnel. */ 116 uint8_t outer_l3_ptype; /**< Packet type of outer layer 3. */ 117 }; 118 119 /* Set mbuf checksum flags based on RX descriptor flags */ 120 void 121 nfp_net_rx_cksum(struct nfp_net_rxq *rxq, 122 struct nfp_net_rx_desc *rxd, 123 struct rte_mbuf *mb) 124 { 125 struct nfp_net_hw *hw = rxq->hw; 126 127 if ((hw->super.ctrl & NFP_NET_CFG_CTRL_RXCSUM) == 0) 128 return; 129 130 /* If IPv4 and IP checksum error, fail */ 131 if (unlikely((rxd->rxd.flags & PCIE_DESC_RX_IP4_CSUM) != 0 && 132 (rxd->rxd.flags & PCIE_DESC_RX_IP4_CSUM_OK) == 0)) 133 mb->ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_BAD; 134 else 135 mb->ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_GOOD; 136 137 /* If neither UDP nor TCP return */ 138 if ((rxd->rxd.flags & PCIE_DESC_RX_TCP_CSUM) == 0 && 139 (rxd->rxd.flags & PCIE_DESC_RX_UDP_CSUM) == 0) 140 return; 141 142 if (likely(rxd->rxd.flags & PCIE_DESC_RX_L4_CSUM_OK) != 0) 143 mb->ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_GOOD; 144 else 145 mb->ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_BAD; 146 } 147 148 static int 149 nfp_net_rx_fill_freelist(struct nfp_net_rxq *rxq) 150 { 151 uint16_t i; 152 uint64_t dma_addr; 153 struct nfp_net_dp_buf *rxe = rxq->rxbufs; 154 155 PMD_RX_LOG(DEBUG, "Fill Rx Freelist for %hu descriptors.", 156 rxq->rx_count); 157 158 for (i = 0; i < rxq->rx_count; i++) { 159 struct nfp_net_rx_desc *rxd; 160 struct rte_mbuf *mbuf = rte_pktmbuf_alloc(rxq->mem_pool); 161 162 if (mbuf == NULL) { 163 PMD_DRV_LOG(ERR, "RX mbuf alloc failed queue_id=%hu.", 164 rxq->qidx); 165 return -ENOMEM; 166 } 167 168 dma_addr = rte_cpu_to_le_64(rte_mbuf_data_iova_default(mbuf)); 169 170 rxd = &rxq->rxds[i]; 171 rxd->fld.dd = 0; 172 rxd->fld.dma_addr_hi = (dma_addr >> 32) & 0xffff; 173 rxd->fld.dma_addr_lo = dma_addr & 0xffffffff; 174 175 rxe[i].mbuf = mbuf; 176 } 177 178 /* Make sure all writes are flushed before telling the hardware */ 179 rte_wmb(); 180 181 /* Not advertising the whole ring as the firmware gets confused if so */ 182 PMD_RX_LOG(DEBUG, "Increment FL write pointer in %hu.", rxq->rx_count - 1); 183 184 nfp_qcp_ptr_add(rxq->qcp_fl, NFP_QCP_WRITE_PTR, rxq->rx_count - 1); 185 186 return 0; 187 } 188 189 int 190 nfp_net_rx_freelist_setup(struct rte_eth_dev *dev) 191 { 192 uint16_t i; 193 194 for (i = 0; i < dev->data->nb_rx_queues; i++) { 195 if (nfp_net_rx_fill_freelist(dev->data->rx_queues[i]) != 0) 196 return -1; 197 } 198 199 return 0; 200 } 201 202 uint32_t 203 nfp_net_rx_queue_count(void *rx_queue) 204 { 205 uint32_t idx; 206 uint32_t count = 0; 207 struct nfp_net_rxq *rxq; 208 struct nfp_net_rx_desc *rxds; 209 210 rxq = rx_queue; 211 idx = rxq->rd_p; 212 213 /* 214 * Other PMDs are just checking the DD bit in intervals of 4 215 * descriptors and counting all four if the first has the DD 216 * bit on. Of course, this is not accurate but can be good for 217 * performance. But ideally that should be done in descriptors 218 * chunks belonging to the same cache line. 219 */ 220 while (count < rxq->rx_count) { 221 rxds = &rxq->rxds[idx]; 222 if ((rxds->rxd.meta_len_dd & PCIE_DESC_RX_DD) == 0) 223 break; 224 225 count++; 226 idx++; 227 228 /* Wrapping */ 229 if ((idx) == rxq->rx_count) 230 idx = 0; 231 } 232 233 return count; 234 } 235 236 /** 237 * Set packet type to mbuf based on parsed structure. 238 * 239 * @param nfp_ptype 240 * Packet type structure parsing from Rx descriptor. 241 * @param mb 242 * Mbuf to set the packet type. 243 */ 244 static void 245 nfp_net_set_ptype(const struct nfp_ptype_parsed *nfp_ptype, 246 struct rte_mbuf *mb) 247 { 248 uint32_t mbuf_ptype = RTE_PTYPE_L2_ETHER; 249 uint8_t nfp_tunnel_ptype = nfp_ptype->tunnel_ptype; 250 251 if (nfp_tunnel_ptype != NFP_NET_PTYPE_TUNNEL_NONE) 252 mbuf_ptype |= RTE_PTYPE_INNER_L2_ETHER; 253 254 switch (nfp_ptype->outer_l3_ptype) { 255 case NFP_NET_PTYPE_OUTER_L3_NONE: 256 break; 257 case NFP_NET_PTYPE_OUTER_L3_IPV4: 258 mbuf_ptype |= RTE_PTYPE_L3_IPV4; 259 break; 260 case NFP_NET_PTYPE_OUTER_L3_IPV6: 261 mbuf_ptype |= RTE_PTYPE_L3_IPV6; 262 break; 263 default: 264 PMD_RX_LOG(DEBUG, "Unrecognized nfp outer layer 3 packet type: %u.", 265 nfp_ptype->outer_l3_ptype); 266 break; 267 } 268 269 switch (nfp_tunnel_ptype) { 270 case NFP_NET_PTYPE_TUNNEL_NONE: 271 break; 272 case NFP_NET_PTYPE_TUNNEL_VXLAN: 273 mbuf_ptype |= RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L4_UDP; 274 break; 275 case NFP_NET_PTYPE_TUNNEL_NVGRE: 276 mbuf_ptype |= RTE_PTYPE_TUNNEL_NVGRE; 277 break; 278 case NFP_NET_PTYPE_TUNNEL_GENEVE: 279 mbuf_ptype |= RTE_PTYPE_TUNNEL_GENEVE | RTE_PTYPE_L4_UDP; 280 break; 281 default: 282 PMD_RX_LOG(DEBUG, "Unrecognized nfp tunnel packet type: %u.", 283 nfp_tunnel_ptype); 284 break; 285 } 286 287 switch (nfp_ptype->l4_ptype) { 288 case NFP_NET_PTYPE_L4_NONE: 289 break; 290 case NFP_NET_PTYPE_L4_TCP: 291 mbuf_ptype |= NFP_PTYPE2RTE(nfp_tunnel_ptype, L4_TCP); 292 break; 293 case NFP_NET_PTYPE_L4_UDP: 294 mbuf_ptype |= NFP_PTYPE2RTE(nfp_tunnel_ptype, L4_UDP); 295 break; 296 case NFP_NET_PTYPE_L4_FRAG: 297 mbuf_ptype |= NFP_PTYPE2RTE(nfp_tunnel_ptype, L4_FRAG); 298 break; 299 case NFP_NET_PTYPE_L4_NONFRAG: 300 mbuf_ptype |= NFP_PTYPE2RTE(nfp_tunnel_ptype, L4_NONFRAG); 301 break; 302 case NFP_NET_PTYPE_L4_ICMP: 303 mbuf_ptype |= NFP_PTYPE2RTE(nfp_tunnel_ptype, L4_ICMP); 304 break; 305 case NFP_NET_PTYPE_L4_SCTP: 306 mbuf_ptype |= NFP_PTYPE2RTE(nfp_tunnel_ptype, L4_SCTP); 307 break; 308 default: 309 PMD_RX_LOG(DEBUG, "Unrecognized nfp layer 4 packet type: %u.", 310 nfp_ptype->l4_ptype); 311 break; 312 } 313 314 switch (nfp_ptype->l3_ptype) { 315 case NFP_NET_PTYPE_L3_NONE: 316 break; 317 case NFP_NET_PTYPE_L3_IPV4: 318 mbuf_ptype |= NFP_PTYPE2RTE(nfp_tunnel_ptype, L3_IPV4); 319 break; 320 case NFP_NET_PTYPE_L3_IPV6: 321 mbuf_ptype |= NFP_PTYPE2RTE(nfp_tunnel_ptype, L3_IPV6); 322 break; 323 case NFP_NET_PTYPE_L3_IPV4_EXT: 324 mbuf_ptype |= NFP_PTYPE2RTE(nfp_tunnel_ptype, L3_IPV4_EXT); 325 break; 326 case NFP_NET_PTYPE_L3_IPV6_EXT: 327 mbuf_ptype |= NFP_PTYPE2RTE(nfp_tunnel_ptype, L3_IPV6_EXT); 328 break; 329 case NFP_NET_PTYPE_L3_IPV4_EXT_UNKNOWN: 330 mbuf_ptype |= NFP_PTYPE2RTE(nfp_tunnel_ptype, L3_IPV4_EXT_UNKNOWN); 331 break; 332 case NFP_NET_PTYPE_L3_IPV6_EXT_UNKNOWN: 333 mbuf_ptype |= NFP_PTYPE2RTE(nfp_tunnel_ptype, L3_IPV6_EXT_UNKNOWN); 334 break; 335 default: 336 PMD_RX_LOG(DEBUG, "Unrecognized nfp layer 3 packet type: %u.", 337 nfp_ptype->l3_ptype); 338 break; 339 } 340 341 mb->packet_type = mbuf_ptype; 342 } 343 344 /** 345 * Parse the packet type from Rx descriptor and set to mbuf. 346 * 347 * @param rxq 348 * Rx queue 349 * @param rxds 350 * Rx descriptor including the offloading info of packet type. 351 * @param mb 352 * Mbuf to set the packet type. 353 */ 354 void 355 nfp_net_parse_ptype(struct nfp_net_rxq *rxq, 356 struct nfp_net_rx_desc *rxds, 357 struct rte_mbuf *mb) 358 { 359 struct nfp_net_hw *hw = rxq->hw; 360 struct nfp_ptype_parsed nfp_ptype; 361 uint16_t rxd_ptype = rxds->rxd.offload_info; 362 363 if ((hw->super.ctrl_ext & NFP_NET_CFG_CTRL_PKT_TYPE) == 0) 364 return; 365 366 if (rxd_ptype == 0 || (rxds->rxd.flags & PCIE_DESC_RX_VLAN) != 0) 367 return; 368 369 nfp_ptype.l4_ptype = (rxd_ptype & NFP_NET_PTYPE_L4_MASK) >> 370 NFP_NET_PTYPE_L4_OFFSET; 371 nfp_ptype.l3_ptype = (rxd_ptype & NFP_NET_PTYPE_L3_MASK) >> 372 NFP_NET_PTYPE_L3_OFFSET; 373 nfp_ptype.tunnel_ptype = (rxd_ptype & NFP_NET_PTYPE_TUNNEL_MASK) >> 374 NFP_NET_PTYPE_TUNNEL_OFFSET; 375 nfp_ptype.outer_l3_ptype = (rxd_ptype & NFP_NET_PTYPE_OUTER_L3_MASK) >> 376 NFP_NET_PTYPE_OUTER_L3_OFFSET; 377 378 nfp_net_set_ptype(&nfp_ptype, mb); 379 } 380 381 /* 382 * RX path design: 383 * 384 * There are some decisions to take: 385 * 1) How to check DD RX descriptors bit 386 * 2) How and when to allocate new mbufs 387 * 388 * Current implementation checks just one single DD bit each loop. As each 389 * descriptor is 8 bytes, it is likely a good idea to check descriptors in 390 * a single cache line instead. Tests with this change have not shown any 391 * performance improvement but it requires further investigation. For example, 392 * depending on which descriptor is next, the number of descriptors could be 393 * less than 8 for just checking those in the same cache line. This implies 394 * extra work which could be counterproductive by itself. Indeed, last firmware 395 * changes are just doing this: writing several descriptors with the DD bit 396 * for saving PCIe bandwidth and DMA operations from the NFP. 397 * 398 * Mbuf allocation is done when a new packet is received. Then the descriptor 399 * is automatically linked with the new mbuf and the old one is given to the 400 * user. The main drawback with this design is mbuf allocation is heavier than 401 * using bulk allocations allowed by DPDK with rte_mempool_get_bulk. From the 402 * cache point of view it does not seem allocating the mbuf early on as we are 403 * doing now have any benefit at all. Again, tests with this change have not 404 * shown any improvement. Also, rte_mempool_get_bulk returns all or nothing 405 * so looking at the implications of this type of allocation should be studied 406 * deeply. 407 */ 408 uint16_t 409 nfp_net_recv_pkts(void *rx_queue, 410 struct rte_mbuf **rx_pkts, 411 uint16_t nb_pkts) 412 { 413 uint64_t dma_addr; 414 uint16_t avail = 0; 415 struct rte_mbuf *mb; 416 uint16_t nb_hold = 0; 417 struct nfp_net_hw *hw; 418 struct rte_mbuf *new_mb; 419 struct nfp_net_rxq *rxq; 420 struct nfp_pf_dev *pf_dev; 421 struct nfp_net_dp_buf *rxb; 422 struct nfp_net_rx_desc *rxds; 423 uint16_t avail_multiplexed = 0; 424 425 rxq = rx_queue; 426 if (unlikely(rxq == NULL)) { 427 /* 428 * DPDK just checks the queue is lower than max queues 429 * enabled. But the queue needs to be configured. 430 */ 431 PMD_RX_LOG(ERR, "RX Bad queue."); 432 return 0; 433 } 434 435 hw = rxq->hw; 436 pf_dev = rxq->hw_priv->pf_dev; 437 438 while (avail + avail_multiplexed < nb_pkts) { 439 rxb = &rxq->rxbufs[rxq->rd_p]; 440 if (unlikely(rxb == NULL)) { 441 PMD_RX_LOG(ERR, "The rxb does not exist!"); 442 break; 443 } 444 445 rxds = &rxq->rxds[rxq->rd_p]; 446 if ((rxds->rxd.meta_len_dd & PCIE_DESC_RX_DD) == 0) 447 break; 448 449 /* 450 * Memory barrier to ensure that we won't do other 451 * reads before the DD bit. 452 */ 453 rte_rmb(); 454 455 /* 456 * We got a packet. Let's alloc a new mbuf for refilling the 457 * free descriptor ring as soon as possible. 458 */ 459 new_mb = rte_pktmbuf_alloc(rxq->mem_pool); 460 if (unlikely(new_mb == NULL)) { 461 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u queue_id=%hu.", 462 rxq->port_id, rxq->qidx); 463 nfp_net_mbuf_alloc_failed(rxq); 464 break; 465 } 466 467 /* 468 * Grab the mbuf and refill the descriptor with the 469 * previously allocated mbuf. 470 */ 471 mb = rxb->mbuf; 472 rxb->mbuf = new_mb; 473 474 PMD_RX_LOG(DEBUG, "Packet len: %u, mbuf_size: %u.", 475 rxds->rxd.data_len, rxq->mbuf_size); 476 477 /* Size of this segment */ 478 mb->data_len = rxds->rxd.data_len - NFP_DESC_META_LEN(rxds); 479 /* Size of the whole packet. We just support 1 segment */ 480 mb->pkt_len = rxds->rxd.data_len - NFP_DESC_META_LEN(rxds); 481 482 if (unlikely((mb->data_len + hw->rx_offset) > rxq->mbuf_size)) { 483 /* 484 * This should not happen and the user has the 485 * responsibility of avoiding it. But we have 486 * to give some info about the error. 487 */ 488 PMD_RX_LOG(ERR, "The mbuf overflow likely due to the RX offset."); 489 rte_pktmbuf_free(mb); 490 break; 491 } 492 493 /* Filling the received mbuf with packet info */ 494 if (hw->rx_offset != 0) 495 mb->data_off = RTE_PKTMBUF_HEADROOM + hw->rx_offset; 496 else 497 mb->data_off = RTE_PKTMBUF_HEADROOM + NFP_DESC_META_LEN(rxds); 498 499 /* No scatter mode supported */ 500 mb->nb_segs = 1; 501 mb->next = NULL; 502 mb->port = rxq->port_id; 503 504 struct nfp_net_meta_parsed meta; 505 nfp_net_meta_parse(rxds, rxq, hw, mb, &meta); 506 507 nfp_net_parse_ptype(rxq, rxds, mb); 508 509 /* Checking the checksum flag */ 510 nfp_net_rx_cksum(rxq, rxds, mb); 511 512 /* Now resetting and updating the descriptor */ 513 rxds->vals[0] = 0; 514 rxds->vals[1] = 0; 515 dma_addr = rte_cpu_to_le_64(rte_mbuf_data_iova_default(new_mb)); 516 rxds->fld.dd = 0; 517 rxds->fld.dma_addr_hi = (dma_addr >> 32) & 0xffff; 518 rxds->fld.dma_addr_lo = dma_addr & 0xffffffff; 519 nb_hold++; 520 521 rxq->rd_p++; 522 if (unlikely(rxq->rd_p == rxq->rx_count)) /* Wrapping */ 523 rxq->rd_p = 0; 524 525 if (pf_dev->recv_pkt_meta_check_t(&meta)) { 526 rx_pkts[avail++] = mb; 527 } else { 528 if (nfp_flower_pf_dispatch_pkts(rxq, mb, meta.port_id)) { 529 avail_multiplexed++; 530 } else { 531 rte_pktmbuf_free(mb); 532 break; 533 } 534 } 535 } 536 537 if (nb_hold == 0) 538 return nb_hold; 539 540 PMD_RX_LOG(DEBUG, "RX port_id=%hu queue_id=%hu, %hu packets received.", 541 rxq->port_id, rxq->qidx, avail); 542 543 nb_hold += rxq->nb_rx_hold; 544 545 /* 546 * FL descriptors needs to be written before incrementing the 547 * FL queue WR pointer. 548 */ 549 rte_wmb(); 550 if (nb_hold > rxq->rx_free_thresh) { 551 PMD_RX_LOG(DEBUG, "The port=%hu queue=%hu nb_hold=%hu avail=%hu.", 552 rxq->port_id, rxq->qidx, nb_hold, avail); 553 nfp_qcp_ptr_add(rxq->qcp_fl, NFP_QCP_WRITE_PTR, nb_hold); 554 nb_hold = 0; 555 } 556 rxq->nb_rx_hold = nb_hold; 557 558 return avail; 559 } 560 561 static void 562 nfp_net_rx_queue_release_mbufs(struct nfp_net_rxq *rxq) 563 { 564 uint16_t i; 565 566 if (rxq->rxbufs == NULL) 567 return; 568 569 for (i = 0; i < rxq->rx_count; i++) { 570 if (rxq->rxbufs[i].mbuf != NULL) { 571 rte_pktmbuf_free_seg(rxq->rxbufs[i].mbuf); 572 rxq->rxbufs[i].mbuf = NULL; 573 } 574 } 575 } 576 577 void 578 nfp_net_rx_queue_release(struct rte_eth_dev *dev, 579 uint16_t queue_idx) 580 { 581 struct nfp_net_rxq *rxq = dev->data->rx_queues[queue_idx]; 582 583 if (rxq != NULL) { 584 nfp_net_rx_queue_release_mbufs(rxq); 585 rte_eth_dma_zone_free(dev, "rx_ring", queue_idx); 586 rte_free(rxq->rxbufs); 587 rte_free(rxq); 588 } 589 } 590 591 void 592 nfp_net_reset_rx_queue(struct nfp_net_rxq *rxq) 593 { 594 nfp_net_rx_queue_release_mbufs(rxq); 595 rxq->rd_p = 0; 596 rxq->nb_rx_hold = 0; 597 } 598 599 static void 600 nfp_rx_queue_setup_flbufsz(struct nfp_net_hw *hw, 601 struct nfp_net_rxq *rxq) 602 { 603 if (!hw->flbufsz_set_flag) { 604 hw->flbufsz_set_flag = true; 605 hw->flbufsz = rxq->mbuf_size; 606 return; 607 } 608 609 if (hw->flbufsz < rxq->mbuf_size) 610 hw->flbufsz = rxq->mbuf_size; 611 } 612 613 int 614 nfp_net_rx_queue_setup(struct rte_eth_dev *dev, 615 uint16_t queue_idx, 616 uint16_t nb_desc, 617 unsigned int socket_id, 618 const struct rte_eth_rxconf *rx_conf, 619 struct rte_mempool *mp) 620 { 621 uint32_t rx_desc_sz; 622 uint16_t min_rx_desc; 623 uint16_t max_rx_desc; 624 struct nfp_net_hw *hw; 625 struct nfp_net_rxq *rxq; 626 const struct rte_memzone *tz; 627 struct nfp_net_hw_priv *hw_priv; 628 629 hw = nfp_net_get_hw(dev); 630 hw_priv = dev->process_private; 631 632 nfp_net_rx_desc_limits(hw_priv, &min_rx_desc, &max_rx_desc); 633 634 /* Validating number of descriptors */ 635 rx_desc_sz = nb_desc * sizeof(struct nfp_net_rx_desc); 636 if (rx_desc_sz % NFP_ALIGN_RING_DESC != 0 || 637 nb_desc > max_rx_desc || nb_desc < min_rx_desc) { 638 PMD_DRV_LOG(ERR, "Wrong nb_desc value."); 639 return -EINVAL; 640 } 641 642 /* 643 * Free memory prior to re-allocation if needed. This is the case after 644 * calling @nfp_net_stop(). 645 */ 646 if (dev->data->rx_queues[queue_idx] != NULL) { 647 nfp_net_rx_queue_release(dev, queue_idx); 648 dev->data->rx_queues[queue_idx] = NULL; 649 } 650 651 /* Allocating rx queue data structure */ 652 rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct nfp_net_rxq), 653 RTE_CACHE_LINE_SIZE, socket_id); 654 if (rxq == NULL) 655 return -ENOMEM; 656 657 dev->data->rx_queues[queue_idx] = rxq; 658 659 /* Hw queues mapping based on firmware configuration */ 660 rxq->qidx = queue_idx; 661 rxq->fl_qcidx = queue_idx * hw->stride_rx; 662 rxq->qcp_fl = hw->rx_bar + NFP_QCP_QUEUE_OFF(rxq->fl_qcidx); 663 664 /* 665 * Tracking mbuf size for detecting a potential mbuf overflow due to 666 * RX offset. 667 */ 668 rxq->mem_pool = mp; 669 rxq->mbuf_size = rxq->mem_pool->elt_size; 670 rxq->mbuf_size -= (sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM); 671 nfp_rx_queue_setup_flbufsz(hw, rxq); 672 673 rxq->rx_count = nb_desc; 674 rxq->port_id = dev->data->port_id; 675 rxq->rx_free_thresh = rx_conf->rx_free_thresh; 676 677 /* 678 * Allocate RX ring hardware descriptors. A memzone large enough to 679 * handle the maximum ring size is allocated in order to allow for 680 * resizing in later calls to the queue setup function. 681 */ 682 tz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx, 683 sizeof(struct nfp_net_rx_desc) * max_rx_desc, 684 NFP_MEMZONE_ALIGN, socket_id); 685 if (tz == NULL) { 686 PMD_DRV_LOG(ERR, "Error allocating rx dma."); 687 nfp_net_rx_queue_release(dev, queue_idx); 688 dev->data->rx_queues[queue_idx] = NULL; 689 return -ENOMEM; 690 } 691 692 /* Saving physical and virtual addresses for the RX ring */ 693 rxq->dma = (uint64_t)tz->iova; 694 rxq->rxds = tz->addr; 695 696 /* Mbuf pointers array for referencing mbufs linked to RX descriptors */ 697 rxq->rxbufs = rte_zmalloc_socket("rxq->rxbufs", 698 sizeof(*rxq->rxbufs) * nb_desc, RTE_CACHE_LINE_SIZE, 699 socket_id); 700 if (rxq->rxbufs == NULL) { 701 nfp_net_rx_queue_release(dev, queue_idx); 702 dev->data->rx_queues[queue_idx] = NULL; 703 return -ENOMEM; 704 } 705 706 nfp_net_reset_rx_queue(rxq); 707 708 rxq->hw = hw; 709 rxq->hw_priv = dev->process_private; 710 711 /* 712 * Telling the HW about the physical address of the RX ring and number 713 * of descriptors in log2 format. 714 */ 715 nn_cfg_writeq(&hw->super, NFP_NET_CFG_RXR_ADDR(queue_idx), rxq->dma); 716 nn_cfg_writeb(&hw->super, NFP_NET_CFG_RXR_SZ(queue_idx), rte_log2_u32(nb_desc)); 717 718 return 0; 719 } 720 721 static inline uint32_t 722 nfp_net_read_tx_free_qcp(struct nfp_net_txq *txq) 723 { 724 /* 725 * If TX ring pointer write back is not supported, do a PCIe read. 726 * Otherwise read qcp value from write back dma address. 727 */ 728 if (txq->txrwb == NULL) 729 return nfp_qcp_read(txq->qcp_q, NFP_QCP_READ_PTR); 730 731 /* 732 * In most cases the TX count is a power of two and the costly modulus 733 * operation can be substituted with a subtraction and an AND operation. 734 */ 735 if (rte_is_power_of_2(txq->tx_count) == 1) 736 return (*txq->txrwb) & (txq->tx_count - 1); 737 else 738 return (*txq->txrwb) % txq->tx_count; 739 } 740 741 /** 742 * Check for descriptors with a complete status 743 * 744 * @param txq 745 * TX queue to work with 746 * 747 * @return 748 * Number of descriptors freed 749 */ 750 uint32_t 751 nfp_net_tx_free_bufs(struct nfp_net_txq *txq) 752 { 753 uint32_t todo; 754 uint32_t qcp_rd_p; 755 756 PMD_TX_LOG(DEBUG, "Queue %hu. Check for descriptor with a complete" 757 " status.", txq->qidx); 758 759 /* Work out how many packets have been sent */ 760 qcp_rd_p = nfp_net_read_tx_free_qcp(txq); 761 762 if (qcp_rd_p == txq->rd_p) { 763 PMD_TX_LOG(DEBUG, "Queue %hu: It seems harrier is not sending " 764 "packets (%u, %u).", txq->qidx, 765 qcp_rd_p, txq->rd_p); 766 return 0; 767 } 768 769 if (qcp_rd_p > txq->rd_p) 770 todo = qcp_rd_p - txq->rd_p; 771 else 772 todo = qcp_rd_p + txq->tx_count - txq->rd_p; 773 774 PMD_TX_LOG(DEBUG, "The qcp_rd_p %u, txq->rd_p: %u, qcp->rd_p: %u.", 775 qcp_rd_p, txq->rd_p, txq->rd_p); 776 777 if (todo == 0) 778 return todo; 779 780 txq->rd_p += todo; 781 if (unlikely(txq->rd_p >= txq->tx_count)) 782 txq->rd_p -= txq->tx_count; 783 784 return todo; 785 } 786 787 static void 788 nfp_net_tx_queue_release_mbufs(struct nfp_net_txq *txq) 789 { 790 uint32_t i; 791 792 if (txq->txbufs == NULL) 793 return; 794 795 for (i = 0; i < txq->tx_count; i++) { 796 if (txq->txbufs[i].mbuf != NULL) { 797 rte_pktmbuf_free_seg(txq->txbufs[i].mbuf); 798 txq->txbufs[i].mbuf = NULL; 799 } 800 } 801 } 802 803 void 804 nfp_net_tx_queue_release(struct rte_eth_dev *dev, 805 uint16_t queue_idx) 806 { 807 struct nfp_net_hw *net_hw; 808 struct nfp_net_txq *txq = dev->data->tx_queues[queue_idx]; 809 810 if (txq != NULL) { 811 net_hw = nfp_net_get_hw(dev); 812 if (net_hw->txrwb_mz != NULL) 813 nn_cfg_writeq(&net_hw->super, NFP_NET_CFG_TXR_WB_ADDR(queue_idx), 0); 814 nfp_net_tx_queue_release_mbufs(txq); 815 rte_eth_dma_zone_free(dev, "tx_ring", queue_idx); 816 rte_free(txq->txbufs); 817 rte_free(txq); 818 } 819 } 820 821 void 822 nfp_net_reset_tx_queue(struct nfp_net_txq *txq) 823 { 824 nfp_net_tx_queue_release_mbufs(txq); 825 txq->wr_p = 0; 826 txq->rd_p = 0; 827 if (txq->txrwb != NULL) 828 *txq->txrwb = 0; 829 } 830 831 int 832 nfp_net_tx_queue_setup(struct rte_eth_dev *dev, 833 uint16_t queue_idx, 834 uint16_t nb_desc, 835 unsigned int socket_id, 836 const struct rte_eth_txconf *tx_conf) 837 { 838 struct nfp_net_hw_priv *hw_priv; 839 840 hw_priv = dev->process_private; 841 842 if (hw_priv->pf_dev->ver.extend == NFP_NET_CFG_VERSION_DP_NFD3) 843 return nfp_net_nfd3_tx_queue_setup(dev, queue_idx, 844 nb_desc, socket_id, tx_conf); 845 else 846 return nfp_net_nfdk_tx_queue_setup(dev, queue_idx, 847 nb_desc, socket_id, tx_conf); 848 } 849 850 void 851 nfp_net_rx_queue_info_get(struct rte_eth_dev *dev, 852 uint16_t queue_id, 853 struct rte_eth_rxq_info *info) 854 { 855 struct rte_eth_dev_info dev_info; 856 struct nfp_net_rxq *rxq = dev->data->rx_queues[queue_id]; 857 858 info->mp = rxq->mem_pool; 859 info->nb_desc = rxq->rx_count; 860 861 info->conf.rx_free_thresh = rxq->rx_free_thresh; 862 863 nfp_net_infos_get(dev, &dev_info); 864 info->conf.offloads = dev_info.rx_offload_capa & 865 dev->data->dev_conf.rxmode.offloads; 866 info->conf.rx_thresh = dev_info.default_rxconf.rx_thresh; 867 } 868 869 void 870 nfp_net_tx_queue_info_get(struct rte_eth_dev *dev, 871 uint16_t queue_id, 872 struct rte_eth_txq_info *info) 873 { 874 struct rte_eth_dev_info dev_info; 875 struct nfp_net_hw_priv *hw_priv = dev->process_private; 876 struct nfp_net_txq *txq = dev->data->tx_queues[queue_id]; 877 878 if (hw_priv->pf_dev->ver.extend == NFP_NET_CFG_VERSION_DP_NFD3) 879 info->nb_desc = txq->tx_count / NFD3_TX_DESC_PER_PKT; 880 else 881 info->nb_desc = txq->tx_count / NFDK_TX_DESC_PER_SIMPLE_PKT; 882 883 info->conf.tx_free_thresh = txq->tx_free_thresh; 884 885 nfp_net_infos_get(dev, &dev_info); 886 info->conf.offloads = dev_info.tx_offload_capa & 887 dev->data->dev_conf.txmode.offloads; 888 info->conf.tx_thresh = dev_info.default_txconf.tx_thresh; 889 } 890 891 void 892 nfp_net_recv_pkts_set(struct rte_eth_dev *eth_dev) 893 { 894 if (nfp_net_get_avx2_supported()) 895 eth_dev->rx_pkt_burst = nfp_net_vec_avx2_recv_pkts; 896 else 897 eth_dev->rx_pkt_burst = nfp_net_recv_pkts; 898 } 899 900 int 901 nfp_net_rx_burst_mode_get(struct rte_eth_dev *eth_dev, 902 uint16_t queue_id __rte_unused, 903 struct rte_eth_burst_mode *mode) 904 { 905 eth_rx_burst_t pkt_burst; 906 907 pkt_burst = eth_dev->rx_pkt_burst; 908 if (pkt_burst == nfp_net_recv_pkts) { 909 strlcpy(mode->info, "Scalar", 910 RTE_ETH_BURST_MODE_INFO_SIZE); 911 } else if (pkt_burst == nfp_net_vec_avx2_recv_pkts) { 912 strlcpy(mode->info, "Vector AVX2", 913 RTE_ETH_BURST_MODE_INFO_SIZE); 914 } else { 915 return -EINVAL; 916 } 917 918 return 0; 919 } 920 921 int 922 nfp_net_tx_burst_mode_get(struct rte_eth_dev *eth_dev, 923 uint16_t queue_id __rte_unused, 924 struct rte_eth_burst_mode *mode) 925 { 926 eth_tx_burst_t pkt_burst; 927 928 pkt_burst = eth_dev->tx_pkt_burst; 929 if (pkt_burst == nfp_net_nfd3_xmit_pkts) { 930 strlcpy(mode->info, "NFD3 Scalar", 931 RTE_ETH_BURST_MODE_INFO_SIZE); 932 } else if (pkt_burst == nfp_net_nfdk_xmit_pkts) { 933 strlcpy(mode->info, "NFDk Scalar", 934 RTE_ETH_BURST_MODE_INFO_SIZE); 935 } else if (pkt_burst == nfp_net_nfdk_vec_avx2_xmit_pkts) { 936 strlcpy(mode->info, "NFDk Vector AVX2", 937 RTE_ETH_BURST_MODE_INFO_SIZE); 938 } else { 939 return -EINVAL; 940 } 941 942 return 0; 943 } 944