1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (c) 2014-2021 Netronome Systems, Inc. 3 * All rights reserved. 4 * 5 * Small portions derived from code Copyright(c) 2010-2015 Intel Corporation. 6 */ 7 8 #include "nfp_rxtx.h" 9 10 #include <ethdev_pci.h> 11 #include <rte_security.h> 12 13 #include "nfd3/nfp_nfd3.h" 14 #include "nfdk/nfp_nfdk.h" 15 #include "flower/nfp_flower.h" 16 17 #include "nfp_ipsec.h" 18 #include "nfp_logs.h" 19 #include "nfp_net_meta.h" 20 #include "nfp_rxtx_vec.h" 21 22 /* 23 * The bit format and map of nfp packet type for rxd.offload_info in Rx descriptor. 24 * 25 * Bit format about nfp packet type refers to the following: 26 * --------------------------------- 27 * 1 0 28 * 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 29 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 30 * | |ol3|tunnel | l3 | l4 | 31 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 32 * 33 * Bit map about nfp packet type refers to the following: 34 * 35 * L4: bit 0~2, used for layer 4 or inner layer 4. 36 * 000: NFP_NET_PTYPE_L4_NONE 37 * 001: NFP_NET_PTYPE_L4_TCP 38 * 010: NFP_NET_PTYPE_L4_UDP 39 * 011: NFP_NET_PTYPE_L4_FRAG 40 * 100: NFP_NET_PTYPE_L4_NONFRAG 41 * 101: NFP_NET_PTYPE_L4_ICMP 42 * 110: NFP_NET_PTYPE_L4_SCTP 43 * 111: reserved 44 * 45 * L3: bit 3~5, used for layer 3 or inner layer 3. 46 * 000: NFP_NET_PTYPE_L3_NONE 47 * 001: NFP_NET_PTYPE_L3_IPV6 48 * 010: NFP_NET_PTYPE_L3_IPV4 49 * 011: NFP_NET_PTYPE_L3_IPV4_EXT 50 * 100: NFP_NET_PTYPE_L3_IPV6_EXT 51 * 101: NFP_NET_PTYPE_L3_IPV4_EXT_UNKNOWN 52 * 110: NFP_NET_PTYPE_L3_IPV6_EXT_UNKNOWN 53 * 111: reserved 54 * 55 * Tunnel: bit 6~9, used for tunnel. 56 * 0000: NFP_NET_PTYPE_TUNNEL_NONE 57 * 0001: NFP_NET_PTYPE_TUNNEL_VXLAN 58 * 0100: NFP_NET_PTYPE_TUNNEL_NVGRE 59 * 0101: NFP_NET_PTYPE_TUNNEL_GENEVE 60 * 0010, 0011, 0110~1111: reserved 61 * 62 * Outer L3: bit 10~11, used for outer layer 3. 63 * 00: NFP_NET_PTYPE_OUTER_L3_NONE 64 * 01: NFP_NET_PTYPE_OUTER_L3_IPV6 65 * 10: NFP_NET_PTYPE_OUTER_L3_IPV4 66 * 11: reserved 67 * 68 * Reserved: bit 10~15, used for extension. 69 */ 70 71 /* Mask and offset about nfp packet type based on the bit map above. */ 72 #define NFP_NET_PTYPE_L4_MASK 0x0007 73 #define NFP_NET_PTYPE_L3_MASK 0x0038 74 #define NFP_NET_PTYPE_TUNNEL_MASK 0x03c0 75 #define NFP_NET_PTYPE_OUTER_L3_MASK 0x0c00 76 77 #define NFP_NET_PTYPE_L4_OFFSET 0 78 #define NFP_NET_PTYPE_L3_OFFSET 3 79 #define NFP_NET_PTYPE_TUNNEL_OFFSET 6 80 #define NFP_NET_PTYPE_OUTER_L3_OFFSET 10 81 82 /* Case about nfp packet type based on the bit map above. */ 83 #define NFP_NET_PTYPE_L4_NONE 0 84 #define NFP_NET_PTYPE_L4_TCP 1 85 #define NFP_NET_PTYPE_L4_UDP 2 86 #define NFP_NET_PTYPE_L4_FRAG 3 87 #define NFP_NET_PTYPE_L4_NONFRAG 4 88 #define NFP_NET_PTYPE_L4_ICMP 5 89 #define NFP_NET_PTYPE_L4_SCTP 6 90 91 #define NFP_NET_PTYPE_L3_NONE 0 92 #define NFP_NET_PTYPE_L3_IPV6 1 93 #define NFP_NET_PTYPE_L3_IPV4 2 94 #define NFP_NET_PTYPE_L3_IPV4_EXT 3 95 #define NFP_NET_PTYPE_L3_IPV6_EXT 4 96 #define NFP_NET_PTYPE_L3_IPV4_EXT_UNKNOWN 5 97 #define NFP_NET_PTYPE_L3_IPV6_EXT_UNKNOWN 6 98 99 #define NFP_NET_PTYPE_TUNNEL_NONE 0 100 #define NFP_NET_PTYPE_TUNNEL_VXLAN 1 101 #define NFP_NET_PTYPE_TUNNEL_NVGRE 4 102 #define NFP_NET_PTYPE_TUNNEL_GENEVE 5 103 104 #define NFP_NET_PTYPE_OUTER_L3_NONE 0 105 #define NFP_NET_PTYPE_OUTER_L3_IPV6 1 106 #define NFP_NET_PTYPE_OUTER_L3_IPV4 2 107 108 #define NFP_PTYPE2RTE(tunnel, type) ((tunnel) ? RTE_PTYPE_INNER_##type : RTE_PTYPE_##type) 109 110 /* Record NFP packet type parsed from rxd.offload_info. */ 111 struct nfp_ptype_parsed { 112 uint8_t l4_ptype; /**< Packet type of layer 4, or inner layer 4. */ 113 uint8_t l3_ptype; /**< Packet type of layer 3, or inner layer 3. */ 114 uint8_t tunnel_ptype; /**< Packet type of tunnel. */ 115 uint8_t outer_l3_ptype; /**< Packet type of outer layer 3. */ 116 }; 117 118 /* Set mbuf checksum flags based on RX descriptor flags */ 119 void 120 nfp_net_rx_cksum(struct nfp_net_rxq *rxq, 121 struct nfp_net_rx_desc *rxd, 122 struct rte_mbuf *mb) 123 { 124 struct nfp_net_hw *hw = rxq->hw; 125 126 if ((hw->super.ctrl & NFP_NET_CFG_CTRL_RXCSUM) == 0) 127 return; 128 129 /* If IPv4 and IP checksum error, fail */ 130 if (unlikely((rxd->rxd.flags & PCIE_DESC_RX_IP4_CSUM) != 0 && 131 (rxd->rxd.flags & PCIE_DESC_RX_IP4_CSUM_OK) == 0)) 132 mb->ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_BAD; 133 else 134 mb->ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_GOOD; 135 136 /* If neither UDP nor TCP return */ 137 if ((rxd->rxd.flags & PCIE_DESC_RX_TCP_CSUM) == 0 && 138 (rxd->rxd.flags & PCIE_DESC_RX_UDP_CSUM) == 0) 139 return; 140 141 if (likely(rxd->rxd.flags & PCIE_DESC_RX_L4_CSUM_OK) != 0) 142 mb->ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_GOOD; 143 else 144 mb->ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_BAD; 145 } 146 147 static int 148 nfp_net_rx_fill_freelist(struct nfp_net_rxq *rxq) 149 { 150 uint16_t i; 151 uint64_t dma_addr; 152 struct nfp_net_dp_buf *rxe = rxq->rxbufs; 153 154 PMD_RX_LOG(DEBUG, "Fill Rx Freelist for %hu descriptors.", 155 rxq->rx_count); 156 157 for (i = 0; i < rxq->rx_count; i++) { 158 struct nfp_net_rx_desc *rxd; 159 struct rte_mbuf *mbuf = rte_pktmbuf_alloc(rxq->mem_pool); 160 161 if (mbuf == NULL) { 162 PMD_DRV_LOG(ERR, "RX mbuf alloc failed queue_id=%hu.", 163 rxq->qidx); 164 return -ENOMEM; 165 } 166 167 dma_addr = rte_cpu_to_le_64(rte_mbuf_data_iova_default(mbuf)); 168 169 rxd = &rxq->rxds[i]; 170 rxd->fld.dd = 0; 171 rxd->fld.dma_addr_hi = (dma_addr >> 32) & 0xffff; 172 rxd->fld.dma_addr_lo = dma_addr & 0xffffffff; 173 174 rxe[i].mbuf = mbuf; 175 } 176 177 /* Make sure all writes are flushed before telling the hardware */ 178 rte_wmb(); 179 180 /* Not advertising the whole ring as the firmware gets confused if so */ 181 PMD_RX_LOG(DEBUG, "Increment FL write pointer in %hu.", rxq->rx_count - 1); 182 183 nfp_qcp_ptr_add(rxq->qcp_fl, NFP_QCP_WRITE_PTR, rxq->rx_count - 1); 184 185 return 0; 186 } 187 188 int 189 nfp_net_rx_freelist_setup(struct rte_eth_dev *dev) 190 { 191 uint16_t i; 192 193 for (i = 0; i < dev->data->nb_rx_queues; i++) { 194 if (nfp_net_rx_fill_freelist(dev->data->rx_queues[i]) != 0) 195 return -1; 196 } 197 198 return 0; 199 } 200 201 uint32_t 202 nfp_net_rx_queue_count(void *rx_queue) 203 { 204 uint32_t idx; 205 uint32_t count = 0; 206 struct nfp_net_rxq *rxq; 207 struct nfp_net_rx_desc *rxds; 208 209 rxq = rx_queue; 210 idx = rxq->rd_p; 211 212 /* 213 * Other PMDs are just checking the DD bit in intervals of 4 214 * descriptors and counting all four if the first has the DD 215 * bit on. Of course, this is not accurate but can be good for 216 * performance. But ideally that should be done in descriptors 217 * chunks belonging to the same cache line. 218 */ 219 while (count < rxq->rx_count) { 220 rxds = &rxq->rxds[idx]; 221 if ((rxds->rxd.meta_len_dd & PCIE_DESC_RX_DD) == 0) 222 break; 223 224 count++; 225 idx++; 226 227 /* Wrapping */ 228 if ((idx) == rxq->rx_count) 229 idx = 0; 230 } 231 232 return count; 233 } 234 235 /** 236 * Set packet type to mbuf based on parsed structure. 237 * 238 * @param nfp_ptype 239 * Packet type structure parsing from Rx descriptor. 240 * @param mb 241 * Mbuf to set the packet type. 242 */ 243 static void 244 nfp_net_set_ptype(const struct nfp_ptype_parsed *nfp_ptype, 245 struct rte_mbuf *mb) 246 { 247 uint32_t mbuf_ptype = RTE_PTYPE_L2_ETHER; 248 uint8_t nfp_tunnel_ptype = nfp_ptype->tunnel_ptype; 249 250 if (nfp_tunnel_ptype != NFP_NET_PTYPE_TUNNEL_NONE) 251 mbuf_ptype |= RTE_PTYPE_INNER_L2_ETHER; 252 253 switch (nfp_ptype->outer_l3_ptype) { 254 case NFP_NET_PTYPE_OUTER_L3_NONE: 255 break; 256 case NFP_NET_PTYPE_OUTER_L3_IPV4: 257 mbuf_ptype |= RTE_PTYPE_L3_IPV4; 258 break; 259 case NFP_NET_PTYPE_OUTER_L3_IPV6: 260 mbuf_ptype |= RTE_PTYPE_L3_IPV6; 261 break; 262 default: 263 PMD_RX_LOG(DEBUG, "Unrecognized nfp outer layer 3 packet type: %u.", 264 nfp_ptype->outer_l3_ptype); 265 break; 266 } 267 268 switch (nfp_tunnel_ptype) { 269 case NFP_NET_PTYPE_TUNNEL_NONE: 270 break; 271 case NFP_NET_PTYPE_TUNNEL_VXLAN: 272 mbuf_ptype |= RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L4_UDP; 273 break; 274 case NFP_NET_PTYPE_TUNNEL_NVGRE: 275 mbuf_ptype |= RTE_PTYPE_TUNNEL_NVGRE; 276 break; 277 case NFP_NET_PTYPE_TUNNEL_GENEVE: 278 mbuf_ptype |= RTE_PTYPE_TUNNEL_GENEVE | RTE_PTYPE_L4_UDP; 279 break; 280 default: 281 PMD_RX_LOG(DEBUG, "Unrecognized nfp tunnel packet type: %u.", 282 nfp_tunnel_ptype); 283 break; 284 } 285 286 switch (nfp_ptype->l4_ptype) { 287 case NFP_NET_PTYPE_L4_NONE: 288 break; 289 case NFP_NET_PTYPE_L4_TCP: 290 mbuf_ptype |= NFP_PTYPE2RTE(nfp_tunnel_ptype, L4_TCP); 291 break; 292 case NFP_NET_PTYPE_L4_UDP: 293 mbuf_ptype |= NFP_PTYPE2RTE(nfp_tunnel_ptype, L4_UDP); 294 break; 295 case NFP_NET_PTYPE_L4_FRAG: 296 mbuf_ptype |= NFP_PTYPE2RTE(nfp_tunnel_ptype, L4_FRAG); 297 break; 298 case NFP_NET_PTYPE_L4_NONFRAG: 299 mbuf_ptype |= NFP_PTYPE2RTE(nfp_tunnel_ptype, L4_NONFRAG); 300 break; 301 case NFP_NET_PTYPE_L4_ICMP: 302 mbuf_ptype |= NFP_PTYPE2RTE(nfp_tunnel_ptype, L4_ICMP); 303 break; 304 case NFP_NET_PTYPE_L4_SCTP: 305 mbuf_ptype |= NFP_PTYPE2RTE(nfp_tunnel_ptype, L4_SCTP); 306 break; 307 default: 308 PMD_RX_LOG(DEBUG, "Unrecognized nfp layer 4 packet type: %u.", 309 nfp_ptype->l4_ptype); 310 break; 311 } 312 313 switch (nfp_ptype->l3_ptype) { 314 case NFP_NET_PTYPE_L3_NONE: 315 break; 316 case NFP_NET_PTYPE_L3_IPV4: 317 mbuf_ptype |= NFP_PTYPE2RTE(nfp_tunnel_ptype, L3_IPV4); 318 break; 319 case NFP_NET_PTYPE_L3_IPV6: 320 mbuf_ptype |= NFP_PTYPE2RTE(nfp_tunnel_ptype, L3_IPV6); 321 break; 322 case NFP_NET_PTYPE_L3_IPV4_EXT: 323 mbuf_ptype |= NFP_PTYPE2RTE(nfp_tunnel_ptype, L3_IPV4_EXT); 324 break; 325 case NFP_NET_PTYPE_L3_IPV6_EXT: 326 mbuf_ptype |= NFP_PTYPE2RTE(nfp_tunnel_ptype, L3_IPV6_EXT); 327 break; 328 case NFP_NET_PTYPE_L3_IPV4_EXT_UNKNOWN: 329 mbuf_ptype |= NFP_PTYPE2RTE(nfp_tunnel_ptype, L3_IPV4_EXT_UNKNOWN); 330 break; 331 case NFP_NET_PTYPE_L3_IPV6_EXT_UNKNOWN: 332 mbuf_ptype |= NFP_PTYPE2RTE(nfp_tunnel_ptype, L3_IPV6_EXT_UNKNOWN); 333 break; 334 default: 335 PMD_RX_LOG(DEBUG, "Unrecognized nfp layer 3 packet type: %u.", 336 nfp_ptype->l3_ptype); 337 break; 338 } 339 340 mb->packet_type = mbuf_ptype; 341 } 342 343 /** 344 * Parse the packet type from Rx descriptor and set to mbuf. 345 * 346 * @param rxq 347 * Rx queue 348 * @param rxds 349 * Rx descriptor including the offloading info of packet type. 350 * @param mb 351 * Mbuf to set the packet type. 352 */ 353 void 354 nfp_net_parse_ptype(struct nfp_net_rxq *rxq, 355 struct nfp_net_rx_desc *rxds, 356 struct rte_mbuf *mb) 357 { 358 struct nfp_net_hw *hw = rxq->hw; 359 struct nfp_ptype_parsed nfp_ptype; 360 uint16_t rxd_ptype = rxds->rxd.offload_info; 361 362 if ((hw->super.ctrl_ext & NFP_NET_CFG_CTRL_PKT_TYPE) == 0) 363 return; 364 365 if (rxd_ptype == 0 || (rxds->rxd.flags & PCIE_DESC_RX_VLAN) != 0) 366 return; 367 368 nfp_ptype.l4_ptype = (rxd_ptype & NFP_NET_PTYPE_L4_MASK) >> 369 NFP_NET_PTYPE_L4_OFFSET; 370 nfp_ptype.l3_ptype = (rxd_ptype & NFP_NET_PTYPE_L3_MASK) >> 371 NFP_NET_PTYPE_L3_OFFSET; 372 nfp_ptype.tunnel_ptype = (rxd_ptype & NFP_NET_PTYPE_TUNNEL_MASK) >> 373 NFP_NET_PTYPE_TUNNEL_OFFSET; 374 nfp_ptype.outer_l3_ptype = (rxd_ptype & NFP_NET_PTYPE_OUTER_L3_MASK) >> 375 NFP_NET_PTYPE_OUTER_L3_OFFSET; 376 377 nfp_net_set_ptype(&nfp_ptype, mb); 378 } 379 380 /* 381 * RX path design: 382 * 383 * There are some decisions to take: 384 * 1) How to check DD RX descriptors bit 385 * 2) How and when to allocate new mbufs 386 * 387 * Current implementation checks just one single DD bit each loop. As each 388 * descriptor is 8 bytes, it is likely a good idea to check descriptors in 389 * a single cache line instead. Tests with this change have not shown any 390 * performance improvement but it requires further investigation. For example, 391 * depending on which descriptor is next, the number of descriptors could be 392 * less than 8 for just checking those in the same cache line. This implies 393 * extra work which could be counterproductive by itself. Indeed, last firmware 394 * changes are just doing this: writing several descriptors with the DD bit 395 * for saving PCIe bandwidth and DMA operations from the NFP. 396 * 397 * Mbuf allocation is done when a new packet is received. Then the descriptor 398 * is automatically linked with the new mbuf and the old one is given to the 399 * user. The main drawback with this design is mbuf allocation is heavier than 400 * using bulk allocations allowed by DPDK with rte_mempool_get_bulk. From the 401 * cache point of view it does not seem allocating the mbuf early on as we are 402 * doing now have any benefit at all. Again, tests with this change have not 403 * shown any improvement. Also, rte_mempool_get_bulk returns all or nothing 404 * so looking at the implications of this type of allocation should be studied 405 * deeply. 406 */ 407 uint16_t 408 nfp_net_recv_pkts(void *rx_queue, 409 struct rte_mbuf **rx_pkts, 410 uint16_t nb_pkts) 411 { 412 uint64_t dma_addr; 413 uint16_t avail = 0; 414 struct rte_mbuf *mb; 415 uint16_t nb_hold = 0; 416 struct nfp_net_hw *hw; 417 struct rte_mbuf *new_mb; 418 struct nfp_net_rxq *rxq; 419 struct nfp_pf_dev *pf_dev; 420 struct nfp_net_dp_buf *rxb; 421 struct nfp_net_rx_desc *rxds; 422 uint16_t avail_multiplexed = 0; 423 424 rxq = rx_queue; 425 if (unlikely(rxq == NULL)) { 426 /* 427 * DPDK just checks the queue is lower than max queues 428 * enabled. But the queue needs to be configured. 429 */ 430 PMD_RX_LOG(ERR, "RX Bad queue."); 431 return 0; 432 } 433 434 hw = rxq->hw; 435 pf_dev = rxq->hw_priv->pf_dev; 436 437 while (avail + avail_multiplexed < nb_pkts) { 438 rxb = &rxq->rxbufs[rxq->rd_p]; 439 if (unlikely(rxb == NULL)) { 440 PMD_RX_LOG(ERR, "The rxb does not exist!"); 441 break; 442 } 443 444 rxds = &rxq->rxds[rxq->rd_p]; 445 if ((rxds->rxd.meta_len_dd & PCIE_DESC_RX_DD) == 0) 446 break; 447 448 /* 449 * Memory barrier to ensure that we won't do other 450 * reads before the DD bit. 451 */ 452 rte_rmb(); 453 454 /* 455 * We got a packet. Let's alloc a new mbuf for refilling the 456 * free descriptor ring as soon as possible. 457 */ 458 new_mb = rte_pktmbuf_alloc(rxq->mem_pool); 459 if (unlikely(new_mb == NULL)) { 460 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u queue_id=%hu.", 461 rxq->port_id, rxq->qidx); 462 nfp_net_mbuf_alloc_failed(rxq); 463 break; 464 } 465 466 /* 467 * Grab the mbuf and refill the descriptor with the 468 * previously allocated mbuf. 469 */ 470 mb = rxb->mbuf; 471 rxb->mbuf = new_mb; 472 473 PMD_RX_LOG(DEBUG, "Packet len: %u, mbuf_size: %u.", 474 rxds->rxd.data_len, rxq->mbuf_size); 475 476 /* Size of this segment */ 477 mb->data_len = rxds->rxd.data_len - NFP_DESC_META_LEN(rxds); 478 /* Size of the whole packet. We just support 1 segment */ 479 mb->pkt_len = rxds->rxd.data_len - NFP_DESC_META_LEN(rxds); 480 481 if (unlikely((mb->data_len + hw->rx_offset) > rxq->mbuf_size)) { 482 /* 483 * This should not happen and the user has the 484 * responsibility of avoiding it. But we have 485 * to give some info about the error. 486 */ 487 PMD_RX_LOG(ERR, "The mbuf overflow likely due to the RX offset."); 488 rte_pktmbuf_free(mb); 489 break; 490 } 491 492 /* Filling the received mbuf with packet info */ 493 if (hw->rx_offset != 0) 494 mb->data_off = RTE_PKTMBUF_HEADROOM + hw->rx_offset; 495 else 496 mb->data_off = RTE_PKTMBUF_HEADROOM + NFP_DESC_META_LEN(rxds); 497 498 /* No scatter mode supported */ 499 mb->nb_segs = 1; 500 mb->next = NULL; 501 mb->port = rxq->port_id; 502 503 struct nfp_net_meta_parsed meta; 504 nfp_net_meta_parse(rxds, rxq, hw, mb, &meta); 505 506 nfp_net_parse_ptype(rxq, rxds, mb); 507 508 /* Checking the checksum flag */ 509 nfp_net_rx_cksum(rxq, rxds, mb); 510 511 /* Now resetting and updating the descriptor */ 512 rxds->vals[0] = 0; 513 rxds->vals[1] = 0; 514 dma_addr = rte_cpu_to_le_64(rte_mbuf_data_iova_default(new_mb)); 515 rxds->fld.dd = 0; 516 rxds->fld.dma_addr_hi = (dma_addr >> 32) & 0xffff; 517 rxds->fld.dma_addr_lo = dma_addr & 0xffffffff; 518 nb_hold++; 519 520 rxq->rd_p++; 521 if (unlikely(rxq->rd_p == rxq->rx_count)) /* Wrapping */ 522 rxq->rd_p = 0; 523 524 if (pf_dev->recv_pkt_meta_check_t(&meta)) { 525 rx_pkts[avail++] = mb; 526 } else { 527 if (nfp_flower_pf_dispatch_pkts(rxq, mb, meta.port_id)) { 528 avail_multiplexed++; 529 } else { 530 rte_pktmbuf_free(mb); 531 break; 532 } 533 } 534 } 535 536 if (nb_hold == 0) 537 return nb_hold; 538 539 PMD_RX_LOG(DEBUG, "RX port_id=%hu queue_id=%hu, %hu packets received.", 540 rxq->port_id, rxq->qidx, avail); 541 542 nb_hold += rxq->nb_rx_hold; 543 544 /* 545 * FL descriptors needs to be written before incrementing the 546 * FL queue WR pointer. 547 */ 548 rte_wmb(); 549 if (nb_hold > rxq->rx_free_thresh) { 550 PMD_RX_LOG(DEBUG, "The port=%hu queue=%hu nb_hold=%hu avail=%hu.", 551 rxq->port_id, rxq->qidx, nb_hold, avail); 552 nfp_qcp_ptr_add(rxq->qcp_fl, NFP_QCP_WRITE_PTR, nb_hold); 553 nb_hold = 0; 554 } 555 rxq->nb_rx_hold = nb_hold; 556 557 return avail; 558 } 559 560 static void 561 nfp_net_rx_queue_release_mbufs(struct nfp_net_rxq *rxq) 562 { 563 uint16_t i; 564 565 if (rxq->rxbufs == NULL) 566 return; 567 568 for (i = 0; i < rxq->rx_count; i++) { 569 if (rxq->rxbufs[i].mbuf != NULL) { 570 rte_pktmbuf_free_seg(rxq->rxbufs[i].mbuf); 571 rxq->rxbufs[i].mbuf = NULL; 572 } 573 } 574 } 575 576 void 577 nfp_net_rx_queue_release(struct rte_eth_dev *dev, 578 uint16_t queue_idx) 579 { 580 struct nfp_net_rxq *rxq = dev->data->rx_queues[queue_idx]; 581 582 if (rxq != NULL) { 583 nfp_net_rx_queue_release_mbufs(rxq); 584 rte_eth_dma_zone_free(dev, "rx_ring", queue_idx); 585 rte_free(rxq->rxbufs); 586 rte_free(rxq); 587 } 588 } 589 590 void 591 nfp_net_reset_rx_queue(struct nfp_net_rxq *rxq) 592 { 593 nfp_net_rx_queue_release_mbufs(rxq); 594 rxq->rd_p = 0; 595 rxq->nb_rx_hold = 0; 596 } 597 598 static void 599 nfp_rx_queue_setup_flbufsz(struct nfp_net_hw *hw, 600 struct nfp_net_rxq *rxq) 601 { 602 if (!hw->flbufsz_set_flag) { 603 hw->flbufsz_set_flag = true; 604 hw->flbufsz = rxq->mbuf_size; 605 return; 606 } 607 608 if (hw->flbufsz < rxq->mbuf_size) 609 hw->flbufsz = rxq->mbuf_size; 610 } 611 612 int 613 nfp_net_rx_queue_setup(struct rte_eth_dev *dev, 614 uint16_t queue_idx, 615 uint16_t nb_desc, 616 unsigned int socket_id, 617 const struct rte_eth_rxconf *rx_conf, 618 struct rte_mempool *mp) 619 { 620 uint32_t rx_desc_sz; 621 uint16_t min_rx_desc; 622 uint16_t max_rx_desc; 623 struct nfp_net_hw *hw; 624 struct nfp_net_rxq *rxq; 625 const struct rte_memzone *tz; 626 struct nfp_net_hw_priv *hw_priv; 627 628 hw = nfp_net_get_hw(dev); 629 hw_priv = dev->process_private; 630 631 nfp_net_rx_desc_limits(hw_priv, &min_rx_desc, &max_rx_desc); 632 633 /* Validating number of descriptors */ 634 rx_desc_sz = nb_desc * sizeof(struct nfp_net_rx_desc); 635 if (rx_desc_sz % NFP_ALIGN_RING_DESC != 0 || 636 nb_desc > max_rx_desc || nb_desc < min_rx_desc) { 637 PMD_DRV_LOG(ERR, "Wrong nb_desc value."); 638 return -EINVAL; 639 } 640 641 /* 642 * Free memory prior to re-allocation if needed. This is the case after 643 * calling @nfp_net_stop(). 644 */ 645 if (dev->data->rx_queues[queue_idx] != NULL) { 646 nfp_net_rx_queue_release(dev, queue_idx); 647 dev->data->rx_queues[queue_idx] = NULL; 648 } 649 650 /* Allocating rx queue data structure */ 651 rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct nfp_net_rxq), 652 RTE_CACHE_LINE_SIZE, socket_id); 653 if (rxq == NULL) 654 return -ENOMEM; 655 656 dev->data->rx_queues[queue_idx] = rxq; 657 658 /* Hw queues mapping based on firmware configuration */ 659 rxq->qidx = queue_idx; 660 rxq->fl_qcidx = queue_idx * hw->stride_rx; 661 rxq->qcp_fl = hw->rx_bar + NFP_QCP_QUEUE_OFF(rxq->fl_qcidx); 662 663 /* 664 * Tracking mbuf size for detecting a potential mbuf overflow due to 665 * RX offset. 666 */ 667 rxq->mem_pool = mp; 668 rxq->mbuf_size = rxq->mem_pool->elt_size; 669 rxq->mbuf_size -= (sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM); 670 nfp_rx_queue_setup_flbufsz(hw, rxq); 671 672 rxq->rx_count = nb_desc; 673 rxq->port_id = dev->data->port_id; 674 rxq->rx_free_thresh = rx_conf->rx_free_thresh; 675 676 /* 677 * Allocate RX ring hardware descriptors. A memzone large enough to 678 * handle the maximum ring size is allocated in order to allow for 679 * resizing in later calls to the queue setup function. 680 */ 681 tz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx, 682 sizeof(struct nfp_net_rx_desc) * max_rx_desc, 683 NFP_MEMZONE_ALIGN, socket_id); 684 if (tz == NULL) { 685 PMD_DRV_LOG(ERR, "Error allocating rx dma."); 686 nfp_net_rx_queue_release(dev, queue_idx); 687 dev->data->rx_queues[queue_idx] = NULL; 688 return -ENOMEM; 689 } 690 691 /* Saving physical and virtual addresses for the RX ring */ 692 rxq->dma = (uint64_t)tz->iova; 693 rxq->rxds = tz->addr; 694 695 /* Mbuf pointers array for referencing mbufs linked to RX descriptors */ 696 rxq->rxbufs = rte_zmalloc_socket("rxq->rxbufs", 697 sizeof(*rxq->rxbufs) * nb_desc, RTE_CACHE_LINE_SIZE, 698 socket_id); 699 if (rxq->rxbufs == NULL) { 700 nfp_net_rx_queue_release(dev, queue_idx); 701 dev->data->rx_queues[queue_idx] = NULL; 702 return -ENOMEM; 703 } 704 705 nfp_net_reset_rx_queue(rxq); 706 707 rxq->hw = hw; 708 rxq->hw_priv = dev->process_private; 709 710 /* 711 * Telling the HW about the physical address of the RX ring and number 712 * of descriptors in log2 format. 713 */ 714 nn_cfg_writeq(&hw->super, NFP_NET_CFG_RXR_ADDR(queue_idx), rxq->dma); 715 nn_cfg_writeb(&hw->super, NFP_NET_CFG_RXR_SZ(queue_idx), rte_log2_u32(nb_desc)); 716 717 return 0; 718 } 719 720 static inline uint32_t 721 nfp_net_read_tx_free_qcp(struct nfp_net_txq *txq) 722 { 723 /* 724 * If TX ring pointer write back is not supported, do a PCIe read. 725 * Otherwise read qcp value from write back dma address. 726 */ 727 if (txq->txrwb == NULL) 728 return nfp_qcp_read(txq->qcp_q, NFP_QCP_READ_PTR); 729 730 /* 731 * In most cases the TX count is a power of two and the costly modulus 732 * operation can be substituted with a subtraction and an AND operation. 733 */ 734 if (rte_is_power_of_2(txq->tx_count) == 1) 735 return (*txq->txrwb) & (txq->tx_count - 1); 736 else 737 return (*txq->txrwb) % txq->tx_count; 738 } 739 740 /** 741 * Check for descriptors with a complete status 742 * 743 * @param txq 744 * TX queue to work with 745 * 746 * @return 747 * Number of descriptors freed 748 */ 749 uint32_t 750 nfp_net_tx_free_bufs(struct nfp_net_txq *txq) 751 { 752 uint32_t todo; 753 uint32_t qcp_rd_p; 754 755 PMD_TX_LOG(DEBUG, "Queue %hu. Check for descriptor with a complete" 756 " status.", txq->qidx); 757 758 /* Work out how many packets have been sent */ 759 qcp_rd_p = nfp_net_read_tx_free_qcp(txq); 760 761 if (qcp_rd_p == txq->rd_p) { 762 PMD_TX_LOG(DEBUG, "Queue %hu: It seems harrier is not sending " 763 "packets (%u, %u).", txq->qidx, 764 qcp_rd_p, txq->rd_p); 765 return 0; 766 } 767 768 if (qcp_rd_p > txq->rd_p) 769 todo = qcp_rd_p - txq->rd_p; 770 else 771 todo = qcp_rd_p + txq->tx_count - txq->rd_p; 772 773 PMD_TX_LOG(DEBUG, "The qcp_rd_p %u, txq->rd_p: %u, qcp->rd_p: %u.", 774 qcp_rd_p, txq->rd_p, txq->rd_p); 775 776 if (todo == 0) 777 return todo; 778 779 txq->rd_p += todo; 780 if (unlikely(txq->rd_p >= txq->tx_count)) 781 txq->rd_p -= txq->tx_count; 782 783 return todo; 784 } 785 786 static void 787 nfp_net_tx_queue_release_mbufs(struct nfp_net_txq *txq) 788 { 789 uint32_t i; 790 791 if (txq->txbufs == NULL) 792 return; 793 794 for (i = 0; i < txq->tx_count; i++) { 795 if (txq->txbufs[i].mbuf != NULL) { 796 rte_pktmbuf_free_seg(txq->txbufs[i].mbuf); 797 txq->txbufs[i].mbuf = NULL; 798 } 799 } 800 } 801 802 void 803 nfp_net_tx_queue_release(struct rte_eth_dev *dev, 804 uint16_t queue_idx) 805 { 806 struct nfp_net_hw *net_hw; 807 struct nfp_net_txq *txq = dev->data->tx_queues[queue_idx]; 808 809 if (txq != NULL) { 810 net_hw = nfp_net_get_hw(dev); 811 if (net_hw->txrwb_mz != NULL) 812 nn_cfg_writeq(&net_hw->super, NFP_NET_CFG_TXR_WB_ADDR(queue_idx), 0); 813 nfp_net_tx_queue_release_mbufs(txq); 814 rte_eth_dma_zone_free(dev, "tx_ring", queue_idx); 815 rte_free(txq->txbufs); 816 rte_free(txq); 817 } 818 } 819 820 void 821 nfp_net_reset_tx_queue(struct nfp_net_txq *txq) 822 { 823 nfp_net_tx_queue_release_mbufs(txq); 824 txq->wr_p = 0; 825 txq->rd_p = 0; 826 if (txq->txrwb != NULL) 827 *txq->txrwb = 0; 828 } 829 830 int 831 nfp_net_tx_queue_setup(struct rte_eth_dev *dev, 832 uint16_t queue_idx, 833 uint16_t nb_desc, 834 unsigned int socket_id, 835 const struct rte_eth_txconf *tx_conf) 836 { 837 struct nfp_net_hw_priv *hw_priv; 838 839 hw_priv = dev->process_private; 840 841 if (hw_priv->pf_dev->ver.extend == NFP_NET_CFG_VERSION_DP_NFD3) 842 return nfp_net_nfd3_tx_queue_setup(dev, queue_idx, 843 nb_desc, socket_id, tx_conf); 844 else 845 return nfp_net_nfdk_tx_queue_setup(dev, queue_idx, 846 nb_desc, socket_id, tx_conf); 847 } 848 849 void 850 nfp_net_rx_queue_info_get(struct rte_eth_dev *dev, 851 uint16_t queue_id, 852 struct rte_eth_rxq_info *info) 853 { 854 struct rte_eth_dev_info dev_info; 855 struct nfp_net_rxq *rxq = dev->data->rx_queues[queue_id]; 856 857 info->mp = rxq->mem_pool; 858 info->nb_desc = rxq->rx_count; 859 860 info->conf.rx_free_thresh = rxq->rx_free_thresh; 861 862 nfp_net_infos_get(dev, &dev_info); 863 info->conf.offloads = dev_info.rx_offload_capa & 864 dev->data->dev_conf.rxmode.offloads; 865 } 866 867 void 868 nfp_net_tx_queue_info_get(struct rte_eth_dev *dev, 869 uint16_t queue_id, 870 struct rte_eth_txq_info *info) 871 { 872 struct rte_eth_dev_info dev_info; 873 struct nfp_net_hw_priv *hw_priv = dev->process_private; 874 struct nfp_net_txq *txq = dev->data->tx_queues[queue_id]; 875 876 if (hw_priv->pf_dev->ver.extend == NFP_NET_CFG_VERSION_DP_NFD3) 877 info->nb_desc = txq->tx_count / NFD3_TX_DESC_PER_PKT; 878 else 879 info->nb_desc = txq->tx_count / NFDK_TX_DESC_PER_SIMPLE_PKT; 880 881 info->conf.tx_free_thresh = txq->tx_free_thresh; 882 883 nfp_net_infos_get(dev, &dev_info); 884 info->conf.offloads = dev_info.tx_offload_capa & 885 dev->data->dev_conf.txmode.offloads; 886 } 887 888 void 889 nfp_net_recv_pkts_set(struct rte_eth_dev *eth_dev) 890 { 891 if (nfp_net_get_avx2_supported()) 892 eth_dev->rx_pkt_burst = nfp_net_vec_avx2_recv_pkts; 893 else 894 eth_dev->rx_pkt_burst = nfp_net_recv_pkts; 895 } 896