1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (c) 2014-2021 Netronome Systems, Inc. 3 * All rights reserved. 4 * 5 * Small portions derived from code Copyright(c) 2010-2015 Intel Corporation. 6 */ 7 8 #include "nfp_rxtx.h" 9 10 #include <ethdev_pci.h> 11 #include <rte_security.h> 12 13 #include "nfd3/nfp_nfd3.h" 14 #include "nfdk/nfp_nfdk.h" 15 #include "flower/nfp_flower.h" 16 17 #include "nfp_ipsec.h" 18 #include "nfp_logs.h" 19 #include "nfp_net_meta.h" 20 #include "nfp_rxtx_vec.h" 21 22 /* 23 * The bit format and map of nfp packet type for rxd.offload_info in Rx descriptor. 24 * 25 * Bit format about nfp packet type refers to the following: 26 * --------------------------------- 27 * 1 0 28 * 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 29 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 30 * | |ol3|tunnel | l3 | l4 | 31 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 32 * 33 * Bit map about nfp packet type refers to the following: 34 * 35 * L4: bit 0~2, used for layer 4 or inner layer 4. 36 * 000: NFP_NET_PTYPE_L4_NONE 37 * 001: NFP_NET_PTYPE_L4_TCP 38 * 010: NFP_NET_PTYPE_L4_UDP 39 * 011: NFP_NET_PTYPE_L4_FRAG 40 * 100: NFP_NET_PTYPE_L4_NONFRAG 41 * 101: NFP_NET_PTYPE_L4_ICMP 42 * 110: NFP_NET_PTYPE_L4_SCTP 43 * 111: reserved 44 * 45 * L3: bit 3~5, used for layer 3 or inner layer 3. 46 * 000: NFP_NET_PTYPE_L3_NONE 47 * 001: NFP_NET_PTYPE_L3_IPV6 48 * 010: NFP_NET_PTYPE_L3_IPV4 49 * 011: NFP_NET_PTYPE_L3_IPV4_EXT 50 * 100: NFP_NET_PTYPE_L3_IPV6_EXT 51 * 101: NFP_NET_PTYPE_L3_IPV4_EXT_UNKNOWN 52 * 110: NFP_NET_PTYPE_L3_IPV6_EXT_UNKNOWN 53 * 111: reserved 54 * 55 * Tunnel: bit 6~9, used for tunnel. 56 * 0000: NFP_NET_PTYPE_TUNNEL_NONE 57 * 0001: NFP_NET_PTYPE_TUNNEL_VXLAN 58 * 0100: NFP_NET_PTYPE_TUNNEL_NVGRE 59 * 0101: NFP_NET_PTYPE_TUNNEL_GENEVE 60 * 0010, 0011, 0110~1111: reserved 61 * 62 * Outer L3: bit 10~11, used for outer layer 3. 63 * 00: NFP_NET_PTYPE_OUTER_L3_NONE 64 * 01: NFP_NET_PTYPE_OUTER_L3_IPV6 65 * 10: NFP_NET_PTYPE_OUTER_L3_IPV4 66 * 11: reserved 67 * 68 * Reserved: bit 10~15, used for extension. 69 */ 70 71 /* Mask and offset about nfp packet type based on the bit map above. */ 72 #define NFP_NET_PTYPE_L4_MASK 0x0007 73 #define NFP_NET_PTYPE_L3_MASK 0x0038 74 #define NFP_NET_PTYPE_TUNNEL_MASK 0x03c0 75 #define NFP_NET_PTYPE_OUTER_L3_MASK 0x0c00 76 77 #define NFP_NET_PTYPE_L4_OFFSET 0 78 #define NFP_NET_PTYPE_L3_OFFSET 3 79 #define NFP_NET_PTYPE_TUNNEL_OFFSET 6 80 #define NFP_NET_PTYPE_OUTER_L3_OFFSET 10 81 82 /* Case about nfp packet type based on the bit map above. */ 83 #define NFP_NET_PTYPE_L4_NONE 0 84 #define NFP_NET_PTYPE_L4_TCP 1 85 #define NFP_NET_PTYPE_L4_UDP 2 86 #define NFP_NET_PTYPE_L4_FRAG 3 87 #define NFP_NET_PTYPE_L4_NONFRAG 4 88 #define NFP_NET_PTYPE_L4_ICMP 5 89 #define NFP_NET_PTYPE_L4_SCTP 6 90 91 #define NFP_NET_PTYPE_L3_NONE 0 92 #define NFP_NET_PTYPE_L3_IPV6 1 93 #define NFP_NET_PTYPE_L3_IPV4 2 94 #define NFP_NET_PTYPE_L3_IPV4_EXT 3 95 #define NFP_NET_PTYPE_L3_IPV6_EXT 4 96 #define NFP_NET_PTYPE_L3_IPV4_EXT_UNKNOWN 5 97 #define NFP_NET_PTYPE_L3_IPV6_EXT_UNKNOWN 6 98 99 #define NFP_NET_PTYPE_TUNNEL_NONE 0 100 #define NFP_NET_PTYPE_TUNNEL_VXLAN 1 101 #define NFP_NET_PTYPE_TUNNEL_NVGRE 4 102 #define NFP_NET_PTYPE_TUNNEL_GENEVE 5 103 104 #define NFP_NET_PTYPE_OUTER_L3_NONE 0 105 #define NFP_NET_PTYPE_OUTER_L3_IPV6 1 106 #define NFP_NET_PTYPE_OUTER_L3_IPV4 2 107 108 #define NFP_PTYPE2RTE(tunnel, type) ((tunnel) ? RTE_PTYPE_INNER_##type : RTE_PTYPE_##type) 109 110 /* Record NFP packet type parsed from rxd.offload_info. */ 111 struct nfp_ptype_parsed { 112 uint8_t l4_ptype; /**< Packet type of layer 4, or inner layer 4. */ 113 uint8_t l3_ptype; /**< Packet type of layer 3, or inner layer 3. */ 114 uint8_t tunnel_ptype; /**< Packet type of tunnel. */ 115 uint8_t outer_l3_ptype; /**< Packet type of outer layer 3. */ 116 }; 117 118 /* Set mbuf checksum flags based on RX descriptor flags */ 119 void 120 nfp_net_rx_cksum(struct nfp_net_rxq *rxq, 121 struct nfp_net_rx_desc *rxd, 122 struct rte_mbuf *mb) 123 { 124 struct nfp_net_hw *hw = rxq->hw; 125 126 if ((hw->super.ctrl & NFP_NET_CFG_CTRL_RXCSUM) == 0) 127 return; 128 129 /* If IPv4 and IP checksum error, fail */ 130 if (unlikely((rxd->rxd.flags & PCIE_DESC_RX_IP4_CSUM) != 0 && 131 (rxd->rxd.flags & PCIE_DESC_RX_IP4_CSUM_OK) == 0)) 132 mb->ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_BAD; 133 else 134 mb->ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_GOOD; 135 136 /* If neither UDP nor TCP return */ 137 if ((rxd->rxd.flags & PCIE_DESC_RX_TCP_CSUM) == 0 && 138 (rxd->rxd.flags & PCIE_DESC_RX_UDP_CSUM) == 0) 139 return; 140 141 if (likely(rxd->rxd.flags & PCIE_DESC_RX_L4_CSUM_OK) != 0) 142 mb->ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_GOOD; 143 else 144 mb->ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_BAD; 145 } 146 147 static int 148 nfp_net_rx_fill_freelist(struct nfp_net_rxq *rxq) 149 { 150 uint16_t i; 151 uint64_t dma_addr; 152 struct nfp_net_dp_buf *rxe = rxq->rxbufs; 153 154 PMD_RX_LOG(DEBUG, "Fill Rx Freelist for %hu descriptors", 155 rxq->rx_count); 156 157 for (i = 0; i < rxq->rx_count; i++) { 158 struct nfp_net_rx_desc *rxd; 159 struct rte_mbuf *mbuf = rte_pktmbuf_alloc(rxq->mem_pool); 160 161 if (mbuf == NULL) { 162 PMD_DRV_LOG(ERR, "RX mbuf alloc failed queue_id=%hu", 163 rxq->qidx); 164 return -ENOMEM; 165 } 166 167 dma_addr = rte_cpu_to_le_64(rte_mbuf_data_iova_default(mbuf)); 168 169 rxd = &rxq->rxds[i]; 170 rxd->fld.dd = 0; 171 rxd->fld.dma_addr_hi = (dma_addr >> 32) & 0xffff; 172 rxd->fld.dma_addr_lo = dma_addr & 0xffffffff; 173 174 rxe[i].mbuf = mbuf; 175 } 176 177 /* Make sure all writes are flushed before telling the hardware */ 178 rte_wmb(); 179 180 /* Not advertising the whole ring as the firmware gets confused if so */ 181 PMD_RX_LOG(DEBUG, "Increment FL write pointer in %hu", rxq->rx_count - 1); 182 183 nfp_qcp_ptr_add(rxq->qcp_fl, NFP_QCP_WRITE_PTR, rxq->rx_count - 1); 184 185 return 0; 186 } 187 188 int 189 nfp_net_rx_freelist_setup(struct rte_eth_dev *dev) 190 { 191 uint16_t i; 192 193 for (i = 0; i < dev->data->nb_rx_queues; i++) { 194 if (nfp_net_rx_fill_freelist(dev->data->rx_queues[i]) != 0) 195 return -1; 196 } 197 198 return 0; 199 } 200 201 uint32_t 202 nfp_net_rx_queue_count(void *rx_queue) 203 { 204 uint32_t idx; 205 uint32_t count = 0; 206 struct nfp_net_rxq *rxq; 207 struct nfp_net_rx_desc *rxds; 208 209 rxq = rx_queue; 210 idx = rxq->rd_p; 211 212 /* 213 * Other PMDs are just checking the DD bit in intervals of 4 214 * descriptors and counting all four if the first has the DD 215 * bit on. Of course, this is not accurate but can be good for 216 * performance. But ideally that should be done in descriptors 217 * chunks belonging to the same cache line. 218 */ 219 while (count < rxq->rx_count) { 220 rxds = &rxq->rxds[idx]; 221 if ((rxds->rxd.meta_len_dd & PCIE_DESC_RX_DD) == 0) 222 break; 223 224 count++; 225 idx++; 226 227 /* Wrapping */ 228 if ((idx) == rxq->rx_count) 229 idx = 0; 230 } 231 232 return count; 233 } 234 235 /** 236 * Set packet type to mbuf based on parsed structure. 237 * 238 * @param nfp_ptype 239 * Packet type structure parsing from Rx descriptor. 240 * @param mb 241 * Mbuf to set the packet type. 242 */ 243 static void 244 nfp_net_set_ptype(const struct nfp_ptype_parsed *nfp_ptype, 245 struct rte_mbuf *mb) 246 { 247 uint32_t mbuf_ptype = RTE_PTYPE_L2_ETHER; 248 uint8_t nfp_tunnel_ptype = nfp_ptype->tunnel_ptype; 249 250 if (nfp_tunnel_ptype != NFP_NET_PTYPE_TUNNEL_NONE) 251 mbuf_ptype |= RTE_PTYPE_INNER_L2_ETHER; 252 253 switch (nfp_ptype->outer_l3_ptype) { 254 case NFP_NET_PTYPE_OUTER_L3_NONE: 255 break; 256 case NFP_NET_PTYPE_OUTER_L3_IPV4: 257 mbuf_ptype |= RTE_PTYPE_L3_IPV4; 258 break; 259 case NFP_NET_PTYPE_OUTER_L3_IPV6: 260 mbuf_ptype |= RTE_PTYPE_L3_IPV6; 261 break; 262 default: 263 PMD_RX_LOG(DEBUG, "Unrecognized nfp outer layer 3 packet type: %u", 264 nfp_ptype->outer_l3_ptype); 265 break; 266 } 267 268 switch (nfp_tunnel_ptype) { 269 case NFP_NET_PTYPE_TUNNEL_NONE: 270 break; 271 case NFP_NET_PTYPE_TUNNEL_VXLAN: 272 mbuf_ptype |= RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L4_UDP; 273 break; 274 case NFP_NET_PTYPE_TUNNEL_NVGRE: 275 mbuf_ptype |= RTE_PTYPE_TUNNEL_NVGRE; 276 break; 277 case NFP_NET_PTYPE_TUNNEL_GENEVE: 278 mbuf_ptype |= RTE_PTYPE_TUNNEL_GENEVE | RTE_PTYPE_L4_UDP; 279 break; 280 default: 281 PMD_RX_LOG(DEBUG, "Unrecognized nfp tunnel packet type: %u", 282 nfp_tunnel_ptype); 283 break; 284 } 285 286 switch (nfp_ptype->l4_ptype) { 287 case NFP_NET_PTYPE_L4_NONE: 288 break; 289 case NFP_NET_PTYPE_L4_TCP: 290 mbuf_ptype |= NFP_PTYPE2RTE(nfp_tunnel_ptype, L4_TCP); 291 break; 292 case NFP_NET_PTYPE_L4_UDP: 293 mbuf_ptype |= NFP_PTYPE2RTE(nfp_tunnel_ptype, L4_UDP); 294 break; 295 case NFP_NET_PTYPE_L4_FRAG: 296 mbuf_ptype |= NFP_PTYPE2RTE(nfp_tunnel_ptype, L4_FRAG); 297 break; 298 case NFP_NET_PTYPE_L4_NONFRAG: 299 mbuf_ptype |= NFP_PTYPE2RTE(nfp_tunnel_ptype, L4_NONFRAG); 300 break; 301 case NFP_NET_PTYPE_L4_ICMP: 302 mbuf_ptype |= NFP_PTYPE2RTE(nfp_tunnel_ptype, L4_ICMP); 303 break; 304 case NFP_NET_PTYPE_L4_SCTP: 305 mbuf_ptype |= NFP_PTYPE2RTE(nfp_tunnel_ptype, L4_SCTP); 306 break; 307 default: 308 PMD_RX_LOG(DEBUG, "Unrecognized nfp layer 4 packet type: %u", 309 nfp_ptype->l4_ptype); 310 break; 311 } 312 313 switch (nfp_ptype->l3_ptype) { 314 case NFP_NET_PTYPE_L3_NONE: 315 break; 316 case NFP_NET_PTYPE_L3_IPV4: 317 mbuf_ptype |= NFP_PTYPE2RTE(nfp_tunnel_ptype, L3_IPV4); 318 break; 319 case NFP_NET_PTYPE_L3_IPV6: 320 mbuf_ptype |= NFP_PTYPE2RTE(nfp_tunnel_ptype, L3_IPV6); 321 break; 322 case NFP_NET_PTYPE_L3_IPV4_EXT: 323 mbuf_ptype |= NFP_PTYPE2RTE(nfp_tunnel_ptype, L3_IPV4_EXT); 324 break; 325 case NFP_NET_PTYPE_L3_IPV6_EXT: 326 mbuf_ptype |= NFP_PTYPE2RTE(nfp_tunnel_ptype, L3_IPV6_EXT); 327 break; 328 case NFP_NET_PTYPE_L3_IPV4_EXT_UNKNOWN: 329 mbuf_ptype |= NFP_PTYPE2RTE(nfp_tunnel_ptype, L3_IPV4_EXT_UNKNOWN); 330 break; 331 case NFP_NET_PTYPE_L3_IPV6_EXT_UNKNOWN: 332 mbuf_ptype |= NFP_PTYPE2RTE(nfp_tunnel_ptype, L3_IPV6_EXT_UNKNOWN); 333 break; 334 default: 335 PMD_RX_LOG(DEBUG, "Unrecognized nfp layer 3 packet type: %u", 336 nfp_ptype->l3_ptype); 337 break; 338 } 339 340 mb->packet_type = mbuf_ptype; 341 } 342 343 /** 344 * Parse the packet type from Rx descriptor and set to mbuf. 345 * 346 * @param rxq 347 * Rx queue 348 * @param rxds 349 * Rx descriptor including the offloading info of packet type. 350 * @param mb 351 * Mbuf to set the packet type. 352 */ 353 void 354 nfp_net_parse_ptype(struct nfp_net_rxq *rxq, 355 struct nfp_net_rx_desc *rxds, 356 struct rte_mbuf *mb) 357 { 358 struct nfp_net_hw *hw = rxq->hw; 359 struct nfp_ptype_parsed nfp_ptype; 360 uint16_t rxd_ptype = rxds->rxd.offload_info; 361 362 if ((hw->super.ctrl_ext & NFP_NET_CFG_CTRL_PKT_TYPE) == 0) 363 return; 364 365 if (rxd_ptype == 0 || (rxds->rxd.flags & PCIE_DESC_RX_VLAN) != 0) 366 return; 367 368 nfp_ptype.l4_ptype = (rxd_ptype & NFP_NET_PTYPE_L4_MASK) >> 369 NFP_NET_PTYPE_L4_OFFSET; 370 nfp_ptype.l3_ptype = (rxd_ptype & NFP_NET_PTYPE_L3_MASK) >> 371 NFP_NET_PTYPE_L3_OFFSET; 372 nfp_ptype.tunnel_ptype = (rxd_ptype & NFP_NET_PTYPE_TUNNEL_MASK) >> 373 NFP_NET_PTYPE_TUNNEL_OFFSET; 374 nfp_ptype.outer_l3_ptype = (rxd_ptype & NFP_NET_PTYPE_OUTER_L3_MASK) >> 375 NFP_NET_PTYPE_OUTER_L3_OFFSET; 376 377 nfp_net_set_ptype(&nfp_ptype, mb); 378 } 379 380 /* 381 * RX path design: 382 * 383 * There are some decisions to take: 384 * 1) How to check DD RX descriptors bit 385 * 2) How and when to allocate new mbufs 386 * 387 * Current implementation checks just one single DD bit each loop. As each 388 * descriptor is 8 bytes, it is likely a good idea to check descriptors in 389 * a single cache line instead. Tests with this change have not shown any 390 * performance improvement but it requires further investigation. For example, 391 * depending on which descriptor is next, the number of descriptors could be 392 * less than 8 for just checking those in the same cache line. This implies 393 * extra work which could be counterproductive by itself. Indeed, last firmware 394 * changes are just doing this: writing several descriptors with the DD bit 395 * for saving PCIe bandwidth and DMA operations from the NFP. 396 * 397 * Mbuf allocation is done when a new packet is received. Then the descriptor 398 * is automatically linked with the new mbuf and the old one is given to the 399 * user. The main drawback with this design is mbuf allocation is heavier than 400 * using bulk allocations allowed by DPDK with rte_mempool_get_bulk. From the 401 * cache point of view it does not seem allocating the mbuf early on as we are 402 * doing now have any benefit at all. Again, tests with this change have not 403 * shown any improvement. Also, rte_mempool_get_bulk returns all or nothing 404 * so looking at the implications of this type of allocation should be studied 405 * deeply. 406 */ 407 uint16_t 408 nfp_net_recv_pkts(void *rx_queue, 409 struct rte_mbuf **rx_pkts, 410 uint16_t nb_pkts) 411 { 412 uint64_t dma_addr; 413 uint16_t avail = 0; 414 struct rte_mbuf *mb; 415 uint16_t nb_hold = 0; 416 struct nfp_net_hw *hw; 417 struct rte_mbuf *new_mb; 418 struct nfp_net_rxq *rxq; 419 struct nfp_net_dp_buf *rxb; 420 struct nfp_net_rx_desc *rxds; 421 uint16_t avail_multiplexed = 0; 422 423 rxq = rx_queue; 424 if (unlikely(rxq == NULL)) { 425 /* 426 * DPDK just checks the queue is lower than max queues 427 * enabled. But the queue needs to be configured. 428 */ 429 PMD_RX_LOG(ERR, "RX Bad queue"); 430 return 0; 431 } 432 433 hw = rxq->hw; 434 435 while (avail + avail_multiplexed < nb_pkts) { 436 rxb = &rxq->rxbufs[rxq->rd_p]; 437 if (unlikely(rxb == NULL)) { 438 PMD_RX_LOG(ERR, "rxb does not exist!"); 439 break; 440 } 441 442 rxds = &rxq->rxds[rxq->rd_p]; 443 if ((rxds->rxd.meta_len_dd & PCIE_DESC_RX_DD) == 0) 444 break; 445 446 /* 447 * Memory barrier to ensure that we won't do other 448 * reads before the DD bit. 449 */ 450 rte_rmb(); 451 452 /* 453 * We got a packet. Let's alloc a new mbuf for refilling the 454 * free descriptor ring as soon as possible. 455 */ 456 new_mb = rte_pktmbuf_alloc(rxq->mem_pool); 457 if (unlikely(new_mb == NULL)) { 458 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u queue_id=%hu", 459 rxq->port_id, rxq->qidx); 460 nfp_net_mbuf_alloc_failed(rxq); 461 break; 462 } 463 464 /* 465 * Grab the mbuf and refill the descriptor with the 466 * previously allocated mbuf. 467 */ 468 mb = rxb->mbuf; 469 rxb->mbuf = new_mb; 470 471 PMD_RX_LOG(DEBUG, "Packet len: %u, mbuf_size: %u", 472 rxds->rxd.data_len, rxq->mbuf_size); 473 474 /* Size of this segment */ 475 mb->data_len = rxds->rxd.data_len - NFP_DESC_META_LEN(rxds); 476 /* Size of the whole packet. We just support 1 segment */ 477 mb->pkt_len = rxds->rxd.data_len - NFP_DESC_META_LEN(rxds); 478 479 if (unlikely((mb->data_len + hw->rx_offset) > rxq->mbuf_size)) { 480 /* 481 * This should not happen and the user has the 482 * responsibility of avoiding it. But we have 483 * to give some info about the error. 484 */ 485 PMD_RX_LOG(ERR, "mbuf overflow likely due to the RX offset."); 486 rte_pktmbuf_free(mb); 487 break; 488 } 489 490 /* Filling the received mbuf with packet info */ 491 if (hw->rx_offset != 0) 492 mb->data_off = RTE_PKTMBUF_HEADROOM + hw->rx_offset; 493 else 494 mb->data_off = RTE_PKTMBUF_HEADROOM + NFP_DESC_META_LEN(rxds); 495 496 /* No scatter mode supported */ 497 mb->nb_segs = 1; 498 mb->next = NULL; 499 mb->port = rxq->port_id; 500 501 struct nfp_net_meta_parsed meta; 502 nfp_net_meta_parse(rxds, rxq, hw, mb, &meta); 503 504 nfp_net_parse_ptype(rxq, rxds, mb); 505 506 /* Checking the checksum flag */ 507 nfp_net_rx_cksum(rxq, rxds, mb); 508 509 /* Now resetting and updating the descriptor */ 510 rxds->vals[0] = 0; 511 rxds->vals[1] = 0; 512 dma_addr = rte_cpu_to_le_64(rte_mbuf_data_iova_default(new_mb)); 513 rxds->fld.dd = 0; 514 rxds->fld.dma_addr_hi = (dma_addr >> 32) & 0xffff; 515 rxds->fld.dma_addr_lo = dma_addr & 0xffffffff; 516 nb_hold++; 517 518 rxq->rd_p++; 519 if (unlikely(rxq->rd_p == rxq->rx_count)) /* Wrapping */ 520 rxq->rd_p = 0; 521 522 if (((meta.flags >> NFP_NET_META_PORTID) & 0x1) == 0) { 523 rx_pkts[avail++] = mb; 524 } else if (nfp_flower_pf_dispatch_pkts(rxq, mb, meta.port_id)) { 525 avail_multiplexed++; 526 } else { 527 rte_pktmbuf_free(mb); 528 break; 529 } 530 } 531 532 if (nb_hold == 0) 533 return nb_hold; 534 535 PMD_RX_LOG(DEBUG, "RX port_id=%hu queue_id=%hu, %hu packets received", 536 rxq->port_id, rxq->qidx, avail); 537 538 nb_hold += rxq->nb_rx_hold; 539 540 /* 541 * FL descriptors needs to be written before incrementing the 542 * FL queue WR pointer. 543 */ 544 rte_wmb(); 545 if (nb_hold > rxq->rx_free_thresh) { 546 PMD_RX_LOG(DEBUG, "port=%hu queue=%hu nb_hold=%hu avail=%hu", 547 rxq->port_id, rxq->qidx, nb_hold, avail); 548 nfp_qcp_ptr_add(rxq->qcp_fl, NFP_QCP_WRITE_PTR, nb_hold); 549 nb_hold = 0; 550 } 551 rxq->nb_rx_hold = nb_hold; 552 553 return avail; 554 } 555 556 static void 557 nfp_net_rx_queue_release_mbufs(struct nfp_net_rxq *rxq) 558 { 559 uint16_t i; 560 561 if (rxq->rxbufs == NULL) 562 return; 563 564 for (i = 0; i < rxq->rx_count; i++) { 565 if (rxq->rxbufs[i].mbuf != NULL) { 566 rte_pktmbuf_free_seg(rxq->rxbufs[i].mbuf); 567 rxq->rxbufs[i].mbuf = NULL; 568 } 569 } 570 } 571 572 void 573 nfp_net_rx_queue_release(struct rte_eth_dev *dev, 574 uint16_t queue_idx) 575 { 576 struct nfp_net_rxq *rxq = dev->data->rx_queues[queue_idx]; 577 578 if (rxq != NULL) { 579 nfp_net_rx_queue_release_mbufs(rxq); 580 rte_eth_dma_zone_free(dev, "rx_ring", queue_idx); 581 rte_free(rxq->rxbufs); 582 rte_free(rxq); 583 } 584 } 585 586 void 587 nfp_net_reset_rx_queue(struct nfp_net_rxq *rxq) 588 { 589 nfp_net_rx_queue_release_mbufs(rxq); 590 rxq->rd_p = 0; 591 rxq->nb_rx_hold = 0; 592 } 593 594 int 595 nfp_net_rx_queue_setup(struct rte_eth_dev *dev, 596 uint16_t queue_idx, 597 uint16_t nb_desc, 598 unsigned int socket_id, 599 const struct rte_eth_rxconf *rx_conf, 600 struct rte_mempool *mp) 601 { 602 uint32_t rx_desc_sz; 603 uint16_t min_rx_desc; 604 uint16_t max_rx_desc; 605 struct nfp_net_hw *hw; 606 struct nfp_net_rxq *rxq; 607 const struct rte_memzone *tz; 608 struct nfp_net_hw_priv *hw_priv; 609 610 hw = nfp_net_get_hw(dev); 611 hw_priv = dev->process_private; 612 613 nfp_net_rx_desc_limits(hw_priv, &min_rx_desc, &max_rx_desc); 614 615 /* Validating number of descriptors */ 616 rx_desc_sz = nb_desc * sizeof(struct nfp_net_rx_desc); 617 if (rx_desc_sz % NFP_ALIGN_RING_DESC != 0 || 618 nb_desc > max_rx_desc || nb_desc < min_rx_desc) { 619 PMD_DRV_LOG(ERR, "Wrong nb_desc value"); 620 return -EINVAL; 621 } 622 623 /* 624 * Free memory prior to re-allocation if needed. This is the case after 625 * calling @nfp_net_stop(). 626 */ 627 if (dev->data->rx_queues[queue_idx] != NULL) { 628 nfp_net_rx_queue_release(dev, queue_idx); 629 dev->data->rx_queues[queue_idx] = NULL; 630 } 631 632 /* Allocating rx queue data structure */ 633 rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct nfp_net_rxq), 634 RTE_CACHE_LINE_SIZE, socket_id); 635 if (rxq == NULL) 636 return -ENOMEM; 637 638 dev->data->rx_queues[queue_idx] = rxq; 639 640 /* Hw queues mapping based on firmware configuration */ 641 rxq->qidx = queue_idx; 642 rxq->fl_qcidx = queue_idx * hw->stride_rx; 643 rxq->qcp_fl = hw->rx_bar + NFP_QCP_QUEUE_OFF(rxq->fl_qcidx); 644 645 /* 646 * Tracking mbuf size for detecting a potential mbuf overflow due to 647 * RX offset. 648 */ 649 rxq->mem_pool = mp; 650 rxq->mbuf_size = rxq->mem_pool->elt_size; 651 rxq->mbuf_size -= (sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM); 652 hw->flbufsz = rxq->mbuf_size; 653 654 rxq->rx_count = nb_desc; 655 rxq->port_id = dev->data->port_id; 656 rxq->rx_free_thresh = rx_conf->rx_free_thresh; 657 658 /* 659 * Allocate RX ring hardware descriptors. A memzone large enough to 660 * handle the maximum ring size is allocated in order to allow for 661 * resizing in later calls to the queue setup function. 662 */ 663 tz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx, 664 sizeof(struct nfp_net_rx_desc) * max_rx_desc, 665 NFP_MEMZONE_ALIGN, socket_id); 666 if (tz == NULL) { 667 PMD_DRV_LOG(ERR, "Error allocating rx dma"); 668 nfp_net_rx_queue_release(dev, queue_idx); 669 dev->data->rx_queues[queue_idx] = NULL; 670 return -ENOMEM; 671 } 672 673 /* Saving physical and virtual addresses for the RX ring */ 674 rxq->dma = (uint64_t)tz->iova; 675 rxq->rxds = tz->addr; 676 677 /* Mbuf pointers array for referencing mbufs linked to RX descriptors */ 678 rxq->rxbufs = rte_zmalloc_socket("rxq->rxbufs", 679 sizeof(*rxq->rxbufs) * nb_desc, RTE_CACHE_LINE_SIZE, 680 socket_id); 681 if (rxq->rxbufs == NULL) { 682 nfp_net_rx_queue_release(dev, queue_idx); 683 dev->data->rx_queues[queue_idx] = NULL; 684 return -ENOMEM; 685 } 686 687 nfp_net_reset_rx_queue(rxq); 688 689 rxq->hw = hw; 690 rxq->hw_priv = dev->process_private; 691 692 /* 693 * Telling the HW about the physical address of the RX ring and number 694 * of descriptors in log2 format. 695 */ 696 nn_cfg_writeq(&hw->super, NFP_NET_CFG_RXR_ADDR(queue_idx), rxq->dma); 697 nn_cfg_writeb(&hw->super, NFP_NET_CFG_RXR_SZ(queue_idx), rte_log2_u32(nb_desc)); 698 699 return 0; 700 } 701 702 static inline uint32_t 703 nfp_net_read_tx_free_qcp(struct nfp_net_txq *txq) 704 { 705 /* 706 * If TX ring pointer write back is not supported, do a PCIe read. 707 * Otherwise read qcp value from write back dma address. 708 */ 709 if (txq->txrwb == NULL) 710 return nfp_qcp_read(txq->qcp_q, NFP_QCP_READ_PTR); 711 712 /* 713 * In most cases the TX count is a power of two and the costly modulus 714 * operation can be substituted with a subtraction and an AND operation. 715 */ 716 if (rte_is_power_of_2(txq->tx_count) == 1) 717 return (*txq->txrwb) & (txq->tx_count - 1); 718 else 719 return (*txq->txrwb) % txq->tx_count; 720 } 721 722 /** 723 * Check for descriptors with a complete status 724 * 725 * @param txq 726 * TX queue to work with 727 * 728 * @return 729 * Number of descriptors freed 730 */ 731 uint32_t 732 nfp_net_tx_free_bufs(struct nfp_net_txq *txq) 733 { 734 uint32_t todo; 735 uint32_t qcp_rd_p; 736 737 PMD_TX_LOG(DEBUG, "queue %hu. Check for descriptor with a complete" 738 " status", txq->qidx); 739 740 /* Work out how many packets have been sent */ 741 qcp_rd_p = nfp_net_read_tx_free_qcp(txq); 742 743 if (qcp_rd_p == txq->rd_p) { 744 PMD_TX_LOG(DEBUG, "queue %hu: It seems harrier is not sending " 745 "packets (%u, %u)", txq->qidx, 746 qcp_rd_p, txq->rd_p); 747 return 0; 748 } 749 750 if (qcp_rd_p > txq->rd_p) 751 todo = qcp_rd_p - txq->rd_p; 752 else 753 todo = qcp_rd_p + txq->tx_count - txq->rd_p; 754 755 PMD_TX_LOG(DEBUG, "qcp_rd_p %u, txq->rd_p: %u, qcp->rd_p: %u", 756 qcp_rd_p, txq->rd_p, txq->rd_p); 757 758 if (todo == 0) 759 return todo; 760 761 txq->rd_p += todo; 762 if (unlikely(txq->rd_p >= txq->tx_count)) 763 txq->rd_p -= txq->tx_count; 764 765 return todo; 766 } 767 768 static void 769 nfp_net_tx_queue_release_mbufs(struct nfp_net_txq *txq) 770 { 771 uint32_t i; 772 773 if (txq->txbufs == NULL) 774 return; 775 776 for (i = 0; i < txq->tx_count; i++) { 777 if (txq->txbufs[i].mbuf != NULL) { 778 rte_pktmbuf_free_seg(txq->txbufs[i].mbuf); 779 txq->txbufs[i].mbuf = NULL; 780 } 781 } 782 } 783 784 void 785 nfp_net_tx_queue_release(struct rte_eth_dev *dev, 786 uint16_t queue_idx) 787 { 788 struct nfp_net_hw *net_hw; 789 struct nfp_net_txq *txq = dev->data->tx_queues[queue_idx]; 790 791 if (txq != NULL) { 792 net_hw = nfp_net_get_hw(dev); 793 if (net_hw->txrwb_mz != NULL) 794 nn_cfg_writeq(&net_hw->super, NFP_NET_CFG_TXR_WB_ADDR(queue_idx), 0); 795 nfp_net_tx_queue_release_mbufs(txq); 796 rte_eth_dma_zone_free(dev, "tx_ring", queue_idx); 797 rte_free(txq->txbufs); 798 rte_free(txq); 799 } 800 } 801 802 void 803 nfp_net_reset_tx_queue(struct nfp_net_txq *txq) 804 { 805 nfp_net_tx_queue_release_mbufs(txq); 806 txq->wr_p = 0; 807 txq->rd_p = 0; 808 if (txq->txrwb != NULL) 809 *txq->txrwb = 0; 810 } 811 812 int 813 nfp_net_tx_queue_setup(struct rte_eth_dev *dev, 814 uint16_t queue_idx, 815 uint16_t nb_desc, 816 unsigned int socket_id, 817 const struct rte_eth_txconf *tx_conf) 818 { 819 struct nfp_net_hw_priv *hw_priv; 820 821 hw_priv = dev->process_private; 822 823 if (hw_priv->pf_dev->ver.extend == NFP_NET_CFG_VERSION_DP_NFD3) 824 return nfp_net_nfd3_tx_queue_setup(dev, queue_idx, 825 nb_desc, socket_id, tx_conf); 826 else 827 return nfp_net_nfdk_tx_queue_setup(dev, queue_idx, 828 nb_desc, socket_id, tx_conf); 829 } 830 831 void 832 nfp_net_rx_queue_info_get(struct rte_eth_dev *dev, 833 uint16_t queue_id, 834 struct rte_eth_rxq_info *info) 835 { 836 struct rte_eth_dev_info dev_info; 837 struct nfp_net_rxq *rxq = dev->data->rx_queues[queue_id]; 838 839 info->mp = rxq->mem_pool; 840 info->nb_desc = rxq->rx_count; 841 842 info->conf.rx_free_thresh = rxq->rx_free_thresh; 843 844 nfp_net_infos_get(dev, &dev_info); 845 info->conf.offloads = dev_info.rx_offload_capa & 846 dev->data->dev_conf.rxmode.offloads; 847 } 848 849 void 850 nfp_net_tx_queue_info_get(struct rte_eth_dev *dev, 851 uint16_t queue_id, 852 struct rte_eth_txq_info *info) 853 { 854 struct rte_eth_dev_info dev_info; 855 struct nfp_net_hw_priv *hw_priv = dev->process_private; 856 struct nfp_net_txq *txq = dev->data->tx_queues[queue_id]; 857 858 if (hw_priv->pf_dev->ver.extend == NFP_NET_CFG_VERSION_DP_NFD3) 859 info->nb_desc = txq->tx_count / NFD3_TX_DESC_PER_PKT; 860 else 861 info->nb_desc = txq->tx_count / NFDK_TX_DESC_PER_SIMPLE_PKT; 862 863 info->conf.tx_free_thresh = txq->tx_free_thresh; 864 865 nfp_net_infos_get(dev, &dev_info); 866 info->conf.offloads = dev_info.tx_offload_capa & 867 dev->data->dev_conf.txmode.offloads; 868 } 869 870 void 871 nfp_net_recv_pkts_set(struct rte_eth_dev *eth_dev) 872 { 873 if (nfp_net_get_avx2_supported()) 874 eth_dev->rx_pkt_burst = nfp_net_vec_avx2_recv_pkts; 875 else 876 eth_dev->rx_pkt_burst = nfp_net_recv_pkts; 877 } 878