1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (c) 2014-2021 Netronome Systems, Inc. 3 * All rights reserved. 4 * 5 * Small portions derived from code Copyright(c) 2010-2015 Intel Corporation. 6 */ 7 8 #include "nfp_rxtx.h" 9 10 #include <ethdev_pci.h> 11 #include <rte_security.h> 12 13 #include "nfd3/nfp_nfd3.h" 14 #include "nfdk/nfp_nfdk.h" 15 #include "flower/nfp_flower.h" 16 17 #include "nfp_ipsec.h" 18 #include "nfp_logs.h" 19 20 /* Maximum number of supported VLANs in parsed form packet metadata. */ 21 #define NFP_META_MAX_VLANS 2 22 23 /* Record metadata parsed from packet */ 24 struct nfp_meta_parsed { 25 uint32_t port_id; /**< Port id value */ 26 uint32_t sa_idx; /**< IPsec SA index */ 27 uint32_t hash; /**< RSS hash value */ 28 uint32_t mark_id; /**< Mark id value */ 29 uint16_t flags; /**< Bitmap to indicate if meta exist */ 30 uint8_t hash_type; /**< RSS hash type */ 31 uint8_t ipsec_type; /**< IPsec type */ 32 uint8_t vlan_layer; /**< The valid number of value in @vlan[] */ 33 /** 34 * Holds information parses from NFP_NET_META_VLAN. 35 * The inner most vlan starts at position 0 36 */ 37 struct { 38 uint8_t offload; /**< Flag indicates whether VLAN is offloaded */ 39 uint8_t tpid; /**< Vlan TPID */ 40 uint16_t tci; /**< Vlan TCI (PCP + Priority + VID) */ 41 } vlan[NFP_META_MAX_VLANS]; 42 }; 43 44 /* 45 * The bit format and map of nfp packet type for rxd.offload_info in Rx descriptor. 46 * 47 * Bit format about nfp packet type refers to the following: 48 * --------------------------------- 49 * 1 0 50 * 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 51 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 52 * | |ol3|tunnel | l3 | l4 | 53 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 54 * 55 * Bit map about nfp packet type refers to the following: 56 * 57 * L4: bit 0~2, used for layer 4 or inner layer 4. 58 * 000: NFP_NET_PTYPE_L4_NONE 59 * 001: NFP_NET_PTYPE_L4_TCP 60 * 010: NFP_NET_PTYPE_L4_UDP 61 * 011: NFP_NET_PTYPE_L4_FRAG 62 * 100: NFP_NET_PTYPE_L4_NONFRAG 63 * 101: NFP_NET_PTYPE_L4_ICMP 64 * 110: NFP_NET_PTYPE_L4_SCTP 65 * 111: reserved 66 * 67 * L3: bit 3~5, used for layer 3 or inner layer 3. 68 * 000: NFP_NET_PTYPE_L3_NONE 69 * 001: NFP_NET_PTYPE_L3_IPV6 70 * 010: NFP_NET_PTYPE_L3_IPV4 71 * 011: NFP_NET_PTYPE_L3_IPV4_EXT 72 * 100: NFP_NET_PTYPE_L3_IPV6_EXT 73 * 101: NFP_NET_PTYPE_L3_IPV4_EXT_UNKNOWN 74 * 110: NFP_NET_PTYPE_L3_IPV6_EXT_UNKNOWN 75 * 111: reserved 76 * 77 * Tunnel: bit 6~9, used for tunnel. 78 * 0000: NFP_NET_PTYPE_TUNNEL_NONE 79 * 0001: NFP_NET_PTYPE_TUNNEL_VXLAN 80 * 0100: NFP_NET_PTYPE_TUNNEL_NVGRE 81 * 0101: NFP_NET_PTYPE_TUNNEL_GENEVE 82 * 0010, 0011, 0110~1111: reserved 83 * 84 * Outer L3: bit 10~11, used for outer layer 3. 85 * 00: NFP_NET_PTYPE_OUTER_L3_NONE 86 * 01: NFP_NET_PTYPE_OUTER_L3_IPV6 87 * 10: NFP_NET_PTYPE_OUTER_L3_IPV4 88 * 11: reserved 89 * 90 * Reserved: bit 10~15, used for extension. 91 */ 92 93 /* Mask and offset about nfp packet type based on the bit map above. */ 94 #define NFP_NET_PTYPE_L4_MASK 0x0007 95 #define NFP_NET_PTYPE_L3_MASK 0x0038 96 #define NFP_NET_PTYPE_TUNNEL_MASK 0x03c0 97 #define NFP_NET_PTYPE_OUTER_L3_MASK 0x0c00 98 99 #define NFP_NET_PTYPE_L4_OFFSET 0 100 #define NFP_NET_PTYPE_L3_OFFSET 3 101 #define NFP_NET_PTYPE_TUNNEL_OFFSET 6 102 #define NFP_NET_PTYPE_OUTER_L3_OFFSET 10 103 104 /* Case about nfp packet type based on the bit map above. */ 105 #define NFP_NET_PTYPE_L4_NONE 0 106 #define NFP_NET_PTYPE_L4_TCP 1 107 #define NFP_NET_PTYPE_L4_UDP 2 108 #define NFP_NET_PTYPE_L4_FRAG 3 109 #define NFP_NET_PTYPE_L4_NONFRAG 4 110 #define NFP_NET_PTYPE_L4_ICMP 5 111 #define NFP_NET_PTYPE_L4_SCTP 6 112 113 #define NFP_NET_PTYPE_L3_NONE 0 114 #define NFP_NET_PTYPE_L3_IPV6 1 115 #define NFP_NET_PTYPE_L3_IPV4 2 116 #define NFP_NET_PTYPE_L3_IPV4_EXT 3 117 #define NFP_NET_PTYPE_L3_IPV6_EXT 4 118 #define NFP_NET_PTYPE_L3_IPV4_EXT_UNKNOWN 5 119 #define NFP_NET_PTYPE_L3_IPV6_EXT_UNKNOWN 6 120 121 #define NFP_NET_PTYPE_TUNNEL_NONE 0 122 #define NFP_NET_PTYPE_TUNNEL_VXLAN 1 123 #define NFP_NET_PTYPE_TUNNEL_NVGRE 4 124 #define NFP_NET_PTYPE_TUNNEL_GENEVE 5 125 126 #define NFP_NET_PTYPE_OUTER_L3_NONE 0 127 #define NFP_NET_PTYPE_OUTER_L3_IPV6 1 128 #define NFP_NET_PTYPE_OUTER_L3_IPV4 2 129 130 #define NFP_PTYPE2RTE(tunnel, type) ((tunnel) ? RTE_PTYPE_INNER_##type : RTE_PTYPE_##type) 131 132 /* Record NFP packet type parsed from rxd.offload_info. */ 133 struct nfp_ptype_parsed { 134 uint8_t l4_ptype; /**< Packet type of layer 4, or inner layer 4. */ 135 uint8_t l3_ptype; /**< Packet type of layer 3, or inner layer 3. */ 136 uint8_t tunnel_ptype; /**< Packet type of tunnel. */ 137 uint8_t outer_l3_ptype; /**< Packet type of outer layer 3. */ 138 }; 139 140 /* Set mbuf checksum flags based on RX descriptor flags */ 141 void 142 nfp_net_rx_cksum(struct nfp_net_rxq *rxq, 143 struct nfp_net_rx_desc *rxd, 144 struct rte_mbuf *mb) 145 { 146 struct nfp_net_hw *hw = rxq->hw; 147 148 if ((hw->super.ctrl & NFP_NET_CFG_CTRL_RXCSUM) == 0) 149 return; 150 151 /* If IPv4 and IP checksum error, fail */ 152 if (unlikely((rxd->rxd.flags & PCIE_DESC_RX_IP4_CSUM) != 0 && 153 (rxd->rxd.flags & PCIE_DESC_RX_IP4_CSUM_OK) == 0)) 154 mb->ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_BAD; 155 else 156 mb->ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_GOOD; 157 158 /* If neither UDP nor TCP return */ 159 if ((rxd->rxd.flags & PCIE_DESC_RX_TCP_CSUM) == 0 && 160 (rxd->rxd.flags & PCIE_DESC_RX_UDP_CSUM) == 0) 161 return; 162 163 if (likely(rxd->rxd.flags & PCIE_DESC_RX_L4_CSUM_OK) != 0) 164 mb->ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_GOOD; 165 else 166 mb->ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_BAD; 167 } 168 169 static int 170 nfp_net_rx_fill_freelist(struct nfp_net_rxq *rxq) 171 { 172 uint16_t i; 173 uint64_t dma_addr; 174 struct nfp_net_dp_buf *rxe = rxq->rxbufs; 175 176 PMD_RX_LOG(DEBUG, "Fill Rx Freelist for %hu descriptors", 177 rxq->rx_count); 178 179 for (i = 0; i < rxq->rx_count; i++) { 180 struct nfp_net_rx_desc *rxd; 181 struct rte_mbuf *mbuf = rte_pktmbuf_alloc(rxq->mem_pool); 182 183 if (mbuf == NULL) { 184 PMD_DRV_LOG(ERR, "RX mbuf alloc failed queue_id=%hu", 185 rxq->qidx); 186 return -ENOMEM; 187 } 188 189 dma_addr = rte_cpu_to_le_64(rte_mbuf_data_iova_default(mbuf)); 190 191 rxd = &rxq->rxds[i]; 192 rxd->fld.dd = 0; 193 rxd->fld.dma_addr_hi = (dma_addr >> 32) & 0xffff; 194 rxd->fld.dma_addr_lo = dma_addr & 0xffffffff; 195 196 rxe[i].mbuf = mbuf; 197 } 198 199 /* Make sure all writes are flushed before telling the hardware */ 200 rte_wmb(); 201 202 /* Not advertising the whole ring as the firmware gets confused if so */ 203 PMD_RX_LOG(DEBUG, "Increment FL write pointer in %hu", rxq->rx_count - 1); 204 205 nfp_qcp_ptr_add(rxq->qcp_fl, NFP_QCP_WRITE_PTR, rxq->rx_count - 1); 206 207 return 0; 208 } 209 210 int 211 nfp_net_rx_freelist_setup(struct rte_eth_dev *dev) 212 { 213 uint16_t i; 214 215 for (i = 0; i < dev->data->nb_rx_queues; i++) { 216 if (nfp_net_rx_fill_freelist(dev->data->rx_queues[i]) != 0) 217 return -1; 218 } 219 220 return 0; 221 } 222 223 uint32_t 224 nfp_net_rx_queue_count(void *rx_queue) 225 { 226 uint32_t idx; 227 uint32_t count = 0; 228 struct nfp_net_rxq *rxq; 229 struct nfp_net_rx_desc *rxds; 230 231 rxq = rx_queue; 232 idx = rxq->rd_p; 233 234 /* 235 * Other PMDs are just checking the DD bit in intervals of 4 236 * descriptors and counting all four if the first has the DD 237 * bit on. Of course, this is not accurate but can be good for 238 * performance. But ideally that should be done in descriptors 239 * chunks belonging to the same cache line. 240 */ 241 while (count < rxq->rx_count) { 242 rxds = &rxq->rxds[idx]; 243 if ((rxds->rxd.meta_len_dd & PCIE_DESC_RX_DD) == 0) 244 break; 245 246 count++; 247 idx++; 248 249 /* Wrapping */ 250 if ((idx) == rxq->rx_count) 251 idx = 0; 252 } 253 254 return count; 255 } 256 257 /* Parse the chained metadata from packet */ 258 static bool 259 nfp_net_parse_chained_meta(uint8_t *meta_base, 260 rte_be32_t meta_header, 261 struct nfp_meta_parsed *meta) 262 { 263 uint32_t meta_info; 264 uint32_t vlan_info; 265 uint8_t *meta_offset; 266 267 meta_info = rte_be_to_cpu_32(meta_header); 268 meta_offset = meta_base + 4; 269 270 for (; meta_info != 0; meta_info >>= NFP_NET_META_FIELD_SIZE, meta_offset += 4) { 271 switch (meta_info & NFP_NET_META_FIELD_MASK) { 272 case NFP_NET_META_PORTID: 273 meta->port_id = rte_be_to_cpu_32(*(rte_be32_t *)meta_offset); 274 break; 275 case NFP_NET_META_HASH: 276 /* Next field type is about the hash type */ 277 meta_info >>= NFP_NET_META_FIELD_SIZE; 278 /* Hash value is in the data field */ 279 meta->hash = rte_be_to_cpu_32(*(rte_be32_t *)meta_offset); 280 meta->hash_type = meta_info & NFP_NET_META_FIELD_MASK; 281 break; 282 case NFP_NET_META_VLAN: 283 vlan_info = rte_be_to_cpu_32(*(rte_be32_t *)meta_offset); 284 meta->vlan[meta->vlan_layer].offload = 285 vlan_info >> NFP_NET_META_VLAN_OFFLOAD; 286 meta->vlan[meta->vlan_layer].tci = 287 vlan_info & NFP_NET_META_VLAN_MASK; 288 meta->vlan[meta->vlan_layer].tpid = NFP_NET_META_TPID(vlan_info); 289 meta->vlan_layer++; 290 break; 291 case NFP_NET_META_IPSEC: 292 meta->sa_idx = rte_be_to_cpu_32(*(rte_be32_t *)meta_offset); 293 meta->ipsec_type = meta_info & NFP_NET_META_FIELD_MASK; 294 break; 295 case NFP_NET_META_MARK: 296 meta->flags |= (1 << NFP_NET_META_MARK); 297 meta->mark_id = rte_be_to_cpu_32(*(rte_be32_t *)meta_offset); 298 break; 299 default: 300 /* Unsupported metadata can be a performance issue */ 301 return false; 302 } 303 } 304 305 return true; 306 } 307 308 /* Set mbuf hash data based on the metadata info */ 309 static void 310 nfp_net_parse_meta_hash(const struct nfp_meta_parsed *meta, 311 struct nfp_net_rxq *rxq, 312 struct rte_mbuf *mbuf) 313 { 314 struct nfp_net_hw *hw = rxq->hw; 315 316 if ((hw->super.ctrl & NFP_NET_CFG_CTRL_RSS_ANY) == 0) 317 return; 318 319 mbuf->hash.rss = meta->hash; 320 mbuf->ol_flags |= RTE_MBUF_F_RX_RSS_HASH; 321 } 322 323 /* 324 * Parse the single metadata 325 * 326 * The RSS hash and hash-type are prepended to the packet data. 327 * Get it from metadata area. 328 */ 329 static inline void 330 nfp_net_parse_single_meta(uint8_t *meta_base, 331 rte_be32_t meta_header, 332 struct nfp_meta_parsed *meta) 333 { 334 meta->hash_type = rte_be_to_cpu_32(meta_header); 335 meta->hash = rte_be_to_cpu_32(*(rte_be32_t *)(meta_base + 4)); 336 } 337 338 /* Set mbuf vlan_strip data based on metadata info */ 339 static void 340 nfp_net_parse_meta_vlan(const struct nfp_meta_parsed *meta, 341 struct nfp_net_rx_desc *rxd, 342 struct nfp_net_rxq *rxq, 343 struct rte_mbuf *mb) 344 { 345 uint32_t ctrl = rxq->hw->super.ctrl; 346 347 /* Skip if hardware don't support setting vlan. */ 348 if ((ctrl & (NFP_NET_CFG_CTRL_RXVLAN | NFP_NET_CFG_CTRL_RXVLAN_V2)) == 0) 349 return; 350 351 /* 352 * The firmware support two ways to send the VLAN info (with priority) : 353 * 1. Using the metadata when NFP_NET_CFG_CTRL_RXVLAN_V2 is set, 354 * 2. Using the descriptor when NFP_NET_CFG_CTRL_RXVLAN is set. 355 */ 356 if ((ctrl & NFP_NET_CFG_CTRL_RXVLAN_V2) != 0) { 357 if (meta->vlan_layer > 0 && meta->vlan[0].offload != 0) { 358 mb->vlan_tci = rte_cpu_to_le_32(meta->vlan[0].tci); 359 mb->ol_flags |= RTE_MBUF_F_RX_VLAN | RTE_MBUF_F_RX_VLAN_STRIPPED; 360 } 361 } else if ((ctrl & NFP_NET_CFG_CTRL_RXVLAN) != 0) { 362 if ((rxd->rxd.flags & PCIE_DESC_RX_VLAN) != 0) { 363 mb->vlan_tci = rte_cpu_to_le_32(rxd->rxd.offload_info); 364 mb->ol_flags |= RTE_MBUF_F_RX_VLAN | RTE_MBUF_F_RX_VLAN_STRIPPED; 365 } 366 } 367 } 368 369 /* 370 * Set mbuf qinq_strip data based on metadata info 371 * 372 * The out VLAN tci are prepended to the packet data. 373 * Extract and decode it and set the mbuf fields. 374 * 375 * If both RTE_MBUF_F_RX_VLAN and NFP_NET_CFG_CTRL_RXQINQ are set, the 2 VLANs 376 * have been stripped by the hardware and their TCIs are saved in 377 * mbuf->vlan_tci (inner) and mbuf->vlan_tci_outer (outer). 378 * If NFP_NET_CFG_CTRL_RXQINQ is set and RTE_MBUF_F_RX_VLAN is unset, only the 379 * outer VLAN is removed from packet data, but both tci are saved in 380 * mbuf->vlan_tci (inner) and mbuf->vlan_tci_outer (outer). 381 * 382 * qinq set & vlan set : meta->vlan_layer>=2, meta->vlan[0].offload=1, meta->vlan[1].offload=1 383 * qinq set & vlan not set: meta->vlan_layer>=2, meta->vlan[1].offload=1,meta->vlan[0].offload=0 384 * qinq not set & vlan set: meta->vlan_layer=1, meta->vlan[0].offload=1 385 * qinq not set & vlan not set: meta->vlan_layer=0 386 */ 387 static void 388 nfp_net_parse_meta_qinq(const struct nfp_meta_parsed *meta, 389 struct nfp_net_rxq *rxq, 390 struct rte_mbuf *mb) 391 { 392 struct nfp_hw *hw = &rxq->hw->super; 393 394 if ((hw->ctrl & NFP_NET_CFG_CTRL_RXQINQ) == 0) 395 return; 396 397 if (meta->vlan_layer < NFP_META_MAX_VLANS) 398 return; 399 400 if (meta->vlan[0].offload == 0) 401 mb->vlan_tci = rte_cpu_to_le_16(meta->vlan[0].tci); 402 403 mb->vlan_tci_outer = rte_cpu_to_le_16(meta->vlan[1].tci); 404 PMD_RX_LOG(DEBUG, "Received outer vlan TCI is %u inner vlan TCI is %u", 405 mb->vlan_tci_outer, mb->vlan_tci); 406 mb->ol_flags |= RTE_MBUF_F_RX_QINQ | RTE_MBUF_F_RX_QINQ_STRIPPED; 407 } 408 409 /* 410 * Set mbuf IPsec Offload features based on metadata info. 411 * 412 * The IPsec Offload features is prepended to the mbuf ol_flags. 413 * Extract and decode metadata info and set the mbuf ol_flags. 414 */ 415 static void 416 nfp_net_parse_meta_ipsec(struct nfp_meta_parsed *meta, 417 struct nfp_net_rxq *rxq, 418 struct rte_mbuf *mbuf) 419 { 420 int offset; 421 uint32_t sa_idx; 422 struct nfp_net_hw *hw; 423 struct nfp_tx_ipsec_desc_msg *desc_md; 424 425 hw = rxq->hw; 426 sa_idx = meta->sa_idx; 427 428 if (meta->ipsec_type != NFP_NET_META_IPSEC) 429 return; 430 431 if (sa_idx >= NFP_NET_IPSEC_MAX_SA_CNT) { 432 mbuf->ol_flags |= RTE_MBUF_F_RX_SEC_OFFLOAD_FAILED; 433 } else { 434 mbuf->ol_flags |= RTE_MBUF_F_RX_SEC_OFFLOAD; 435 offset = hw->ipsec_data->pkt_dynfield_offset; 436 desc_md = RTE_MBUF_DYNFIELD(mbuf, offset, struct nfp_tx_ipsec_desc_msg *); 437 desc_md->sa_idx = sa_idx; 438 desc_md->enc = 0; 439 } 440 } 441 442 static void 443 nfp_net_parse_meta_mark(const struct nfp_meta_parsed *meta, 444 struct rte_mbuf *mbuf) 445 { 446 if (((meta->flags >> NFP_NET_META_MARK) & 0x1) == 0) 447 return; 448 449 mbuf->hash.fdir.hi = meta->mark_id; 450 mbuf->ol_flags |= RTE_MBUF_F_RX_FDIR | RTE_MBUF_F_RX_FDIR_ID; 451 } 452 453 /* Parse the metadata from packet */ 454 static void 455 nfp_net_parse_meta(struct nfp_net_rx_desc *rxds, 456 struct nfp_net_rxq *rxq, 457 struct nfp_net_hw *hw, 458 struct rte_mbuf *mb, 459 struct nfp_meta_parsed *meta) 460 { 461 uint8_t *meta_base; 462 rte_be32_t meta_header; 463 464 if (unlikely(NFP_DESC_META_LEN(rxds) == 0)) 465 return; 466 467 meta_base = rte_pktmbuf_mtod_offset(mb, uint8_t *, -NFP_DESC_META_LEN(rxds)); 468 meta_header = *(rte_be32_t *)meta_base; 469 470 switch (hw->meta_format) { 471 case NFP_NET_METAFORMAT_CHAINED: 472 if (nfp_net_parse_chained_meta(meta_base, meta_header, meta)) { 473 nfp_net_parse_meta_hash(meta, rxq, mb); 474 nfp_net_parse_meta_vlan(meta, rxds, rxq, mb); 475 nfp_net_parse_meta_qinq(meta, rxq, mb); 476 nfp_net_parse_meta_ipsec(meta, rxq, mb); 477 nfp_net_parse_meta_mark(meta, mb); 478 } else { 479 PMD_RX_LOG(DEBUG, "RX chained metadata format is wrong!"); 480 } 481 break; 482 case NFP_NET_METAFORMAT_SINGLE: 483 if ((rxds->rxd.flags & PCIE_DESC_RX_RSS) != 0) { 484 nfp_net_parse_single_meta(meta_base, meta_header, meta); 485 nfp_net_parse_meta_hash(meta, rxq, mb); 486 } 487 break; 488 default: 489 PMD_RX_LOG(DEBUG, "RX metadata do not exist."); 490 } 491 } 492 493 /** 494 * Set packet type to mbuf based on parsed structure. 495 * 496 * @param nfp_ptype 497 * Packet type structure parsing from Rx descriptor. 498 * @param mb 499 * Mbuf to set the packet type. 500 */ 501 static void 502 nfp_net_set_ptype(const struct nfp_ptype_parsed *nfp_ptype, 503 struct rte_mbuf *mb) 504 { 505 uint32_t mbuf_ptype = RTE_PTYPE_L2_ETHER; 506 uint8_t nfp_tunnel_ptype = nfp_ptype->tunnel_ptype; 507 508 if (nfp_tunnel_ptype != NFP_NET_PTYPE_TUNNEL_NONE) 509 mbuf_ptype |= RTE_PTYPE_INNER_L2_ETHER; 510 511 switch (nfp_ptype->outer_l3_ptype) { 512 case NFP_NET_PTYPE_OUTER_L3_NONE: 513 break; 514 case NFP_NET_PTYPE_OUTER_L3_IPV4: 515 mbuf_ptype |= RTE_PTYPE_L3_IPV4; 516 break; 517 case NFP_NET_PTYPE_OUTER_L3_IPV6: 518 mbuf_ptype |= RTE_PTYPE_L3_IPV6; 519 break; 520 default: 521 PMD_RX_LOG(DEBUG, "Unrecognized nfp outer layer 3 packet type: %u", 522 nfp_ptype->outer_l3_ptype); 523 break; 524 } 525 526 switch (nfp_tunnel_ptype) { 527 case NFP_NET_PTYPE_TUNNEL_NONE: 528 break; 529 case NFP_NET_PTYPE_TUNNEL_VXLAN: 530 mbuf_ptype |= RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L4_UDP; 531 break; 532 case NFP_NET_PTYPE_TUNNEL_NVGRE: 533 mbuf_ptype |= RTE_PTYPE_TUNNEL_NVGRE; 534 break; 535 case NFP_NET_PTYPE_TUNNEL_GENEVE: 536 mbuf_ptype |= RTE_PTYPE_TUNNEL_GENEVE | RTE_PTYPE_L4_UDP; 537 break; 538 default: 539 PMD_RX_LOG(DEBUG, "Unrecognized nfp tunnel packet type: %u", 540 nfp_tunnel_ptype); 541 break; 542 } 543 544 switch (nfp_ptype->l4_ptype) { 545 case NFP_NET_PTYPE_L4_NONE: 546 break; 547 case NFP_NET_PTYPE_L4_TCP: 548 mbuf_ptype |= NFP_PTYPE2RTE(nfp_tunnel_ptype, L4_TCP); 549 break; 550 case NFP_NET_PTYPE_L4_UDP: 551 mbuf_ptype |= NFP_PTYPE2RTE(nfp_tunnel_ptype, L4_UDP); 552 break; 553 case NFP_NET_PTYPE_L4_FRAG: 554 mbuf_ptype |= NFP_PTYPE2RTE(nfp_tunnel_ptype, L4_FRAG); 555 break; 556 case NFP_NET_PTYPE_L4_NONFRAG: 557 mbuf_ptype |= NFP_PTYPE2RTE(nfp_tunnel_ptype, L4_NONFRAG); 558 break; 559 case NFP_NET_PTYPE_L4_ICMP: 560 mbuf_ptype |= NFP_PTYPE2RTE(nfp_tunnel_ptype, L4_ICMP); 561 break; 562 case NFP_NET_PTYPE_L4_SCTP: 563 mbuf_ptype |= NFP_PTYPE2RTE(nfp_tunnel_ptype, L4_SCTP); 564 break; 565 default: 566 PMD_RX_LOG(DEBUG, "Unrecognized nfp layer 4 packet type: %u", 567 nfp_ptype->l4_ptype); 568 break; 569 } 570 571 switch (nfp_ptype->l3_ptype) { 572 case NFP_NET_PTYPE_L3_NONE: 573 break; 574 case NFP_NET_PTYPE_L3_IPV4: 575 mbuf_ptype |= NFP_PTYPE2RTE(nfp_tunnel_ptype, L3_IPV4); 576 break; 577 case NFP_NET_PTYPE_L3_IPV6: 578 mbuf_ptype |= NFP_PTYPE2RTE(nfp_tunnel_ptype, L3_IPV6); 579 break; 580 case NFP_NET_PTYPE_L3_IPV4_EXT: 581 mbuf_ptype |= NFP_PTYPE2RTE(nfp_tunnel_ptype, L3_IPV4_EXT); 582 break; 583 case NFP_NET_PTYPE_L3_IPV6_EXT: 584 mbuf_ptype |= NFP_PTYPE2RTE(nfp_tunnel_ptype, L3_IPV6_EXT); 585 break; 586 case NFP_NET_PTYPE_L3_IPV4_EXT_UNKNOWN: 587 mbuf_ptype |= NFP_PTYPE2RTE(nfp_tunnel_ptype, L3_IPV4_EXT_UNKNOWN); 588 break; 589 case NFP_NET_PTYPE_L3_IPV6_EXT_UNKNOWN: 590 mbuf_ptype |= NFP_PTYPE2RTE(nfp_tunnel_ptype, L3_IPV6_EXT_UNKNOWN); 591 break; 592 default: 593 PMD_RX_LOG(DEBUG, "Unrecognized nfp layer 3 packet type: %u", 594 nfp_ptype->l3_ptype); 595 break; 596 } 597 598 mb->packet_type = mbuf_ptype; 599 } 600 601 /** 602 * Parse the packet type from Rx descriptor and set to mbuf. 603 * 604 * @param rxq 605 * Rx queue 606 * @param rxds 607 * Rx descriptor including the offloading info of packet type. 608 * @param mb 609 * Mbuf to set the packet type. 610 */ 611 static void 612 nfp_net_parse_ptype(struct nfp_net_rxq *rxq, 613 struct nfp_net_rx_desc *rxds, 614 struct rte_mbuf *mb) 615 { 616 struct nfp_net_hw *hw = rxq->hw; 617 struct nfp_ptype_parsed nfp_ptype; 618 uint16_t rxd_ptype = rxds->rxd.offload_info; 619 620 if ((hw->super.ctrl_ext & NFP_NET_CFG_CTRL_PKT_TYPE) == 0) 621 return; 622 623 if (rxd_ptype == 0 || (rxds->rxd.flags & PCIE_DESC_RX_VLAN) != 0) 624 return; 625 626 nfp_ptype.l4_ptype = (rxd_ptype & NFP_NET_PTYPE_L4_MASK) >> 627 NFP_NET_PTYPE_L4_OFFSET; 628 nfp_ptype.l3_ptype = (rxd_ptype & NFP_NET_PTYPE_L3_MASK) >> 629 NFP_NET_PTYPE_L3_OFFSET; 630 nfp_ptype.tunnel_ptype = (rxd_ptype & NFP_NET_PTYPE_TUNNEL_MASK) >> 631 NFP_NET_PTYPE_TUNNEL_OFFSET; 632 nfp_ptype.outer_l3_ptype = (rxd_ptype & NFP_NET_PTYPE_OUTER_L3_MASK) >> 633 NFP_NET_PTYPE_OUTER_L3_OFFSET; 634 635 nfp_net_set_ptype(&nfp_ptype, mb); 636 } 637 638 /* 639 * RX path design: 640 * 641 * There are some decisions to take: 642 * 1) How to check DD RX descriptors bit 643 * 2) How and when to allocate new mbufs 644 * 645 * Current implementation checks just one single DD bit each loop. As each 646 * descriptor is 8 bytes, it is likely a good idea to check descriptors in 647 * a single cache line instead. Tests with this change have not shown any 648 * performance improvement but it requires further investigation. For example, 649 * depending on which descriptor is next, the number of descriptors could be 650 * less than 8 for just checking those in the same cache line. This implies 651 * extra work which could be counterproductive by itself. Indeed, last firmware 652 * changes are just doing this: writing several descriptors with the DD bit 653 * for saving PCIe bandwidth and DMA operations from the NFP. 654 * 655 * Mbuf allocation is done when a new packet is received. Then the descriptor 656 * is automatically linked with the new mbuf and the old one is given to the 657 * user. The main drawback with this design is mbuf allocation is heavier than 658 * using bulk allocations allowed by DPDK with rte_mempool_get_bulk. From the 659 * cache point of view it does not seem allocating the mbuf early on as we are 660 * doing now have any benefit at all. Again, tests with this change have not 661 * shown any improvement. Also, rte_mempool_get_bulk returns all or nothing 662 * so looking at the implications of this type of allocation should be studied 663 * deeply. 664 */ 665 uint16_t 666 nfp_net_recv_pkts(void *rx_queue, 667 struct rte_mbuf **rx_pkts, 668 uint16_t nb_pkts) 669 { 670 uint64_t dma_addr; 671 uint16_t avail = 0; 672 struct rte_mbuf *mb; 673 uint16_t nb_hold = 0; 674 struct nfp_net_hw *hw; 675 struct rte_mbuf *new_mb; 676 struct nfp_net_rxq *rxq; 677 struct nfp_net_dp_buf *rxb; 678 struct nfp_net_rx_desc *rxds; 679 uint16_t avail_multiplexed = 0; 680 681 rxq = rx_queue; 682 if (unlikely(rxq == NULL)) { 683 /* 684 * DPDK just checks the queue is lower than max queues 685 * enabled. But the queue needs to be configured. 686 */ 687 PMD_RX_LOG(ERR, "RX Bad queue"); 688 return 0; 689 } 690 691 hw = rxq->hw; 692 693 while (avail + avail_multiplexed < nb_pkts) { 694 rxb = &rxq->rxbufs[rxq->rd_p]; 695 if (unlikely(rxb == NULL)) { 696 PMD_RX_LOG(ERR, "rxb does not exist!"); 697 break; 698 } 699 700 rxds = &rxq->rxds[rxq->rd_p]; 701 if ((rxds->rxd.meta_len_dd & PCIE_DESC_RX_DD) == 0) 702 break; 703 704 /* 705 * Memory barrier to ensure that we won't do other 706 * reads before the DD bit. 707 */ 708 rte_rmb(); 709 710 /* 711 * We got a packet. Let's alloc a new mbuf for refilling the 712 * free descriptor ring as soon as possible. 713 */ 714 new_mb = rte_pktmbuf_alloc(rxq->mem_pool); 715 if (unlikely(new_mb == NULL)) { 716 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u queue_id=%hu", 717 rxq->port_id, rxq->qidx); 718 nfp_net_mbuf_alloc_failed(rxq); 719 break; 720 } 721 722 /* 723 * Grab the mbuf and refill the descriptor with the 724 * previously allocated mbuf. 725 */ 726 mb = rxb->mbuf; 727 rxb->mbuf = new_mb; 728 729 PMD_RX_LOG(DEBUG, "Packet len: %u, mbuf_size: %u", 730 rxds->rxd.data_len, rxq->mbuf_size); 731 732 /* Size of this segment */ 733 mb->data_len = rxds->rxd.data_len - NFP_DESC_META_LEN(rxds); 734 /* Size of the whole packet. We just support 1 segment */ 735 mb->pkt_len = rxds->rxd.data_len - NFP_DESC_META_LEN(rxds); 736 737 if (unlikely((mb->data_len + hw->rx_offset) > rxq->mbuf_size)) { 738 /* 739 * This should not happen and the user has the 740 * responsibility of avoiding it. But we have 741 * to give some info about the error. 742 */ 743 PMD_RX_LOG(ERR, "mbuf overflow likely due to the RX offset."); 744 rte_pktmbuf_free(mb); 745 break; 746 } 747 748 /* Filling the received mbuf with packet info */ 749 if (hw->rx_offset != 0) 750 mb->data_off = RTE_PKTMBUF_HEADROOM + hw->rx_offset; 751 else 752 mb->data_off = RTE_PKTMBUF_HEADROOM + NFP_DESC_META_LEN(rxds); 753 754 /* No scatter mode supported */ 755 mb->nb_segs = 1; 756 mb->next = NULL; 757 mb->port = rxq->port_id; 758 759 struct nfp_meta_parsed meta = {}; 760 nfp_net_parse_meta(rxds, rxq, hw, mb, &meta); 761 762 nfp_net_parse_ptype(rxq, rxds, mb); 763 764 /* Checking the checksum flag */ 765 nfp_net_rx_cksum(rxq, rxds, mb); 766 767 /* Now resetting and updating the descriptor */ 768 rxds->vals[0] = 0; 769 rxds->vals[1] = 0; 770 dma_addr = rte_cpu_to_le_64(rte_mbuf_data_iova_default(new_mb)); 771 rxds->fld.dd = 0; 772 rxds->fld.dma_addr_hi = (dma_addr >> 32) & 0xffff; 773 rxds->fld.dma_addr_lo = dma_addr & 0xffffffff; 774 nb_hold++; 775 776 rxq->rd_p++; 777 if (unlikely(rxq->rd_p == rxq->rx_count)) /* Wrapping */ 778 rxq->rd_p = 0; 779 780 if (meta.port_id == 0) { 781 rx_pkts[avail++] = mb; 782 } else if (nfp_flower_pf_dispatch_pkts(hw, mb, meta.port_id)) { 783 avail_multiplexed++; 784 } else { 785 rte_pktmbuf_free(mb); 786 break; 787 } 788 } 789 790 if (nb_hold == 0) 791 return nb_hold; 792 793 PMD_RX_LOG(DEBUG, "RX port_id=%hu queue_id=%hu, %hu packets received", 794 rxq->port_id, rxq->qidx, avail); 795 796 nb_hold += rxq->nb_rx_hold; 797 798 /* 799 * FL descriptors needs to be written before incrementing the 800 * FL queue WR pointer. 801 */ 802 rte_wmb(); 803 if (nb_hold > rxq->rx_free_thresh) { 804 PMD_RX_LOG(DEBUG, "port=%hu queue=%hu nb_hold=%hu avail=%hu", 805 rxq->port_id, rxq->qidx, nb_hold, avail); 806 nfp_qcp_ptr_add(rxq->qcp_fl, NFP_QCP_WRITE_PTR, nb_hold); 807 nb_hold = 0; 808 } 809 rxq->nb_rx_hold = nb_hold; 810 811 return avail; 812 } 813 814 static void 815 nfp_net_rx_queue_release_mbufs(struct nfp_net_rxq *rxq) 816 { 817 uint16_t i; 818 819 if (rxq->rxbufs == NULL) 820 return; 821 822 for (i = 0; i < rxq->rx_count; i++) { 823 if (rxq->rxbufs[i].mbuf != NULL) { 824 rte_pktmbuf_free_seg(rxq->rxbufs[i].mbuf); 825 rxq->rxbufs[i].mbuf = NULL; 826 } 827 } 828 } 829 830 void 831 nfp_net_rx_queue_release(struct rte_eth_dev *dev, 832 uint16_t queue_idx) 833 { 834 struct nfp_net_rxq *rxq = dev->data->rx_queues[queue_idx]; 835 836 if (rxq != NULL) { 837 nfp_net_rx_queue_release_mbufs(rxq); 838 rte_eth_dma_zone_free(dev, "rx_ring", queue_idx); 839 rte_free(rxq->rxbufs); 840 rte_free(rxq); 841 } 842 } 843 844 void 845 nfp_net_reset_rx_queue(struct nfp_net_rxq *rxq) 846 { 847 nfp_net_rx_queue_release_mbufs(rxq); 848 rxq->rd_p = 0; 849 rxq->nb_rx_hold = 0; 850 } 851 852 int 853 nfp_net_rx_queue_setup(struct rte_eth_dev *dev, 854 uint16_t queue_idx, 855 uint16_t nb_desc, 856 unsigned int socket_id, 857 const struct rte_eth_rxconf *rx_conf, 858 struct rte_mempool *mp) 859 { 860 uint32_t rx_desc_sz; 861 uint16_t min_rx_desc; 862 uint16_t max_rx_desc; 863 struct nfp_net_hw *hw; 864 struct nfp_net_rxq *rxq; 865 const struct rte_memzone *tz; 866 867 hw = nfp_net_get_hw(dev); 868 869 nfp_net_rx_desc_limits(hw, &min_rx_desc, &max_rx_desc); 870 871 /* Validating number of descriptors */ 872 rx_desc_sz = nb_desc * sizeof(struct nfp_net_rx_desc); 873 if (rx_desc_sz % NFP_ALIGN_RING_DESC != 0 || 874 nb_desc > max_rx_desc || nb_desc < min_rx_desc) { 875 PMD_DRV_LOG(ERR, "Wrong nb_desc value"); 876 return -EINVAL; 877 } 878 879 /* 880 * Free memory prior to re-allocation if needed. This is the case after 881 * calling @nfp_net_stop(). 882 */ 883 if (dev->data->rx_queues[queue_idx] != NULL) { 884 nfp_net_rx_queue_release(dev, queue_idx); 885 dev->data->rx_queues[queue_idx] = NULL; 886 } 887 888 /* Allocating rx queue data structure */ 889 rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct nfp_net_rxq), 890 RTE_CACHE_LINE_SIZE, socket_id); 891 if (rxq == NULL) 892 return -ENOMEM; 893 894 dev->data->rx_queues[queue_idx] = rxq; 895 896 /* Hw queues mapping based on firmware configuration */ 897 rxq->qidx = queue_idx; 898 rxq->fl_qcidx = queue_idx * hw->stride_rx; 899 rxq->qcp_fl = hw->rx_bar + NFP_QCP_QUEUE_OFF(rxq->fl_qcidx); 900 901 /* 902 * Tracking mbuf size for detecting a potential mbuf overflow due to 903 * RX offset. 904 */ 905 rxq->mem_pool = mp; 906 rxq->mbuf_size = rxq->mem_pool->elt_size; 907 rxq->mbuf_size -= (sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM); 908 hw->flbufsz = rxq->mbuf_size; 909 910 rxq->rx_count = nb_desc; 911 rxq->port_id = dev->data->port_id; 912 rxq->rx_free_thresh = rx_conf->rx_free_thresh; 913 914 /* 915 * Allocate RX ring hardware descriptors. A memzone large enough to 916 * handle the maximum ring size is allocated in order to allow for 917 * resizing in later calls to the queue setup function. 918 */ 919 tz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx, 920 sizeof(struct nfp_net_rx_desc) * max_rx_desc, 921 NFP_MEMZONE_ALIGN, socket_id); 922 if (tz == NULL) { 923 PMD_DRV_LOG(ERR, "Error allocating rx dma"); 924 nfp_net_rx_queue_release(dev, queue_idx); 925 dev->data->rx_queues[queue_idx] = NULL; 926 return -ENOMEM; 927 } 928 929 /* Saving physical and virtual addresses for the RX ring */ 930 rxq->dma = (uint64_t)tz->iova; 931 rxq->rxds = tz->addr; 932 933 /* Mbuf pointers array for referencing mbufs linked to RX descriptors */ 934 rxq->rxbufs = rte_zmalloc_socket("rxq->rxbufs", 935 sizeof(*rxq->rxbufs) * nb_desc, RTE_CACHE_LINE_SIZE, 936 socket_id); 937 if (rxq->rxbufs == NULL) { 938 nfp_net_rx_queue_release(dev, queue_idx); 939 dev->data->rx_queues[queue_idx] = NULL; 940 return -ENOMEM; 941 } 942 943 nfp_net_reset_rx_queue(rxq); 944 945 rxq->hw = hw; 946 947 /* 948 * Telling the HW about the physical address of the RX ring and number 949 * of descriptors in log2 format. 950 */ 951 nn_cfg_writeq(&hw->super, NFP_NET_CFG_RXR_ADDR(queue_idx), rxq->dma); 952 nn_cfg_writeb(&hw->super, NFP_NET_CFG_RXR_SZ(queue_idx), rte_log2_u32(nb_desc)); 953 954 return 0; 955 } 956 957 /** 958 * Check for descriptors with a complete status 959 * 960 * @param txq 961 * TX queue to work with 962 * 963 * @return 964 * Number of descriptors freed 965 */ 966 uint32_t 967 nfp_net_tx_free_bufs(struct nfp_net_txq *txq) 968 { 969 uint32_t todo; 970 uint32_t qcp_rd_p; 971 972 PMD_TX_LOG(DEBUG, "queue %hu. Check for descriptor with a complete" 973 " status", txq->qidx); 974 975 /* Work out how many packets have been sent */ 976 qcp_rd_p = nfp_qcp_read(txq->qcp_q, NFP_QCP_READ_PTR); 977 978 if (qcp_rd_p == txq->rd_p) { 979 PMD_TX_LOG(DEBUG, "queue %hu: It seems harrier is not sending " 980 "packets (%u, %u)", txq->qidx, 981 qcp_rd_p, txq->rd_p); 982 return 0; 983 } 984 985 if (qcp_rd_p > txq->rd_p) 986 todo = qcp_rd_p - txq->rd_p; 987 else 988 todo = qcp_rd_p + txq->tx_count - txq->rd_p; 989 990 PMD_TX_LOG(DEBUG, "qcp_rd_p %u, txq->rd_p: %u, qcp->rd_p: %u", 991 qcp_rd_p, txq->rd_p, txq->rd_p); 992 993 if (todo == 0) 994 return todo; 995 996 txq->rd_p += todo; 997 if (unlikely(txq->rd_p >= txq->tx_count)) 998 txq->rd_p -= txq->tx_count; 999 1000 return todo; 1001 } 1002 1003 static void 1004 nfp_net_tx_queue_release_mbufs(struct nfp_net_txq *txq) 1005 { 1006 uint32_t i; 1007 1008 if (txq->txbufs == NULL) 1009 return; 1010 1011 for (i = 0; i < txq->tx_count; i++) { 1012 if (txq->txbufs[i].mbuf != NULL) { 1013 rte_pktmbuf_free_seg(txq->txbufs[i].mbuf); 1014 txq->txbufs[i].mbuf = NULL; 1015 } 1016 } 1017 } 1018 1019 void 1020 nfp_net_tx_queue_release(struct rte_eth_dev *dev, 1021 uint16_t queue_idx) 1022 { 1023 struct nfp_net_txq *txq = dev->data->tx_queues[queue_idx]; 1024 1025 if (txq != NULL) { 1026 nfp_net_tx_queue_release_mbufs(txq); 1027 rte_eth_dma_zone_free(dev, "tx_ring", queue_idx); 1028 rte_free(txq->txbufs); 1029 rte_free(txq); 1030 } 1031 } 1032 1033 void 1034 nfp_net_reset_tx_queue(struct nfp_net_txq *txq) 1035 { 1036 nfp_net_tx_queue_release_mbufs(txq); 1037 txq->wr_p = 0; 1038 txq->rd_p = 0; 1039 } 1040 1041 void 1042 nfp_net_set_meta_vlan(struct nfp_net_meta_raw *meta_data, 1043 struct rte_mbuf *pkt, 1044 uint8_t layer) 1045 { 1046 uint16_t tpid; 1047 uint16_t vlan_tci; 1048 1049 tpid = RTE_ETHER_TYPE_VLAN; 1050 vlan_tci = pkt->vlan_tci; 1051 1052 meta_data->data[layer] = rte_cpu_to_be_32(tpid << 16 | vlan_tci); 1053 } 1054 1055 void 1056 nfp_net_set_meta_ipsec(struct nfp_net_meta_raw *meta_data, 1057 struct nfp_net_txq *txq, 1058 struct rte_mbuf *pkt, 1059 uint8_t layer, 1060 uint8_t ipsec_layer) 1061 { 1062 int offset; 1063 struct nfp_net_hw *hw; 1064 struct nfp_tx_ipsec_desc_msg *desc_md; 1065 1066 hw = txq->hw; 1067 offset = hw->ipsec_data->pkt_dynfield_offset; 1068 desc_md = RTE_MBUF_DYNFIELD(pkt, offset, struct nfp_tx_ipsec_desc_msg *); 1069 1070 switch (ipsec_layer) { 1071 case NFP_IPSEC_META_SAIDX: 1072 meta_data->data[layer] = desc_md->sa_idx; 1073 break; 1074 case NFP_IPSEC_META_SEQLOW: 1075 meta_data->data[layer] = desc_md->esn.low; 1076 break; 1077 case NFP_IPSEC_META_SEQHI: 1078 meta_data->data[layer] = desc_md->esn.hi; 1079 break; 1080 default: 1081 break; 1082 } 1083 } 1084 1085 int 1086 nfp_net_tx_queue_setup(struct rte_eth_dev *dev, 1087 uint16_t queue_idx, 1088 uint16_t nb_desc, 1089 unsigned int socket_id, 1090 const struct rte_eth_txconf *tx_conf) 1091 { 1092 struct nfp_net_hw *hw; 1093 1094 hw = nfp_net_get_hw(dev); 1095 1096 if (hw->ver.extend == NFP_NET_CFG_VERSION_DP_NFD3) 1097 return nfp_net_nfd3_tx_queue_setup(dev, queue_idx, 1098 nb_desc, socket_id, tx_conf); 1099 else 1100 return nfp_net_nfdk_tx_queue_setup(dev, queue_idx, 1101 nb_desc, socket_id, tx_conf); 1102 } 1103