1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (c) 2023 Corigine, Inc. 3 * All rights reserved. 4 */ 5 6 #include "nfp_nfd3.h" 7 8 #include <bus_pci_driver.h> 9 #include <rte_malloc.h> 10 11 #include "../flower/nfp_flower.h" 12 #include "../nfp_logs.h" 13 #include "../nfp_net_meta.h" 14 15 /* Flags in the host TX descriptor */ 16 #define NFD3_DESC_TX_CSUM RTE_BIT32(7) 17 #define NFD3_DESC_TX_IP4_CSUM RTE_BIT32(6) 18 #define NFD3_DESC_TX_TCP_CSUM RTE_BIT32(5) 19 #define NFD3_DESC_TX_UDP_CSUM RTE_BIT32(4) 20 #define NFD3_DESC_TX_VLAN RTE_BIT32(3) 21 #define NFD3_DESC_TX_LSO RTE_BIT32(2) 22 #define NFD3_DESC_TX_ENCAP RTE_BIT32(1) 23 #define NFD3_DESC_TX_O_IP4_CSUM RTE_BIT32(0) 24 25 /* Set NFD3 TX descriptor for TSO */ 26 static void 27 nfp_net_nfd3_tx_tso(struct nfp_net_txq *txq, 28 struct nfp_net_nfd3_tx_desc *txd, 29 struct rte_mbuf *mb) 30 { 31 uint64_t ol_flags; 32 struct nfp_net_hw *hw = txq->hw; 33 34 if ((hw->super.ctrl & NFP_NET_CFG_CTRL_LSO_ANY) == 0) 35 goto clean_txd; 36 37 ol_flags = mb->ol_flags; 38 if ((ol_flags & RTE_MBUF_F_TX_TCP_SEG) == 0 && 39 (ol_flags & RTE_MBUF_F_TX_UDP_SEG) == 0) 40 goto clean_txd; 41 42 txd->l3_offset = mb->l2_len; 43 txd->l4_offset = mb->l2_len + mb->l3_len; 44 txd->lso_hdrlen = mb->l2_len + mb->l3_len + mb->l4_len; 45 46 if ((ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) != 0) { 47 txd->l3_offset += mb->outer_l2_len + mb->outer_l3_len; 48 txd->l4_offset += mb->outer_l2_len + mb->outer_l3_len; 49 txd->lso_hdrlen += mb->outer_l2_len + mb->outer_l3_len; 50 } 51 52 txd->mss = rte_cpu_to_le_16(mb->tso_segsz); 53 txd->flags = NFD3_DESC_TX_LSO; 54 55 return; 56 57 clean_txd: 58 txd->flags = 0; 59 txd->l3_offset = 0; 60 txd->l4_offset = 0; 61 txd->lso_hdrlen = 0; 62 txd->mss = 0; 63 } 64 65 /* Set TX CSUM offload flags in NFD3 TX descriptor */ 66 static void 67 nfp_net_nfd3_tx_cksum(struct nfp_net_txq *txq, 68 struct nfp_net_nfd3_tx_desc *txd, 69 struct rte_mbuf *mb) 70 { 71 uint64_t ol_flags; 72 struct nfp_net_hw *hw = txq->hw; 73 74 if ((hw->super.ctrl & NFP_NET_CFG_CTRL_TXCSUM) == 0) 75 return; 76 77 ol_flags = mb->ol_flags; 78 79 /* Set TCP csum offload if TSO enabled. */ 80 if ((ol_flags & RTE_MBUF_F_TX_TCP_SEG) != 0) 81 txd->flags |= NFD3_DESC_TX_TCP_CSUM; 82 83 /* Set UDP csum offload if UFO enabled. */ 84 if ((ol_flags & RTE_MBUF_F_TX_UDP_SEG) != 0) 85 txd->flags |= NFD3_DESC_TX_UDP_CSUM; 86 87 /* IPv6 does not need checksum */ 88 if ((ol_flags & RTE_MBUF_F_TX_IP_CKSUM) != 0) 89 txd->flags |= NFD3_DESC_TX_IP4_CSUM; 90 91 if ((ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) != 0) 92 txd->flags |= NFD3_DESC_TX_ENCAP; 93 94 switch (ol_flags & RTE_MBUF_F_TX_L4_MASK) { 95 case RTE_MBUF_F_TX_UDP_CKSUM: 96 txd->flags |= NFD3_DESC_TX_UDP_CSUM; 97 break; 98 case RTE_MBUF_F_TX_TCP_CKSUM: 99 txd->flags |= NFD3_DESC_TX_TCP_CSUM; 100 break; 101 } 102 103 if ((ol_flags & (RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_L4_MASK)) != 0) 104 txd->flags |= NFD3_DESC_TX_CSUM; 105 } 106 107 uint32_t 108 nfp_flower_nfd3_pkt_add_metadata(struct rte_mbuf *mbuf, 109 uint32_t port_id) 110 { 111 char *meta_offset; 112 113 meta_offset = rte_pktmbuf_prepend(mbuf, FLOWER_PKT_DATA_OFFSET); 114 *(rte_be32_t *)meta_offset = rte_cpu_to_be_32(NFP_NET_META_PORTID); 115 meta_offset += NFP_NET_META_HEADER_SIZE; 116 *(rte_be32_t *)meta_offset = rte_cpu_to_be_32(port_id); 117 118 return FLOWER_PKT_DATA_OFFSET; 119 } 120 121 /* 122 * Set vlan info in the nfd3 tx desc 123 * 124 * If enable NFP_NET_CFG_CTRL_TXVLAN_V2 125 * Vlan_info is stored in the meta and is handled in the @nfp_net_nfd3_set_meta_vlan() 126 * else if enable NFP_NET_CFG_CTRL_TXVLAN 127 * Vlan_info is stored in the tx_desc and is handled in the @nfp_net_nfd3_tx_vlan() 128 */ 129 static inline void 130 nfp_net_nfd3_tx_vlan(struct nfp_net_txq *txq, 131 struct nfp_net_nfd3_tx_desc *txd, 132 struct rte_mbuf *mb) 133 { 134 struct nfp_net_hw *hw = txq->hw; 135 136 if ((hw->super.ctrl & NFP_NET_CFG_CTRL_TXVLAN_V2) != 0 || 137 (hw->super.ctrl & NFP_NET_CFG_CTRL_TXVLAN) == 0) 138 return; 139 140 if ((mb->ol_flags & RTE_MBUF_F_TX_VLAN) != 0) { 141 txd->flags |= NFD3_DESC_TX_VLAN; 142 txd->vlan = mb->vlan_tci; 143 } 144 } 145 146 static inline int 147 nfp_net_nfd3_set_meta_data(struct nfp_net_meta_raw *meta_data, 148 struct nfp_net_txq *txq, 149 struct rte_mbuf *pkt) 150 { 151 char *meta; 152 uint8_t layer = 0; 153 uint32_t meta_info; 154 struct nfp_net_hw *hw; 155 uint8_t vlan_layer = 0; 156 uint8_t ipsec_layer = 0; 157 158 hw = txq->hw; 159 160 if ((pkt->ol_flags & RTE_MBUF_F_TX_VLAN) != 0 && 161 (hw->super.ctrl & NFP_NET_CFG_CTRL_TXVLAN_V2) != 0) { 162 if (meta_data->length == 0) 163 meta_data->length = NFP_NET_META_HEADER_SIZE; 164 meta_data->length += NFP_NET_META_FIELD_SIZE; 165 meta_data->header |= NFP_NET_META_VLAN; 166 } 167 168 if ((pkt->ol_flags & RTE_MBUF_F_TX_SEC_OFFLOAD) != 0 && 169 (hw->super.ctrl_ext & NFP_NET_CFG_CTRL_IPSEC) != 0) { 170 uint32_t ipsec_type = NFP_NET_META_IPSEC | 171 NFP_NET_META_IPSEC << NFP_NET_META_FIELD_SIZE | 172 NFP_NET_META_IPSEC << (2 * NFP_NET_META_FIELD_SIZE); 173 if (meta_data->length == 0) 174 meta_data->length = NFP_NET_META_FIELD_SIZE; 175 uint8_t ipsec_offset = meta_data->length - NFP_NET_META_FIELD_SIZE; 176 meta_data->header |= (ipsec_type << ipsec_offset); 177 meta_data->length += 3 * NFP_NET_META_FIELD_SIZE; 178 } 179 180 if (meta_data->length == 0) 181 return 0; 182 183 meta_info = meta_data->header; 184 meta = rte_pktmbuf_prepend(pkt, meta_data->length); 185 *(rte_be32_t *)meta = rte_cpu_to_be_32(meta_data->header); 186 meta += NFP_NET_META_HEADER_SIZE; 187 188 for (; meta_info != 0; meta_info >>= NFP_NET_META_FIELD_SIZE, layer++, 189 meta += NFP_NET_META_FIELD_SIZE) { 190 switch (meta_info & NFP_NET_META_FIELD_MASK) { 191 case NFP_NET_META_VLAN: 192 if (vlan_layer > 0) { 193 PMD_DRV_LOG(ERR, "At most 1 layers of vlan is supported."); 194 return -EINVAL; 195 } 196 nfp_net_meta_set_vlan(meta_data, pkt, layer); 197 vlan_layer++; 198 break; 199 case NFP_NET_META_IPSEC: 200 if (ipsec_layer > 2) { 201 PMD_DRV_LOG(ERR, "At most 3 layers of ipsec is supported for now."); 202 return -EINVAL; 203 } 204 205 nfp_net_meta_set_ipsec(meta_data, txq, pkt, layer, ipsec_layer); 206 ipsec_layer++; 207 break; 208 default: 209 PMD_DRV_LOG(ERR, "The metadata type not supported."); 210 return -ENOTSUP; 211 } 212 213 *(rte_be32_t *)meta = rte_cpu_to_be_32(meta_data->data[layer]); 214 } 215 216 return 0; 217 } 218 219 uint16_t 220 nfp_net_nfd3_xmit_pkts(void *tx_queue, 221 struct rte_mbuf **tx_pkts, 222 uint16_t nb_pkts) 223 { 224 return nfp_net_nfd3_xmit_pkts_common(tx_queue, tx_pkts, nb_pkts, false); 225 } 226 227 uint16_t 228 nfp_net_nfd3_xmit_pkts_common(void *tx_queue, 229 struct rte_mbuf **tx_pkts, 230 uint16_t nb_pkts, 231 bool repr_flag) 232 { 233 int ret; 234 uint16_t i; 235 uint8_t offset; 236 uint32_t pkt_size; 237 uint16_t dma_size; 238 uint64_t dma_addr; 239 uint16_t free_descs; 240 struct rte_mbuf *pkt; 241 uint16_t issued_descs; 242 struct nfp_net_hw *hw; 243 struct rte_mbuf **lmbuf; 244 struct nfp_net_txq *txq; 245 struct nfp_net_nfd3_tx_desc txd; 246 struct nfp_net_nfd3_tx_desc *txds; 247 248 txq = tx_queue; 249 hw = txq->hw; 250 txds = &txq->txds[txq->wr_p]; 251 252 PMD_TX_LOG(DEBUG, "Working for queue %hu at pos %d and %hu packets.", 253 txq->qidx, txq->wr_p, nb_pkts); 254 255 if (nfp_net_nfd3_free_tx_desc(txq) < NFD3_TX_DESC_PER_PKT * nb_pkts || 256 nfp_net_nfd3_txq_full(txq)) 257 nfp_net_tx_free_bufs(txq); 258 259 free_descs = nfp_net_nfd3_free_tx_desc(txq); 260 if (unlikely(free_descs == 0)) 261 return 0; 262 263 pkt = *tx_pkts; 264 265 issued_descs = 0; 266 PMD_TX_LOG(DEBUG, "Queue: %hu. Sending %hu packets.", txq->qidx, nb_pkts); 267 268 /* Sending packets */ 269 for (i = 0; i < nb_pkts && free_descs > 0; i++) { 270 /* Grabbing the mbuf linked to the current descriptor */ 271 lmbuf = &txq->txbufs[txq->wr_p].mbuf; 272 /* Warming the cache for releasing the mbuf later on */ 273 RTE_MBUF_PREFETCH_TO_FREE(*lmbuf); 274 275 pkt = *(tx_pkts + i); 276 277 if (!repr_flag) { 278 struct nfp_net_meta_raw meta_data; 279 memset(&meta_data, 0, sizeof(meta_data)); 280 ret = nfp_net_nfd3_set_meta_data(&meta_data, txq, pkt); 281 if (unlikely(ret != 0)) 282 goto xmit_end; 283 284 offset = meta_data.length; 285 } else { 286 offset = FLOWER_PKT_DATA_OFFSET; 287 } 288 289 if (unlikely(pkt->nb_segs > 1 && 290 (hw->super.ctrl & NFP_NET_CFG_CTRL_GATHER) == 0)) { 291 PMD_TX_LOG(ERR, "Multisegment packet not supported."); 292 goto xmit_end; 293 } 294 295 /* Checking if we have enough descriptors */ 296 if (unlikely(pkt->nb_segs > free_descs)) 297 goto xmit_end; 298 299 /* 300 * Checksum and VLAN flags just in the first descriptor for a 301 * multisegment packet, but TSO info needs to be in all of them. 302 */ 303 txd.data_len = pkt->pkt_len; 304 nfp_net_nfd3_tx_tso(txq, &txd, pkt); 305 nfp_net_nfd3_tx_cksum(txq, &txd, pkt); 306 nfp_net_nfd3_tx_vlan(txq, &txd, pkt); 307 308 /* 309 * Mbuf data_len is the data in one segment and pkt_len data 310 * in the whole packet. When the packet is just one segment, 311 * then data_len = pkt_len. 312 */ 313 pkt_size = pkt->pkt_len; 314 315 while (pkt != NULL && free_descs > 0) { 316 /* Copying TSO, VLAN and cksum info */ 317 *txds = txd; 318 319 /* Releasing mbuf used by this descriptor previously */ 320 if (*lmbuf != NULL) 321 rte_pktmbuf_free_seg(*lmbuf); 322 323 /* 324 * Linking mbuf with descriptor for being released 325 * next time descriptor is used. 326 */ 327 *lmbuf = pkt; 328 329 dma_size = pkt->data_len; 330 dma_addr = rte_mbuf_data_iova(pkt); 331 332 /* Filling descriptors fields */ 333 txds->dma_len = dma_size; 334 txds->data_len = txd.data_len; 335 txds->dma_addr_hi = (dma_addr >> 32) & 0xff; 336 txds->dma_addr_lo = (dma_addr & 0xffffffff); 337 free_descs--; 338 339 txq->wr_p++; 340 if (unlikely(txq->wr_p == txq->tx_count)) /* Wrapping */ 341 txq->wr_p = 0; 342 343 pkt_size -= dma_size; 344 345 /* 346 * Making the EOP, packets with just one segment 347 * the priority. 348 */ 349 if (likely(pkt_size == 0)) 350 txds->offset_eop = NFD3_DESC_TX_EOP; 351 else 352 txds->offset_eop = 0; 353 354 /* Set the meta_len */ 355 txds->offset_eop |= offset; 356 357 pkt = pkt->next; 358 /* Referencing next free TX descriptor */ 359 txds = &txq->txds[txq->wr_p]; 360 lmbuf = &txq->txbufs[txq->wr_p].mbuf; 361 issued_descs++; 362 } 363 } 364 365 xmit_end: 366 /* Increment write pointers. Force memory write before we let HW know */ 367 rte_wmb(); 368 nfp_qcp_ptr_add(txq->qcp_q, NFP_QCP_WRITE_PTR, issued_descs); 369 370 return i; 371 } 372 373 int 374 nfp_net_nfd3_tx_queue_setup(struct rte_eth_dev *dev, 375 uint16_t queue_idx, 376 uint16_t nb_desc, 377 unsigned int socket_id, 378 const struct rte_eth_txconf *tx_conf) 379 { 380 size_t size; 381 uint32_t tx_desc_sz; 382 uint16_t min_tx_desc; 383 uint16_t max_tx_desc; 384 struct nfp_net_hw *hw; 385 struct nfp_net_txq *txq; 386 uint16_t tx_free_thresh; 387 const struct rte_memzone *tz; 388 struct nfp_net_hw_priv *hw_priv; 389 390 hw = nfp_net_get_hw(dev); 391 hw_priv = dev->process_private; 392 393 nfp_net_tx_desc_limits(hw_priv, &min_tx_desc, &max_tx_desc); 394 395 /* Validating number of descriptors */ 396 tx_desc_sz = nb_desc * sizeof(struct nfp_net_nfd3_tx_desc); 397 if ((NFD3_TX_DESC_PER_PKT * tx_desc_sz) % NFP_ALIGN_RING_DESC != 0 || 398 nb_desc > max_tx_desc || nb_desc < min_tx_desc) { 399 PMD_DRV_LOG(ERR, "Wrong nb_desc value."); 400 return -EINVAL; 401 } 402 403 tx_free_thresh = (tx_conf->tx_free_thresh != 0) ? 404 tx_conf->tx_free_thresh : DEFAULT_TX_FREE_THRESH; 405 if (tx_free_thresh > nb_desc) { 406 PMD_DRV_LOG(ERR, "The tx_free_thresh must be less than the number of TX " 407 "descriptors. (tx_free_thresh=%u port=%d queue=%d)", 408 tx_free_thresh, dev->data->port_id, queue_idx); 409 return -EINVAL; 410 } 411 412 /* 413 * Free memory prior to re-allocation if needed. This is the case after 414 * calling nfp_net_stop(). 415 */ 416 if (dev->data->tx_queues[queue_idx] != NULL) { 417 PMD_TX_LOG(DEBUG, "Freeing memory prior to re-allocation %d.", 418 queue_idx); 419 nfp_net_tx_queue_release(dev, queue_idx); 420 dev->data->tx_queues[queue_idx] = NULL; 421 } 422 423 /* Allocating tx queue data structure */ 424 txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct nfp_net_txq), 425 RTE_CACHE_LINE_SIZE, socket_id); 426 if (txq == NULL) { 427 PMD_DRV_LOG(ERR, "Error allocating tx dma."); 428 return -ENOMEM; 429 } 430 431 dev->data->tx_queues[queue_idx] = txq; 432 433 /* 434 * Allocate TX ring hardware descriptors. A memzone large enough to 435 * handle the maximum ring size is allocated in order to allow for 436 * resizing in later calls to the queue setup function. 437 */ 438 size = sizeof(struct nfp_net_nfd3_tx_desc) * NFD3_TX_DESC_PER_PKT * max_tx_desc; 439 tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx, size, 440 NFP_MEMZONE_ALIGN, socket_id); 441 if (tz == NULL) { 442 PMD_DRV_LOG(ERR, "Error allocating tx dma."); 443 nfp_net_tx_queue_release(dev, queue_idx); 444 dev->data->tx_queues[queue_idx] = NULL; 445 return -ENOMEM; 446 } 447 448 txq->tx_count = nb_desc * NFD3_TX_DESC_PER_PKT; 449 txq->tx_free_thresh = tx_free_thresh; 450 451 /* Queue mapping based on firmware configuration */ 452 txq->qidx = queue_idx; 453 txq->tx_qcidx = queue_idx * hw->stride_tx; 454 txq->qcp_q = hw->tx_bar + NFP_QCP_QUEUE_OFF(txq->tx_qcidx); 455 txq->port_id = dev->data->port_id; 456 457 /* Saving physical and virtual addresses for the TX ring */ 458 txq->dma = tz->iova; 459 txq->txds = tz->addr; 460 461 /* Mbuf pointers array for referencing mbufs linked to TX descriptors */ 462 txq->txbufs = rte_zmalloc_socket("txq->txbufs", 463 sizeof(*txq->txbufs) * txq->tx_count, 464 RTE_CACHE_LINE_SIZE, socket_id); 465 if (txq->txbufs == NULL) { 466 nfp_net_tx_queue_release(dev, queue_idx); 467 dev->data->tx_queues[queue_idx] = NULL; 468 return -ENOMEM; 469 } 470 471 nfp_net_reset_tx_queue(txq); 472 473 txq->hw = hw; 474 txq->hw_priv = dev->process_private; 475 476 /* 477 * Telling the HW about the physical address of the TX ring and number 478 * of descriptors in log2 format. 479 */ 480 nn_cfg_writeq(&hw->super, NFP_NET_CFG_TXR_ADDR(queue_idx), txq->dma); 481 nn_cfg_writeb(&hw->super, NFP_NET_CFG_TXR_SZ(queue_idx), rte_log2_u32(txq->tx_count)); 482 483 return 0; 484 } 485