18fd92a66SOlivier Matz /* SPDX-License-Identifier: BSD-3-Clause 2f0d2114fSYongseok Koh * Copyright 2017 6WIND S.A. 35feecc57SShahaf Shuler * Copyright 2017 Mellanox Technologies, Ltd 4f0d2114fSYongseok Koh */ 5f0d2114fSYongseok Koh 6f0d2114fSYongseok Koh #include <assert.h> 7f0d2114fSYongseok Koh #include <stdint.h> 8f0d2114fSYongseok Koh #include <string.h> 9f0d2114fSYongseok Koh #include <stdlib.h> 10f0d2114fSYongseok Koh 11f0d2114fSYongseok Koh /* Verbs header. */ 12f0d2114fSYongseok Koh /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */ 13f0d2114fSYongseok Koh #ifdef PEDANTIC 14f0d2114fSYongseok Koh #pragma GCC diagnostic ignored "-Wpedantic" 15f0d2114fSYongseok Koh #endif 16f0d2114fSYongseok Koh #include <infiniband/verbs.h> 17f0d2114fSYongseok Koh #include <infiniband/mlx5dv.h> 18f0d2114fSYongseok Koh #ifdef PEDANTIC 19f0d2114fSYongseok Koh #pragma GCC diagnostic error "-Wpedantic" 20f0d2114fSYongseok Koh #endif 21f0d2114fSYongseok Koh 22f0d2114fSYongseok Koh #include <rte_mbuf.h> 23f0d2114fSYongseok Koh #include <rte_mempool.h> 24f0d2114fSYongseok Koh #include <rte_prefetch.h> 25f0d2114fSYongseok Koh 26f0d2114fSYongseok Koh #include "mlx5.h" 27f0d2114fSYongseok Koh #include "mlx5_utils.h" 28f0d2114fSYongseok Koh #include "mlx5_rxtx.h" 295bfc9fc1SYongseok Koh #include "mlx5_rxtx_vec.h" 30f0d2114fSYongseok Koh #include "mlx5_autoconf.h" 31f0d2114fSYongseok Koh #include "mlx5_defs.h" 32f0d2114fSYongseok Koh #include "mlx5_prm.h" 33f0d2114fSYongseok Koh 34570acdb1SYongseok Koh #if defined RTE_ARCH_X86_64 353c2ddbd4SYongseok Koh #include "mlx5_rxtx_vec_sse.h" 36570acdb1SYongseok Koh #elif defined RTE_ARCH_ARM64 37570acdb1SYongseok Koh #include "mlx5_rxtx_vec_neon.h" 383c2ddbd4SYongseok Koh #else 393c2ddbd4SYongseok Koh #error "This should not be compiled if SIMD instructions are not supported." 40f0d2114fSYongseok Koh #endif 41f0d2114fSYongseok Koh 42f0d2114fSYongseok Koh /** 436bd7fbd0SDekel Peled * Count the number of packets having same ol_flags and same metadata (if 446bd7fbd0SDekel Peled * PKT_TX_METADATA is set in ol_flags), and calculate cs_flags. 45f0d2114fSYongseok Koh * 46f0d2114fSYongseok Koh * @param pkts 47f0d2114fSYongseok Koh * Pointer to array of packets. 48f0d2114fSYongseok Koh * @param pkts_n 49f0d2114fSYongseok Koh * Number of packets. 50f0d2114fSYongseok Koh * @param cs_flags 51f0d2114fSYongseok Koh * Pointer of flags to be returned. 526bd7fbd0SDekel Peled * @param metadata 536bd7fbd0SDekel Peled * Pointer of metadata to be returned. 546bd7fbd0SDekel Peled * @param txq_offloads 556bd7fbd0SDekel Peled * Offloads enabled on Tx queue 56f0d2114fSYongseok Koh * 57f0d2114fSYongseok Koh * @return 586bd7fbd0SDekel Peled * Number of packets having same ol_flags and metadata, if relevant. 59f0d2114fSYongseok Koh */ 60f0d2114fSYongseok Koh static inline unsigned int 616bd7fbd0SDekel Peled txq_calc_offload(struct rte_mbuf **pkts, uint16_t pkts_n, uint8_t *cs_flags, 626bd7fbd0SDekel Peled rte_be32_t *metadata, const uint64_t txq_offloads) 63f0d2114fSYongseok Koh { 64f0d2114fSYongseok Koh unsigned int pos; 656bd7fbd0SDekel Peled const uint64_t cksum_ol_mask = 66f0d2114fSYongseok Koh PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | 67f0d2114fSYongseok Koh PKT_TX_UDP_CKSUM | PKT_TX_TUNNEL_GRE | 68f0d2114fSYongseok Koh PKT_TX_TUNNEL_VXLAN | PKT_TX_OUTER_IP_CKSUM; 696bd7fbd0SDekel Peled rte_be32_t p0_metadata, pn_metadata; 70f0d2114fSYongseok Koh 71f0d2114fSYongseok Koh if (!pkts_n) 72f0d2114fSYongseok Koh return 0; 736bd7fbd0SDekel Peled p0_metadata = pkts[0]->ol_flags & PKT_TX_METADATA ? 746bd7fbd0SDekel Peled pkts[0]->tx_metadata : 0; 756bd7fbd0SDekel Peled /* Count the number of packets having same offload parameters. */ 766bd7fbd0SDekel Peled for (pos = 1; pos < pkts_n; ++pos) { 776bd7fbd0SDekel Peled /* Check if packet has same checksum flags. */ 786bd7fbd0SDekel Peled if ((txq_offloads & MLX5_VEC_TX_CKSUM_OFFLOAD_CAP) && 796bd7fbd0SDekel Peled ((pkts[pos]->ol_flags ^ pkts[0]->ol_flags) & cksum_ol_mask)) 80f0d2114fSYongseok Koh break; 816bd7fbd0SDekel Peled /* Check if packet has same metadata. */ 826bd7fbd0SDekel Peled if (txq_offloads & DEV_TX_OFFLOAD_MATCH_METADATA) { 836bd7fbd0SDekel Peled pn_metadata = pkts[pos]->ol_flags & PKT_TX_METADATA ? 846bd7fbd0SDekel Peled pkts[pos]->tx_metadata : 0; 856bd7fbd0SDekel Peled if (pn_metadata != p0_metadata) 866bd7fbd0SDekel Peled break; 876bd7fbd0SDekel Peled } 886bd7fbd0SDekel Peled } 895f8ba81cSXueming Li *cs_flags = txq_ol_cksum_to_cs(pkts[0]); 906bd7fbd0SDekel Peled *metadata = p0_metadata; 91f0d2114fSYongseok Koh return pos; 92f0d2114fSYongseok Koh } 93f0d2114fSYongseok Koh 94f0d2114fSYongseok Koh /** 95f0d2114fSYongseok Koh * DPDK callback for vectorized TX. 96f0d2114fSYongseok Koh * 97f0d2114fSYongseok Koh * @param dpdk_txq 98f0d2114fSYongseok Koh * Generic pointer to TX queue structure. 99f0d2114fSYongseok Koh * @param[in] pkts 100f0d2114fSYongseok Koh * Packets to transmit. 101f0d2114fSYongseok Koh * @param pkts_n 102f0d2114fSYongseok Koh * Number of packets in array. 103f0d2114fSYongseok Koh * 104f0d2114fSYongseok Koh * @return 105f0d2114fSYongseok Koh * Number of packets successfully transmitted (<= pkts_n). 106f0d2114fSYongseok Koh */ 107f0d2114fSYongseok Koh uint16_t 108f0d2114fSYongseok Koh mlx5_tx_burst_raw_vec(void *dpdk_txq, struct rte_mbuf **pkts, 109f0d2114fSYongseok Koh uint16_t pkts_n) 110f0d2114fSYongseok Koh { 111f0d2114fSYongseok Koh struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq; 112f0d2114fSYongseok Koh uint16_t nb_tx = 0; 113f0d2114fSYongseok Koh 114f0d2114fSYongseok Koh while (pkts_n > nb_tx) { 115f0d2114fSYongseok Koh uint16_t n; 116f0d2114fSYongseok Koh uint16_t ret; 117f0d2114fSYongseok Koh 118f0d2114fSYongseok Koh n = RTE_MIN((uint16_t)(pkts_n - nb_tx), MLX5_VPMD_TX_MAX_BURST); 1196bd7fbd0SDekel Peled ret = txq_burst_v(txq, &pkts[nb_tx], n, 0, 0); 120f0d2114fSYongseok Koh nb_tx += ret; 121f0d2114fSYongseok Koh if (!ret) 122f0d2114fSYongseok Koh break; 123f0d2114fSYongseok Koh } 124f0d2114fSYongseok Koh return nb_tx; 125f0d2114fSYongseok Koh } 126f0d2114fSYongseok Koh 127f0d2114fSYongseok Koh /** 128f0d2114fSYongseok Koh * DPDK callback for vectorized TX with multi-seg packets and offload. 129f0d2114fSYongseok Koh * 130f0d2114fSYongseok Koh * @param dpdk_txq 131f0d2114fSYongseok Koh * Generic pointer to TX queue structure. 132f0d2114fSYongseok Koh * @param[in] pkts 133f0d2114fSYongseok Koh * Packets to transmit. 134f0d2114fSYongseok Koh * @param pkts_n 135f0d2114fSYongseok Koh * Number of packets in array. 136f0d2114fSYongseok Koh * 137f0d2114fSYongseok Koh * @return 138f0d2114fSYongseok Koh * Number of packets successfully transmitted (<= pkts_n). 139f0d2114fSYongseok Koh */ 140f0d2114fSYongseok Koh uint16_t 141f0d2114fSYongseok Koh mlx5_tx_burst_vec(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) 142f0d2114fSYongseok Koh { 143f0d2114fSYongseok Koh struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq; 144f0d2114fSYongseok Koh uint16_t nb_tx = 0; 145f0d2114fSYongseok Koh 146f0d2114fSYongseok Koh while (pkts_n > nb_tx) { 147f0d2114fSYongseok Koh uint8_t cs_flags = 0; 148f0d2114fSYongseok Koh uint16_t n; 149f0d2114fSYongseok Koh uint16_t ret; 1506bd7fbd0SDekel Peled rte_be32_t metadata = 0; 151f0d2114fSYongseok Koh 152f0d2114fSYongseok Koh /* Transmit multi-seg packets in the head of pkts list. */ 153dbccb4cdSShahaf Shuler if ((txq->offloads & DEV_TX_OFFLOAD_MULTI_SEGS) && 154f0d2114fSYongseok Koh NB_SEGS(pkts[nb_tx]) > 1) 155f0d2114fSYongseok Koh nb_tx += txq_scatter_v(txq, 156f0d2114fSYongseok Koh &pkts[nb_tx], 157f0d2114fSYongseok Koh pkts_n - nb_tx); 158f0d2114fSYongseok Koh n = RTE_MIN((uint16_t)(pkts_n - nb_tx), MLX5_VPMD_TX_MAX_BURST); 159dbccb4cdSShahaf Shuler if (txq->offloads & DEV_TX_OFFLOAD_MULTI_SEGS) 1604b0d7b7fSYongseok Koh n = txq_count_contig_single_seg(&pkts[nb_tx], n); 1616bd7fbd0SDekel Peled if (txq->offloads & (MLX5_VEC_TX_CKSUM_OFFLOAD_CAP | 1626bd7fbd0SDekel Peled DEV_TX_OFFLOAD_MATCH_METADATA)) 1636bd7fbd0SDekel Peled n = txq_calc_offload(&pkts[nb_tx], n, 1646bd7fbd0SDekel Peled &cs_flags, &metadata, 1656bd7fbd0SDekel Peled txq->offloads); 1666bd7fbd0SDekel Peled ret = txq_burst_v(txq, &pkts[nb_tx], n, cs_flags, metadata); 167f0d2114fSYongseok Koh nb_tx += ret; 168f0d2114fSYongseok Koh if (!ret) 169f0d2114fSYongseok Koh break; 170f0d2114fSYongseok Koh } 171f0d2114fSYongseok Koh return nb_tx; 172f0d2114fSYongseok Koh } 173f0d2114fSYongseok Koh 174f0d2114fSYongseok Koh /** 175f0d2114fSYongseok Koh * Skip error packets. 176f0d2114fSYongseok Koh * 177f0d2114fSYongseok Koh * @param rxq 178f0d2114fSYongseok Koh * Pointer to RX queue structure. 179f0d2114fSYongseok Koh * @param[out] pkts 180f0d2114fSYongseok Koh * Array to store received packets. 181f0d2114fSYongseok Koh * @param pkts_n 182f0d2114fSYongseok Koh * Maximum number of packets in array. 183f0d2114fSYongseok Koh * 184f0d2114fSYongseok Koh * @return 185f0d2114fSYongseok Koh * Number of packets successfully received (<= pkts_n). 186f0d2114fSYongseok Koh */ 187f0d2114fSYongseok Koh static uint16_t 188f0d2114fSYongseok Koh rxq_handle_pending_error(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, 189f0d2114fSYongseok Koh uint16_t pkts_n) 190f0d2114fSYongseok Koh { 191f0d2114fSYongseok Koh uint16_t n = 0; 192f0d2114fSYongseok Koh unsigned int i; 193f0d2114fSYongseok Koh #ifdef MLX5_PMD_SOFT_COUNTERS 194f0d2114fSYongseok Koh uint32_t err_bytes = 0; 195f0d2114fSYongseok Koh #endif 196f0d2114fSYongseok Koh 197f0d2114fSYongseok Koh for (i = 0; i < pkts_n; ++i) { 198f0d2114fSYongseok Koh struct rte_mbuf *pkt = pkts[i]; 199f0d2114fSYongseok Koh 200f0d2114fSYongseok Koh if (pkt->packet_type == RTE_PTYPE_ALL_MASK) { 201f0d2114fSYongseok Koh #ifdef MLX5_PMD_SOFT_COUNTERS 202f0d2114fSYongseok Koh err_bytes += PKT_LEN(pkt); 203f0d2114fSYongseok Koh #endif 204f0d2114fSYongseok Koh rte_pktmbuf_free_seg(pkt); 205f0d2114fSYongseok Koh } else { 206f0d2114fSYongseok Koh pkts[n++] = pkt; 207f0d2114fSYongseok Koh } 208f0d2114fSYongseok Koh } 209f0d2114fSYongseok Koh rxq->stats.idropped += (pkts_n - n); 210f0d2114fSYongseok Koh #ifdef MLX5_PMD_SOFT_COUNTERS 211f0d2114fSYongseok Koh /* Correct counters of errored completions. */ 212f0d2114fSYongseok Koh rxq->stats.ipackets -= (pkts_n - n); 213f0d2114fSYongseok Koh rxq->stats.ibytes -= err_bytes; 214f0d2114fSYongseok Koh #endif 215f0d2114fSYongseok Koh return n; 216f0d2114fSYongseok Koh } 217f0d2114fSYongseok Koh 218f0d2114fSYongseok Koh /** 219f0d2114fSYongseok Koh * DPDK callback for vectorized RX. 220f0d2114fSYongseok Koh * 221f0d2114fSYongseok Koh * @param dpdk_rxq 222f0d2114fSYongseok Koh * Generic pointer to RX queue structure. 223f0d2114fSYongseok Koh * @param[out] pkts 224f0d2114fSYongseok Koh * Array to store received packets. 225f0d2114fSYongseok Koh * @param pkts_n 226f0d2114fSYongseok Koh * Maximum number of packets in array. 227f0d2114fSYongseok Koh * 228f0d2114fSYongseok Koh * @return 229f0d2114fSYongseok Koh * Number of packets successfully received (<= pkts_n). 230f0d2114fSYongseok Koh */ 231f0d2114fSYongseok Koh uint16_t 232f0d2114fSYongseok Koh mlx5_rx_burst_vec(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) 233f0d2114fSYongseok Koh { 234f0d2114fSYongseok Koh struct mlx5_rxq_data *rxq = dpdk_rxq; 235f0d2114fSYongseok Koh uint16_t nb_rx; 236d27fb0deSYongseok Koh uint64_t err = 0; 237f0d2114fSYongseok Koh 238d27fb0deSYongseok Koh nb_rx = rxq_burst_v(rxq, pkts, pkts_n, &err); 239d27fb0deSYongseok Koh if (unlikely(err)) 240f0d2114fSYongseok Koh nb_rx = rxq_handle_pending_error(rxq, pkts, nb_rx); 241f0d2114fSYongseok Koh return nb_rx; 242f0d2114fSYongseok Koh } 243f0d2114fSYongseok Koh 244f0d2114fSYongseok Koh /** 245f0d2114fSYongseok Koh * Check Tx queue flags are set for raw vectorized Tx. 246f0d2114fSYongseok Koh * 247dbccb4cdSShahaf Shuler * @param dev 248af4f09f2SNélio Laranjeiro * Pointer to Ethernet device. 249f0d2114fSYongseok Koh * 250f0d2114fSYongseok Koh * @return 251f0d2114fSYongseok Koh * 1 if supported, negative errno value if not. 252f0d2114fSYongseok Koh */ 253f0d2114fSYongseok Koh int __attribute__((cold)) 254af4f09f2SNélio Laranjeiro mlx5_check_raw_vec_tx_support(struct rte_eth_dev *dev) 255f0d2114fSYongseok Koh { 256dbccb4cdSShahaf Shuler uint64_t offloads = dev->data->dev_conf.txmode.offloads; 257f0d2114fSYongseok Koh 258dbccb4cdSShahaf Shuler /* Doesn't support any offload. */ 259dbccb4cdSShahaf Shuler if (offloads) 260f0d2114fSYongseok Koh return -ENOTSUP; 261f0d2114fSYongseok Koh return 1; 262f0d2114fSYongseok Koh } 263f0d2114fSYongseok Koh 264f0d2114fSYongseok Koh /** 265f0d2114fSYongseok Koh * Check a device can support vectorized TX. 266f0d2114fSYongseok Koh * 267dbccb4cdSShahaf Shuler * @param dev 268af4f09f2SNélio Laranjeiro * Pointer to Ethernet device. 269f0d2114fSYongseok Koh * 270f0d2114fSYongseok Koh * @return 271f0d2114fSYongseok Koh * 1 if supported, negative errno value if not. 272f0d2114fSYongseok Koh */ 273f0d2114fSYongseok Koh int __attribute__((cold)) 274af4f09f2SNélio Laranjeiro mlx5_check_vec_tx_support(struct rte_eth_dev *dev) 275f0d2114fSYongseok Koh { 276*dbeba4cfSThomas Monjalon struct mlx5_priv *priv = dev->data->dev_private; 277dbccb4cdSShahaf Shuler uint64_t offloads = dev->data->dev_conf.txmode.offloads; 278dbccb4cdSShahaf Shuler 2797fe24446SShahaf Shuler if (!priv->config.tx_vec_en || 28009d8b416SYongseok Koh priv->txqs_n > (unsigned int)priv->config.txqs_vec || 2817fe24446SShahaf Shuler priv->config.mps != MLX5_MPW_ENHANCED || 282dbccb4cdSShahaf Shuler offloads & ~MLX5_VEC_TX_OFFLOAD_CAP) 283f0d2114fSYongseok Koh return -ENOTSUP; 284f0d2114fSYongseok Koh return 1; 285f0d2114fSYongseok Koh } 286f0d2114fSYongseok Koh 287f0d2114fSYongseok Koh /** 288f0d2114fSYongseok Koh * Check a RX queue can support vectorized RX. 289f0d2114fSYongseok Koh * 290f0d2114fSYongseok Koh * @param rxq 291f0d2114fSYongseok Koh * Pointer to RX queue. 292f0d2114fSYongseok Koh * 293f0d2114fSYongseok Koh * @return 294f0d2114fSYongseok Koh * 1 if supported, negative errno value if not. 295f0d2114fSYongseok Koh */ 296f0d2114fSYongseok Koh int __attribute__((cold)) 297af4f09f2SNélio Laranjeiro mlx5_rxq_check_vec_support(struct mlx5_rxq_data *rxq) 298f0d2114fSYongseok Koh { 299f0d2114fSYongseok Koh struct mlx5_rxq_ctrl *ctrl = 300f0d2114fSYongseok Koh container_of(rxq, struct mlx5_rxq_ctrl, rxq); 301f0d2114fSYongseok Koh 3027d6bf6b8SYongseok Koh if (mlx5_mprq_enabled(ETH_DEV(ctrl->priv))) 3037d6bf6b8SYongseok Koh return -ENOTSUP; 3047fe24446SShahaf Shuler if (!ctrl->priv->config.rx_vec_en || rxq->sges_n != 0) 305f0d2114fSYongseok Koh return -ENOTSUP; 306f0d2114fSYongseok Koh return 1; 307f0d2114fSYongseok Koh } 308f0d2114fSYongseok Koh 309f0d2114fSYongseok Koh /** 310f0d2114fSYongseok Koh * Check a device can support vectorized RX. 311f0d2114fSYongseok Koh * 312af4f09f2SNélio Laranjeiro * @param dev 313af4f09f2SNélio Laranjeiro * Pointer to Ethernet device. 314f0d2114fSYongseok Koh * 315f0d2114fSYongseok Koh * @return 316f0d2114fSYongseok Koh * 1 if supported, negative errno value if not. 317f0d2114fSYongseok Koh */ 318f0d2114fSYongseok Koh int __attribute__((cold)) 319af4f09f2SNélio Laranjeiro mlx5_check_vec_rx_support(struct rte_eth_dev *dev) 320f0d2114fSYongseok Koh { 321*dbeba4cfSThomas Monjalon struct mlx5_priv *priv = dev->data->dev_private; 322f0d2114fSYongseok Koh uint16_t i; 323f0d2114fSYongseok Koh 3247fe24446SShahaf Shuler if (!priv->config.rx_vec_en) 325f0d2114fSYongseok Koh return -ENOTSUP; 3267d6bf6b8SYongseok Koh if (mlx5_mprq_enabled(dev)) 3277d6bf6b8SYongseok Koh return -ENOTSUP; 328f0d2114fSYongseok Koh /* All the configured queues should support. */ 329f0d2114fSYongseok Koh for (i = 0; i < priv->rxqs_n; ++i) { 330f0d2114fSYongseok Koh struct mlx5_rxq_data *rxq = (*priv->rxqs)[i]; 331f0d2114fSYongseok Koh 332f0d2114fSYongseok Koh if (!rxq) 333f0d2114fSYongseok Koh continue; 334af4f09f2SNélio Laranjeiro if (mlx5_rxq_check_vec_support(rxq) < 0) 335f0d2114fSYongseok Koh break; 336f0d2114fSYongseok Koh } 337f0d2114fSYongseok Koh if (i != priv->rxqs_n) 338f0d2114fSYongseok Koh return -ENOTSUP; 339f0d2114fSYongseok Koh return 1; 340f0d2114fSYongseok Koh } 341