1f0d2114fSYongseok Koh /*- 2f0d2114fSYongseok Koh * BSD LICENSE 3f0d2114fSYongseok Koh * 4f0d2114fSYongseok Koh * Copyright 2017 6WIND S.A. 5f0d2114fSYongseok Koh * Copyright 2017 Mellanox. 6f0d2114fSYongseok Koh * 7f0d2114fSYongseok Koh * Redistribution and use in source and binary forms, with or without 8f0d2114fSYongseok Koh * modification, are permitted provided that the following conditions 9f0d2114fSYongseok Koh * are met: 10f0d2114fSYongseok Koh * 11f0d2114fSYongseok Koh * * Redistributions of source code must retain the above copyright 12f0d2114fSYongseok Koh * notice, this list of conditions and the following disclaimer. 13f0d2114fSYongseok Koh * * Redistributions in binary form must reproduce the above copyright 14f0d2114fSYongseok Koh * notice, this list of conditions and the following disclaimer in 15f0d2114fSYongseok Koh * the documentation and/or other materials provided with the 16f0d2114fSYongseok Koh * distribution. 17f0d2114fSYongseok Koh * * Neither the name of 6WIND S.A. nor the names of its 18f0d2114fSYongseok Koh * contributors may be used to endorse or promote products derived 19f0d2114fSYongseok Koh * from this software without specific prior written permission. 20f0d2114fSYongseok Koh * 21f0d2114fSYongseok Koh * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22f0d2114fSYongseok Koh * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23f0d2114fSYongseok Koh * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24f0d2114fSYongseok Koh * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25f0d2114fSYongseok Koh * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26f0d2114fSYongseok Koh * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27f0d2114fSYongseok Koh * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28f0d2114fSYongseok Koh * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29f0d2114fSYongseok Koh * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30f0d2114fSYongseok Koh * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31f0d2114fSYongseok Koh * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32f0d2114fSYongseok Koh */ 33f0d2114fSYongseok Koh 34f0d2114fSYongseok Koh #include <assert.h> 35f0d2114fSYongseok Koh #include <stdint.h> 36f0d2114fSYongseok Koh #include <string.h> 37f0d2114fSYongseok Koh #include <stdlib.h> 38f0d2114fSYongseok Koh 39f0d2114fSYongseok Koh /* Verbs header. */ 40f0d2114fSYongseok Koh /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */ 41f0d2114fSYongseok Koh #ifdef PEDANTIC 42f0d2114fSYongseok Koh #pragma GCC diagnostic ignored "-Wpedantic" 43f0d2114fSYongseok Koh #endif 44f0d2114fSYongseok Koh #include <infiniband/verbs.h> 45f0d2114fSYongseok Koh #include <infiniband/mlx5dv.h> 46f0d2114fSYongseok Koh #ifdef PEDANTIC 47f0d2114fSYongseok Koh #pragma GCC diagnostic error "-Wpedantic" 48f0d2114fSYongseok Koh #endif 49f0d2114fSYongseok Koh 50f0d2114fSYongseok Koh #include <rte_mbuf.h> 51f0d2114fSYongseok Koh #include <rte_mempool.h> 52f0d2114fSYongseok Koh #include <rte_prefetch.h> 53f0d2114fSYongseok Koh 54f0d2114fSYongseok Koh #include "mlx5.h" 55f0d2114fSYongseok Koh #include "mlx5_utils.h" 56f0d2114fSYongseok Koh #include "mlx5_rxtx.h" 575bfc9fc1SYongseok Koh #include "mlx5_rxtx_vec.h" 58f0d2114fSYongseok Koh #include "mlx5_autoconf.h" 59f0d2114fSYongseok Koh #include "mlx5_defs.h" 60f0d2114fSYongseok Koh #include "mlx5_prm.h" 61f0d2114fSYongseok Koh 62570acdb1SYongseok Koh #if defined RTE_ARCH_X86_64 633c2ddbd4SYongseok Koh #include "mlx5_rxtx_vec_sse.h" 64570acdb1SYongseok Koh #elif defined RTE_ARCH_ARM64 65570acdb1SYongseok Koh #include "mlx5_rxtx_vec_neon.h" 663c2ddbd4SYongseok Koh #else 673c2ddbd4SYongseok Koh #error "This should not be compiled if SIMD instructions are not supported." 68f0d2114fSYongseok Koh #endif 69f0d2114fSYongseok Koh 70f0d2114fSYongseok Koh /** 71f0d2114fSYongseok Koh * Count the number of packets having same ol_flags and calculate cs_flags. 72f0d2114fSYongseok Koh * 73f0d2114fSYongseok Koh * @param txq 74f0d2114fSYongseok Koh * Pointer to TX queue structure. 75f0d2114fSYongseok Koh * @param pkts 76f0d2114fSYongseok Koh * Pointer to array of packets. 77f0d2114fSYongseok Koh * @param pkts_n 78f0d2114fSYongseok Koh * Number of packets. 79f0d2114fSYongseok Koh * @param cs_flags 80f0d2114fSYongseok Koh * Pointer of flags to be returned. 81f0d2114fSYongseok Koh * 82f0d2114fSYongseok Koh * @return 83f0d2114fSYongseok Koh * Number of packets having same ol_flags. 84f0d2114fSYongseok Koh */ 85f0d2114fSYongseok Koh static inline unsigned int 86f0d2114fSYongseok Koh txq_calc_offload(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, 87f0d2114fSYongseok Koh uint16_t pkts_n, uint8_t *cs_flags) 88f0d2114fSYongseok Koh { 89f0d2114fSYongseok Koh unsigned int pos; 90f0d2114fSYongseok Koh const uint64_t ol_mask = 91f0d2114fSYongseok Koh PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | 92f0d2114fSYongseok Koh PKT_TX_UDP_CKSUM | PKT_TX_TUNNEL_GRE | 93f0d2114fSYongseok Koh PKT_TX_TUNNEL_VXLAN | PKT_TX_OUTER_IP_CKSUM; 94f0d2114fSYongseok Koh 95f0d2114fSYongseok Koh if (!pkts_n) 96f0d2114fSYongseok Koh return 0; 97f0d2114fSYongseok Koh /* Count the number of packets having same ol_flags. */ 98f0d2114fSYongseok Koh for (pos = 1; pos < pkts_n; ++pos) 99f0d2114fSYongseok Koh if ((pkts[pos]->ol_flags ^ pkts[0]->ol_flags) & ol_mask) 100f0d2114fSYongseok Koh break; 1014aa15eb1SNélio Laranjeiro *cs_flags = txq_ol_cksum_to_cs(txq, pkts[0]); 102f0d2114fSYongseok Koh return pos; 103f0d2114fSYongseok Koh } 104f0d2114fSYongseok Koh 105f0d2114fSYongseok Koh /** 106f0d2114fSYongseok Koh * DPDK callback for vectorized TX. 107f0d2114fSYongseok Koh * 108f0d2114fSYongseok Koh * @param dpdk_txq 109f0d2114fSYongseok Koh * Generic pointer to TX queue structure. 110f0d2114fSYongseok Koh * @param[in] pkts 111f0d2114fSYongseok Koh * Packets to transmit. 112f0d2114fSYongseok Koh * @param pkts_n 113f0d2114fSYongseok Koh * Number of packets in array. 114f0d2114fSYongseok Koh * 115f0d2114fSYongseok Koh * @return 116f0d2114fSYongseok Koh * Number of packets successfully transmitted (<= pkts_n). 117f0d2114fSYongseok Koh */ 118f0d2114fSYongseok Koh uint16_t 119f0d2114fSYongseok Koh mlx5_tx_burst_raw_vec(void *dpdk_txq, struct rte_mbuf **pkts, 120f0d2114fSYongseok Koh uint16_t pkts_n) 121f0d2114fSYongseok Koh { 122f0d2114fSYongseok Koh struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq; 123f0d2114fSYongseok Koh uint16_t nb_tx = 0; 124f0d2114fSYongseok Koh 125f0d2114fSYongseok Koh while (pkts_n > nb_tx) { 126f0d2114fSYongseok Koh uint16_t n; 127f0d2114fSYongseok Koh uint16_t ret; 128f0d2114fSYongseok Koh 129f0d2114fSYongseok Koh n = RTE_MIN((uint16_t)(pkts_n - nb_tx), MLX5_VPMD_TX_MAX_BURST); 130f0d2114fSYongseok Koh ret = txq_burst_v(txq, &pkts[nb_tx], n, 0); 131f0d2114fSYongseok Koh nb_tx += ret; 132f0d2114fSYongseok Koh if (!ret) 133f0d2114fSYongseok Koh break; 134f0d2114fSYongseok Koh } 135f0d2114fSYongseok Koh return nb_tx; 136f0d2114fSYongseok Koh } 137f0d2114fSYongseok Koh 138f0d2114fSYongseok Koh /** 139f0d2114fSYongseok Koh * DPDK callback for vectorized TX with multi-seg packets and offload. 140f0d2114fSYongseok Koh * 141f0d2114fSYongseok Koh * @param dpdk_txq 142f0d2114fSYongseok Koh * Generic pointer to TX queue structure. 143f0d2114fSYongseok Koh * @param[in] pkts 144f0d2114fSYongseok Koh * Packets to transmit. 145f0d2114fSYongseok Koh * @param pkts_n 146f0d2114fSYongseok Koh * Number of packets in array. 147f0d2114fSYongseok Koh * 148f0d2114fSYongseok Koh * @return 149f0d2114fSYongseok Koh * Number of packets successfully transmitted (<= pkts_n). 150f0d2114fSYongseok Koh */ 151f0d2114fSYongseok Koh uint16_t 152f0d2114fSYongseok Koh mlx5_tx_burst_vec(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) 153f0d2114fSYongseok Koh { 154f0d2114fSYongseok Koh struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq; 155f0d2114fSYongseok Koh uint16_t nb_tx = 0; 156f0d2114fSYongseok Koh 157f0d2114fSYongseok Koh while (pkts_n > nb_tx) { 158f0d2114fSYongseok Koh uint8_t cs_flags = 0; 159f0d2114fSYongseok Koh uint16_t n; 160f0d2114fSYongseok Koh uint16_t ret; 161f0d2114fSYongseok Koh 162f0d2114fSYongseok Koh /* Transmit multi-seg packets in the head of pkts list. */ 163*dbccb4cdSShahaf Shuler if ((txq->offloads & DEV_TX_OFFLOAD_MULTI_SEGS) && 164f0d2114fSYongseok Koh NB_SEGS(pkts[nb_tx]) > 1) 165f0d2114fSYongseok Koh nb_tx += txq_scatter_v(txq, 166f0d2114fSYongseok Koh &pkts[nb_tx], 167f0d2114fSYongseok Koh pkts_n - nb_tx); 168f0d2114fSYongseok Koh n = RTE_MIN((uint16_t)(pkts_n - nb_tx), MLX5_VPMD_TX_MAX_BURST); 169*dbccb4cdSShahaf Shuler if (txq->offloads & DEV_TX_OFFLOAD_MULTI_SEGS) 1704b0d7b7fSYongseok Koh n = txq_count_contig_single_seg(&pkts[nb_tx], n); 171*dbccb4cdSShahaf Shuler if (txq->offloads & MLX5_VEC_TX_CKSUM_OFFLOAD_CAP) 172f0d2114fSYongseok Koh n = txq_calc_offload(txq, &pkts[nb_tx], n, &cs_flags); 173f0d2114fSYongseok Koh ret = txq_burst_v(txq, &pkts[nb_tx], n, cs_flags); 174f0d2114fSYongseok Koh nb_tx += ret; 175f0d2114fSYongseok Koh if (!ret) 176f0d2114fSYongseok Koh break; 177f0d2114fSYongseok Koh } 178f0d2114fSYongseok Koh return nb_tx; 179f0d2114fSYongseok Koh } 180f0d2114fSYongseok Koh 181f0d2114fSYongseok Koh /** 182f0d2114fSYongseok Koh * Skip error packets. 183f0d2114fSYongseok Koh * 184f0d2114fSYongseok Koh * @param rxq 185f0d2114fSYongseok Koh * Pointer to RX queue structure. 186f0d2114fSYongseok Koh * @param[out] pkts 187f0d2114fSYongseok Koh * Array to store received packets. 188f0d2114fSYongseok Koh * @param pkts_n 189f0d2114fSYongseok Koh * Maximum number of packets in array. 190f0d2114fSYongseok Koh * 191f0d2114fSYongseok Koh * @return 192f0d2114fSYongseok Koh * Number of packets successfully received (<= pkts_n). 193f0d2114fSYongseok Koh */ 194f0d2114fSYongseok Koh static uint16_t 195f0d2114fSYongseok Koh rxq_handle_pending_error(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, 196f0d2114fSYongseok Koh uint16_t pkts_n) 197f0d2114fSYongseok Koh { 198f0d2114fSYongseok Koh uint16_t n = 0; 199f0d2114fSYongseok Koh unsigned int i; 200f0d2114fSYongseok Koh #ifdef MLX5_PMD_SOFT_COUNTERS 201f0d2114fSYongseok Koh uint32_t err_bytes = 0; 202f0d2114fSYongseok Koh #endif 203f0d2114fSYongseok Koh 204f0d2114fSYongseok Koh for (i = 0; i < pkts_n; ++i) { 205f0d2114fSYongseok Koh struct rte_mbuf *pkt = pkts[i]; 206f0d2114fSYongseok Koh 207f0d2114fSYongseok Koh if (pkt->packet_type == RTE_PTYPE_ALL_MASK) { 208f0d2114fSYongseok Koh #ifdef MLX5_PMD_SOFT_COUNTERS 209f0d2114fSYongseok Koh err_bytes += PKT_LEN(pkt); 210f0d2114fSYongseok Koh #endif 211f0d2114fSYongseok Koh rte_pktmbuf_free_seg(pkt); 212f0d2114fSYongseok Koh } else { 213f0d2114fSYongseok Koh pkts[n++] = pkt; 214f0d2114fSYongseok Koh } 215f0d2114fSYongseok Koh } 216f0d2114fSYongseok Koh rxq->stats.idropped += (pkts_n - n); 217f0d2114fSYongseok Koh #ifdef MLX5_PMD_SOFT_COUNTERS 218f0d2114fSYongseok Koh /* Correct counters of errored completions. */ 219f0d2114fSYongseok Koh rxq->stats.ipackets -= (pkts_n - n); 220f0d2114fSYongseok Koh rxq->stats.ibytes -= err_bytes; 221f0d2114fSYongseok Koh #endif 222f0d2114fSYongseok Koh return n; 223f0d2114fSYongseok Koh } 224f0d2114fSYongseok Koh 225f0d2114fSYongseok Koh /** 226f0d2114fSYongseok Koh * DPDK callback for vectorized RX. 227f0d2114fSYongseok Koh * 228f0d2114fSYongseok Koh * @param dpdk_rxq 229f0d2114fSYongseok Koh * Generic pointer to RX queue structure. 230f0d2114fSYongseok Koh * @param[out] pkts 231f0d2114fSYongseok Koh * Array to store received packets. 232f0d2114fSYongseok Koh * @param pkts_n 233f0d2114fSYongseok Koh * Maximum number of packets in array. 234f0d2114fSYongseok Koh * 235f0d2114fSYongseok Koh * @return 236f0d2114fSYongseok Koh * Number of packets successfully received (<= pkts_n). 237f0d2114fSYongseok Koh */ 238f0d2114fSYongseok Koh uint16_t 239f0d2114fSYongseok Koh mlx5_rx_burst_vec(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) 240f0d2114fSYongseok Koh { 241f0d2114fSYongseok Koh struct mlx5_rxq_data *rxq = dpdk_rxq; 242f0d2114fSYongseok Koh uint16_t nb_rx; 243d27fb0deSYongseok Koh uint64_t err = 0; 244f0d2114fSYongseok Koh 245d27fb0deSYongseok Koh nb_rx = rxq_burst_v(rxq, pkts, pkts_n, &err); 246d27fb0deSYongseok Koh if (unlikely(err)) 247f0d2114fSYongseok Koh nb_rx = rxq_handle_pending_error(rxq, pkts, nb_rx); 248f0d2114fSYongseok Koh return nb_rx; 249f0d2114fSYongseok Koh } 250f0d2114fSYongseok Koh 251f0d2114fSYongseok Koh /** 252f0d2114fSYongseok Koh * Check Tx queue flags are set for raw vectorized Tx. 253f0d2114fSYongseok Koh * 254f0d2114fSYongseok Koh * @param priv 255f0d2114fSYongseok Koh * Pointer to private structure. 256*dbccb4cdSShahaf Shuler * @param dev 257*dbccb4cdSShahaf Shuler * Pointer to rte_eth_dev structure. 258f0d2114fSYongseok Koh * 259f0d2114fSYongseok Koh * @return 260f0d2114fSYongseok Koh * 1 if supported, negative errno value if not. 261f0d2114fSYongseok Koh */ 262f0d2114fSYongseok Koh int __attribute__((cold)) 263*dbccb4cdSShahaf Shuler priv_check_raw_vec_tx_support(__rte_unused struct priv *priv, 264*dbccb4cdSShahaf Shuler struct rte_eth_dev *dev) 265f0d2114fSYongseok Koh { 266*dbccb4cdSShahaf Shuler uint64_t offloads = dev->data->dev_conf.txmode.offloads; 267f0d2114fSYongseok Koh 268*dbccb4cdSShahaf Shuler /* Doesn't support any offload. */ 269*dbccb4cdSShahaf Shuler if (offloads) 270f0d2114fSYongseok Koh return -ENOTSUP; 271f0d2114fSYongseok Koh return 1; 272f0d2114fSYongseok Koh } 273f0d2114fSYongseok Koh 274f0d2114fSYongseok Koh /** 275f0d2114fSYongseok Koh * Check a device can support vectorized TX. 276f0d2114fSYongseok Koh * 277f0d2114fSYongseok Koh * @param priv 278f0d2114fSYongseok Koh * Pointer to private structure. 279*dbccb4cdSShahaf Shuler * @param dev 280*dbccb4cdSShahaf Shuler * Pointer to rte_eth_dev structure. 281f0d2114fSYongseok Koh * 282f0d2114fSYongseok Koh * @return 283f0d2114fSYongseok Koh * 1 if supported, negative errno value if not. 284f0d2114fSYongseok Koh */ 285f0d2114fSYongseok Koh int __attribute__((cold)) 286*dbccb4cdSShahaf Shuler priv_check_vec_tx_support(struct priv *priv, struct rte_eth_dev *dev) 287f0d2114fSYongseok Koh { 288*dbccb4cdSShahaf Shuler uint64_t offloads = dev->data->dev_conf.txmode.offloads; 289*dbccb4cdSShahaf Shuler 2907fe24446SShahaf Shuler if (!priv->config.tx_vec_en || 291f0d2114fSYongseok Koh priv->txqs_n > MLX5_VPMD_MIN_TXQS || 2927fe24446SShahaf Shuler priv->config.mps != MLX5_MPW_ENHANCED || 293*dbccb4cdSShahaf Shuler offloads & ~MLX5_VEC_TX_OFFLOAD_CAP) 294f0d2114fSYongseok Koh return -ENOTSUP; 295f0d2114fSYongseok Koh return 1; 296f0d2114fSYongseok Koh } 297f0d2114fSYongseok Koh 298f0d2114fSYongseok Koh /** 299f0d2114fSYongseok Koh * Check a RX queue can support vectorized RX. 300f0d2114fSYongseok Koh * 301f0d2114fSYongseok Koh * @param rxq 302f0d2114fSYongseok Koh * Pointer to RX queue. 303f0d2114fSYongseok Koh * 304f0d2114fSYongseok Koh * @return 305f0d2114fSYongseok Koh * 1 if supported, negative errno value if not. 306f0d2114fSYongseok Koh */ 307f0d2114fSYongseok Koh int __attribute__((cold)) 308f0d2114fSYongseok Koh rxq_check_vec_support(struct mlx5_rxq_data *rxq) 309f0d2114fSYongseok Koh { 310f0d2114fSYongseok Koh struct mlx5_rxq_ctrl *ctrl = 311f0d2114fSYongseok Koh container_of(rxq, struct mlx5_rxq_ctrl, rxq); 312f0d2114fSYongseok Koh 3137fe24446SShahaf Shuler if (!ctrl->priv->config.rx_vec_en || rxq->sges_n != 0) 314f0d2114fSYongseok Koh return -ENOTSUP; 315f0d2114fSYongseok Koh return 1; 316f0d2114fSYongseok Koh } 317f0d2114fSYongseok Koh 318f0d2114fSYongseok Koh /** 319f0d2114fSYongseok Koh * Check a device can support vectorized RX. 320f0d2114fSYongseok Koh * 321f0d2114fSYongseok Koh * @param priv 322f0d2114fSYongseok Koh * Pointer to private structure. 323f0d2114fSYongseok Koh * 324f0d2114fSYongseok Koh * @return 325f0d2114fSYongseok Koh * 1 if supported, negative errno value if not. 326f0d2114fSYongseok Koh */ 327f0d2114fSYongseok Koh int __attribute__((cold)) 328f0d2114fSYongseok Koh priv_check_vec_rx_support(struct priv *priv) 329f0d2114fSYongseok Koh { 330f0d2114fSYongseok Koh uint16_t i; 331f0d2114fSYongseok Koh 3327fe24446SShahaf Shuler if (!priv->config.rx_vec_en) 333f0d2114fSYongseok Koh return -ENOTSUP; 334f0d2114fSYongseok Koh /* All the configured queues should support. */ 335f0d2114fSYongseok Koh for (i = 0; i < priv->rxqs_n; ++i) { 336f0d2114fSYongseok Koh struct mlx5_rxq_data *rxq = (*priv->rxqs)[i]; 337f0d2114fSYongseok Koh 338f0d2114fSYongseok Koh if (!rxq) 339f0d2114fSYongseok Koh continue; 340f0d2114fSYongseok Koh if (rxq_check_vec_support(rxq) < 0) 341f0d2114fSYongseok Koh break; 342f0d2114fSYongseok Koh } 343f0d2114fSYongseok Koh if (i != priv->rxqs_n) 344f0d2114fSYongseok Koh return -ENOTSUP; 345f0d2114fSYongseok Koh return 1; 346f0d2114fSYongseok Koh } 347