18fd92a66SOlivier Matz /* SPDX-License-Identifier: BSD-3-Clause 2f0d2114fSYongseok Koh * Copyright 2017 6WIND S.A. 35feecc57SShahaf Shuler * Copyright 2017 Mellanox Technologies, Ltd 4f0d2114fSYongseok Koh */ 5f0d2114fSYongseok Koh 6f0d2114fSYongseok Koh #include <stdint.h> 7f0d2114fSYongseok Koh #include <string.h> 8f0d2114fSYongseok Koh #include <stdlib.h> 9f0d2114fSYongseok Koh 10f0d2114fSYongseok Koh #include <rte_mbuf.h> 11f0d2114fSYongseok Koh #include <rte_mempool.h> 12f0d2114fSYongseok Koh #include <rte_prefetch.h> 132c5e0dd2SCiara Power #include <rte_vect.h> 14f0d2114fSYongseok Koh 159d60f545SOphir Munk #include <mlx5_glue.h> 167b4f1e6bSMatan Azrad #include <mlx5_prm.h> 177b4f1e6bSMatan Azrad 187b4f1e6bSMatan Azrad #include "mlx5_defs.h" 19f0d2114fSYongseok Koh #include "mlx5.h" 20f0d2114fSYongseok Koh #include "mlx5_utils.h" 21f0d2114fSYongseok Koh #include "mlx5_rxtx.h" 225bfc9fc1SYongseok Koh #include "mlx5_rxtx_vec.h" 23f0d2114fSYongseok Koh #include "mlx5_autoconf.h" 24f0d2114fSYongseok Koh 25570acdb1SYongseok Koh #if defined RTE_ARCH_X86_64 263c2ddbd4SYongseok Koh #include "mlx5_rxtx_vec_sse.h" 27570acdb1SYongseok Koh #elif defined RTE_ARCH_ARM64 28570acdb1SYongseok Koh #include "mlx5_rxtx_vec_neon.h" 292e542da7SDavid Christensen #elif defined RTE_ARCH_PPC_64 302e542da7SDavid Christensen #include "mlx5_rxtx_vec_altivec.h" 313c2ddbd4SYongseok Koh #else 323c2ddbd4SYongseok Koh #error "This should not be compiled if SIMD instructions are not supported." 33f0d2114fSYongseok Koh #endif 34f0d2114fSYongseok Koh 35f0d2114fSYongseok Koh /** 36f0d2114fSYongseok Koh * Skip error packets. 37f0d2114fSYongseok Koh * 38f0d2114fSYongseok Koh * @param rxq 39f0d2114fSYongseok Koh * Pointer to RX queue structure. 40f0d2114fSYongseok Koh * @param[out] pkts 41f0d2114fSYongseok Koh * Array to store received packets. 42f0d2114fSYongseok Koh * @param pkts_n 43f0d2114fSYongseok Koh * Maximum number of packets in array. 44f0d2114fSYongseok Koh * 45f0d2114fSYongseok Koh * @return 46f0d2114fSYongseok Koh * Number of packets successfully received (<= pkts_n). 47f0d2114fSYongseok Koh */ 48f0d2114fSYongseok Koh static uint16_t 49f0d2114fSYongseok Koh rxq_handle_pending_error(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, 50f0d2114fSYongseok Koh uint16_t pkts_n) 51f0d2114fSYongseok Koh { 52f0d2114fSYongseok Koh uint16_t n = 0; 53f0d2114fSYongseok Koh unsigned int i; 54f0d2114fSYongseok Koh #ifdef MLX5_PMD_SOFT_COUNTERS 55f0d2114fSYongseok Koh uint32_t err_bytes = 0; 56f0d2114fSYongseok Koh #endif 57f0d2114fSYongseok Koh 58f0d2114fSYongseok Koh for (i = 0; i < pkts_n; ++i) { 59f0d2114fSYongseok Koh struct rte_mbuf *pkt = pkts[i]; 60f0d2114fSYongseok Koh 6188c07335SMatan Azrad if (pkt->packet_type == RTE_PTYPE_ALL_MASK || rxq->err_state) { 62f0d2114fSYongseok Koh #ifdef MLX5_PMD_SOFT_COUNTERS 63f0d2114fSYongseok Koh err_bytes += PKT_LEN(pkt); 64f0d2114fSYongseok Koh #endif 65f0d2114fSYongseok Koh rte_pktmbuf_free_seg(pkt); 66f0d2114fSYongseok Koh } else { 67f0d2114fSYongseok Koh pkts[n++] = pkt; 68f0d2114fSYongseok Koh } 69f0d2114fSYongseok Koh } 70f0d2114fSYongseok Koh rxq->stats.idropped += (pkts_n - n); 71f0d2114fSYongseok Koh #ifdef MLX5_PMD_SOFT_COUNTERS 72f0d2114fSYongseok Koh /* Correct counters of errored completions. */ 73f0d2114fSYongseok Koh rxq->stats.ipackets -= (pkts_n - n); 74f0d2114fSYongseok Koh rxq->stats.ibytes -= err_bytes; 75f0d2114fSYongseok Koh #endif 7688c07335SMatan Azrad mlx5_rx_err_handle(rxq, 1); 77f0d2114fSYongseok Koh return n; 78f0d2114fSYongseok Koh } 79f0d2114fSYongseok Koh 80f0d2114fSYongseok Koh /** 810f20acbfSAlexander Kozyrev * Replenish buffers for RX in bulk. 820f20acbfSAlexander Kozyrev * 830f20acbfSAlexander Kozyrev * @param rxq 840f20acbfSAlexander Kozyrev * Pointer to RX queue structure. 850f20acbfSAlexander Kozyrev */ 860f20acbfSAlexander Kozyrev static inline void 870f20acbfSAlexander Kozyrev mlx5_rx_replenish_bulk_mbuf(struct mlx5_rxq_data *rxq) 880f20acbfSAlexander Kozyrev { 890f20acbfSAlexander Kozyrev const uint16_t q_n = 1 << rxq->elts_n; 900f20acbfSAlexander Kozyrev const uint16_t q_mask = q_n - 1; 910f20acbfSAlexander Kozyrev uint16_t n = q_n - (rxq->rq_ci - rxq->rq_pi); 920f20acbfSAlexander Kozyrev uint16_t elts_idx = rxq->rq_ci & q_mask; 930f20acbfSAlexander Kozyrev struct rte_mbuf **elts = &(*rxq->elts)[elts_idx]; 940f20acbfSAlexander Kozyrev volatile struct mlx5_wqe_data_seg *wq = 950f20acbfSAlexander Kozyrev &((volatile struct mlx5_wqe_data_seg *)rxq->wqes)[elts_idx]; 960f20acbfSAlexander Kozyrev unsigned int i; 970f20acbfSAlexander Kozyrev 980f20acbfSAlexander Kozyrev if (n >= rxq->rq_repl_thresh) { 990f20acbfSAlexander Kozyrev MLX5_ASSERT(n >= MLX5_VPMD_RXQ_RPLNSH_THRESH(q_n)); 1000f20acbfSAlexander Kozyrev MLX5_ASSERT(MLX5_VPMD_RXQ_RPLNSH_THRESH(q_n) > 1010f20acbfSAlexander Kozyrev MLX5_VPMD_DESCS_PER_LOOP); 1020f20acbfSAlexander Kozyrev /* Not to cross queue end. */ 1030f20acbfSAlexander Kozyrev n = RTE_MIN(n - MLX5_VPMD_DESCS_PER_LOOP, q_n - elts_idx); 1040f20acbfSAlexander Kozyrev if (rte_mempool_get_bulk(rxq->mp, (void *)elts, n) < 0) { 1050f20acbfSAlexander Kozyrev rxq->stats.rx_nombuf += n; 1060f20acbfSAlexander Kozyrev return; 1070f20acbfSAlexander Kozyrev } 1080f20acbfSAlexander Kozyrev for (i = 0; i < n; ++i) { 1090f20acbfSAlexander Kozyrev void *buf_addr; 1100f20acbfSAlexander Kozyrev 1110f20acbfSAlexander Kozyrev /* 1120f20acbfSAlexander Kozyrev * In order to support the mbufs with external attached 1130f20acbfSAlexander Kozyrev * data buffer we should use the buf_addr pointer 1140f20acbfSAlexander Kozyrev * instead of rte_mbuf_buf_addr(). It touches the mbuf 1150f20acbfSAlexander Kozyrev * itself and may impact the performance. 1160f20acbfSAlexander Kozyrev */ 1170f20acbfSAlexander Kozyrev buf_addr = elts[i]->buf_addr; 1180f20acbfSAlexander Kozyrev wq[i].addr = rte_cpu_to_be_64((uintptr_t)buf_addr + 1190f20acbfSAlexander Kozyrev RTE_PKTMBUF_HEADROOM); 1200f20acbfSAlexander Kozyrev /* If there's a single MR, no need to replace LKey. */ 1210f20acbfSAlexander Kozyrev if (unlikely(mlx5_mr_btree_len(&rxq->mr_ctrl.cache_bh) 1220f20acbfSAlexander Kozyrev > 1)) 1230f20acbfSAlexander Kozyrev wq[i].lkey = mlx5_rx_mb2mr(rxq, elts[i]); 1240f20acbfSAlexander Kozyrev } 1250f20acbfSAlexander Kozyrev rxq->rq_ci += n; 1260f20acbfSAlexander Kozyrev /* Prevent overflowing into consumed mbufs. */ 1270f20acbfSAlexander Kozyrev elts_idx = rxq->rq_ci & q_mask; 1280f20acbfSAlexander Kozyrev for (i = 0; i < MLX5_VPMD_DESCS_PER_LOOP; ++i) 1290f20acbfSAlexander Kozyrev (*rxq->elts)[elts_idx + i] = &rxq->fake_mbuf; 1300f20acbfSAlexander Kozyrev rte_io_wmb(); 1310f20acbfSAlexander Kozyrev *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci); 1320f20acbfSAlexander Kozyrev } 1330f20acbfSAlexander Kozyrev } 1340f20acbfSAlexander Kozyrev 1350f20acbfSAlexander Kozyrev /** 1360f20acbfSAlexander Kozyrev * Replenish buffers for MPRQ RX in bulk. 1370f20acbfSAlexander Kozyrev * 1380f20acbfSAlexander Kozyrev * @param rxq 1390f20acbfSAlexander Kozyrev * Pointer to RX queue structure. 1400f20acbfSAlexander Kozyrev */ 1410f20acbfSAlexander Kozyrev static inline void 1420f20acbfSAlexander Kozyrev mlx5_rx_mprq_replenish_bulk_mbuf(struct mlx5_rxq_data *rxq) 1430f20acbfSAlexander Kozyrev { 1440f20acbfSAlexander Kozyrev const uint16_t wqe_n = 1 << rxq->elts_n; 1450f20acbfSAlexander Kozyrev const uint32_t strd_n = 1 << rxq->strd_num_n; 1460f20acbfSAlexander Kozyrev const uint32_t elts_n = wqe_n * strd_n; 1470f20acbfSAlexander Kozyrev const uint32_t wqe_mask = elts_n - 1; 148*5c687643SAlexander Kozyrev uint32_t n = rxq->elts_ci - rxq->rq_pi; 1490f20acbfSAlexander Kozyrev uint32_t elts_idx = rxq->elts_ci & wqe_mask; 1500f20acbfSAlexander Kozyrev struct rte_mbuf **elts = &(*rxq->elts)[elts_idx]; 1510f20acbfSAlexander Kozyrev 152*5c687643SAlexander Kozyrev if (n <= rxq->rq_repl_thresh) { 153*5c687643SAlexander Kozyrev MLX5_ASSERT(n + MLX5_VPMD_RX_MAX_BURST >= 154*5c687643SAlexander Kozyrev MLX5_VPMD_RXQ_RPLNSH_THRESH(elts_n)); 1550f20acbfSAlexander Kozyrev MLX5_ASSERT(MLX5_VPMD_RXQ_RPLNSH_THRESH(elts_n) > 1560f20acbfSAlexander Kozyrev MLX5_VPMD_DESCS_PER_LOOP); 157*5c687643SAlexander Kozyrev /* Not to cross queue end. */ 158*5c687643SAlexander Kozyrev n = RTE_MIN(n + MLX5_VPMD_RX_MAX_BURST, elts_n - elts_idx); 1590f20acbfSAlexander Kozyrev if (rte_mempool_get_bulk(rxq->mp, (void *)elts, n) < 0) { 1600f20acbfSAlexander Kozyrev rxq->stats.rx_nombuf += n; 1610f20acbfSAlexander Kozyrev return; 1620f20acbfSAlexander Kozyrev } 1630f20acbfSAlexander Kozyrev rxq->elts_ci += n; 1640f20acbfSAlexander Kozyrev } 1650f20acbfSAlexander Kozyrev } 1660f20acbfSAlexander Kozyrev 1670f20acbfSAlexander Kozyrev /** 1680f20acbfSAlexander Kozyrev * Copy or attach MPRQ buffers to RX SW ring. 1690f20acbfSAlexander Kozyrev * 1700f20acbfSAlexander Kozyrev * @param rxq 1710f20acbfSAlexander Kozyrev * Pointer to RX queue structure. 1720f20acbfSAlexander Kozyrev * @param pkts 1730f20acbfSAlexander Kozyrev * Pointer to array of packets to be stored. 1740f20acbfSAlexander Kozyrev * @param pkts_n 1750f20acbfSAlexander Kozyrev * Number of packets to be stored. 1760f20acbfSAlexander Kozyrev * 1770f20acbfSAlexander Kozyrev * @return 1780f20acbfSAlexander Kozyrev * Number of packets successfully copied/attached (<= pkts_n). 1790f20acbfSAlexander Kozyrev */ 1800f20acbfSAlexander Kozyrev static inline uint16_t 1810f20acbfSAlexander Kozyrev rxq_copy_mprq_mbuf_v(struct mlx5_rxq_data *rxq, 1820f20acbfSAlexander Kozyrev struct rte_mbuf **pkts, uint16_t pkts_n) 1830f20acbfSAlexander Kozyrev { 1840f20acbfSAlexander Kozyrev const uint16_t wqe_n = 1 << rxq->elts_n; 1850f20acbfSAlexander Kozyrev const uint16_t wqe_mask = wqe_n - 1; 1860f20acbfSAlexander Kozyrev const uint16_t strd_sz = 1 << rxq->strd_sz_n; 1870f20acbfSAlexander Kozyrev const uint32_t strd_n = 1 << rxq->strd_num_n; 1880f20acbfSAlexander Kozyrev const uint32_t elts_n = wqe_n * strd_n; 1890f20acbfSAlexander Kozyrev const uint32_t elts_mask = elts_n - 1; 1900f20acbfSAlexander Kozyrev uint32_t elts_idx = rxq->rq_pi & elts_mask; 1910f20acbfSAlexander Kozyrev struct rte_mbuf **elts = &(*rxq->elts)[elts_idx]; 1920f20acbfSAlexander Kozyrev uint32_t rq_ci = rxq->rq_ci; 1930f20acbfSAlexander Kozyrev struct mlx5_mprq_buf *buf = (*rxq->mprq_bufs)[rq_ci & wqe_mask]; 1940f20acbfSAlexander Kozyrev uint16_t copied = 0; 1950f20acbfSAlexander Kozyrev uint16_t i = 0; 1960f20acbfSAlexander Kozyrev 1970f20acbfSAlexander Kozyrev for (i = 0; i < pkts_n; ++i) { 1980f20acbfSAlexander Kozyrev uint16_t strd_cnt; 1990f20acbfSAlexander Kozyrev enum mlx5_rqx_code rxq_code; 2000f20acbfSAlexander Kozyrev 2010f20acbfSAlexander Kozyrev if (rxq->consumed_strd == strd_n) { 2020f20acbfSAlexander Kozyrev /* Replace WQE if the buffer is still in use. */ 2030f20acbfSAlexander Kozyrev mprq_buf_replace(rxq, rq_ci & wqe_mask); 2040f20acbfSAlexander Kozyrev /* Advance to the next WQE. */ 2050f20acbfSAlexander Kozyrev rxq->consumed_strd = 0; 2060f20acbfSAlexander Kozyrev rq_ci++; 2070f20acbfSAlexander Kozyrev buf = (*rxq->mprq_bufs)[rq_ci & wqe_mask]; 2080f20acbfSAlexander Kozyrev } 2090f20acbfSAlexander Kozyrev 2100f20acbfSAlexander Kozyrev if (!elts[i]->pkt_len) { 2110f20acbfSAlexander Kozyrev rxq->consumed_strd = strd_n; 2120f20acbfSAlexander Kozyrev rte_pktmbuf_free_seg(elts[i]); 2130f20acbfSAlexander Kozyrev #ifdef MLX5_PMD_SOFT_COUNTERS 2140f20acbfSAlexander Kozyrev rxq->stats.ipackets -= 1; 2150f20acbfSAlexander Kozyrev #endif 2160f20acbfSAlexander Kozyrev continue; 2170f20acbfSAlexander Kozyrev } 2180f20acbfSAlexander Kozyrev strd_cnt = (elts[i]->pkt_len / strd_sz) + 2190f20acbfSAlexander Kozyrev ((elts[i]->pkt_len % strd_sz) ? 1 : 0); 2200f20acbfSAlexander Kozyrev rxq_code = mprq_buf_to_pkt(rxq, elts[i], elts[i]->pkt_len, 2210f20acbfSAlexander Kozyrev buf, rxq->consumed_strd, strd_cnt); 2220f20acbfSAlexander Kozyrev rxq->consumed_strd += strd_cnt; 2230f20acbfSAlexander Kozyrev if (unlikely(rxq_code != MLX5_RXQ_CODE_EXIT)) { 2240f20acbfSAlexander Kozyrev rte_pktmbuf_free_seg(elts[i]); 2250f20acbfSAlexander Kozyrev #ifdef MLX5_PMD_SOFT_COUNTERS 2260f20acbfSAlexander Kozyrev rxq->stats.ipackets -= 1; 2270f20acbfSAlexander Kozyrev rxq->stats.ibytes -= elts[i]->pkt_len; 2280f20acbfSAlexander Kozyrev #endif 2290f20acbfSAlexander Kozyrev if (rxq_code == MLX5_RXQ_CODE_NOMBUF) { 2300f20acbfSAlexander Kozyrev ++rxq->stats.rx_nombuf; 2310f20acbfSAlexander Kozyrev break; 2320f20acbfSAlexander Kozyrev } 2330f20acbfSAlexander Kozyrev if (rxq_code == MLX5_RXQ_CODE_DROPPED) { 2340f20acbfSAlexander Kozyrev ++rxq->stats.idropped; 2350f20acbfSAlexander Kozyrev continue; 2360f20acbfSAlexander Kozyrev } 2370f20acbfSAlexander Kozyrev } 2380f20acbfSAlexander Kozyrev pkts[copied++] = elts[i]; 2390f20acbfSAlexander Kozyrev } 2400f20acbfSAlexander Kozyrev rxq->rq_pi += i; 2410f20acbfSAlexander Kozyrev rxq->cq_ci += i; 2420f20acbfSAlexander Kozyrev rte_io_wmb(); 2430f20acbfSAlexander Kozyrev *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci); 2440f20acbfSAlexander Kozyrev if (rq_ci != rxq->rq_ci) { 2450f20acbfSAlexander Kozyrev rxq->rq_ci = rq_ci; 2460f20acbfSAlexander Kozyrev rte_io_wmb(); 2470f20acbfSAlexander Kozyrev *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci); 2480f20acbfSAlexander Kozyrev } 2490f20acbfSAlexander Kozyrev return copied; 2500f20acbfSAlexander Kozyrev } 2510f20acbfSAlexander Kozyrev 2520f20acbfSAlexander Kozyrev /** 2531ded2623SAlexander Kozyrev * Receive burst of packets. An errored completion also consumes a mbuf, but the 2541ded2623SAlexander Kozyrev * packet_type is set to be RTE_PTYPE_ALL_MASK. Marked mbufs should be freed 2551ded2623SAlexander Kozyrev * before returning to application. 2561ded2623SAlexander Kozyrev * 2571ded2623SAlexander Kozyrev * @param rxq 2581ded2623SAlexander Kozyrev * Pointer to RX queue structure. 2591ded2623SAlexander Kozyrev * @param[out] pkts 2601ded2623SAlexander Kozyrev * Array to store received packets. 2611ded2623SAlexander Kozyrev * @param pkts_n 2621ded2623SAlexander Kozyrev * Maximum number of packets in array. 2631ded2623SAlexander Kozyrev * @param[out] err 2641ded2623SAlexander Kozyrev * Pointer to a flag. Set non-zero value if pkts array has at least one error 2651ded2623SAlexander Kozyrev * packet to handle. 2661ded2623SAlexander Kozyrev * @param[out] no_cq 2671ded2623SAlexander Kozyrev * Pointer to a boolean. Set true if no new CQE seen. 2681ded2623SAlexander Kozyrev * 2691ded2623SAlexander Kozyrev * @return 2701ded2623SAlexander Kozyrev * Number of packets received including errors (<= pkts_n). 2711ded2623SAlexander Kozyrev */ 2721ded2623SAlexander Kozyrev static inline uint16_t 2731ded2623SAlexander Kozyrev rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, 2741ded2623SAlexander Kozyrev uint16_t pkts_n, uint64_t *err, bool *no_cq) 2751ded2623SAlexander Kozyrev { 2761ded2623SAlexander Kozyrev const uint16_t q_n = 1 << rxq->cqe_n; 2771ded2623SAlexander Kozyrev const uint16_t q_mask = q_n - 1; 2781ded2623SAlexander Kozyrev const uint16_t e_n = 1 << rxq->elts_n; 2791ded2623SAlexander Kozyrev const uint16_t e_mask = e_n - 1; 2801ded2623SAlexander Kozyrev volatile struct mlx5_cqe *cq; 2811ded2623SAlexander Kozyrev struct rte_mbuf **elts; 2821ded2623SAlexander Kozyrev uint64_t comp_idx = MLX5_VPMD_DESCS_PER_LOOP; 2831ded2623SAlexander Kozyrev uint16_t nocmp_n = 0; 2841ded2623SAlexander Kozyrev uint16_t rcvd_pkt = 0; 2851ded2623SAlexander Kozyrev unsigned int cq_idx = rxq->cq_ci & q_mask; 2861ded2623SAlexander Kozyrev unsigned int elts_idx; 2871ded2623SAlexander Kozyrev 2881ded2623SAlexander Kozyrev MLX5_ASSERT(rxq->sges_n == 0); 2891ded2623SAlexander Kozyrev MLX5_ASSERT(rxq->cqe_n == rxq->elts_n); 2901ded2623SAlexander Kozyrev cq = &(*rxq->cqes)[cq_idx]; 2911ded2623SAlexander Kozyrev rte_prefetch0(cq); 2921ded2623SAlexander Kozyrev rte_prefetch0(cq + 1); 2931ded2623SAlexander Kozyrev rte_prefetch0(cq + 2); 2941ded2623SAlexander Kozyrev rte_prefetch0(cq + 3); 2951ded2623SAlexander Kozyrev pkts_n = RTE_MIN(pkts_n, MLX5_VPMD_RX_MAX_BURST); 2961ded2623SAlexander Kozyrev mlx5_rx_replenish_bulk_mbuf(rxq); 2971ded2623SAlexander Kozyrev /* See if there're unreturned mbufs from compressed CQE. */ 2981ded2623SAlexander Kozyrev rcvd_pkt = rxq->decompressed; 2991ded2623SAlexander Kozyrev if (rcvd_pkt > 0) { 3001ded2623SAlexander Kozyrev rcvd_pkt = RTE_MIN(rcvd_pkt, pkts_n); 3011ded2623SAlexander Kozyrev rxq_copy_mbuf_v(&(*rxq->elts)[rxq->rq_pi & e_mask], 3021ded2623SAlexander Kozyrev pkts, rcvd_pkt); 3031ded2623SAlexander Kozyrev rxq->rq_pi += rcvd_pkt; 3041ded2623SAlexander Kozyrev rxq->decompressed -= rcvd_pkt; 3051ded2623SAlexander Kozyrev pkts += rcvd_pkt; 3061ded2623SAlexander Kozyrev } 3071ded2623SAlexander Kozyrev elts_idx = rxq->rq_pi & e_mask; 3081ded2623SAlexander Kozyrev elts = &(*rxq->elts)[elts_idx]; 3091ded2623SAlexander Kozyrev /* Not to overflow pkts array. */ 3101ded2623SAlexander Kozyrev pkts_n = RTE_ALIGN_FLOOR(pkts_n - rcvd_pkt, MLX5_VPMD_DESCS_PER_LOOP); 3111ded2623SAlexander Kozyrev /* Not to cross queue end. */ 3121ded2623SAlexander Kozyrev pkts_n = RTE_MIN(pkts_n, q_n - elts_idx); 3131ded2623SAlexander Kozyrev pkts_n = RTE_MIN(pkts_n, q_n - cq_idx); 3141ded2623SAlexander Kozyrev if (!pkts_n) { 3151ded2623SAlexander Kozyrev *no_cq = !rcvd_pkt; 3161ded2623SAlexander Kozyrev return rcvd_pkt; 3171ded2623SAlexander Kozyrev } 3181ded2623SAlexander Kozyrev /* At this point, there shouldn't be any remaining packets. */ 3191ded2623SAlexander Kozyrev MLX5_ASSERT(rxq->decompressed == 0); 3201ded2623SAlexander Kozyrev /* Process all the CQEs */ 3211ded2623SAlexander Kozyrev nocmp_n = rxq_cq_process_v(rxq, cq, elts, pkts, pkts_n, err, &comp_idx); 3221ded2623SAlexander Kozyrev /* If no new CQE seen, return without updating cq_db. */ 3231ded2623SAlexander Kozyrev if (unlikely(!nocmp_n && comp_idx == MLX5_VPMD_DESCS_PER_LOOP)) { 3241ded2623SAlexander Kozyrev *no_cq = true; 3251ded2623SAlexander Kozyrev return rcvd_pkt; 3261ded2623SAlexander Kozyrev } 3271ded2623SAlexander Kozyrev /* Update the consumer indexes for non-compressed CQEs. */ 3281ded2623SAlexander Kozyrev MLX5_ASSERT(nocmp_n <= pkts_n); 3291ded2623SAlexander Kozyrev rxq->cq_ci += nocmp_n; 3301ded2623SAlexander Kozyrev rxq->rq_pi += nocmp_n; 3311ded2623SAlexander Kozyrev rcvd_pkt += nocmp_n; 3321ded2623SAlexander Kozyrev /* Decompress the last CQE if compressed. */ 3331ded2623SAlexander Kozyrev if (comp_idx < MLX5_VPMD_DESCS_PER_LOOP) { 3341ded2623SAlexander Kozyrev MLX5_ASSERT(comp_idx == (nocmp_n % MLX5_VPMD_DESCS_PER_LOOP)); 3351ded2623SAlexander Kozyrev rxq->decompressed = rxq_cq_decompress_v(rxq, &cq[nocmp_n], 3361ded2623SAlexander Kozyrev &elts[nocmp_n]); 3371ded2623SAlexander Kozyrev rxq->cq_ci += rxq->decompressed; 3381ded2623SAlexander Kozyrev /* Return more packets if needed. */ 3391ded2623SAlexander Kozyrev if (nocmp_n < pkts_n) { 3401ded2623SAlexander Kozyrev uint16_t n = rxq->decompressed; 3411ded2623SAlexander Kozyrev 3421ded2623SAlexander Kozyrev n = RTE_MIN(n, pkts_n - nocmp_n); 3431ded2623SAlexander Kozyrev rxq_copy_mbuf_v(&(*rxq->elts)[rxq->rq_pi & e_mask], 3441ded2623SAlexander Kozyrev &pkts[nocmp_n], n); 3451ded2623SAlexander Kozyrev rxq->rq_pi += n; 3461ded2623SAlexander Kozyrev rcvd_pkt += n; 3471ded2623SAlexander Kozyrev rxq->decompressed -= n; 3481ded2623SAlexander Kozyrev } 3491ded2623SAlexander Kozyrev } 3501ded2623SAlexander Kozyrev rte_io_wmb(); 3511ded2623SAlexander Kozyrev *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci); 3521ded2623SAlexander Kozyrev *no_cq = !rcvd_pkt; 3531ded2623SAlexander Kozyrev return rcvd_pkt; 3541ded2623SAlexander Kozyrev } 3551ded2623SAlexander Kozyrev 3561ded2623SAlexander Kozyrev /** 357f0d2114fSYongseok Koh * DPDK callback for vectorized RX. 358f0d2114fSYongseok Koh * 359f0d2114fSYongseok Koh * @param dpdk_rxq 360f0d2114fSYongseok Koh * Generic pointer to RX queue structure. 361f0d2114fSYongseok Koh * @param[out] pkts 362f0d2114fSYongseok Koh * Array to store received packets. 363f0d2114fSYongseok Koh * @param pkts_n 364f0d2114fSYongseok Koh * Maximum number of packets in array. 365f0d2114fSYongseok Koh * 366f0d2114fSYongseok Koh * @return 367f0d2114fSYongseok Koh * Number of packets successfully received (<= pkts_n). 368f0d2114fSYongseok Koh */ 369f0d2114fSYongseok Koh uint16_t 370f0d2114fSYongseok Koh mlx5_rx_burst_vec(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) 371f0d2114fSYongseok Koh { 372f0d2114fSYongseok Koh struct mlx5_rxq_data *rxq = dpdk_rxq; 373c9cc554bSAlexander Kozyrev uint16_t nb_rx = 0; 374c9cc554bSAlexander Kozyrev uint16_t tn = 0; 375d27fb0deSYongseok Koh uint64_t err = 0; 376c9cc554bSAlexander Kozyrev bool no_cq = false; 377f0d2114fSYongseok Koh 378c9cc554bSAlexander Kozyrev do { 3790f20acbfSAlexander Kozyrev nb_rx = rxq_burst_v(rxq, pkts + tn, pkts_n - tn, 3800f20acbfSAlexander Kozyrev &err, &no_cq); 3810f20acbfSAlexander Kozyrev if (unlikely(err | rxq->err_state)) 3820f20acbfSAlexander Kozyrev nb_rx = rxq_handle_pending_error(rxq, pkts + tn, nb_rx); 3830f20acbfSAlexander Kozyrev tn += nb_rx; 3840f20acbfSAlexander Kozyrev if (unlikely(no_cq)) 3850f20acbfSAlexander Kozyrev break; 3860f20acbfSAlexander Kozyrev } while (tn != pkts_n); 3870f20acbfSAlexander Kozyrev return tn; 3880f20acbfSAlexander Kozyrev } 3890f20acbfSAlexander Kozyrev 3900f20acbfSAlexander Kozyrev /** 3910f20acbfSAlexander Kozyrev * Receive burst of packets. An errored completion also consumes a mbuf, but the 3920f20acbfSAlexander Kozyrev * packet_type is set to be RTE_PTYPE_ALL_MASK. Marked mbufs should be freed 3930f20acbfSAlexander Kozyrev * before returning to application. 3940f20acbfSAlexander Kozyrev * 3950f20acbfSAlexander Kozyrev * @param rxq 3960f20acbfSAlexander Kozyrev * Pointer to RX queue structure. 3970f20acbfSAlexander Kozyrev * @param[out] pkts 3980f20acbfSAlexander Kozyrev * Array to store received packets. 3990f20acbfSAlexander Kozyrev * @param pkts_n 4000f20acbfSAlexander Kozyrev * Maximum number of packets in array. 4010f20acbfSAlexander Kozyrev * @param[out] err 4020f20acbfSAlexander Kozyrev * Pointer to a flag. Set non-zero value if pkts array has at least one error 4030f20acbfSAlexander Kozyrev * packet to handle. 4040f20acbfSAlexander Kozyrev * @param[out] no_cq 4050f20acbfSAlexander Kozyrev * Pointer to a boolean. Set true if no new CQE seen. 4060f20acbfSAlexander Kozyrev * 4070f20acbfSAlexander Kozyrev * @return 4080f20acbfSAlexander Kozyrev * Number of packets received including errors (<= pkts_n). 4090f20acbfSAlexander Kozyrev */ 4100f20acbfSAlexander Kozyrev static inline uint16_t 4110f20acbfSAlexander Kozyrev rxq_burst_mprq_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, 4120f20acbfSAlexander Kozyrev uint16_t pkts_n, uint64_t *err, bool *no_cq) 4130f20acbfSAlexander Kozyrev { 4140f20acbfSAlexander Kozyrev const uint16_t q_n = 1 << rxq->cqe_n; 4150f20acbfSAlexander Kozyrev const uint16_t q_mask = q_n - 1; 4160f20acbfSAlexander Kozyrev const uint16_t wqe_n = 1 << rxq->elts_n; 4170f20acbfSAlexander Kozyrev const uint32_t strd_n = 1 << rxq->strd_num_n; 4180f20acbfSAlexander Kozyrev const uint32_t elts_n = wqe_n * strd_n; 4190f20acbfSAlexander Kozyrev const uint32_t elts_mask = elts_n - 1; 4200f20acbfSAlexander Kozyrev volatile struct mlx5_cqe *cq; 4210f20acbfSAlexander Kozyrev struct rte_mbuf **elts; 4220f20acbfSAlexander Kozyrev uint64_t comp_idx = MLX5_VPMD_DESCS_PER_LOOP; 4230f20acbfSAlexander Kozyrev uint16_t nocmp_n = 0; 4240f20acbfSAlexander Kozyrev uint16_t rcvd_pkt = 0; 4250f20acbfSAlexander Kozyrev uint16_t cp_pkt = 0; 4260f20acbfSAlexander Kozyrev unsigned int cq_idx = rxq->cq_ci & q_mask; 4270f20acbfSAlexander Kozyrev unsigned int elts_idx; 4280f20acbfSAlexander Kozyrev 4290f20acbfSAlexander Kozyrev MLX5_ASSERT(rxq->sges_n == 0); 4300f20acbfSAlexander Kozyrev cq = &(*rxq->cqes)[cq_idx]; 4310f20acbfSAlexander Kozyrev rte_prefetch0(cq); 4320f20acbfSAlexander Kozyrev rte_prefetch0(cq + 1); 4330f20acbfSAlexander Kozyrev rte_prefetch0(cq + 2); 4340f20acbfSAlexander Kozyrev rte_prefetch0(cq + 3); 4350f20acbfSAlexander Kozyrev pkts_n = RTE_MIN(pkts_n, MLX5_VPMD_RX_MAX_BURST); 4360f20acbfSAlexander Kozyrev mlx5_rx_mprq_replenish_bulk_mbuf(rxq); 4370f20acbfSAlexander Kozyrev /* See if there're unreturned mbufs from compressed CQE. */ 4380f20acbfSAlexander Kozyrev rcvd_pkt = rxq->decompressed; 4390f20acbfSAlexander Kozyrev if (rcvd_pkt > 0) { 4400f20acbfSAlexander Kozyrev rcvd_pkt = RTE_MIN(rcvd_pkt, pkts_n); 4410f20acbfSAlexander Kozyrev cp_pkt = rxq_copy_mprq_mbuf_v(rxq, pkts, rcvd_pkt); 4420f20acbfSAlexander Kozyrev rxq->decompressed -= rcvd_pkt; 4430f20acbfSAlexander Kozyrev pkts += cp_pkt; 4440f20acbfSAlexander Kozyrev } 4450f20acbfSAlexander Kozyrev elts_idx = rxq->rq_pi & elts_mask; 4460f20acbfSAlexander Kozyrev elts = &(*rxq->elts)[elts_idx]; 4470f20acbfSAlexander Kozyrev /* Not to overflow pkts array. */ 4480f20acbfSAlexander Kozyrev pkts_n = RTE_ALIGN_FLOOR(pkts_n - cp_pkt, MLX5_VPMD_DESCS_PER_LOOP); 4490f20acbfSAlexander Kozyrev /* Not to cross queue end. */ 4500f20acbfSAlexander Kozyrev pkts_n = RTE_MIN(pkts_n, elts_n - elts_idx); 4510f20acbfSAlexander Kozyrev pkts_n = RTE_MIN(pkts_n, q_n - cq_idx); 4520f20acbfSAlexander Kozyrev /* Not to move past the allocated mbufs. */ 4530f20acbfSAlexander Kozyrev pkts_n = RTE_MIN(pkts_n, rxq->elts_ci - rxq->rq_pi); 4540f20acbfSAlexander Kozyrev if (!pkts_n) { 4550f20acbfSAlexander Kozyrev *no_cq = !cp_pkt; 4560f20acbfSAlexander Kozyrev return cp_pkt; 4570f20acbfSAlexander Kozyrev } 4580f20acbfSAlexander Kozyrev /* At this point, there shouldn't be any remaining packets. */ 4590f20acbfSAlexander Kozyrev MLX5_ASSERT(rxq->decompressed == 0); 4600f20acbfSAlexander Kozyrev /* Process all the CQEs */ 4610f20acbfSAlexander Kozyrev nocmp_n = rxq_cq_process_v(rxq, cq, elts, pkts, pkts_n, err, &comp_idx); 4620f20acbfSAlexander Kozyrev /* If no new CQE seen, return without updating cq_db. */ 4630f20acbfSAlexander Kozyrev if (unlikely(!nocmp_n && comp_idx == MLX5_VPMD_DESCS_PER_LOOP)) { 4640f20acbfSAlexander Kozyrev *no_cq = true; 4650f20acbfSAlexander Kozyrev return cp_pkt; 4660f20acbfSAlexander Kozyrev } 4670f20acbfSAlexander Kozyrev /* Update the consumer indexes for non-compressed CQEs. */ 4680f20acbfSAlexander Kozyrev MLX5_ASSERT(nocmp_n <= pkts_n); 4690f20acbfSAlexander Kozyrev cp_pkt = rxq_copy_mprq_mbuf_v(rxq, pkts, nocmp_n); 4700f20acbfSAlexander Kozyrev rcvd_pkt += cp_pkt; 4710f20acbfSAlexander Kozyrev /* Decompress the last CQE if compressed. */ 4720f20acbfSAlexander Kozyrev if (comp_idx < MLX5_VPMD_DESCS_PER_LOOP) { 4730f20acbfSAlexander Kozyrev MLX5_ASSERT(comp_idx == (nocmp_n % MLX5_VPMD_DESCS_PER_LOOP)); 4740f20acbfSAlexander Kozyrev rxq->decompressed = rxq_cq_decompress_v(rxq, &cq[nocmp_n], 4750f20acbfSAlexander Kozyrev &elts[nocmp_n]); 4760f20acbfSAlexander Kozyrev /* Return more packets if needed. */ 4770f20acbfSAlexander Kozyrev if (nocmp_n < pkts_n) { 4780f20acbfSAlexander Kozyrev uint16_t n = rxq->decompressed; 4790f20acbfSAlexander Kozyrev 4800f20acbfSAlexander Kozyrev n = RTE_MIN(n, pkts_n - nocmp_n); 4810f20acbfSAlexander Kozyrev cp_pkt = rxq_copy_mprq_mbuf_v(rxq, &pkts[cp_pkt], n); 4820f20acbfSAlexander Kozyrev rcvd_pkt += cp_pkt; 4830f20acbfSAlexander Kozyrev rxq->decompressed -= n; 4840f20acbfSAlexander Kozyrev } 4850f20acbfSAlexander Kozyrev } 4860f20acbfSAlexander Kozyrev *no_cq = !rcvd_pkt; 4870f20acbfSAlexander Kozyrev return rcvd_pkt; 4880f20acbfSAlexander Kozyrev } 4890f20acbfSAlexander Kozyrev 4900f20acbfSAlexander Kozyrev /** 4910f20acbfSAlexander Kozyrev * DPDK callback for vectorized MPRQ RX. 4920f20acbfSAlexander Kozyrev * 4930f20acbfSAlexander Kozyrev * @param dpdk_rxq 4940f20acbfSAlexander Kozyrev * Generic pointer to RX queue structure. 4950f20acbfSAlexander Kozyrev * @param[out] pkts 4960f20acbfSAlexander Kozyrev * Array to store received packets. 4970f20acbfSAlexander Kozyrev * @param pkts_n 4980f20acbfSAlexander Kozyrev * Maximum number of packets in array. 4990f20acbfSAlexander Kozyrev * 5000f20acbfSAlexander Kozyrev * @return 5010f20acbfSAlexander Kozyrev * Number of packets successfully received (<= pkts_n). 5020f20acbfSAlexander Kozyrev */ 5030f20acbfSAlexander Kozyrev uint16_t 5040f20acbfSAlexander Kozyrev mlx5_rx_burst_mprq_vec(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) 5050f20acbfSAlexander Kozyrev { 5060f20acbfSAlexander Kozyrev struct mlx5_rxq_data *rxq = dpdk_rxq; 5070f20acbfSAlexander Kozyrev uint16_t nb_rx = 0; 5080f20acbfSAlexander Kozyrev uint16_t tn = 0; 5090f20acbfSAlexander Kozyrev uint64_t err = 0; 5100f20acbfSAlexander Kozyrev bool no_cq = false; 5110f20acbfSAlexander Kozyrev 5120f20acbfSAlexander Kozyrev do { 5130f20acbfSAlexander Kozyrev nb_rx = rxq_burst_mprq_v(rxq, pkts + tn, pkts_n - tn, 5140f20acbfSAlexander Kozyrev &err, &no_cq); 51588c07335SMatan Azrad if (unlikely(err | rxq->err_state)) 516c9cc554bSAlexander Kozyrev nb_rx = rxq_handle_pending_error(rxq, pkts + tn, nb_rx); 517c9cc554bSAlexander Kozyrev tn += nb_rx; 518c9cc554bSAlexander Kozyrev if (unlikely(no_cq)) 519c9cc554bSAlexander Kozyrev break; 520c9cc554bSAlexander Kozyrev } while (tn != pkts_n); 521c9cc554bSAlexander Kozyrev return tn; 522f0d2114fSYongseok Koh } 523f0d2114fSYongseok Koh 524f0d2114fSYongseok Koh /** 525f0d2114fSYongseok Koh * Check a RX queue can support vectorized RX. 526f0d2114fSYongseok Koh * 527f0d2114fSYongseok Koh * @param rxq 528f0d2114fSYongseok Koh * Pointer to RX queue. 529f0d2114fSYongseok Koh * 530f0d2114fSYongseok Koh * @return 531f0d2114fSYongseok Koh * 1 if supported, negative errno value if not. 532f0d2114fSYongseok Koh */ 533ce6427ddSThomas Monjalon int __rte_cold 534af4f09f2SNélio Laranjeiro mlx5_rxq_check_vec_support(struct mlx5_rxq_data *rxq) 535f0d2114fSYongseok Koh { 536f0d2114fSYongseok Koh struct mlx5_rxq_ctrl *ctrl = 537f0d2114fSYongseok Koh container_of(rxq, struct mlx5_rxq_ctrl, rxq); 538f0d2114fSYongseok Koh 5397fe24446SShahaf Shuler if (!ctrl->priv->config.rx_vec_en || rxq->sges_n != 0) 540f0d2114fSYongseok Koh return -ENOTSUP; 54117ed314cSMatan Azrad if (rxq->lro) 54217ed314cSMatan Azrad return -ENOTSUP; 543f0d2114fSYongseok Koh return 1; 544f0d2114fSYongseok Koh } 545f0d2114fSYongseok Koh 546f0d2114fSYongseok Koh /** 547f0d2114fSYongseok Koh * Check a device can support vectorized RX. 548f0d2114fSYongseok Koh * 549af4f09f2SNélio Laranjeiro * @param dev 550af4f09f2SNélio Laranjeiro * Pointer to Ethernet device. 551f0d2114fSYongseok Koh * 552f0d2114fSYongseok Koh * @return 553f0d2114fSYongseok Koh * 1 if supported, negative errno value if not. 554f0d2114fSYongseok Koh */ 555ce6427ddSThomas Monjalon int __rte_cold 556af4f09f2SNélio Laranjeiro mlx5_check_vec_rx_support(struct rte_eth_dev *dev) 557f0d2114fSYongseok Koh { 558dbeba4cfSThomas Monjalon struct mlx5_priv *priv = dev->data->dev_private; 5590f006468SMichael Baum uint32_t i; 560f0d2114fSYongseok Koh 5612c5e0dd2SCiara Power if (rte_vect_get_max_simd_bitwidth() < RTE_VECT_SIMD_128) 5622c5e0dd2SCiara Power return -ENOTSUP; 5637fe24446SShahaf Shuler if (!priv->config.rx_vec_en) 564f0d2114fSYongseok Koh return -ENOTSUP; 565f0d2114fSYongseok Koh /* All the configured queues should support. */ 566f0d2114fSYongseok Koh for (i = 0; i < priv->rxqs_n; ++i) { 567f0d2114fSYongseok Koh struct mlx5_rxq_data *rxq = (*priv->rxqs)[i]; 568f0d2114fSYongseok Koh 569f0d2114fSYongseok Koh if (!rxq) 570f0d2114fSYongseok Koh continue; 571af4f09f2SNélio Laranjeiro if (mlx5_rxq_check_vec_support(rxq) < 0) 572f0d2114fSYongseok Koh break; 573f0d2114fSYongseok Koh } 574f0d2114fSYongseok Koh if (i != priv->rxqs_n) 575f0d2114fSYongseok Koh return -ENOTSUP; 576f0d2114fSYongseok Koh return 1; 577f0d2114fSYongseok Koh } 578