18a6ff33dSHyong Youb Kim /* SPDX-License-Identifier: BSD-3-Clause
28a6ff33dSHyong Youb Kim * Copyright 2008-2018 Cisco Systems, Inc. All rights reserved.
38a6ff33dSHyong Youb Kim * Copyright 2007 Nuova Systems, Inc. All rights reserved.
48a6ff33dSHyong Youb Kim */
58a6ff33dSHyong Youb Kim
68a6ff33dSHyong Youb Kim #include <rte_mbuf.h>
7df96fd0dSBruce Richardson #include <ethdev_driver.h>
8ac61aa64SCiara Power #include <rte_vect.h>
98a6ff33dSHyong Youb Kim
108a6ff33dSHyong Youb Kim #include "enic_compat.h"
118a6ff33dSHyong Youb Kim #include "rq_enet_desc.h"
128a6ff33dSHyong Youb Kim #include "enic.h"
138a6ff33dSHyong Youb Kim #include "enic_rxtx_common.h"
148a6ff33dSHyong Youb Kim
158a6ff33dSHyong Youb Kim #include <x86intrin.h>
168a6ff33dSHyong Youb Kim
178a6ff33dSHyong Youb Kim static struct rte_mbuf *
rx_one(struct cq_enet_rq_desc * cqd,struct rte_mbuf * mb,struct enic * enic)188a6ff33dSHyong Youb Kim rx_one(struct cq_enet_rq_desc *cqd, struct rte_mbuf *mb, struct enic *enic)
198a6ff33dSHyong Youb Kim {
208a6ff33dSHyong Youb Kim bool tnl;
218a6ff33dSHyong Youb Kim
228a6ff33dSHyong Youb Kim *(uint64_t *)&mb->rearm_data = enic->mbuf_initializer;
238a6ff33dSHyong Youb Kim mb->data_len = cqd->bytes_written_flags &
248a6ff33dSHyong Youb Kim CQ_ENET_RQ_DESC_BYTES_WRITTEN_MASK;
258a6ff33dSHyong Youb Kim mb->pkt_len = mb->data_len;
268a6ff33dSHyong Youb Kim tnl = enic->overlay_offload && (cqd->completed_index_flags &
278a6ff33dSHyong Youb Kim CQ_ENET_RQ_DESC_FLAGS_FCOE) != 0;
288a6ff33dSHyong Youb Kim mb->packet_type =
298a6ff33dSHyong Youb Kim enic_cq_rx_flags_to_pkt_type((struct cq_desc *)cqd, tnl);
308a6ff33dSHyong Youb Kim enic_cq_rx_to_pkt_flags((struct cq_desc *)cqd, mb);
318a6ff33dSHyong Youb Kim /* Wipe the outer types set by enic_cq_rx_flags_to_pkt_type() */
328a6ff33dSHyong Youb Kim if (tnl) {
338a6ff33dSHyong Youb Kim mb->packet_type &= ~(RTE_PTYPE_L3_MASK |
348a6ff33dSHyong Youb Kim RTE_PTYPE_L4_MASK);
358a6ff33dSHyong Youb Kim }
368a6ff33dSHyong Youb Kim return mb;
378a6ff33dSHyong Youb Kim }
388a6ff33dSHyong Youb Kim
398a6ff33dSHyong Youb Kim static uint16_t
enic_noscatter_vec_recv_pkts(void * rx_queue,struct rte_mbuf ** rx_pkts,uint16_t nb_pkts)408a6ff33dSHyong Youb Kim enic_noscatter_vec_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
418a6ff33dSHyong Youb Kim uint16_t nb_pkts)
428a6ff33dSHyong Youb Kim {
438a6ff33dSHyong Youb Kim struct rte_mbuf **rx, **rxmb;
448a6ff33dSHyong Youb Kim uint16_t cq_idx, nb_rx, max_rx;
458a6ff33dSHyong Youb Kim struct cq_enet_rq_desc *cqd;
468a6ff33dSHyong Youb Kim struct rq_enet_desc *rqd;
478a6ff33dSHyong Youb Kim struct vnic_cq *cq;
488a6ff33dSHyong Youb Kim struct vnic_rq *rq;
498a6ff33dSHyong Youb Kim struct enic *enic;
508a6ff33dSHyong Youb Kim uint8_t color;
518a6ff33dSHyong Youb Kim
528a6ff33dSHyong Youb Kim rq = rx_queue;
538a6ff33dSHyong Youb Kim enic = vnic_dev_priv(rq->vdev);
548a6ff33dSHyong Youb Kim cq = &enic->cq[enic_cq_rq(enic, rq->index)];
558a6ff33dSHyong Youb Kim cq_idx = cq->to_clean;
568a6ff33dSHyong Youb Kim
578a6ff33dSHyong Youb Kim /*
588a6ff33dSHyong Youb Kim * Fill up the reserve of free mbufs. Below, we restock the receive
598a6ff33dSHyong Youb Kim * ring with these mbufs to avoid allocation failures.
608a6ff33dSHyong Youb Kim */
618a6ff33dSHyong Youb Kim if (rq->num_free_mbufs == 0) {
628a6ff33dSHyong Youb Kim if (rte_mempool_get_bulk(rq->mp, (void **)rq->free_mbufs,
638a6ff33dSHyong Youb Kim ENIC_RX_BURST_MAX))
648a6ff33dSHyong Youb Kim return 0;
658a6ff33dSHyong Youb Kim rq->num_free_mbufs = ENIC_RX_BURST_MAX;
668a6ff33dSHyong Youb Kim }
678a6ff33dSHyong Youb Kim /* Receive until the end of the ring, at most. */
688a6ff33dSHyong Youb Kim max_rx = RTE_MIN(nb_pkts, rq->num_free_mbufs);
698a6ff33dSHyong Youb Kim max_rx = RTE_MIN(max_rx, cq->ring.desc_count - cq_idx);
708a6ff33dSHyong Youb Kim
718a6ff33dSHyong Youb Kim rxmb = rq->mbuf_ring + cq_idx;
728a6ff33dSHyong Youb Kim color = cq->last_color;
738a6ff33dSHyong Youb Kim cqd = (struct cq_enet_rq_desc *)(cq->ring.descs) + cq_idx;
748a6ff33dSHyong Youb Kim rx = rx_pkts;
758a6ff33dSHyong Youb Kim if (max_rx == 0 ||
768a6ff33dSHyong Youb Kim (cqd->type_color & CQ_DESC_COLOR_MASK_NOSHIFT) == color)
778a6ff33dSHyong Youb Kim return 0;
788a6ff33dSHyong Youb Kim
798a6ff33dSHyong Youb Kim /* Step 1: Process one packet to do aligned 256-bit load below */
808a6ff33dSHyong Youb Kim if (cq_idx & 0x1) {
818a6ff33dSHyong Youb Kim if (unlikely(cqd->bytes_written_flags &
828a6ff33dSHyong Youb Kim CQ_ENET_RQ_DESC_FLAGS_TRUNCATED)) {
838a6ff33dSHyong Youb Kim rte_pktmbuf_free(*rxmb++);
848a6ff33dSHyong Youb Kim rte_atomic64_inc(&enic->soft_stats.rx_packet_errors);
858a6ff33dSHyong Youb Kim } else {
868a6ff33dSHyong Youb Kim *rx++ = rx_one(cqd, *rxmb++, enic);
878a6ff33dSHyong Youb Kim }
888a6ff33dSHyong Youb Kim cqd++;
898a6ff33dSHyong Youb Kim max_rx--;
908a6ff33dSHyong Youb Kim }
918a6ff33dSHyong Youb Kim
928a6ff33dSHyong Youb Kim const __m256i mask =
938a6ff33dSHyong Youb Kim _mm256_set_epi8(/* Second descriptor */
948a6ff33dSHyong Youb Kim 0xff, /* type_color */
958a6ff33dSHyong Youb Kim (CQ_ENET_RQ_DESC_FLAGS_IPV4_FRAGMENT |
968a6ff33dSHyong Youb Kim CQ_ENET_RQ_DESC_FLAGS_IPV4 |
978a6ff33dSHyong Youb Kim CQ_ENET_RQ_DESC_FLAGS_IPV6 |
988a6ff33dSHyong Youb Kim CQ_ENET_RQ_DESC_FLAGS_TCP |
998a6ff33dSHyong Youb Kim CQ_ENET_RQ_DESC_FLAGS_UDP), /* flags */
1008a6ff33dSHyong Youb Kim 0, 0, /* checksum_fcoe */
1018a6ff33dSHyong Youb Kim 0xff, 0xff, /* vlan */
1028a6ff33dSHyong Youb Kim 0x3f, 0xff, /* bytes_written_flags */
1038a6ff33dSHyong Youb Kim 0xff, 0xff, 0xff, 0xff, /* rss_hash */
1048a6ff33dSHyong Youb Kim 0xff, 0xff, /* q_number_rss_type_flags */
1058a6ff33dSHyong Youb Kim 0, 0, /* completed_index_flags */
1068a6ff33dSHyong Youb Kim /* First descriptor */
1078a6ff33dSHyong Youb Kim 0xff, /* type_color */
1088a6ff33dSHyong Youb Kim (CQ_ENET_RQ_DESC_FLAGS_IPV4_FRAGMENT |
1098a6ff33dSHyong Youb Kim CQ_ENET_RQ_DESC_FLAGS_IPV4 |
1108a6ff33dSHyong Youb Kim CQ_ENET_RQ_DESC_FLAGS_IPV6 |
1118a6ff33dSHyong Youb Kim CQ_ENET_RQ_DESC_FLAGS_TCP |
1128a6ff33dSHyong Youb Kim CQ_ENET_RQ_DESC_FLAGS_UDP), /* flags */
1138a6ff33dSHyong Youb Kim 0, 0, /* checksum_fcoe */
1148a6ff33dSHyong Youb Kim 0xff, 0xff, /* vlan */
1158a6ff33dSHyong Youb Kim 0x3f, 0xff, /* bytes_written_flags */
1168a6ff33dSHyong Youb Kim 0xff, 0xff, 0xff, 0xff, /* rss_hash */
1178a6ff33dSHyong Youb Kim 0xff, 0xff, /* q_number_rss_type_flags */
1188a6ff33dSHyong Youb Kim 0, 0 /* completed_index_flags */
1198a6ff33dSHyong Youb Kim );
1208a6ff33dSHyong Youb Kim const __m256i shuffle_mask =
1218a6ff33dSHyong Youb Kim _mm256_set_epi8(/* Second descriptor */
1228a6ff33dSHyong Youb Kim 7, 6, 5, 4, /* rss = rss_hash */
1238a6ff33dSHyong Youb Kim 11, 10, /* vlan_tci = vlan */
1248a6ff33dSHyong Youb Kim 9, 8, /* data_len = bytes_written */
1258a6ff33dSHyong Youb Kim 0x80, 0x80, 9, 8, /* pkt_len = bytes_written */
1268a6ff33dSHyong Youb Kim 0x80, 0x80, 0x80, 0x80, /* packet_type = 0 */
1278a6ff33dSHyong Youb Kim /* First descriptor */
1288a6ff33dSHyong Youb Kim 7, 6, 5, 4, /* rss = rss_hash */
1298a6ff33dSHyong Youb Kim 11, 10, /* vlan_tci = vlan */
1308a6ff33dSHyong Youb Kim 9, 8, /* data_len = bytes_written */
1318a6ff33dSHyong Youb Kim 0x80, 0x80, 9, 8, /* pkt_len = bytes_written */
1328a6ff33dSHyong Youb Kim 0x80, 0x80, 0x80, 0x80 /* packet_type = 0 */
1338a6ff33dSHyong Youb Kim );
1348a6ff33dSHyong Youb Kim /* Used to collect 8 flags from 8 desc into one register */
1358a6ff33dSHyong Youb Kim const __m256i flags_shuffle_mask =
1368a6ff33dSHyong Youb Kim _mm256_set_epi8(/* Second descriptor */
1378a6ff33dSHyong Youb Kim 1, 3, 9, 14,
1388a6ff33dSHyong Youb Kim 1, 3, 9, 14,
1398a6ff33dSHyong Youb Kim 1, 3, 9, 14,
1408a6ff33dSHyong Youb Kim 1, 3, 9, 14,
1418a6ff33dSHyong Youb Kim /* First descriptor */
1428a6ff33dSHyong Youb Kim 1, 3, 9, 14,
1438a6ff33dSHyong Youb Kim 1, 3, 9, 14,
1448a6ff33dSHyong Youb Kim 1, 3, 9, 14,
1458a6ff33dSHyong Youb Kim /*
1468a6ff33dSHyong Youb Kim * Byte 3: upper byte of completed_index_flags
1478a6ff33dSHyong Youb Kim * bit 5 = fcoe (tunnel)
1488a6ff33dSHyong Youb Kim * Byte 2: upper byte of q_number_rss_type_flags
1498a6ff33dSHyong Youb Kim * bits 2,3,4,5 = rss type
1508a6ff33dSHyong Youb Kim * bit 6 = csum_not_calc
1518a6ff33dSHyong Youb Kim * Byte 1: upper byte of bytes_written_flags
1528a6ff33dSHyong Youb Kim * bit 6 = truncated
1538a6ff33dSHyong Youb Kim * bit 7 = vlan stripped
1548a6ff33dSHyong Youb Kim * Byte 0: flags
1558a6ff33dSHyong Youb Kim */
1568a6ff33dSHyong Youb Kim 1, 3, 9, 14
1578a6ff33dSHyong Youb Kim );
1588a6ff33dSHyong Youb Kim /* Used to collect 8 VLAN IDs from 8 desc into one register */
1598a6ff33dSHyong Youb Kim const __m256i vlan_shuffle_mask =
1608a6ff33dSHyong Youb Kim _mm256_set_epi8(/* Second descriptor */
1618a6ff33dSHyong Youb Kim 0x80, 0x80, 11, 10,
1628a6ff33dSHyong Youb Kim 0x80, 0x80, 11, 10,
1638a6ff33dSHyong Youb Kim 0x80, 0x80, 11, 10,
1648a6ff33dSHyong Youb Kim 0x80, 0x80, 11, 10,
1658a6ff33dSHyong Youb Kim /* First descriptor */
1668a6ff33dSHyong Youb Kim 0x80, 0x80, 11, 10,
1678a6ff33dSHyong Youb Kim 0x80, 0x80, 11, 10,
1688a6ff33dSHyong Youb Kim 0x80, 0x80, 11, 10,
1698a6ff33dSHyong Youb Kim 0x80, 0x80, 11, 10);
170*daa02b5cSOlivier Matz /* RTE_MBUF_F_RX_RSS_HASH is 1<<1 so fits in 8-bit integer */
1718a6ff33dSHyong Youb Kim const __m256i rss_shuffle =
1728a6ff33dSHyong Youb Kim _mm256_set_epi8(/* second 128 bits */
173*daa02b5cSOlivier Matz RTE_MBUF_F_RX_RSS_HASH, RTE_MBUF_F_RX_RSS_HASH, RTE_MBUF_F_RX_RSS_HASH,
174*daa02b5cSOlivier Matz RTE_MBUF_F_RX_RSS_HASH, RTE_MBUF_F_RX_RSS_HASH, RTE_MBUF_F_RX_RSS_HASH,
175*daa02b5cSOlivier Matz RTE_MBUF_F_RX_RSS_HASH, RTE_MBUF_F_RX_RSS_HASH, RTE_MBUF_F_RX_RSS_HASH,
176*daa02b5cSOlivier Matz RTE_MBUF_F_RX_RSS_HASH, RTE_MBUF_F_RX_RSS_HASH, RTE_MBUF_F_RX_RSS_HASH,
177*daa02b5cSOlivier Matz RTE_MBUF_F_RX_RSS_HASH, RTE_MBUF_F_RX_RSS_HASH, RTE_MBUF_F_RX_RSS_HASH,
1788a6ff33dSHyong Youb Kim 0, /* rss_types = 0 */
1798a6ff33dSHyong Youb Kim /* first 128 bits */
180*daa02b5cSOlivier Matz RTE_MBUF_F_RX_RSS_HASH, RTE_MBUF_F_RX_RSS_HASH, RTE_MBUF_F_RX_RSS_HASH,
181*daa02b5cSOlivier Matz RTE_MBUF_F_RX_RSS_HASH, RTE_MBUF_F_RX_RSS_HASH, RTE_MBUF_F_RX_RSS_HASH,
182*daa02b5cSOlivier Matz RTE_MBUF_F_RX_RSS_HASH, RTE_MBUF_F_RX_RSS_HASH, RTE_MBUF_F_RX_RSS_HASH,
183*daa02b5cSOlivier Matz RTE_MBUF_F_RX_RSS_HASH, RTE_MBUF_F_RX_RSS_HASH, RTE_MBUF_F_RX_RSS_HASH,
184*daa02b5cSOlivier Matz RTE_MBUF_F_RX_RSS_HASH, RTE_MBUF_F_RX_RSS_HASH, RTE_MBUF_F_RX_RSS_HASH,
1858a6ff33dSHyong Youb Kim 0 /* rss_types = 0 */);
1868a6ff33dSHyong Youb Kim /*
1878a6ff33dSHyong Youb Kim * VLAN offload flags.
1888a6ff33dSHyong Youb Kim * shuffle index:
1898a6ff33dSHyong Youb Kim * vlan_stripped => bit 0
1908a6ff33dSHyong Youb Kim * vlan_id == 0 => bit 1
1918a6ff33dSHyong Youb Kim */
1928a6ff33dSHyong Youb Kim const __m256i vlan_shuffle =
1938a6ff33dSHyong Youb Kim _mm256_set_epi32(0, 0, 0, 0,
194*daa02b5cSOlivier Matz RTE_MBUF_F_RX_VLAN | RTE_MBUF_F_RX_VLAN_STRIPPED, 0,
195*daa02b5cSOlivier Matz RTE_MBUF_F_RX_VLAN | RTE_MBUF_F_RX_VLAN_STRIPPED, RTE_MBUF_F_RX_VLAN);
1968a6ff33dSHyong Youb Kim /* Use the same shuffle index as vlan_shuffle */
1978a6ff33dSHyong Youb Kim const __m256i vlan_ptype_shuffle =
1988a6ff33dSHyong Youb Kim _mm256_set_epi32(0, 0, 0, 0,
1998a6ff33dSHyong Youb Kim RTE_PTYPE_L2_ETHER,
2008a6ff33dSHyong Youb Kim RTE_PTYPE_L2_ETHER,
2018a6ff33dSHyong Youb Kim RTE_PTYPE_L2_ETHER,
2028a6ff33dSHyong Youb Kim RTE_PTYPE_L2_ETHER_VLAN);
2038a6ff33dSHyong Youb Kim /*
2048a6ff33dSHyong Youb Kim * CKSUM flags. Shift right so they fit int 8-bit integers.
2058a6ff33dSHyong Youb Kim * shuffle index:
2068a6ff33dSHyong Youb Kim * ipv4_csum_ok => bit 3
2078a6ff33dSHyong Youb Kim * ip4 => bit 2
2088a6ff33dSHyong Youb Kim * tcp_or_udp => bit 1
2098a6ff33dSHyong Youb Kim * tcp_udp_csum_ok => bit 0
2108a6ff33dSHyong Youb Kim */
2118a6ff33dSHyong Youb Kim const __m256i csum_shuffle =
2128a6ff33dSHyong Youb Kim _mm256_set_epi8(/* second 128 bits */
2138a6ff33dSHyong Youb Kim /* 1111 ip4+ip4_ok+l4+l4_ok */
214*daa02b5cSOlivier Matz ((RTE_MBUF_F_RX_IP_CKSUM_GOOD | RTE_MBUF_F_RX_L4_CKSUM_GOOD) >> 1),
2158a6ff33dSHyong Youb Kim /* 1110 ip4_ok+ip4+l4+!l4_ok */
216*daa02b5cSOlivier Matz ((RTE_MBUF_F_RX_IP_CKSUM_GOOD | RTE_MBUF_F_RX_L4_CKSUM_BAD) >> 1),
217*daa02b5cSOlivier Matz (RTE_MBUF_F_RX_IP_CKSUM_GOOD >> 1), /* 1101 ip4+ip4_ok */
218*daa02b5cSOlivier Matz (RTE_MBUF_F_RX_IP_CKSUM_GOOD >> 1), /* 1100 ip4_ok+ip4 */
219*daa02b5cSOlivier Matz (RTE_MBUF_F_RX_L4_CKSUM_GOOD >> 1), /* 1011 l4+l4_ok */
220*daa02b5cSOlivier Matz (RTE_MBUF_F_RX_L4_CKSUM_BAD >> 1), /* 1010 l4+!l4_ok */
2218a6ff33dSHyong Youb Kim 0, /* 1001 */
2228a6ff33dSHyong Youb Kim 0, /* 1000 */
2238a6ff33dSHyong Youb Kim /* 0111 !ip4_ok+ip4+l4+l4_ok */
224*daa02b5cSOlivier Matz ((RTE_MBUF_F_RX_IP_CKSUM_BAD | RTE_MBUF_F_RX_L4_CKSUM_GOOD) >> 1),
2258a6ff33dSHyong Youb Kim /* 0110 !ip4_ok+ip4+l4+!l4_ok */
226*daa02b5cSOlivier Matz ((RTE_MBUF_F_RX_IP_CKSUM_BAD | RTE_MBUF_F_RX_L4_CKSUM_BAD) >> 1),
227*daa02b5cSOlivier Matz (RTE_MBUF_F_RX_IP_CKSUM_BAD >> 1), /* 0101 !ip4_ok+ip4 */
228*daa02b5cSOlivier Matz (RTE_MBUF_F_RX_IP_CKSUM_BAD >> 1), /* 0100 !ip4_ok+ip4 */
229*daa02b5cSOlivier Matz (RTE_MBUF_F_RX_L4_CKSUM_GOOD >> 1), /* 0011 l4+l4_ok */
230*daa02b5cSOlivier Matz (RTE_MBUF_F_RX_L4_CKSUM_BAD >> 1), /* 0010 l4+!l4_ok */
2318a6ff33dSHyong Youb Kim 0, /* 0001 */
2328a6ff33dSHyong Youb Kim 0, /* 0000 */
2338a6ff33dSHyong Youb Kim /* first 128 bits */
234*daa02b5cSOlivier Matz ((RTE_MBUF_F_RX_IP_CKSUM_GOOD | RTE_MBUF_F_RX_L4_CKSUM_GOOD) >> 1),
235*daa02b5cSOlivier Matz ((RTE_MBUF_F_RX_IP_CKSUM_GOOD | RTE_MBUF_F_RX_L4_CKSUM_BAD) >> 1),
236*daa02b5cSOlivier Matz (RTE_MBUF_F_RX_IP_CKSUM_GOOD >> 1),
237*daa02b5cSOlivier Matz (RTE_MBUF_F_RX_IP_CKSUM_GOOD >> 1),
238*daa02b5cSOlivier Matz (RTE_MBUF_F_RX_L4_CKSUM_GOOD >> 1),
239*daa02b5cSOlivier Matz (RTE_MBUF_F_RX_L4_CKSUM_BAD >> 1),
2408a6ff33dSHyong Youb Kim 0, 0,
241*daa02b5cSOlivier Matz ((RTE_MBUF_F_RX_IP_CKSUM_BAD | RTE_MBUF_F_RX_L4_CKSUM_GOOD) >> 1),
242*daa02b5cSOlivier Matz ((RTE_MBUF_F_RX_IP_CKSUM_BAD | RTE_MBUF_F_RX_L4_CKSUM_BAD) >> 1),
243*daa02b5cSOlivier Matz (RTE_MBUF_F_RX_IP_CKSUM_BAD >> 1),
244*daa02b5cSOlivier Matz (RTE_MBUF_F_RX_IP_CKSUM_BAD >> 1),
245*daa02b5cSOlivier Matz (RTE_MBUF_F_RX_L4_CKSUM_GOOD >> 1),
246*daa02b5cSOlivier Matz (RTE_MBUF_F_RX_L4_CKSUM_BAD >> 1),
2478a6ff33dSHyong Youb Kim 0, 0);
2488a6ff33dSHyong Youb Kim /*
2498a6ff33dSHyong Youb Kim * Non-fragment PTYPEs.
2508a6ff33dSHyong Youb Kim * Shuffle 4-bit index:
2518a6ff33dSHyong Youb Kim * ip6 => bit 0
2528a6ff33dSHyong Youb Kim * ip4 => bit 1
2538a6ff33dSHyong Youb Kim * udp => bit 2
2548a6ff33dSHyong Youb Kim * tcp => bit 3
2558a6ff33dSHyong Youb Kim * bit
2568a6ff33dSHyong Youb Kim * 3 2 1 0
2578a6ff33dSHyong Youb Kim * -------
2588a6ff33dSHyong Youb Kim * 0 0 0 0 unknown
2598a6ff33dSHyong Youb Kim * 0 0 0 1 ip6 | nonfrag
2608a6ff33dSHyong Youb Kim * 0 0 1 0 ip4 | nonfrag
2618a6ff33dSHyong Youb Kim * 0 0 1 1 unknown
2628a6ff33dSHyong Youb Kim * 0 1 0 0 unknown
2638a6ff33dSHyong Youb Kim * 0 1 0 1 ip6 | udp
2648a6ff33dSHyong Youb Kim * 0 1 1 0 ip4 | udp
2658a6ff33dSHyong Youb Kim * 0 1 1 1 unknown
2668a6ff33dSHyong Youb Kim * 1 0 0 0 unknown
2678a6ff33dSHyong Youb Kim * 1 0 0 1 ip6 | tcp
2688a6ff33dSHyong Youb Kim * 1 0 1 0 ip4 | tcp
2698a6ff33dSHyong Youb Kim * 1 0 1 1 unknown
2708a6ff33dSHyong Youb Kim * 1 1 0 0 unknown
2718a6ff33dSHyong Youb Kim * 1 1 0 1 unknown
2728a6ff33dSHyong Youb Kim * 1 1 1 0 unknown
2738a6ff33dSHyong Youb Kim * 1 1 1 1 unknown
2748a6ff33dSHyong Youb Kim *
2758a6ff33dSHyong Youb Kim * PTYPEs do not fit in 8 bits, so shift right 4..
2768a6ff33dSHyong Youb Kim */
2778a6ff33dSHyong Youb Kim const __m256i nonfrag_ptype_shuffle =
2788a6ff33dSHyong Youb Kim _mm256_set_epi8(/* second 128 bits */
2798a6ff33dSHyong Youb Kim RTE_PTYPE_UNKNOWN,
2808a6ff33dSHyong Youb Kim RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
2818a6ff33dSHyong Youb Kim RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
2828a6ff33dSHyong Youb Kim (RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_TCP) >> 4,
2838a6ff33dSHyong Youb Kim (RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_TCP) >> 4,
2848a6ff33dSHyong Youb Kim RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
2858a6ff33dSHyong Youb Kim (RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP) >> 4,
2868a6ff33dSHyong Youb Kim (RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_UDP) >> 4,
2878a6ff33dSHyong Youb Kim RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
2888a6ff33dSHyong Youb Kim (RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
2898a6ff33dSHyong Youb Kim RTE_PTYPE_L4_NONFRAG) >> 4,
2908a6ff33dSHyong Youb Kim (RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
2918a6ff33dSHyong Youb Kim RTE_PTYPE_L4_NONFRAG) >> 4,
2928a6ff33dSHyong Youb Kim RTE_PTYPE_UNKNOWN,
2938a6ff33dSHyong Youb Kim /* first 128 bits */
2948a6ff33dSHyong Youb Kim RTE_PTYPE_UNKNOWN,
2958a6ff33dSHyong Youb Kim RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
2968a6ff33dSHyong Youb Kim RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
2978a6ff33dSHyong Youb Kim (RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_TCP) >> 4,
2988a6ff33dSHyong Youb Kim (RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_TCP) >> 4,
2998a6ff33dSHyong Youb Kim RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
3008a6ff33dSHyong Youb Kim (RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP) >> 4,
3018a6ff33dSHyong Youb Kim (RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_UDP) >> 4,
3028a6ff33dSHyong Youb Kim RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
3038a6ff33dSHyong Youb Kim (RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
3048a6ff33dSHyong Youb Kim RTE_PTYPE_L4_NONFRAG) >> 4,
3058a6ff33dSHyong Youb Kim (RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
3068a6ff33dSHyong Youb Kim RTE_PTYPE_L4_NONFRAG) >> 4,
3078a6ff33dSHyong Youb Kim RTE_PTYPE_UNKNOWN);
3088a6ff33dSHyong Youb Kim /* Fragment PTYPEs. Use the same shuffle index as above. */
3098a6ff33dSHyong Youb Kim const __m256i frag_ptype_shuffle =
3108a6ff33dSHyong Youb Kim _mm256_set_epi8(/* second 128 bits */
3118a6ff33dSHyong Youb Kim RTE_PTYPE_UNKNOWN,
3128a6ff33dSHyong Youb Kim RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
3138a6ff33dSHyong Youb Kim RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
3148a6ff33dSHyong Youb Kim (RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
3158a6ff33dSHyong Youb Kim RTE_PTYPE_L4_FRAG) >> 4,
3168a6ff33dSHyong Youb Kim (RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
3178a6ff33dSHyong Youb Kim RTE_PTYPE_L4_FRAG) >> 4,
3188a6ff33dSHyong Youb Kim RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
3198a6ff33dSHyong Youb Kim (RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
3208a6ff33dSHyong Youb Kim RTE_PTYPE_L4_FRAG) >> 4,
3218a6ff33dSHyong Youb Kim (RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
3228a6ff33dSHyong Youb Kim RTE_PTYPE_L4_FRAG) >> 4,
3238a6ff33dSHyong Youb Kim RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
3248a6ff33dSHyong Youb Kim (RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
3258a6ff33dSHyong Youb Kim RTE_PTYPE_L4_FRAG) >> 4,
3268a6ff33dSHyong Youb Kim (RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
3278a6ff33dSHyong Youb Kim RTE_PTYPE_L4_FRAG) >> 4,
3288a6ff33dSHyong Youb Kim RTE_PTYPE_UNKNOWN,
3298a6ff33dSHyong Youb Kim /* first 128 bits */
3308a6ff33dSHyong Youb Kim RTE_PTYPE_UNKNOWN,
3318a6ff33dSHyong Youb Kim RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
3328a6ff33dSHyong Youb Kim RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
3338a6ff33dSHyong Youb Kim (RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
3348a6ff33dSHyong Youb Kim RTE_PTYPE_L4_FRAG) >> 4,
3358a6ff33dSHyong Youb Kim (RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
3368a6ff33dSHyong Youb Kim RTE_PTYPE_L4_FRAG) >> 4,
3378a6ff33dSHyong Youb Kim RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
3388a6ff33dSHyong Youb Kim (RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
3398a6ff33dSHyong Youb Kim RTE_PTYPE_L4_FRAG) >> 4,
3408a6ff33dSHyong Youb Kim (RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
3418a6ff33dSHyong Youb Kim RTE_PTYPE_L4_FRAG) >> 4,
3428a6ff33dSHyong Youb Kim RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
3438a6ff33dSHyong Youb Kim (RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
3448a6ff33dSHyong Youb Kim RTE_PTYPE_L4_FRAG) >> 4,
3458a6ff33dSHyong Youb Kim (RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
3468a6ff33dSHyong Youb Kim RTE_PTYPE_L4_FRAG) >> 4,
3478a6ff33dSHyong Youb Kim RTE_PTYPE_UNKNOWN);
3488a6ff33dSHyong Youb Kim /*
3498a6ff33dSHyong Youb Kim * Tunnel PTYPEs. Use the same shuffle index as above.
3508a6ff33dSHyong Youb Kim * L4 types are not part of this table. They come from non-tunnel
3518a6ff33dSHyong Youb Kim * types above.
3528a6ff33dSHyong Youb Kim */
3538a6ff33dSHyong Youb Kim const __m256i tnl_l3_ptype_shuffle =
3548a6ff33dSHyong Youb Kim _mm256_set_epi8(/* second 128 bits */
3558a6ff33dSHyong Youb Kim RTE_PTYPE_UNKNOWN,
3568a6ff33dSHyong Youb Kim RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
3578a6ff33dSHyong Youb Kim RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
3588a6ff33dSHyong Youb Kim RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN >> 16,
3598a6ff33dSHyong Youb Kim RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN >> 16,
3608a6ff33dSHyong Youb Kim RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
3618a6ff33dSHyong Youb Kim RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN >> 16,
3628a6ff33dSHyong Youb Kim RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN >> 16,
3638a6ff33dSHyong Youb Kim RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
3648a6ff33dSHyong Youb Kim RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN >> 16,
3658a6ff33dSHyong Youb Kim RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN >> 16,
3668a6ff33dSHyong Youb Kim RTE_PTYPE_UNKNOWN,
3678a6ff33dSHyong Youb Kim /* first 128 bits */
3688a6ff33dSHyong Youb Kim RTE_PTYPE_UNKNOWN,
3698a6ff33dSHyong Youb Kim RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
3708a6ff33dSHyong Youb Kim RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
3718a6ff33dSHyong Youb Kim RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN >> 16,
3728a6ff33dSHyong Youb Kim RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN >> 16,
3738a6ff33dSHyong Youb Kim RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
3748a6ff33dSHyong Youb Kim RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN >> 16,
3758a6ff33dSHyong Youb Kim RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN >> 16,
3768a6ff33dSHyong Youb Kim RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
3778a6ff33dSHyong Youb Kim RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN >> 16,
3788a6ff33dSHyong Youb Kim RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN >> 16,
3798a6ff33dSHyong Youb Kim RTE_PTYPE_UNKNOWN);
3808a6ff33dSHyong Youb Kim
3818a6ff33dSHyong Youb Kim const __m256i mbuf_init = _mm256_set_epi64x(0, enic->mbuf_initializer,
3828a6ff33dSHyong Youb Kim 0, enic->mbuf_initializer);
3838a6ff33dSHyong Youb Kim
3848a6ff33dSHyong Youb Kim /*
3858a6ff33dSHyong Youb Kim * --- cq desc fields --- offset
3868a6ff33dSHyong Youb Kim * completed_index_flags - 0 use: fcoe
3878a6ff33dSHyong Youb Kim * q_number_rss_type_flags - 2 use: rss types, csum_not_calc
3888a6ff33dSHyong Youb Kim * rss_hash - 4 ==> mbuf.hash.rss
3898a6ff33dSHyong Youb Kim * bytes_written_flags - 8 ==> mbuf.pkt_len,data_len
3908a6ff33dSHyong Youb Kim * use: truncated, vlan_stripped
3918a6ff33dSHyong Youb Kim * vlan - 10 ==> mbuf.vlan_tci
3928a6ff33dSHyong Youb Kim * checksum_fcoe - 12 (unused)
3938a6ff33dSHyong Youb Kim * flags - 14 use: all bits
3948a6ff33dSHyong Youb Kim * type_color - 15 (unused)
3958a6ff33dSHyong Youb Kim *
3968a6ff33dSHyong Youb Kim * --- mbuf fields --- offset
3978a6ff33dSHyong Youb Kim * rearm_data ---- 16
3988a6ff33dSHyong Youb Kim * data_off - 0 (mbuf_init) -+
3998a6ff33dSHyong Youb Kim * refcnt - 2 (mbuf_init) |
4008a6ff33dSHyong Youb Kim * nb_segs - 4 (mbuf_init) | 16B 128b
4018a6ff33dSHyong Youb Kim * port - 6 (mbuf_init) |
4028a6ff33dSHyong Youb Kim * ol_flag - 8 (from cqd) -+
4038a6ff33dSHyong Youb Kim * rx_descriptor_fields1 ---- 32
4048a6ff33dSHyong Youb Kim * packet_type - 0 (from cqd) -+
4058a6ff33dSHyong Youb Kim * pkt_len - 4 (from cqd) |
4068a6ff33dSHyong Youb Kim * data_len - 8 (from cqd) | 16B 128b
4078a6ff33dSHyong Youb Kim * vlan_tci - 10 (from cqd) |
4088a6ff33dSHyong Youb Kim * rss - 12 (from cqd) -+
4098a6ff33dSHyong Youb Kim */
4108a6ff33dSHyong Youb Kim
4118a6ff33dSHyong Youb Kim __m256i overlay_enabled =
4128a6ff33dSHyong Youb Kim _mm256_set1_epi32((uint32_t)enic->overlay_offload);
4138a6ff33dSHyong Youb Kim
4148a6ff33dSHyong Youb Kim /* Step 2: Process 8 packets per loop using SIMD */
4158a6ff33dSHyong Youb Kim while (max_rx > 7 && (((cqd + 7)->type_color &
4168a6ff33dSHyong Youb Kim CQ_DESC_COLOR_MASK_NOSHIFT) != color)) {
4178a6ff33dSHyong Youb Kim /* Load 8 16B CQ descriptors */
4188a6ff33dSHyong Youb Kim __m256i cqd01 = _mm256_load_si256((void *)cqd);
4198a6ff33dSHyong Youb Kim __m256i cqd23 = _mm256_load_si256((void *)(cqd + 2));
4208a6ff33dSHyong Youb Kim __m256i cqd45 = _mm256_load_si256((void *)(cqd + 4));
4218a6ff33dSHyong Youb Kim __m256i cqd67 = _mm256_load_si256((void *)(cqd + 6));
4228a6ff33dSHyong Youb Kim /* Copy 8 mbuf pointers to rx_pkts */
4238a6ff33dSHyong Youb Kim _mm256_storeu_si256((void *)rx,
4248a6ff33dSHyong Youb Kim _mm256_loadu_si256((void *)rxmb));
4258a6ff33dSHyong Youb Kim _mm256_storeu_si256((void *)(rx + 4),
4268a6ff33dSHyong Youb Kim _mm256_loadu_si256((void *)(rxmb + 4)));
4278a6ff33dSHyong Youb Kim
4288a6ff33dSHyong Youb Kim /*
4298a6ff33dSHyong Youb Kim * Collect 8 flags (each 32 bits) into one register.
4308a6ff33dSHyong Youb Kim * 4 shuffles, 3 blends, 1 permute for 8 desc: 1 inst/desc
4318a6ff33dSHyong Youb Kim */
4328a6ff33dSHyong Youb Kim __m256i flags01 =
4338a6ff33dSHyong Youb Kim _mm256_shuffle_epi8(cqd01, flags_shuffle_mask);
4348a6ff33dSHyong Youb Kim /*
4358a6ff33dSHyong Youb Kim * Shuffle above produces 8 x 32-bit flags for 8 descriptors
4368a6ff33dSHyong Youb Kim * in this order: 0, 0, 0, 0, 1, 1, 1, 1
4378a6ff33dSHyong Youb Kim * The duplicates in each 128-bit lane simplifies blending
4388a6ff33dSHyong Youb Kim * below.
4398a6ff33dSHyong Youb Kim */
4408a6ff33dSHyong Youb Kim __m256i flags23 =
4418a6ff33dSHyong Youb Kim _mm256_shuffle_epi8(cqd23, flags_shuffle_mask);
4428a6ff33dSHyong Youb Kim __m256i flags45 =
4438a6ff33dSHyong Youb Kim _mm256_shuffle_epi8(cqd45, flags_shuffle_mask);
4448a6ff33dSHyong Youb Kim __m256i flags67 =
4458a6ff33dSHyong Youb Kim _mm256_shuffle_epi8(cqd67, flags_shuffle_mask);
4468a6ff33dSHyong Youb Kim /* 1st blend produces flags for desc: 0, 2, 0, 0, 1, 3, 1, 1 */
4478a6ff33dSHyong Youb Kim __m256i flags0_3 = _mm256_blend_epi32(flags01, flags23, 0x22);
4488a6ff33dSHyong Youb Kim /* 2nd blend produces flags for desc: 4, 4, 4, 6, 5, 5, 5, 7 */
4498a6ff33dSHyong Youb Kim __m256i flags4_7 = _mm256_blend_epi32(flags45, flags67, 0x88);
4508a6ff33dSHyong Youb Kim /* 3rd blend produces flags for desc: 0, 2, 4, 6, 1, 3, 5, 7 */
4518a6ff33dSHyong Youb Kim __m256i flags0_7 = _mm256_blend_epi32(flags0_3, flags4_7, 0xcc);
4528a6ff33dSHyong Youb Kim /*
4538a6ff33dSHyong Youb Kim * Swap to reorder flags in this order: 1, 3, 5, 7, 0, 2, 4, 6
4548a6ff33dSHyong Youb Kim * This order simplifies blend operations way below that
4558a6ff33dSHyong Youb Kim * produce 'rearm' data for each mbuf.
4568a6ff33dSHyong Youb Kim */
4578a6ff33dSHyong Youb Kim flags0_7 = _mm256_permute4x64_epi64(flags0_7,
4588a6ff33dSHyong Youb Kim (1 << 6) + (0 << 4) + (3 << 2) + 2);
4598a6ff33dSHyong Youb Kim
4608a6ff33dSHyong Youb Kim /*
4618a6ff33dSHyong Youb Kim * Check truncated bits and bail out early on.
4628a6ff33dSHyong Youb Kim * 6 avx inst, 1 or, 1 if-then-else for 8 desc: 1 inst/desc
4638a6ff33dSHyong Youb Kim */
4648a6ff33dSHyong Youb Kim __m256i trunc =
4658a6ff33dSHyong Youb Kim _mm256_srli_epi32(_mm256_slli_epi32(flags0_7, 17), 31);
4668a6ff33dSHyong Youb Kim trunc = _mm256_add_epi64(trunc, _mm256_permute4x64_epi64(trunc,
4678a6ff33dSHyong Youb Kim (1 << 6) + (0 << 4) + (3 << 2) + 2));
4688a6ff33dSHyong Youb Kim /* 0:63 contains 1+3+0+2 and 64:127 contains 5+7+4+6 */
4698a6ff33dSHyong Youb Kim if (_mm256_extract_epi64(trunc, 0) ||
4708a6ff33dSHyong Youb Kim _mm256_extract_epi64(trunc, 1))
4718a6ff33dSHyong Youb Kim break;
4728a6ff33dSHyong Youb Kim
4738a6ff33dSHyong Youb Kim /*
474*daa02b5cSOlivier Matz * Compute RTE_MBUF_F_RX_RSS_HASH.
4758a6ff33dSHyong Youb Kim * Use 2 shifts and 1 shuffle for 8 desc: 0.375 inst/desc
4768a6ff33dSHyong Youb Kim * RSS types in byte 0, 4, 8, 12, 16, 20, 24, 28
4778a6ff33dSHyong Youb Kim * Everything else is zero.
4788a6ff33dSHyong Youb Kim */
4798a6ff33dSHyong Youb Kim __m256i rss_types =
4808a6ff33dSHyong Youb Kim _mm256_srli_epi32(_mm256_slli_epi32(flags0_7, 10), 28);
4818a6ff33dSHyong Youb Kim /*
482*daa02b5cSOlivier Matz * RSS flags (RTE_MBUF_F_RX_RSS_HASH) are in
4838a6ff33dSHyong Youb Kim * byte 0, 4, 8, 12, 16, 20, 24, 28
4848a6ff33dSHyong Youb Kim * Everything else is zero.
4858a6ff33dSHyong Youb Kim */
4868a6ff33dSHyong Youb Kim __m256i rss_flags = _mm256_shuffle_epi8(rss_shuffle, rss_types);
4878a6ff33dSHyong Youb Kim
4888a6ff33dSHyong Youb Kim /*
4898a6ff33dSHyong Youb Kim * Compute CKSUM flags. First build the index and then
4908a6ff33dSHyong Youb Kim * use it to shuffle csum_shuffle.
4918a6ff33dSHyong Youb Kim * 20 instructions including const loads: 2.5 inst/desc
4928a6ff33dSHyong Youb Kim */
4938a6ff33dSHyong Youb Kim /*
4948a6ff33dSHyong Youb Kim * csum_not_calc (bit 22)
4958a6ff33dSHyong Youb Kim * csum_not_calc (0) => 0xffffffff
4968a6ff33dSHyong Youb Kim * csum_not_calc (1) => 0x0
4978a6ff33dSHyong Youb Kim */
4988a6ff33dSHyong Youb Kim const __m256i zero4 = _mm256_setzero_si256();
4998a6ff33dSHyong Youb Kim const __m256i mask22 = _mm256_set1_epi32(0x400000);
5008a6ff33dSHyong Youb Kim __m256i csum_not_calc = _mm256_cmpeq_epi32(zero4,
5018a6ff33dSHyong Youb Kim _mm256_and_si256(flags0_7, mask22));
5028a6ff33dSHyong Youb Kim /*
5038a6ff33dSHyong Youb Kim * (tcp|udp) && !fragment => bit 1
5048a6ff33dSHyong Youb Kim * tcp = bit 2, udp = bit 1, frag = bit 6
5058a6ff33dSHyong Youb Kim */
5068a6ff33dSHyong Youb Kim const __m256i mask1 = _mm256_set1_epi32(0x2);
5078a6ff33dSHyong Youb Kim __m256i tcp_udp =
5088a6ff33dSHyong Youb Kim _mm256_andnot_si256(_mm256_srli_epi32(flags0_7, 5),
5098a6ff33dSHyong Youb Kim _mm256_or_si256(flags0_7,
5108a6ff33dSHyong Youb Kim _mm256_srli_epi32(flags0_7, 1)));
5118a6ff33dSHyong Youb Kim tcp_udp = _mm256_and_si256(tcp_udp, mask1);
5128a6ff33dSHyong Youb Kim /* ipv4 (bit 5) => bit 2 */
5138a6ff33dSHyong Youb Kim const __m256i mask2 = _mm256_set1_epi32(0x4);
5148a6ff33dSHyong Youb Kim __m256i ipv4 = _mm256_and_si256(mask2,
5158a6ff33dSHyong Youb Kim _mm256_srli_epi32(flags0_7, 3));
5168a6ff33dSHyong Youb Kim /*
5178a6ff33dSHyong Youb Kim * ipv4_csum_ok (bit 3) => bit 3
5188a6ff33dSHyong Youb Kim * tcp_udp_csum_ok (bit 0) => bit 0
5198a6ff33dSHyong Youb Kim * 0x9
5208a6ff33dSHyong Youb Kim */
5218a6ff33dSHyong Youb Kim const __m256i mask0_3 = _mm256_set1_epi32(0x9);
5228a6ff33dSHyong Youb Kim __m256i csum_idx = _mm256_and_si256(flags0_7, mask0_3);
5238a6ff33dSHyong Youb Kim csum_idx = _mm256_and_si256(csum_not_calc,
5248a6ff33dSHyong Youb Kim _mm256_or_si256(_mm256_or_si256(csum_idx, ipv4),
5258a6ff33dSHyong Youb Kim tcp_udp));
5268a6ff33dSHyong Youb Kim __m256i csum_flags =
5278a6ff33dSHyong Youb Kim _mm256_shuffle_epi8(csum_shuffle, csum_idx);
5288a6ff33dSHyong Youb Kim /* Shift left to restore CKSUM flags. See csum_shuffle. */
5298a6ff33dSHyong Youb Kim csum_flags = _mm256_slli_epi32(csum_flags, 1);
5308a6ff33dSHyong Youb Kim /* Combine csum flags and offload flags: 0.125 inst/desc */
5318a6ff33dSHyong Youb Kim rss_flags = _mm256_or_si256(rss_flags, csum_flags);
5328a6ff33dSHyong Youb Kim
5338a6ff33dSHyong Youb Kim /*
5348a6ff33dSHyong Youb Kim * Collect 8 VLAN IDs and compute vlan_id != 0 on each.
5358a6ff33dSHyong Youb Kim * 4 shuffles, 3 blends, 1 permute, 1 cmp, 1 sub for 8 desc:
5368a6ff33dSHyong Youb Kim * 1.25 inst/desc
5378a6ff33dSHyong Youb Kim */
5388a6ff33dSHyong Youb Kim __m256i vlan01 = _mm256_shuffle_epi8(cqd01, vlan_shuffle_mask);
5398a6ff33dSHyong Youb Kim __m256i vlan23 = _mm256_shuffle_epi8(cqd23, vlan_shuffle_mask);
5408a6ff33dSHyong Youb Kim __m256i vlan45 = _mm256_shuffle_epi8(cqd45, vlan_shuffle_mask);
5418a6ff33dSHyong Youb Kim __m256i vlan67 = _mm256_shuffle_epi8(cqd67, vlan_shuffle_mask);
5428a6ff33dSHyong Youb Kim __m256i vlan0_3 = _mm256_blend_epi32(vlan01, vlan23, 0x22);
5438a6ff33dSHyong Youb Kim __m256i vlan4_7 = _mm256_blend_epi32(vlan45, vlan67, 0x88);
5448a6ff33dSHyong Youb Kim /* desc: 0, 2, 4, 6, 1, 3, 5, 7 */
5458a6ff33dSHyong Youb Kim __m256i vlan0_7 = _mm256_blend_epi32(vlan0_3, vlan4_7, 0xcc);
5468a6ff33dSHyong Youb Kim /* desc: 1, 3, 5, 7, 0, 2, 4, 6 */
5478a6ff33dSHyong Youb Kim vlan0_7 = _mm256_permute4x64_epi64(vlan0_7,
5488a6ff33dSHyong Youb Kim (1 << 6) + (0 << 4) + (3 << 2) + 2);
5498a6ff33dSHyong Youb Kim /*
5508a6ff33dSHyong Youb Kim * Compare 0 == vlan_id produces 0xffffffff (-1) if
5518a6ff33dSHyong Youb Kim * vlan 0 and 0 if vlan non-0. Then subtracting the
5528a6ff33dSHyong Youb Kim * result from 0 produces 0 - (-1) = 1 for vlan 0, and
5538a6ff33dSHyong Youb Kim * 0 - 0 = 0 for vlan non-0.
5548a6ff33dSHyong Youb Kim */
5558a6ff33dSHyong Youb Kim vlan0_7 = _mm256_cmpeq_epi32(zero4, vlan0_7);
5568a6ff33dSHyong Youb Kim /* vlan_id != 0 => 0, vlan_id == 0 => 1 */
5578a6ff33dSHyong Youb Kim vlan0_7 = _mm256_sub_epi32(zero4, vlan0_7);
5588a6ff33dSHyong Youb Kim
5598a6ff33dSHyong Youb Kim /*
560*daa02b5cSOlivier Matz * Compute RTE_MBUF_F_RX_VLAN and RTE_MBUF_F_RX_VLAN_STRIPPED.
5618a6ff33dSHyong Youb Kim * Use 3 shifts, 1 or, 1 shuffle for 8 desc: 0.625 inst/desc
5628a6ff33dSHyong Youb Kim * VLAN offload flags in byte 0, 4, 8, 12, 16, 20, 24, 28
5638a6ff33dSHyong Youb Kim * Everything else is zero.
5648a6ff33dSHyong Youb Kim */
5658a6ff33dSHyong Youb Kim __m256i vlan_idx =
5668a6ff33dSHyong Youb Kim _mm256_or_si256(/* vlan_stripped => bit 0 */
5678a6ff33dSHyong Youb Kim _mm256_srli_epi32(_mm256_slli_epi32(flags0_7,
5688a6ff33dSHyong Youb Kim 16), 31),
5698a6ff33dSHyong Youb Kim /* (vlan_id == 0) => bit 1 */
5708a6ff33dSHyong Youb Kim _mm256_slli_epi32(vlan0_7, 1));
5718a6ff33dSHyong Youb Kim /*
5728a6ff33dSHyong Youb Kim * The index captures 4 cases.
5738a6ff33dSHyong Youb Kim * stripped, id = 0 ==> 11b = 3
5748a6ff33dSHyong Youb Kim * stripped, id != 0 ==> 01b = 1
5758a6ff33dSHyong Youb Kim * not strip, id == 0 ==> 10b = 2
5768a6ff33dSHyong Youb Kim * not strip, id != 0 ==> 00b = 0
5778a6ff33dSHyong Youb Kim */
5788a6ff33dSHyong Youb Kim __m256i vlan_flags = _mm256_permutevar8x32_epi32(vlan_shuffle,
5798a6ff33dSHyong Youb Kim vlan_idx);
5808a6ff33dSHyong Youb Kim /* Combine vlan and offload flags: 0.125 inst/desc */
5818a6ff33dSHyong Youb Kim rss_flags = _mm256_or_si256(rss_flags, vlan_flags);
5828a6ff33dSHyong Youb Kim
5838a6ff33dSHyong Youb Kim /*
5848a6ff33dSHyong Youb Kim * Compute non-tunnel PTYPEs.
5858a6ff33dSHyong Youb Kim * 17 inst / 8 desc = 2.125 inst/desc
5868a6ff33dSHyong Youb Kim */
5878a6ff33dSHyong Youb Kim /* ETHER and ETHER_VLAN */
5888a6ff33dSHyong Youb Kim __m256i vlan_ptype =
5898a6ff33dSHyong Youb Kim _mm256_permutevar8x32_epi32(vlan_ptype_shuffle,
5908a6ff33dSHyong Youb Kim vlan_idx);
5918a6ff33dSHyong Youb Kim /* Build the ptype index from flags */
5928a6ff33dSHyong Youb Kim tcp_udp = _mm256_slli_epi32(flags0_7, 29);
5938a6ff33dSHyong Youb Kim tcp_udp = _mm256_slli_epi32(_mm256_srli_epi32(tcp_udp, 30), 2);
5948a6ff33dSHyong Youb Kim __m256i ip4_ip6 =
5958a6ff33dSHyong Youb Kim _mm256_srli_epi32(_mm256_slli_epi32(flags0_7, 26), 30);
5968a6ff33dSHyong Youb Kim __m256i ptype_idx = _mm256_or_si256(tcp_udp, ip4_ip6);
5978a6ff33dSHyong Youb Kim __m256i frag_bit =
5988a6ff33dSHyong Youb Kim _mm256_srli_epi32(_mm256_slli_epi32(flags0_7, 25), 31);
5998a6ff33dSHyong Youb Kim __m256i nonfrag_ptype =
6008a6ff33dSHyong Youb Kim _mm256_shuffle_epi8(nonfrag_ptype_shuffle, ptype_idx);
6018a6ff33dSHyong Youb Kim __m256i frag_ptype =
6028a6ff33dSHyong Youb Kim _mm256_shuffle_epi8(frag_ptype_shuffle, ptype_idx);
6038a6ff33dSHyong Youb Kim /*
6048a6ff33dSHyong Youb Kim * Zero out the unwanted types and combine the remaining bits.
6058a6ff33dSHyong Youb Kim * The effect is same as selecting non-frag or frag types
6068a6ff33dSHyong Youb Kim * depending on the frag bit.
6078a6ff33dSHyong Youb Kim */
6088a6ff33dSHyong Youb Kim nonfrag_ptype = _mm256_and_si256(nonfrag_ptype,
6098a6ff33dSHyong Youb Kim _mm256_cmpeq_epi32(zero4, frag_bit));
6108a6ff33dSHyong Youb Kim frag_ptype = _mm256_and_si256(frag_ptype,
6118a6ff33dSHyong Youb Kim _mm256_cmpgt_epi32(frag_bit, zero4));
6128a6ff33dSHyong Youb Kim __m256i ptype = _mm256_or_si256(nonfrag_ptype, frag_ptype);
6138a6ff33dSHyong Youb Kim ptype = _mm256_slli_epi32(ptype, 4);
6148a6ff33dSHyong Youb Kim /*
6158a6ff33dSHyong Youb Kim * Compute tunnel PTYPEs.
6168a6ff33dSHyong Youb Kim * 15 inst / 8 desc = 1.875 inst/desc
6178a6ff33dSHyong Youb Kim */
6188a6ff33dSHyong Youb Kim __m256i tnl_l3_ptype =
6198a6ff33dSHyong Youb Kim _mm256_shuffle_epi8(tnl_l3_ptype_shuffle, ptype_idx);
6208a6ff33dSHyong Youb Kim tnl_l3_ptype = _mm256_slli_epi32(tnl_l3_ptype, 16);
6218a6ff33dSHyong Youb Kim /*
6228a6ff33dSHyong Youb Kim * Shift non-tunnel L4 types to make them tunnel types.
6238a6ff33dSHyong Youb Kim * RTE_PTYPE_L4_TCP << 16 == RTE_PTYPE_INNER_L4_TCP
6248a6ff33dSHyong Youb Kim */
6258a6ff33dSHyong Youb Kim __m256i tnl_l4_ptype =
6268a6ff33dSHyong Youb Kim _mm256_slli_epi32(_mm256_and_si256(ptype,
6278a6ff33dSHyong Youb Kim _mm256_set1_epi32(RTE_PTYPE_L4_MASK)), 16);
6288a6ff33dSHyong Youb Kim __m256i tnl_ptype =
6298a6ff33dSHyong Youb Kim _mm256_or_si256(tnl_l3_ptype, tnl_l4_ptype);
6308a6ff33dSHyong Youb Kim tnl_ptype = _mm256_or_si256(tnl_ptype,
6318a6ff33dSHyong Youb Kim _mm256_set1_epi32(RTE_PTYPE_TUNNEL_GRENAT |
6328a6ff33dSHyong Youb Kim RTE_PTYPE_INNER_L2_ETHER));
6338a6ff33dSHyong Youb Kim /*
6348a6ff33dSHyong Youb Kim * Select non-tunnel or tunnel types by zeroing out the
6358a6ff33dSHyong Youb Kim * unwanted ones.
6368a6ff33dSHyong Youb Kim */
6378a6ff33dSHyong Youb Kim __m256i tnl_flags = _mm256_and_si256(overlay_enabled,
6388a6ff33dSHyong Youb Kim _mm256_srli_epi32(_mm256_slli_epi32(flags0_7, 2), 31));
6398a6ff33dSHyong Youb Kim tnl_ptype = _mm256_and_si256(tnl_ptype,
6408a6ff33dSHyong Youb Kim _mm256_sub_epi32(zero4, tnl_flags));
6418a6ff33dSHyong Youb Kim ptype = _mm256_and_si256(ptype,
6428a6ff33dSHyong Youb Kim _mm256_cmpeq_epi32(zero4, tnl_flags));
6438a6ff33dSHyong Youb Kim /*
6448a6ff33dSHyong Youb Kim * Combine types and swap to have ptypes in the same order
6458a6ff33dSHyong Youb Kim * as desc.
6468a6ff33dSHyong Youb Kim * desc: 0 2 4 6 1 3 5 7
6478a6ff33dSHyong Youb Kim * 3 inst / 8 desc = 0.375 inst/desc
6488a6ff33dSHyong Youb Kim */
6498a6ff33dSHyong Youb Kim ptype = _mm256_or_si256(ptype, tnl_ptype);
6508a6ff33dSHyong Youb Kim ptype = _mm256_or_si256(ptype, vlan_ptype);
6518a6ff33dSHyong Youb Kim ptype = _mm256_permute4x64_epi64(ptype,
6528a6ff33dSHyong Youb Kim (1 << 6) + (0 << 4) + (3 << 2) + 2);
6538a6ff33dSHyong Youb Kim
6548a6ff33dSHyong Youb Kim /*
6558a6ff33dSHyong Youb Kim * Mask packet length.
6568a6ff33dSHyong Youb Kim * Use 4 ands: 0.5 instructions/desc
6578a6ff33dSHyong Youb Kim */
6588a6ff33dSHyong Youb Kim cqd01 = _mm256_and_si256(cqd01, mask);
6598a6ff33dSHyong Youb Kim cqd23 = _mm256_and_si256(cqd23, mask);
6608a6ff33dSHyong Youb Kim cqd45 = _mm256_and_si256(cqd45, mask);
6618a6ff33dSHyong Youb Kim cqd67 = _mm256_and_si256(cqd67, mask);
6628a6ff33dSHyong Youb Kim /*
6638a6ff33dSHyong Youb Kim * Shuffle. Two 16B sets of the mbuf fields.
6648a6ff33dSHyong Youb Kim * packet_type, pkt_len, data_len, vlan_tci, rss
6658a6ff33dSHyong Youb Kim */
6668a6ff33dSHyong Youb Kim __m256i rearm01 = _mm256_shuffle_epi8(cqd01, shuffle_mask);
6678a6ff33dSHyong Youb Kim __m256i rearm23 = _mm256_shuffle_epi8(cqd23, shuffle_mask);
6688a6ff33dSHyong Youb Kim __m256i rearm45 = _mm256_shuffle_epi8(cqd45, shuffle_mask);
6698a6ff33dSHyong Youb Kim __m256i rearm67 = _mm256_shuffle_epi8(cqd67, shuffle_mask);
6708a6ff33dSHyong Youb Kim
6718a6ff33dSHyong Youb Kim /*
6728a6ff33dSHyong Youb Kim * Blend in ptypes
6738a6ff33dSHyong Youb Kim * 4 blends and 3 shuffles for 8 desc: 0.875 inst/desc
6748a6ff33dSHyong Youb Kim */
6758a6ff33dSHyong Youb Kim rearm01 = _mm256_blend_epi32(rearm01, ptype, 0x11);
6768a6ff33dSHyong Youb Kim rearm23 = _mm256_blend_epi32(rearm23,
6778a6ff33dSHyong Youb Kim _mm256_shuffle_epi32(ptype, 1), 0x11);
6788a6ff33dSHyong Youb Kim rearm45 = _mm256_blend_epi32(rearm45,
6798a6ff33dSHyong Youb Kim _mm256_shuffle_epi32(ptype, 2), 0x11);
6808a6ff33dSHyong Youb Kim rearm67 = _mm256_blend_epi32(rearm67,
6818a6ff33dSHyong Youb Kim _mm256_shuffle_epi32(ptype, 3), 0x11);
6828a6ff33dSHyong Youb Kim
6838a6ff33dSHyong Youb Kim /*
6848a6ff33dSHyong Youb Kim * Move rss_flags into ol_flags in mbuf_init.
6858a6ff33dSHyong Youb Kim * Use 1 shift and 1 blend for each desc: 2 inst/desc
6868a6ff33dSHyong Youb Kim */
6878a6ff33dSHyong Youb Kim __m256i mbuf_init4_5 = _mm256_blend_epi32(mbuf_init,
6888a6ff33dSHyong Youb Kim rss_flags, 0x44);
6898a6ff33dSHyong Youb Kim __m256i mbuf_init2_3 = _mm256_blend_epi32(mbuf_init,
6908a6ff33dSHyong Youb Kim _mm256_slli_si256(rss_flags, 4), 0x44);
6918a6ff33dSHyong Youb Kim __m256i mbuf_init0_1 = _mm256_blend_epi32(mbuf_init,
6928a6ff33dSHyong Youb Kim _mm256_slli_si256(rss_flags, 8), 0x44);
6938a6ff33dSHyong Youb Kim __m256i mbuf_init6_7 = _mm256_blend_epi32(mbuf_init,
6948a6ff33dSHyong Youb Kim _mm256_srli_si256(rss_flags, 4), 0x44);
6958a6ff33dSHyong Youb Kim
6968a6ff33dSHyong Youb Kim /*
6978a6ff33dSHyong Youb Kim * Build rearm, one per desc.
6988a6ff33dSHyong Youb Kim * 8 blends and 4 permutes: 1.5 inst/desc
6998a6ff33dSHyong Youb Kim */
7008a6ff33dSHyong Youb Kim __m256i rearm0 = _mm256_blend_epi32(rearm01,
7018a6ff33dSHyong Youb Kim mbuf_init0_1, 0xf0);
7028a6ff33dSHyong Youb Kim __m256i rearm1 = _mm256_blend_epi32(mbuf_init0_1,
7038a6ff33dSHyong Youb Kim rearm01, 0xf0);
7048a6ff33dSHyong Youb Kim __m256i rearm2 = _mm256_blend_epi32(rearm23,
7058a6ff33dSHyong Youb Kim mbuf_init2_3, 0xf0);
7068a6ff33dSHyong Youb Kim __m256i rearm3 = _mm256_blend_epi32(mbuf_init2_3,
7078a6ff33dSHyong Youb Kim rearm23, 0xf0);
7088a6ff33dSHyong Youb Kim /* Swap upper and lower 64 bits */
7098a6ff33dSHyong Youb Kim rearm0 = _mm256_permute4x64_epi64(rearm0,
7108a6ff33dSHyong Youb Kim (1 << 6) + (0 << 4) + (3 << 2) + 2);
7118a6ff33dSHyong Youb Kim rearm2 = _mm256_permute4x64_epi64(rearm2,
7128a6ff33dSHyong Youb Kim (1 << 6) + (0 << 4) + (3 << 2) + 2);
7138a6ff33dSHyong Youb Kim /* Second set of 4 descriptors */
7148a6ff33dSHyong Youb Kim __m256i rearm4 = _mm256_blend_epi32(rearm45,
7158a6ff33dSHyong Youb Kim mbuf_init4_5, 0xf0);
7168a6ff33dSHyong Youb Kim __m256i rearm5 = _mm256_blend_epi32(mbuf_init4_5,
7178a6ff33dSHyong Youb Kim rearm45, 0xf0);
7188a6ff33dSHyong Youb Kim __m256i rearm6 = _mm256_blend_epi32(rearm67,
7198a6ff33dSHyong Youb Kim mbuf_init6_7, 0xf0);
7208a6ff33dSHyong Youb Kim __m256i rearm7 = _mm256_blend_epi32(mbuf_init6_7,
7218a6ff33dSHyong Youb Kim rearm67, 0xf0);
7228a6ff33dSHyong Youb Kim rearm4 = _mm256_permute4x64_epi64(rearm4,
7238a6ff33dSHyong Youb Kim (1 << 6) + (0 << 4) + (3 << 2) + 2);
7248a6ff33dSHyong Youb Kim rearm6 = _mm256_permute4x64_epi64(rearm6,
7258a6ff33dSHyong Youb Kim (1 << 6) + (0 << 4) + (3 << 2) + 2);
7268a6ff33dSHyong Youb Kim
7278a6ff33dSHyong Youb Kim /*
7288a6ff33dSHyong Youb Kim * Write out 32B of mbuf fields.
7298a6ff33dSHyong Youb Kim * data_off - off 0 (mbuf_init)
7308a6ff33dSHyong Youb Kim * refcnt - 2 (mbuf_init)
7318a6ff33dSHyong Youb Kim * nb_segs - 4 (mbuf_init)
7328a6ff33dSHyong Youb Kim * port - 6 (mbuf_init)
7338a6ff33dSHyong Youb Kim * ol_flag - 8 (from cqd)
7348a6ff33dSHyong Youb Kim * packet_type - 16 (from cqd)
7358a6ff33dSHyong Youb Kim * pkt_len - 20 (from cqd)
7368a6ff33dSHyong Youb Kim * data_len - 24 (from cqd)
7378a6ff33dSHyong Youb Kim * vlan_tci - 26 (from cqd)
7388a6ff33dSHyong Youb Kim * rss - 28 (from cqd)
7398a6ff33dSHyong Youb Kim */
7408a6ff33dSHyong Youb Kim _mm256_storeu_si256((__m256i *)&rxmb[0]->rearm_data, rearm0);
7418a6ff33dSHyong Youb Kim _mm256_storeu_si256((__m256i *)&rxmb[1]->rearm_data, rearm1);
7428a6ff33dSHyong Youb Kim _mm256_storeu_si256((__m256i *)&rxmb[2]->rearm_data, rearm2);
7438a6ff33dSHyong Youb Kim _mm256_storeu_si256((__m256i *)&rxmb[3]->rearm_data, rearm3);
7448a6ff33dSHyong Youb Kim _mm256_storeu_si256((__m256i *)&rxmb[4]->rearm_data, rearm4);
7458a6ff33dSHyong Youb Kim _mm256_storeu_si256((__m256i *)&rxmb[5]->rearm_data, rearm5);
7468a6ff33dSHyong Youb Kim _mm256_storeu_si256((__m256i *)&rxmb[6]->rearm_data, rearm6);
7478a6ff33dSHyong Youb Kim _mm256_storeu_si256((__m256i *)&rxmb[7]->rearm_data, rearm7);
7488a6ff33dSHyong Youb Kim
7498a6ff33dSHyong Youb Kim max_rx -= 8;
7508a6ff33dSHyong Youb Kim cqd += 8;
7518a6ff33dSHyong Youb Kim rx += 8;
7528a6ff33dSHyong Youb Kim rxmb += 8;
7538a6ff33dSHyong Youb Kim }
7548a6ff33dSHyong Youb Kim
7558a6ff33dSHyong Youb Kim /*
7568a6ff33dSHyong Youb Kim * Step 3: Slow path to handle a small (<8) number of packets and
7578a6ff33dSHyong Youb Kim * occasional truncated packets.
7588a6ff33dSHyong Youb Kim */
7598a6ff33dSHyong Youb Kim while (max_rx && ((cqd->type_color &
7608a6ff33dSHyong Youb Kim CQ_DESC_COLOR_MASK_NOSHIFT) != color)) {
7618a6ff33dSHyong Youb Kim if (unlikely(cqd->bytes_written_flags &
7628a6ff33dSHyong Youb Kim CQ_ENET_RQ_DESC_FLAGS_TRUNCATED)) {
7638a6ff33dSHyong Youb Kim rte_pktmbuf_free(*rxmb++);
7648a6ff33dSHyong Youb Kim rte_atomic64_inc(&enic->soft_stats.rx_packet_errors);
7658a6ff33dSHyong Youb Kim } else {
7668a6ff33dSHyong Youb Kim *rx++ = rx_one(cqd, *rxmb++, enic);
7678a6ff33dSHyong Youb Kim }
7688a6ff33dSHyong Youb Kim cqd++;
7698a6ff33dSHyong Youb Kim max_rx--;
7708a6ff33dSHyong Youb Kim }
7718a6ff33dSHyong Youb Kim
7728a6ff33dSHyong Youb Kim /* Number of descriptors visited */
7738a6ff33dSHyong Youb Kim nb_rx = cqd - (struct cq_enet_rq_desc *)(cq->ring.descs) - cq_idx;
7748a6ff33dSHyong Youb Kim if (nb_rx == 0)
7758a6ff33dSHyong Youb Kim return 0;
7768a6ff33dSHyong Youb Kim rqd = ((struct rq_enet_desc *)rq->ring.descs) + cq_idx;
7778a6ff33dSHyong Youb Kim rxmb = rq->mbuf_ring + cq_idx;
7788a6ff33dSHyong Youb Kim cq_idx += nb_rx;
7798a6ff33dSHyong Youb Kim rq->rx_nb_hold += nb_rx;
7808a6ff33dSHyong Youb Kim if (unlikely(cq_idx == cq->ring.desc_count)) {
7818a6ff33dSHyong Youb Kim cq_idx = 0;
7828a6ff33dSHyong Youb Kim cq->last_color ^= CQ_DESC_COLOR_MASK_NOSHIFT;
7838a6ff33dSHyong Youb Kim }
7848a6ff33dSHyong Youb Kim cq->to_clean = cq_idx;
7858a6ff33dSHyong Youb Kim
7868a6ff33dSHyong Youb Kim /* Step 4: Restock RQ with new mbufs */
7878a6ff33dSHyong Youb Kim memcpy(rxmb, rq->free_mbufs + ENIC_RX_BURST_MAX - rq->num_free_mbufs,
7888a6ff33dSHyong Youb Kim sizeof(struct rte_mbuf *) * nb_rx);
7898a6ff33dSHyong Youb Kim rq->num_free_mbufs -= nb_rx;
7908a6ff33dSHyong Youb Kim while (nb_rx) {
7918a6ff33dSHyong Youb Kim rqd->address = (*rxmb)->buf_iova + RTE_PKTMBUF_HEADROOM;
7928a6ff33dSHyong Youb Kim nb_rx--;
7938a6ff33dSHyong Youb Kim rqd++;
7948a6ff33dSHyong Youb Kim rxmb++;
7958a6ff33dSHyong Youb Kim }
7968a6ff33dSHyong Youb Kim if (rq->rx_nb_hold > rq->rx_free_thresh) {
7978a6ff33dSHyong Youb Kim rq->posted_index = enic_ring_add(rq->ring.desc_count,
7988a6ff33dSHyong Youb Kim rq->posted_index,
7998a6ff33dSHyong Youb Kim rq->rx_nb_hold);
8008a6ff33dSHyong Youb Kim rq->rx_nb_hold = 0;
8018a6ff33dSHyong Youb Kim rte_wmb();
8028a6ff33dSHyong Youb Kim iowrite32_relaxed(rq->posted_index,
8038a6ff33dSHyong Youb Kim &rq->ctrl->posted_index);
8048a6ff33dSHyong Youb Kim }
8058a6ff33dSHyong Youb Kim
8068a6ff33dSHyong Youb Kim return rx - rx_pkts;
8078a6ff33dSHyong Youb Kim }
8088a6ff33dSHyong Youb Kim
8098a6ff33dSHyong Youb Kim bool
enic_use_vector_rx_handler(struct rte_eth_dev * eth_dev)810e92a4b41SHyong Youb Kim enic_use_vector_rx_handler(struct rte_eth_dev *eth_dev)
8118a6ff33dSHyong Youb Kim {
812e92a4b41SHyong Youb Kim struct enic *enic = pmd_priv(eth_dev);
8138a6ff33dSHyong Youb Kim
8148a6ff33dSHyong Youb Kim /* User needs to request for the avx2 handler */
8158a6ff33dSHyong Youb Kim if (!enic->enable_avx2_rx)
8168a6ff33dSHyong Youb Kim return false;
8178a6ff33dSHyong Youb Kim /* Do not support scatter Rx */
8188a6ff33dSHyong Youb Kim if (!(enic->rq_count > 0 && enic->rq[0].data_queue_enable == 0))
8198a6ff33dSHyong Youb Kim return false;
820ac61aa64SCiara Power if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) &&
821ac61aa64SCiara Power rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_256) {
822bbd8ecc0SJohn Daley ENICPMD_LOG(DEBUG, " use the non-scatter avx2 Rx handler");
8238a6ff33dSHyong Youb Kim eth_dev->rx_pkt_burst = &enic_noscatter_vec_recv_pkts;
824f011fa0aSHyong Youb Kim enic->use_noscatter_vec_rx_handler = 1;
8258a6ff33dSHyong Youb Kim return true;
8268a6ff33dSHyong Youb Kim }
8278a6ff33dSHyong Youb Kim return false;
8288a6ff33dSHyong Youb Kim }
829