10eaf7fc2SJoyce Kong /* SPDX-License-Identifier: BSD-3-Clause
20eaf7fc2SJoyce Kong * Copyright(c) 2010-2020 Intel Corporation
30eaf7fc2SJoyce Kong */
40eaf7fc2SJoyce Kong
50eaf7fc2SJoyce Kong #include <stdint.h>
60eaf7fc2SJoyce Kong #include <stdio.h>
70eaf7fc2SJoyce Kong #include <stdlib.h>
80eaf7fc2SJoyce Kong #include <string.h>
90eaf7fc2SJoyce Kong #include <errno.h>
100eaf7fc2SJoyce Kong
110eaf7fc2SJoyce Kong #include <rte_net.h>
120eaf7fc2SJoyce Kong
130eaf7fc2SJoyce Kong #include "virtio_logs.h"
140eaf7fc2SJoyce Kong #include "virtio_ethdev.h"
15b5ba7ee4SMaxime Coquelin #include "virtio.h"
160eaf7fc2SJoyce Kong #include "virtio_rxtx_packed.h"
170eaf7fc2SJoyce Kong #include "virtqueue.h"
180eaf7fc2SJoyce Kong
190eaf7fc2SJoyce Kong static inline int
virtqueue_enqueue_batch_packed_vec(struct virtnet_tx * txvq,struct rte_mbuf ** tx_pkts)200eaf7fc2SJoyce Kong virtqueue_enqueue_batch_packed_vec(struct virtnet_tx *txvq,
210eaf7fc2SJoyce Kong struct rte_mbuf **tx_pkts)
220eaf7fc2SJoyce Kong {
233169550fSMaxime Coquelin struct virtqueue *vq = virtnet_txq_to_vq(txvq);
240eaf7fc2SJoyce Kong uint16_t head_size = vq->hw->vtnet_hdr_size;
250eaf7fc2SJoyce Kong uint16_t idx = vq->vq_avail_idx;
260eaf7fc2SJoyce Kong struct virtio_net_hdr *hdr;
270eaf7fc2SJoyce Kong struct vq_desc_extra *dxp;
280eaf7fc2SJoyce Kong uint16_t i, cmp;
290eaf7fc2SJoyce Kong
300eaf7fc2SJoyce Kong if (vq->vq_avail_idx & PACKED_BATCH_MASK)
310eaf7fc2SJoyce Kong return -1;
320eaf7fc2SJoyce Kong
330eaf7fc2SJoyce Kong if (unlikely((idx + PACKED_BATCH_SIZE) > vq->vq_nentries))
340eaf7fc2SJoyce Kong return -1;
350eaf7fc2SJoyce Kong
360eaf7fc2SJoyce Kong /* Load four mbufs rearm data */
370eaf7fc2SJoyce Kong RTE_BUILD_BUG_ON(REFCNT_BITS_OFFSET >= 64);
380eaf7fc2SJoyce Kong RTE_BUILD_BUG_ON(SEG_NUM_BITS_OFFSET >= 64);
390eaf7fc2SJoyce Kong __m256i mbufs = _mm256_set_epi64x(*tx_pkts[3]->rearm_data,
400eaf7fc2SJoyce Kong *tx_pkts[2]->rearm_data,
410eaf7fc2SJoyce Kong *tx_pkts[1]->rearm_data,
420eaf7fc2SJoyce Kong *tx_pkts[0]->rearm_data);
430eaf7fc2SJoyce Kong
440eaf7fc2SJoyce Kong /* refcnt=1 and nb_segs=1 */
450eaf7fc2SJoyce Kong __m256i mbuf_ref = _mm256_set1_epi64x(DEFAULT_REARM_DATA);
460eaf7fc2SJoyce Kong __m256i head_rooms = _mm256_set1_epi16(head_size);
470eaf7fc2SJoyce Kong
480eaf7fc2SJoyce Kong /* Check refcnt and nb_segs */
490eaf7fc2SJoyce Kong const __mmask16 mask = 0x6 | 0x6 << 4 | 0x6 << 8 | 0x6 << 12;
500eaf7fc2SJoyce Kong cmp = _mm256_mask_cmpneq_epu16_mask(mask, mbufs, mbuf_ref);
510eaf7fc2SJoyce Kong if (unlikely(cmp))
520eaf7fc2SJoyce Kong return -1;
530eaf7fc2SJoyce Kong
540eaf7fc2SJoyce Kong /* Check headroom is enough */
550eaf7fc2SJoyce Kong const __mmask16 data_mask = 0x1 | 0x1 << 4 | 0x1 << 8 | 0x1 << 12;
560eaf7fc2SJoyce Kong RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_off) !=
570eaf7fc2SJoyce Kong offsetof(struct rte_mbuf, rearm_data));
580eaf7fc2SJoyce Kong cmp = _mm256_mask_cmplt_epu16_mask(data_mask, mbufs, head_rooms);
590eaf7fc2SJoyce Kong if (unlikely(cmp))
600eaf7fc2SJoyce Kong return -1;
610eaf7fc2SJoyce Kong
620eaf7fc2SJoyce Kong virtio_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
630eaf7fc2SJoyce Kong dxp = &vq->vq_descx[idx + i];
640eaf7fc2SJoyce Kong dxp->ndescs = 1;
650eaf7fc2SJoyce Kong dxp->cookie = tx_pkts[i];
660eaf7fc2SJoyce Kong }
670eaf7fc2SJoyce Kong
680eaf7fc2SJoyce Kong virtio_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
690eaf7fc2SJoyce Kong tx_pkts[i]->data_off -= head_size;
700eaf7fc2SJoyce Kong tx_pkts[i]->data_len += head_size;
710eaf7fc2SJoyce Kong }
720eaf7fc2SJoyce Kong
730eaf7fc2SJoyce Kong __m512i descs_base = _mm512_set_epi64(tx_pkts[3]->data_len,
74ba55c94aSMaxime Coquelin VIRTIO_MBUF_ADDR(tx_pkts[3], vq),
750eaf7fc2SJoyce Kong tx_pkts[2]->data_len,
76ba55c94aSMaxime Coquelin VIRTIO_MBUF_ADDR(tx_pkts[2], vq),
770eaf7fc2SJoyce Kong tx_pkts[1]->data_len,
78ba55c94aSMaxime Coquelin VIRTIO_MBUF_ADDR(tx_pkts[1], vq),
790eaf7fc2SJoyce Kong tx_pkts[0]->data_len,
80ba55c94aSMaxime Coquelin VIRTIO_MBUF_ADDR(tx_pkts[0], vq));
810eaf7fc2SJoyce Kong
820eaf7fc2SJoyce Kong /* id offset and data offset */
830eaf7fc2SJoyce Kong __m512i data_offsets = _mm512_set_epi64((uint64_t)3 << ID_BITS_OFFSET,
840eaf7fc2SJoyce Kong tx_pkts[3]->data_off,
850eaf7fc2SJoyce Kong (uint64_t)2 << ID_BITS_OFFSET,
860eaf7fc2SJoyce Kong tx_pkts[2]->data_off,
870eaf7fc2SJoyce Kong (uint64_t)1 << ID_BITS_OFFSET,
880eaf7fc2SJoyce Kong tx_pkts[1]->data_off,
890eaf7fc2SJoyce Kong 0, tx_pkts[0]->data_off);
900eaf7fc2SJoyce Kong
910eaf7fc2SJoyce Kong __m512i new_descs = _mm512_add_epi64(descs_base, data_offsets);
920eaf7fc2SJoyce Kong
930eaf7fc2SJoyce Kong uint64_t flags_temp = (uint64_t)idx << ID_BITS_OFFSET |
940eaf7fc2SJoyce Kong (uint64_t)vq->vq_packed.cached_flags << FLAGS_BITS_OFFSET;
950eaf7fc2SJoyce Kong
960eaf7fc2SJoyce Kong /* flags offset and guest virtual address offset */
970eaf7fc2SJoyce Kong __m128i flag_offset = _mm_set_epi64x(flags_temp, 0);
980eaf7fc2SJoyce Kong __m512i v_offset = _mm512_broadcast_i32x4(flag_offset);
990eaf7fc2SJoyce Kong __m512i v_desc = _mm512_add_epi64(new_descs, v_offset);
1000eaf7fc2SJoyce Kong
1010eaf7fc2SJoyce Kong if (!vq->hw->has_tx_offload) {
1020eaf7fc2SJoyce Kong __m128i all_mask = _mm_set1_epi16(0xFFFF);
1030eaf7fc2SJoyce Kong virtio_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
1040eaf7fc2SJoyce Kong hdr = rte_pktmbuf_mtod_offset(tx_pkts[i],
1050eaf7fc2SJoyce Kong struct virtio_net_hdr *, -head_size);
1060eaf7fc2SJoyce Kong __m128i v_hdr = _mm_loadu_si128((void *)hdr);
1070eaf7fc2SJoyce Kong if (unlikely(_mm_mask_test_epi16_mask(NET_HDR_MASK,
1080eaf7fc2SJoyce Kong v_hdr, all_mask))) {
1090eaf7fc2SJoyce Kong __m128i all_zero = _mm_setzero_si128();
1100eaf7fc2SJoyce Kong _mm_mask_storeu_epi16((void *)hdr,
1110eaf7fc2SJoyce Kong NET_HDR_MASK, all_zero);
1120eaf7fc2SJoyce Kong }
1130eaf7fc2SJoyce Kong }
1140eaf7fc2SJoyce Kong } else {
1150eaf7fc2SJoyce Kong virtio_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
1160eaf7fc2SJoyce Kong hdr = rte_pktmbuf_mtod_offset(tx_pkts[i],
1170eaf7fc2SJoyce Kong struct virtio_net_hdr *, -head_size);
11885a4fa2fSDavid Marchand virtqueue_xmit_offload(hdr, tx_pkts[i]);
1190eaf7fc2SJoyce Kong }
1200eaf7fc2SJoyce Kong }
1210eaf7fc2SJoyce Kong
1220eaf7fc2SJoyce Kong /* Enqueue Packet buffers */
1230eaf7fc2SJoyce Kong _mm512_storeu_si512((void *)&vq->vq_packed.ring.desc[idx], v_desc);
1240eaf7fc2SJoyce Kong
1250eaf7fc2SJoyce Kong virtio_update_batch_stats(&txvq->stats, tx_pkts[0]->pkt_len,
1260eaf7fc2SJoyce Kong tx_pkts[1]->pkt_len, tx_pkts[2]->pkt_len,
1270eaf7fc2SJoyce Kong tx_pkts[3]->pkt_len);
1280eaf7fc2SJoyce Kong
1290eaf7fc2SJoyce Kong vq->vq_avail_idx += PACKED_BATCH_SIZE;
1300eaf7fc2SJoyce Kong vq->vq_free_cnt -= PACKED_BATCH_SIZE;
1310eaf7fc2SJoyce Kong
1320eaf7fc2SJoyce Kong if (vq->vq_avail_idx >= vq->vq_nentries) {
1330eaf7fc2SJoyce Kong vq->vq_avail_idx -= vq->vq_nentries;
1340eaf7fc2SJoyce Kong vq->vq_packed.cached_flags ^=
1350eaf7fc2SJoyce Kong VRING_PACKED_DESC_F_AVAIL_USED;
1360eaf7fc2SJoyce Kong }
1370eaf7fc2SJoyce Kong
1380eaf7fc2SJoyce Kong return 0;
1390eaf7fc2SJoyce Kong }
1400eaf7fc2SJoyce Kong
1410eaf7fc2SJoyce Kong static inline uint16_t
virtqueue_dequeue_batch_packed_vec(struct virtnet_rx * rxvq,struct rte_mbuf ** rx_pkts)1420eaf7fc2SJoyce Kong virtqueue_dequeue_batch_packed_vec(struct virtnet_rx *rxvq,
1430eaf7fc2SJoyce Kong struct rte_mbuf **rx_pkts)
1440eaf7fc2SJoyce Kong {
1453169550fSMaxime Coquelin struct virtqueue *vq = virtnet_rxq_to_vq(rxvq);
1460eaf7fc2SJoyce Kong struct virtio_hw *hw = vq->hw;
1470eaf7fc2SJoyce Kong uint16_t hdr_size = hw->vtnet_hdr_size;
1480eaf7fc2SJoyce Kong uint64_t addrs[PACKED_BATCH_SIZE];
1490eaf7fc2SJoyce Kong uint16_t id = vq->vq_used_cons_idx;
1500eaf7fc2SJoyce Kong uint8_t desc_stats;
1510eaf7fc2SJoyce Kong uint16_t i;
1520eaf7fc2SJoyce Kong void *desc_addr;
1530eaf7fc2SJoyce Kong
1540eaf7fc2SJoyce Kong if (id & PACKED_BATCH_MASK)
1550eaf7fc2SJoyce Kong return -1;
1560eaf7fc2SJoyce Kong
1570eaf7fc2SJoyce Kong if (unlikely((id + PACKED_BATCH_SIZE) > vq->vq_nentries))
1580eaf7fc2SJoyce Kong return -1;
1590eaf7fc2SJoyce Kong
1600eaf7fc2SJoyce Kong /* only care avail/used bits */
1610eaf7fc2SJoyce Kong #if defined(RTE_ARCH_I686)
1620eaf7fc2SJoyce Kong __m512i v_mask = _mm512_set4_epi64(PACKED_FLAGS_MASK, 0x0,
1630eaf7fc2SJoyce Kong PACKED_FLAGS_MASK, 0x0);
1640eaf7fc2SJoyce Kong #else
1650eaf7fc2SJoyce Kong __m512i v_mask = _mm512_maskz_set1_epi64(0xaa, PACKED_FLAGS_MASK);
1660eaf7fc2SJoyce Kong #endif
1670eaf7fc2SJoyce Kong desc_addr = &vq->vq_packed.ring.desc[id];
1680eaf7fc2SJoyce Kong
1690eaf7fc2SJoyce Kong __m512i v_desc = _mm512_loadu_si512(desc_addr);
1700eaf7fc2SJoyce Kong __m512i v_flag = _mm512_and_epi64(v_desc, v_mask);
1710eaf7fc2SJoyce Kong
1720eaf7fc2SJoyce Kong __m512i v_used_flag = _mm512_setzero_si512();
1730eaf7fc2SJoyce Kong if (vq->vq_packed.used_wrap_counter)
1740eaf7fc2SJoyce Kong #if defined(RTE_ARCH_I686)
1750eaf7fc2SJoyce Kong v_used_flag = _mm512_set4_epi64(PACKED_FLAGS_MASK, 0x0,
1760eaf7fc2SJoyce Kong PACKED_FLAGS_MASK, 0x0);
1770eaf7fc2SJoyce Kong #else
1780eaf7fc2SJoyce Kong v_used_flag = _mm512_maskz_set1_epi64(0xaa, PACKED_FLAGS_MASK);
1790eaf7fc2SJoyce Kong #endif
1800eaf7fc2SJoyce Kong
1810eaf7fc2SJoyce Kong /* Check all descs are used */
1820eaf7fc2SJoyce Kong desc_stats = _mm512_cmpneq_epu64_mask(v_flag, v_used_flag);
1830eaf7fc2SJoyce Kong if (desc_stats)
1840eaf7fc2SJoyce Kong return -1;
1850eaf7fc2SJoyce Kong
1860eaf7fc2SJoyce Kong virtio_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
1870eaf7fc2SJoyce Kong rx_pkts[i] = (struct rte_mbuf *)vq->vq_descx[id + i].cookie;
1880eaf7fc2SJoyce Kong rte_packet_prefetch(rte_pktmbuf_mtod(rx_pkts[i], void *));
1890eaf7fc2SJoyce Kong
1900eaf7fc2SJoyce Kong addrs[i] = (uintptr_t)rx_pkts[i]->rx_descriptor_fields1;
1910eaf7fc2SJoyce Kong }
1920eaf7fc2SJoyce Kong
1930eaf7fc2SJoyce Kong /*
1940eaf7fc2SJoyce Kong * load len from desc, store into mbuf pkt_len and data_len
195*7be78d02SJosh Soref * len limited by l6bit buf_len, pkt_len[16:31] can be ignored
1960eaf7fc2SJoyce Kong */
1970eaf7fc2SJoyce Kong const __mmask16 mask = 0x6 | 0x6 << 4 | 0x6 << 8 | 0x6 << 12;
1980eaf7fc2SJoyce Kong __m512i values = _mm512_maskz_shuffle_epi32(mask, v_desc, 0xAA);
1990eaf7fc2SJoyce Kong
2000eaf7fc2SJoyce Kong /* reduce hdr_len from pkt_len and data_len */
2010eaf7fc2SJoyce Kong __m512i mbuf_len_offset = _mm512_maskz_set1_epi32(mask,
2020eaf7fc2SJoyce Kong (uint32_t)-hdr_size);
2030eaf7fc2SJoyce Kong
2040eaf7fc2SJoyce Kong __m512i v_value = _mm512_add_epi32(values, mbuf_len_offset);
2050eaf7fc2SJoyce Kong
2060eaf7fc2SJoyce Kong /* assert offset of data_len */
2070eaf7fc2SJoyce Kong RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_len) !=
2080eaf7fc2SJoyce Kong offsetof(struct rte_mbuf, rx_descriptor_fields1) + 8);
2090eaf7fc2SJoyce Kong
2100eaf7fc2SJoyce Kong __m512i v_index = _mm512_set_epi64(addrs[3] + 8, addrs[3],
2110eaf7fc2SJoyce Kong addrs[2] + 8, addrs[2],
2120eaf7fc2SJoyce Kong addrs[1] + 8, addrs[1],
2130eaf7fc2SJoyce Kong addrs[0] + 8, addrs[0]);
2140eaf7fc2SJoyce Kong /* batch store into mbufs */
2150eaf7fc2SJoyce Kong _mm512_i64scatter_epi64(0, v_index, v_value, 1);
2160eaf7fc2SJoyce Kong
2170eaf7fc2SJoyce Kong if (hw->has_rx_offload) {
2180eaf7fc2SJoyce Kong virtio_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
2190eaf7fc2SJoyce Kong char *addr = (char *)rx_pkts[i]->buf_addr +
2200eaf7fc2SJoyce Kong RTE_PKTMBUF_HEADROOM - hdr_size;
2210eaf7fc2SJoyce Kong virtio_vec_rx_offload(rx_pkts[i],
2220eaf7fc2SJoyce Kong (struct virtio_net_hdr *)addr);
2230eaf7fc2SJoyce Kong }
2240eaf7fc2SJoyce Kong }
2250eaf7fc2SJoyce Kong
2260eaf7fc2SJoyce Kong virtio_update_batch_stats(&rxvq->stats, rx_pkts[0]->pkt_len,
2270eaf7fc2SJoyce Kong rx_pkts[1]->pkt_len, rx_pkts[2]->pkt_len,
2280eaf7fc2SJoyce Kong rx_pkts[3]->pkt_len);
2290eaf7fc2SJoyce Kong
2300eaf7fc2SJoyce Kong vq->vq_free_cnt += PACKED_BATCH_SIZE;
2310eaf7fc2SJoyce Kong
2320eaf7fc2SJoyce Kong vq->vq_used_cons_idx += PACKED_BATCH_SIZE;
2330eaf7fc2SJoyce Kong if (vq->vq_used_cons_idx >= vq->vq_nentries) {
2340eaf7fc2SJoyce Kong vq->vq_used_cons_idx -= vq->vq_nentries;
2350eaf7fc2SJoyce Kong vq->vq_packed.used_wrap_counter ^= 1;
2360eaf7fc2SJoyce Kong }
2370eaf7fc2SJoyce Kong
2380eaf7fc2SJoyce Kong return 0;
2390eaf7fc2SJoyce Kong }
240