1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2020 Intel Corporation 3 */ 4 5 #include <stdint.h> 6 #include <stdio.h> 7 #include <stdlib.h> 8 #include <string.h> 9 #include <errno.h> 10 11 #include <rte_net.h> 12 13 #include "virtio_logs.h" 14 #include "virtio_ethdev.h" 15 #include "virtio.h" 16 #include "virtio_rxtx_packed.h" 17 #include "virtqueue.h" 18 19 static inline int 20 virtqueue_enqueue_batch_packed_vec(struct virtnet_tx *txvq, 21 struct rte_mbuf **tx_pkts) 22 { 23 struct virtqueue *vq = virtnet_txq_to_vq(txvq); 24 uint16_t head_size = vq->hw->vtnet_hdr_size; 25 uint16_t idx = vq->vq_avail_idx; 26 struct virtio_net_hdr *hdr; 27 struct vq_desc_extra *dxp; 28 uint16_t i, cmp; 29 30 if (vq->vq_avail_idx & PACKED_BATCH_MASK) 31 return -1; 32 33 if (unlikely((idx + PACKED_BATCH_SIZE) > vq->vq_nentries)) 34 return -1; 35 36 /* Load four mbufs rearm data */ 37 RTE_BUILD_BUG_ON(REFCNT_BITS_OFFSET >= 64); 38 RTE_BUILD_BUG_ON(SEG_NUM_BITS_OFFSET >= 64); 39 __m256i mbufs = _mm256_set_epi64x(*tx_pkts[3]->rearm_data, 40 *tx_pkts[2]->rearm_data, 41 *tx_pkts[1]->rearm_data, 42 *tx_pkts[0]->rearm_data); 43 44 /* refcnt=1 and nb_segs=1 */ 45 __m256i mbuf_ref = _mm256_set1_epi64x(DEFAULT_REARM_DATA); 46 __m256i head_rooms = _mm256_set1_epi16(head_size); 47 48 /* Check refcnt and nb_segs */ 49 const __mmask16 mask = 0x6 | 0x6 << 4 | 0x6 << 8 | 0x6 << 12; 50 cmp = _mm256_mask_cmpneq_epu16_mask(mask, mbufs, mbuf_ref); 51 if (unlikely(cmp)) 52 return -1; 53 54 /* Check headroom is enough */ 55 const __mmask16 data_mask = 0x1 | 0x1 << 4 | 0x1 << 8 | 0x1 << 12; 56 RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_off) != 57 offsetof(struct rte_mbuf, rearm_data)); 58 cmp = _mm256_mask_cmplt_epu16_mask(data_mask, mbufs, head_rooms); 59 if (unlikely(cmp)) 60 return -1; 61 62 virtio_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 63 dxp = &vq->vq_descx[idx + i]; 64 dxp->ndescs = 1; 65 dxp->cookie = tx_pkts[i]; 66 } 67 68 virtio_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 69 tx_pkts[i]->data_off -= head_size; 70 tx_pkts[i]->data_len += head_size; 71 } 72 73 __m512i descs_base = _mm512_set_epi64(tx_pkts[3]->data_len, 74 tx_pkts[3]->buf_iova, 75 tx_pkts[2]->data_len, 76 tx_pkts[2]->buf_iova, 77 tx_pkts[1]->data_len, 78 tx_pkts[1]->buf_iova, 79 tx_pkts[0]->data_len, 80 tx_pkts[0]->buf_iova); 81 82 /* id offset and data offset */ 83 __m512i data_offsets = _mm512_set_epi64((uint64_t)3 << ID_BITS_OFFSET, 84 tx_pkts[3]->data_off, 85 (uint64_t)2 << ID_BITS_OFFSET, 86 tx_pkts[2]->data_off, 87 (uint64_t)1 << ID_BITS_OFFSET, 88 tx_pkts[1]->data_off, 89 0, tx_pkts[0]->data_off); 90 91 __m512i new_descs = _mm512_add_epi64(descs_base, data_offsets); 92 93 uint64_t flags_temp = (uint64_t)idx << ID_BITS_OFFSET | 94 (uint64_t)vq->vq_packed.cached_flags << FLAGS_BITS_OFFSET; 95 96 /* flags offset and guest virtual address offset */ 97 __m128i flag_offset = _mm_set_epi64x(flags_temp, 0); 98 __m512i v_offset = _mm512_broadcast_i32x4(flag_offset); 99 __m512i v_desc = _mm512_add_epi64(new_descs, v_offset); 100 101 if (!vq->hw->has_tx_offload) { 102 __m128i all_mask = _mm_set1_epi16(0xFFFF); 103 virtio_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 104 hdr = rte_pktmbuf_mtod_offset(tx_pkts[i], 105 struct virtio_net_hdr *, -head_size); 106 __m128i v_hdr = _mm_loadu_si128((void *)hdr); 107 if (unlikely(_mm_mask_test_epi16_mask(NET_HDR_MASK, 108 v_hdr, all_mask))) { 109 __m128i all_zero = _mm_setzero_si128(); 110 _mm_mask_storeu_epi16((void *)hdr, 111 NET_HDR_MASK, all_zero); 112 } 113 } 114 } else { 115 virtio_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 116 hdr = rte_pktmbuf_mtod_offset(tx_pkts[i], 117 struct virtio_net_hdr *, -head_size); 118 virtqueue_xmit_offload(hdr, tx_pkts[i], true); 119 } 120 } 121 122 /* Enqueue Packet buffers */ 123 _mm512_storeu_si512((void *)&vq->vq_packed.ring.desc[idx], v_desc); 124 125 virtio_update_batch_stats(&txvq->stats, tx_pkts[0]->pkt_len, 126 tx_pkts[1]->pkt_len, tx_pkts[2]->pkt_len, 127 tx_pkts[3]->pkt_len); 128 129 vq->vq_avail_idx += PACKED_BATCH_SIZE; 130 vq->vq_free_cnt -= PACKED_BATCH_SIZE; 131 132 if (vq->vq_avail_idx >= vq->vq_nentries) { 133 vq->vq_avail_idx -= vq->vq_nentries; 134 vq->vq_packed.cached_flags ^= 135 VRING_PACKED_DESC_F_AVAIL_USED; 136 } 137 138 return 0; 139 } 140 141 static inline uint16_t 142 virtqueue_dequeue_batch_packed_vec(struct virtnet_rx *rxvq, 143 struct rte_mbuf **rx_pkts) 144 { 145 struct virtqueue *vq = virtnet_rxq_to_vq(rxvq); 146 struct virtio_hw *hw = vq->hw; 147 uint16_t hdr_size = hw->vtnet_hdr_size; 148 uint64_t addrs[PACKED_BATCH_SIZE]; 149 uint16_t id = vq->vq_used_cons_idx; 150 uint8_t desc_stats; 151 uint16_t i; 152 void *desc_addr; 153 154 if (id & PACKED_BATCH_MASK) 155 return -1; 156 157 if (unlikely((id + PACKED_BATCH_SIZE) > vq->vq_nentries)) 158 return -1; 159 160 /* only care avail/used bits */ 161 #if defined(RTE_ARCH_I686) 162 __m512i v_mask = _mm512_set4_epi64(PACKED_FLAGS_MASK, 0x0, 163 PACKED_FLAGS_MASK, 0x0); 164 #else 165 __m512i v_mask = _mm512_maskz_set1_epi64(0xaa, PACKED_FLAGS_MASK); 166 #endif 167 desc_addr = &vq->vq_packed.ring.desc[id]; 168 169 __m512i v_desc = _mm512_loadu_si512(desc_addr); 170 __m512i v_flag = _mm512_and_epi64(v_desc, v_mask); 171 172 __m512i v_used_flag = _mm512_setzero_si512(); 173 if (vq->vq_packed.used_wrap_counter) 174 #if defined(RTE_ARCH_I686) 175 v_used_flag = _mm512_set4_epi64(PACKED_FLAGS_MASK, 0x0, 176 PACKED_FLAGS_MASK, 0x0); 177 #else 178 v_used_flag = _mm512_maskz_set1_epi64(0xaa, PACKED_FLAGS_MASK); 179 #endif 180 181 /* Check all descs are used */ 182 desc_stats = _mm512_cmpneq_epu64_mask(v_flag, v_used_flag); 183 if (desc_stats) 184 return -1; 185 186 virtio_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 187 rx_pkts[i] = (struct rte_mbuf *)vq->vq_descx[id + i].cookie; 188 rte_packet_prefetch(rte_pktmbuf_mtod(rx_pkts[i], void *)); 189 190 addrs[i] = (uintptr_t)rx_pkts[i]->rx_descriptor_fields1; 191 } 192 193 /* 194 * load len from desc, store into mbuf pkt_len and data_len 195 * len limiated by l6bit buf_len, pkt_len[16:31] can be ignored 196 */ 197 const __mmask16 mask = 0x6 | 0x6 << 4 | 0x6 << 8 | 0x6 << 12; 198 __m512i values = _mm512_maskz_shuffle_epi32(mask, v_desc, 0xAA); 199 200 /* reduce hdr_len from pkt_len and data_len */ 201 __m512i mbuf_len_offset = _mm512_maskz_set1_epi32(mask, 202 (uint32_t)-hdr_size); 203 204 __m512i v_value = _mm512_add_epi32(values, mbuf_len_offset); 205 206 /* assert offset of data_len */ 207 RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_len) != 208 offsetof(struct rte_mbuf, rx_descriptor_fields1) + 8); 209 210 __m512i v_index = _mm512_set_epi64(addrs[3] + 8, addrs[3], 211 addrs[2] + 8, addrs[2], 212 addrs[1] + 8, addrs[1], 213 addrs[0] + 8, addrs[0]); 214 /* batch store into mbufs */ 215 _mm512_i64scatter_epi64(0, v_index, v_value, 1); 216 217 if (hw->has_rx_offload) { 218 virtio_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) { 219 char *addr = (char *)rx_pkts[i]->buf_addr + 220 RTE_PKTMBUF_HEADROOM - hdr_size; 221 virtio_vec_rx_offload(rx_pkts[i], 222 (struct virtio_net_hdr *)addr); 223 } 224 } 225 226 virtio_update_batch_stats(&rxvq->stats, rx_pkts[0]->pkt_len, 227 rx_pkts[1]->pkt_len, rx_pkts[2]->pkt_len, 228 rx_pkts[3]->pkt_len); 229 230 vq->vq_free_cnt += PACKED_BATCH_SIZE; 231 232 vq->vq_used_cons_idx += PACKED_BATCH_SIZE; 233 if (vq->vq_used_cons_idx >= vq->vq_nentries) { 234 vq->vq_used_cons_idx -= vq->vq_nentries; 235 vq->vq_packed.used_wrap_counter ^= 1; 236 } 237 238 return 0; 239 } 240