1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2020 Intel Corporation 3 */ 4 5 #ifndef _VIRTIO_RXTX_PACKED_H_ 6 #define _VIRTIO_RXTX_PACKED_H_ 7 8 #include <stdint.h> 9 #include <stdio.h> 10 #include <stdlib.h> 11 #include <string.h> 12 #include <errno.h> 13 14 #include <rte_net.h> 15 16 #include "virtio_logs.h" 17 #include "virtio_ethdev.h" 18 #include "virtio.h" 19 #include "virtqueue.h" 20 21 #define BYTE_SIZE 8 22 23 #ifdef CC_AVX512_SUPPORT 24 /* flag bits offset in packed ring desc higher 64bits */ 25 #define FLAGS_BITS_OFFSET ((offsetof(struct vring_packed_desc, flags) - \ 26 offsetof(struct vring_packed_desc, len)) * BYTE_SIZE) 27 #elif defined(RTE_ARCH_ARM) 28 /* flag bits offset in packed ring desc from ID */ 29 #define FLAGS_BITS_OFFSET ((offsetof(struct vring_packed_desc, flags) - \ 30 offsetof(struct vring_packed_desc, id)) * BYTE_SIZE) 31 #define FLAGS_LEN_BITS_OFFSET ((offsetof(struct vring_packed_desc, flags) - \ 32 offsetof(struct vring_packed_desc, len)) * BYTE_SIZE) 33 #endif 34 35 #define PACKED_FLAGS_MASK ((0ULL | VRING_PACKED_DESC_F_AVAIL_USED) << \ 36 FLAGS_BITS_OFFSET) 37 38 /* reference count offset in mbuf rearm data */ 39 #define REFCNT_BITS_OFFSET ((offsetof(struct rte_mbuf, refcnt) - \ 40 offsetof(struct rte_mbuf, rearm_data)) * BYTE_SIZE) 41 42 #ifdef CC_AVX512_SUPPORT 43 /* segment number offset in mbuf rearm data */ 44 #define SEG_NUM_BITS_OFFSET ((offsetof(struct rte_mbuf, nb_segs) - \ 45 offsetof(struct rte_mbuf, rearm_data)) * BYTE_SIZE) 46 /* default rearm data */ 47 #define DEFAULT_REARM_DATA (1ULL << SEG_NUM_BITS_OFFSET | \ 48 1ULL << REFCNT_BITS_OFFSET) 49 #endif 50 51 /* id bits offset in packed ring desc higher 64bits */ 52 #define ID_BITS_OFFSET ((offsetof(struct vring_packed_desc, id) - \ 53 offsetof(struct vring_packed_desc, len)) * BYTE_SIZE) 54 55 /* net hdr short size mask */ 56 #define NET_HDR_MASK 0x3F 57 58 #ifdef RTE_ARCH_ARM 59 /* The cache line size on different Arm platforms are different, so 60 * put a four batch size here to match with the minimum cache line 61 * size and accommodate NEON register size. 62 */ 63 #define PACKED_BATCH_SIZE 4 64 #else 65 #define PACKED_BATCH_SIZE (RTE_CACHE_LINE_SIZE / \ 66 sizeof(struct vring_packed_desc)) 67 #endif 68 #define PACKED_BATCH_MASK (PACKED_BATCH_SIZE - 1) 69 70 #ifdef VIRTIO_GCC_UNROLL_PRAGMA 71 #define virtio_for_each_try_unroll(iter, val, size) _Pragma("GCC unroll 4") \ 72 for (iter = val; iter < size; iter++) 73 #endif 74 75 #ifdef VIRTIO_CLANG_UNROLL_PRAGMA 76 #define virtio_for_each_try_unroll(iter, val, size) _Pragma("unroll 4") \ 77 for (iter = val; iter < size; iter++) 78 #endif 79 80 #ifdef VIRTIO_ICC_UNROLL_PRAGMA 81 #define virtio_for_each_try_unroll(iter, val, size) _Pragma("unroll 4") \ 82 for (iter = val; iter < size; iter++) 83 #endif 84 85 #ifndef virtio_for_each_try_unroll 86 #define virtio_for_each_try_unroll(iter, val, size) \ 87 for (iter = val; iter < size; iter++) 88 #endif 89 90 static inline void 91 virtio_update_batch_stats(struct virtnet_stats *stats, 92 uint16_t pkt_len1, 93 uint16_t pkt_len2, 94 uint16_t pkt_len3, 95 uint16_t pkt_len4) 96 { 97 stats->bytes += pkt_len1; 98 stats->bytes += pkt_len2; 99 stats->bytes += pkt_len3; 100 stats->bytes += pkt_len4; 101 } 102 103 static inline int 104 virtqueue_enqueue_single_packed_vec(struct virtnet_tx *txvq, 105 struct rte_mbuf *txm) 106 { 107 struct virtqueue *vq = virtnet_txq_to_vq(txvq); 108 struct virtio_hw *hw = vq->hw; 109 uint16_t hdr_size = hw->vtnet_hdr_size; 110 uint16_t slots, can_push = 0, use_indirect = 0; 111 int16_t need; 112 113 /* optimize ring usage */ 114 if ((virtio_with_feature(hw, VIRTIO_F_ANY_LAYOUT) || 115 virtio_with_feature(hw, VIRTIO_F_VERSION_1)) && 116 rte_mbuf_refcnt_read(txm) == 1 && RTE_MBUF_DIRECT(txm) && 117 txm->nb_segs == 1 && rte_pktmbuf_headroom(txm) >= hdr_size) 118 can_push = 1; 119 else if (virtio_with_feature(hw, VIRTIO_RING_F_INDIRECT_DESC) && 120 txm->nb_segs < VIRTIO_MAX_TX_INDIRECT) 121 use_indirect = 1; 122 123 /* How many main ring entries are needed to this Tx? 124 * indirect => 1 125 * any_layout => number of segments 126 * default => number of segments + 1 127 */ 128 slots = use_indirect ? 1 : (txm->nb_segs + !can_push); 129 can_push = rte_mbuf_refcnt_read(txm) == 1 && 130 RTE_MBUF_DIRECT(txm) && 131 txm->nb_segs == 1 && 132 rte_pktmbuf_headroom(txm) >= hdr_size; 133 134 slots = txm->nb_segs + !can_push; 135 need = slots - vq->vq_free_cnt; 136 137 /* Positive value indicates it need free vring descriptors */ 138 if (unlikely(need > 0)) { 139 virtio_xmit_cleanup_inorder_packed(vq, need); 140 need = slots - vq->vq_free_cnt; 141 if (unlikely(need > 0)) { 142 PMD_TX_LOG(ERR, 143 "No free tx descriptors to transmit"); 144 return -1; 145 } 146 } 147 148 /* Enqueue Packet buffers */ 149 virtqueue_enqueue_xmit_packed(txvq, txm, slots, use_indirect, 150 can_push, 1); 151 152 txvq->stats.bytes += txm->pkt_len; 153 return 0; 154 } 155 156 /* Optionally fill offload information in structure */ 157 static inline int 158 virtio_vec_rx_offload(struct rte_mbuf *m, struct virtio_net_hdr *hdr) 159 { 160 struct rte_net_hdr_lens hdr_lens; 161 uint32_t hdrlen, ptype; 162 int l4_supported = 0; 163 164 /* nothing to do */ 165 if (hdr->flags == 0) 166 return 0; 167 168 /* GSO not support in vec path, skip check */ 169 m->ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_UNKNOWN; 170 171 ptype = rte_net_get_ptype(m, &hdr_lens, RTE_PTYPE_ALL_MASK); 172 m->packet_type = ptype; 173 if ((ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP || 174 (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP || 175 (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_SCTP) 176 l4_supported = 1; 177 178 if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { 179 hdrlen = hdr_lens.l2_len + hdr_lens.l3_len + hdr_lens.l4_len; 180 if (hdr->csum_start <= hdrlen && l4_supported) { 181 m->ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_NONE; 182 } else { 183 /* Unknown proto or tunnel, do sw cksum. We can assume 184 * the cksum field is in the first segment since the 185 * buffers we provided to the host are large enough. 186 * In case of SCTP, this will be wrong since it's a CRC 187 * but there's nothing we can do. 188 */ 189 uint16_t csum = 0, off; 190 191 if (rte_raw_cksum_mbuf(m, hdr->csum_start, 192 rte_pktmbuf_pkt_len(m) - hdr->csum_start, 193 &csum) < 0) 194 return -1; 195 if (likely(csum != 0xffff)) 196 csum = ~csum; 197 off = hdr->csum_offset + hdr->csum_start; 198 if (rte_pktmbuf_data_len(m) >= off + 1) 199 *rte_pktmbuf_mtod_offset(m, uint16_t *, 200 off) = csum; 201 } 202 } else if (hdr->flags & VIRTIO_NET_HDR_F_DATA_VALID && l4_supported) { 203 m->ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_GOOD; 204 } 205 206 return 0; 207 } 208 209 static inline uint16_t 210 virtqueue_dequeue_single_packed_vec(struct virtnet_rx *rxvq, 211 struct rte_mbuf **rx_pkts) 212 { 213 uint16_t used_idx, id; 214 uint32_t len; 215 struct virtqueue *vq = virtnet_rxq_to_vq(rxvq); 216 struct virtio_hw *hw = vq->hw; 217 uint32_t hdr_size = hw->vtnet_hdr_size; 218 struct virtio_net_hdr *hdr; 219 struct vring_packed_desc *desc; 220 struct rte_mbuf *cookie; 221 222 desc = vq->vq_packed.ring.desc; 223 used_idx = vq->vq_used_cons_idx; 224 if (!desc_is_used(&desc[used_idx], vq)) 225 return -1; 226 227 len = desc[used_idx].len; 228 id = desc[used_idx].id; 229 cookie = (struct rte_mbuf *)vq->vq_descx[id].cookie; 230 if (unlikely(cookie == NULL)) { 231 PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u", 232 vq->vq_used_cons_idx); 233 return -1; 234 } 235 rte_prefetch0(cookie); 236 rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *)); 237 238 cookie->data_off = RTE_PKTMBUF_HEADROOM; 239 cookie->ol_flags = 0; 240 cookie->pkt_len = (uint32_t)(len - hdr_size); 241 cookie->data_len = (uint32_t)(len - hdr_size); 242 243 hdr = (struct virtio_net_hdr *)((char *)cookie->buf_addr + 244 RTE_PKTMBUF_HEADROOM - hdr_size); 245 if (hw->has_rx_offload) 246 virtio_vec_rx_offload(cookie, hdr); 247 248 *rx_pkts = cookie; 249 250 rxvq->stats.bytes += cookie->pkt_len; 251 252 vq->vq_free_cnt++; 253 vq->vq_used_cons_idx++; 254 if (vq->vq_used_cons_idx >= vq->vq_nentries) { 255 vq->vq_used_cons_idx -= vq->vq_nentries; 256 vq->vq_packed.used_wrap_counter ^= 1; 257 } 258 259 return 0; 260 } 261 262 static inline void 263 virtio_recv_refill_packed_vec(struct virtnet_rx *rxvq, 264 struct rte_mbuf **cookie, 265 uint16_t num) 266 { 267 struct virtqueue *vq = virtnet_rxq_to_vq(rxvq); 268 struct vring_packed_desc *start_dp = vq->vq_packed.ring.desc; 269 uint16_t flags = vq->vq_packed.cached_flags; 270 struct virtio_hw *hw = vq->hw; 271 struct vq_desc_extra *dxp; 272 uint16_t idx, i; 273 uint16_t batch_num, total_num = 0; 274 uint16_t head_idx = vq->vq_avail_idx; 275 uint16_t head_flag = vq->vq_packed.cached_flags; 276 uint64_t addr; 277 278 do { 279 idx = vq->vq_avail_idx; 280 281 batch_num = PACKED_BATCH_SIZE; 282 if (unlikely((idx + PACKED_BATCH_SIZE) > vq->vq_nentries)) 283 batch_num = vq->vq_nentries - idx; 284 if (unlikely((total_num + batch_num) > num)) 285 batch_num = num - total_num; 286 287 virtio_for_each_try_unroll(i, 0, batch_num) { 288 dxp = &vq->vq_descx[idx + i]; 289 dxp->cookie = (void *)cookie[total_num + i]; 290 291 addr = VIRTIO_MBUF_ADDR(cookie[total_num + i], vq) + 292 RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size; 293 start_dp[idx + i].addr = addr; 294 start_dp[idx + i].len = cookie[total_num + i]->buf_len 295 - RTE_PKTMBUF_HEADROOM + hw->vtnet_hdr_size; 296 if (total_num || i) { 297 virtqueue_store_flags_packed(&start_dp[idx + i], 298 flags, hw->weak_barriers); 299 } 300 } 301 302 vq->vq_avail_idx += batch_num; 303 if (vq->vq_avail_idx >= vq->vq_nentries) { 304 vq->vq_avail_idx -= vq->vq_nentries; 305 vq->vq_packed.cached_flags ^= 306 VRING_PACKED_DESC_F_AVAIL_USED; 307 flags = vq->vq_packed.cached_flags; 308 } 309 total_num += batch_num; 310 } while (total_num < num); 311 312 virtqueue_store_flags_packed(&start_dp[head_idx], head_flag, 313 hw->weak_barriers); 314 vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num); 315 } 316 317 #endif /* _VIRTIO_RXTX_PACKED_H_ */ 318