1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2020 Intel Corporation 3 */ 4 5 #ifndef _VIRTIO_RXTX_PACKED_H_ 6 #define _VIRTIO_RXTX_PACKED_H_ 7 8 #include <stdint.h> 9 #include <stdio.h> 10 #include <stdlib.h> 11 #include <string.h> 12 #include <errno.h> 13 14 #include <rte_net.h> 15 16 #include "virtio_logs.h" 17 #include "virtio_ethdev.h" 18 #include "virtio.h" 19 #include "virtqueue.h" 20 21 #define BYTE_SIZE 8 22 23 #ifdef CC_AVX512_SUPPORT 24 /* flag bits offset in packed ring desc higher 64bits */ 25 #define FLAGS_BITS_OFFSET ((offsetof(struct vring_packed_desc, flags) - \ 26 offsetof(struct vring_packed_desc, len)) * BYTE_SIZE) 27 #elif defined(RTE_ARCH_ARM) 28 /* flag bits offset in packed ring desc from ID */ 29 #define FLAGS_BITS_OFFSET ((offsetof(struct vring_packed_desc, flags) - \ 30 offsetof(struct vring_packed_desc, id)) * BYTE_SIZE) 31 #define FLAGS_LEN_BITS_OFFSET ((offsetof(struct vring_packed_desc, flags) - \ 32 offsetof(struct vring_packed_desc, len)) * BYTE_SIZE) 33 #endif 34 35 #define PACKED_FLAGS_MASK ((0ULL | VRING_PACKED_DESC_F_AVAIL_USED) << \ 36 FLAGS_BITS_OFFSET) 37 38 /* reference count offset in mbuf rearm data */ 39 #define REFCNT_BITS_OFFSET ((offsetof(struct rte_mbuf, refcnt) - \ 40 offsetof(struct rte_mbuf, rearm_data)) * BYTE_SIZE) 41 42 #ifdef CC_AVX512_SUPPORT 43 /* segment number offset in mbuf rearm data */ 44 #define SEG_NUM_BITS_OFFSET ((offsetof(struct rte_mbuf, nb_segs) - \ 45 offsetof(struct rte_mbuf, rearm_data)) * BYTE_SIZE) 46 /* default rearm data */ 47 #define DEFAULT_REARM_DATA (1ULL << SEG_NUM_BITS_OFFSET | \ 48 1ULL << REFCNT_BITS_OFFSET) 49 #endif 50 51 /* id bits offset in packed ring desc higher 64bits */ 52 #define ID_BITS_OFFSET ((offsetof(struct vring_packed_desc, id) - \ 53 offsetof(struct vring_packed_desc, len)) * BYTE_SIZE) 54 55 /* net hdr short size mask */ 56 #define NET_HDR_MASK 0x3F 57 58 #ifdef RTE_ARCH_ARM 59 /* The cache line size on different Arm platforms are different, so 60 * put a four batch size here to match with the minimum cache line 61 * size and accommodate NEON register size. 62 */ 63 #define PACKED_BATCH_SIZE 4 64 #else 65 #define PACKED_BATCH_SIZE (RTE_CACHE_LINE_SIZE / \ 66 sizeof(struct vring_packed_desc)) 67 #endif 68 #define PACKED_BATCH_MASK (PACKED_BATCH_SIZE - 1) 69 70 #ifdef VIRTIO_GCC_UNROLL_PRAGMA 71 #define virtio_for_each_try_unroll(iter, val, size) _Pragma("GCC unroll 4") \ 72 for (iter = val; iter < size; iter++) 73 #endif 74 75 #ifdef VIRTIO_CLANG_UNROLL_PRAGMA 76 #define virtio_for_each_try_unroll(iter, val, size) _Pragma("unroll 4") \ 77 for (iter = val; iter < size; iter++) 78 #endif 79 80 #ifdef VIRTIO_ICC_UNROLL_PRAGMA 81 #define virtio_for_each_try_unroll(iter, val, size) _Pragma("unroll 4") \ 82 for (iter = val; iter < size; iter++) 83 #endif 84 85 #ifndef virtio_for_each_try_unroll 86 #define virtio_for_each_try_unroll(iter, val, size) \ 87 for (iter = val; iter < size; iter++) 88 #endif 89 90 static inline void 91 virtio_update_batch_stats(struct virtnet_stats *stats, 92 uint16_t pkt_len1, 93 uint16_t pkt_len2, 94 uint16_t pkt_len3, 95 uint16_t pkt_len4) 96 { 97 stats->bytes += pkt_len1; 98 stats->bytes += pkt_len2; 99 stats->bytes += pkt_len3; 100 stats->bytes += pkt_len4; 101 } 102 103 static inline int 104 virtqueue_enqueue_single_packed_vec(struct virtnet_tx *txvq, 105 struct rte_mbuf *txm) 106 { 107 struct virtqueue *vq = virtnet_txq_to_vq(txvq); 108 struct virtio_hw *hw = vq->hw; 109 uint16_t hdr_size = hw->vtnet_hdr_size; 110 uint16_t slots, can_push = 0, use_indirect = 0; 111 int16_t need; 112 113 /* optimize ring usage */ 114 if ((virtio_with_feature(hw, VIRTIO_F_ANY_LAYOUT) || 115 virtio_with_feature(hw, VIRTIO_F_VERSION_1)) && 116 rte_mbuf_refcnt_read(txm) == 1 && RTE_MBUF_DIRECT(txm) && 117 txm->nb_segs == 1 && rte_pktmbuf_headroom(txm) >= hdr_size) 118 can_push = 1; 119 else if (virtio_with_feature(hw, VIRTIO_RING_F_INDIRECT_DESC) && 120 txm->nb_segs < VIRTIO_MAX_TX_INDIRECT) 121 use_indirect = 1; 122 123 /* How many main ring entries are needed to this Tx? 124 * indirect => 1 125 * any_layout => number of segments 126 * default => number of segments + 1 127 */ 128 can_push = rte_mbuf_refcnt_read(txm) == 1 && 129 RTE_MBUF_DIRECT(txm) && 130 txm->nb_segs == 1 && 131 rte_pktmbuf_headroom(txm) >= hdr_size; 132 133 slots = use_indirect ? 1 : (txm->nb_segs + !can_push); 134 need = slots - vq->vq_free_cnt; 135 136 /* Positive value indicates it need free vring descriptors */ 137 if (unlikely(need > 0)) { 138 virtio_xmit_cleanup_inorder_packed(vq, need); 139 need = slots - vq->vq_free_cnt; 140 if (unlikely(need > 0)) { 141 PMD_TX_LOG(ERR, 142 "No free tx descriptors to transmit"); 143 return -1; 144 } 145 } 146 147 /* Enqueue Packet buffers */ 148 virtqueue_enqueue_xmit_packed(txvq, txm, slots, use_indirect, 149 can_push, 1); 150 151 txvq->stats.bytes += txm->pkt_len; 152 return 0; 153 } 154 155 /* Optionally fill offload information in structure */ 156 static inline int 157 virtio_vec_rx_offload(struct rte_mbuf *m, struct virtio_net_hdr *hdr) 158 { 159 struct rte_net_hdr_lens hdr_lens; 160 uint32_t hdrlen, ptype; 161 int l4_supported = 0; 162 163 /* nothing to do */ 164 if (hdr->flags == 0) 165 return 0; 166 167 /* GSO not support in vec path, skip check */ 168 m->ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_UNKNOWN; 169 170 ptype = rte_net_get_ptype(m, &hdr_lens, RTE_PTYPE_ALL_MASK); 171 m->packet_type = ptype; 172 if ((ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP || 173 (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP || 174 (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_SCTP) 175 l4_supported = 1; 176 177 if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { 178 hdrlen = hdr_lens.l2_len + hdr_lens.l3_len + hdr_lens.l4_len; 179 if (hdr->csum_start <= hdrlen && l4_supported) { 180 m->ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_NONE; 181 } else { 182 /* Unknown proto or tunnel, do sw cksum. We can assume 183 * the cksum field is in the first segment since the 184 * buffers we provided to the host are large enough. 185 * In case of SCTP, this will be wrong since it's a CRC 186 * but there's nothing we can do. 187 */ 188 uint16_t csum = 0, off; 189 190 if (rte_raw_cksum_mbuf(m, hdr->csum_start, 191 rte_pktmbuf_pkt_len(m) - hdr->csum_start, 192 &csum) < 0) 193 return -1; 194 if (likely(csum != 0xffff)) 195 csum = ~csum; 196 off = hdr->csum_offset + hdr->csum_start; 197 if (rte_pktmbuf_data_len(m) >= off + 1) 198 *rte_pktmbuf_mtod_offset(m, uint16_t *, 199 off) = csum; 200 } 201 } else if (hdr->flags & VIRTIO_NET_HDR_F_DATA_VALID && l4_supported) { 202 m->ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_GOOD; 203 } 204 205 return 0; 206 } 207 208 static inline uint16_t 209 virtqueue_dequeue_single_packed_vec(struct virtnet_rx *rxvq, 210 struct rte_mbuf **rx_pkts) 211 { 212 uint16_t used_idx, id; 213 uint32_t len; 214 struct virtqueue *vq = virtnet_rxq_to_vq(rxvq); 215 struct virtio_hw *hw = vq->hw; 216 uint32_t hdr_size = hw->vtnet_hdr_size; 217 struct virtio_net_hdr *hdr; 218 struct vring_packed_desc *desc; 219 struct rte_mbuf *cookie; 220 221 desc = vq->vq_packed.ring.desc; 222 used_idx = vq->vq_used_cons_idx; 223 if (!desc_is_used(&desc[used_idx], vq)) 224 return -1; 225 226 len = desc[used_idx].len; 227 id = desc[used_idx].id; 228 cookie = (struct rte_mbuf *)vq->vq_descx[id].cookie; 229 if (unlikely(cookie == NULL)) { 230 PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u", 231 vq->vq_used_cons_idx); 232 return -1; 233 } 234 rte_prefetch0(cookie); 235 rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *)); 236 237 cookie->data_off = RTE_PKTMBUF_HEADROOM; 238 cookie->ol_flags = 0; 239 cookie->pkt_len = (uint32_t)(len - hdr_size); 240 cookie->data_len = (uint32_t)(len - hdr_size); 241 242 hdr = (struct virtio_net_hdr *)((char *)cookie->buf_addr + 243 RTE_PKTMBUF_HEADROOM - hdr_size); 244 if (hw->has_rx_offload) 245 virtio_vec_rx_offload(cookie, hdr); 246 247 *rx_pkts = cookie; 248 249 rxvq->stats.bytes += cookie->pkt_len; 250 251 vq->vq_free_cnt++; 252 vq->vq_used_cons_idx++; 253 if (vq->vq_used_cons_idx >= vq->vq_nentries) { 254 vq->vq_used_cons_idx -= vq->vq_nentries; 255 vq->vq_packed.used_wrap_counter ^= 1; 256 } 257 258 return 0; 259 } 260 261 static inline void 262 virtio_recv_refill_packed_vec(struct virtnet_rx *rxvq, 263 struct rte_mbuf **cookie, 264 uint16_t num) 265 { 266 struct virtqueue *vq = virtnet_rxq_to_vq(rxvq); 267 struct vring_packed_desc *start_dp = vq->vq_packed.ring.desc; 268 uint16_t flags = vq->vq_packed.cached_flags; 269 struct virtio_hw *hw = vq->hw; 270 struct vq_desc_extra *dxp; 271 uint16_t idx, i; 272 uint16_t batch_num, total_num = 0; 273 uint16_t head_idx = vq->vq_avail_idx; 274 uint16_t head_flag = vq->vq_packed.cached_flags; 275 uint64_t addr; 276 277 do { 278 idx = vq->vq_avail_idx; 279 280 batch_num = PACKED_BATCH_SIZE; 281 if (unlikely((idx + PACKED_BATCH_SIZE) > vq->vq_nentries)) 282 batch_num = vq->vq_nentries - idx; 283 if (unlikely((total_num + batch_num) > num)) 284 batch_num = num - total_num; 285 286 virtio_for_each_try_unroll(i, 0, batch_num) { 287 dxp = &vq->vq_descx[idx + i]; 288 dxp->cookie = (void *)cookie[total_num + i]; 289 290 addr = VIRTIO_MBUF_ADDR(cookie[total_num + i], vq) + 291 RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size; 292 start_dp[idx + i].addr = addr; 293 start_dp[idx + i].len = cookie[total_num + i]->buf_len 294 - RTE_PKTMBUF_HEADROOM + hw->vtnet_hdr_size; 295 if (total_num || i) { 296 virtqueue_store_flags_packed(&start_dp[idx + i], 297 flags, hw->weak_barriers); 298 } 299 } 300 301 vq->vq_avail_idx += batch_num; 302 if (vq->vq_avail_idx >= vq->vq_nentries) { 303 vq->vq_avail_idx -= vq->vq_nentries; 304 vq->vq_packed.cached_flags ^= 305 VRING_PACKED_DESC_F_AVAIL_USED; 306 flags = vq->vq_packed.cached_flags; 307 } 308 total_num += batch_num; 309 } while (total_num < num); 310 311 virtqueue_store_flags_packed(&start_dp[head_idx], head_flag, 312 hw->weak_barriers); 313 vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num); 314 } 315 316 #endif /* _VIRTIO_RXTX_PACKED_H_ */ 317