xref: /dpdk/drivers/net/virtio/virtio_rxtx_packed.h (revision 787091b9d5f614ae35ab8cfb7718f6ccb64f1f3c)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2020 Intel Corporation
3  */
4 
5 #ifndef _VIRTIO_RXTX_PACKED_H_
6 #define _VIRTIO_RXTX_PACKED_H_
7 
8 #include <stdint.h>
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <string.h>
12 #include <errno.h>
13 
14 #include <rte_net.h>
15 
16 #include "virtio_logs.h"
17 #include "virtio_ethdev.h"
18 #include "virtio.h"
19 #include "virtqueue.h"
20 
21 #define BYTE_SIZE 8
22 
23 #ifdef CC_AVX512_SUPPORT
24 /* flag bits offset in packed ring desc higher 64bits */
25 #define FLAGS_BITS_OFFSET ((offsetof(struct vring_packed_desc, flags) - \
26 	offsetof(struct vring_packed_desc, len)) * BYTE_SIZE)
27 #elif defined(RTE_ARCH_ARM)
28 /* flag bits offset in packed ring desc from ID */
29 #define FLAGS_BITS_OFFSET ((offsetof(struct vring_packed_desc, flags) - \
30 	offsetof(struct vring_packed_desc, id)) * BYTE_SIZE)
31 #define FLAGS_LEN_BITS_OFFSET ((offsetof(struct vring_packed_desc, flags) - \
32 	offsetof(struct vring_packed_desc, len)) * BYTE_SIZE)
33 #endif
34 
35 #define PACKED_FLAGS_MASK ((0ULL | VRING_PACKED_DESC_F_AVAIL_USED) << \
36 	FLAGS_BITS_OFFSET)
37 
38 /* reference count offset in mbuf rearm data */
39 #define REFCNT_BITS_OFFSET ((offsetof(struct rte_mbuf, refcnt) - \
40 	offsetof(struct rte_mbuf, rearm_data)) * BYTE_SIZE)
41 
42 #ifdef CC_AVX512_SUPPORT
43 /* segment number offset in mbuf rearm data */
44 #define SEG_NUM_BITS_OFFSET ((offsetof(struct rte_mbuf, nb_segs) - \
45 	offsetof(struct rte_mbuf, rearm_data)) * BYTE_SIZE)
46 /* default rearm data */
47 #define DEFAULT_REARM_DATA (1ULL << SEG_NUM_BITS_OFFSET | \
48 	1ULL << REFCNT_BITS_OFFSET)
49 #endif
50 
51 /* id bits offset in packed ring desc higher 64bits */
52 #define ID_BITS_OFFSET ((offsetof(struct vring_packed_desc, id) - \
53 	offsetof(struct vring_packed_desc, len)) * BYTE_SIZE)
54 
55 /* net hdr short size mask */
56 #define NET_HDR_MASK 0x3F
57 
58 #ifdef RTE_ARCH_ARM
59 /* The cache line size on different Arm platforms are different, so
60  * put a four batch size here to match with the minimum cache line
61  * size and accommodate NEON register size.
62  */
63 #define PACKED_BATCH_SIZE 4
64 #else
65 #define PACKED_BATCH_SIZE (RTE_CACHE_LINE_SIZE / \
66 	sizeof(struct vring_packed_desc))
67 #endif
68 #define PACKED_BATCH_MASK (PACKED_BATCH_SIZE - 1)
69 
70 #ifdef VIRTIO_GCC_UNROLL_PRAGMA
71 #define virtio_for_each_try_unroll(iter, val, size) _Pragma("GCC unroll 4") \
72 	for (iter = val; iter < size; iter++)
73 #endif
74 
75 #ifdef VIRTIO_CLANG_UNROLL_PRAGMA
76 #define virtio_for_each_try_unroll(iter, val, size) _Pragma("unroll 4") \
77 	for (iter = val; iter < size; iter++)
78 #endif
79 
80 #ifdef VIRTIO_ICC_UNROLL_PRAGMA
81 #define virtio_for_each_try_unroll(iter, val, size) _Pragma("unroll 4") \
82 	for (iter = val; iter < size; iter++)
83 #endif
84 
85 #ifndef virtio_for_each_try_unroll
86 #define virtio_for_each_try_unroll(iter, val, size) \
87 	for (iter = val; iter < size; iter++)
88 #endif
89 
90 static inline void
virtio_update_batch_stats(struct virtnet_stats * stats,uint16_t pkt_len1,uint16_t pkt_len2,uint16_t pkt_len3,uint16_t pkt_len4)91 virtio_update_batch_stats(struct virtnet_stats *stats,
92 			  uint16_t pkt_len1,
93 			  uint16_t pkt_len2,
94 			  uint16_t pkt_len3,
95 			  uint16_t pkt_len4)
96 {
97 	stats->bytes += pkt_len1;
98 	stats->bytes += pkt_len2;
99 	stats->bytes += pkt_len3;
100 	stats->bytes += pkt_len4;
101 }
102 
103 static inline int
virtqueue_enqueue_single_packed_vec(struct virtnet_tx * txvq,struct rte_mbuf * txm)104 virtqueue_enqueue_single_packed_vec(struct virtnet_tx *txvq,
105 				    struct rte_mbuf *txm)
106 {
107 	struct virtqueue *vq = virtnet_txq_to_vq(txvq);
108 	struct virtio_hw *hw = vq->hw;
109 	uint16_t hdr_size = hw->vtnet_hdr_size;
110 	uint16_t slots, can_push = 0, use_indirect = 0;
111 	int16_t need;
112 
113 	/* optimize ring usage */
114 	if ((virtio_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
115 	     virtio_with_feature(hw, VIRTIO_F_VERSION_1)) &&
116 	     rte_mbuf_refcnt_read(txm) == 1 && RTE_MBUF_DIRECT(txm) &&
117 	     txm->nb_segs == 1 && rte_pktmbuf_headroom(txm) >= hdr_size)
118 		can_push = 1;
119 	else if (virtio_with_feature(hw, VIRTIO_RING_F_INDIRECT_DESC) &&
120 		 txm->nb_segs < VIRTIO_MAX_TX_INDIRECT)
121 		use_indirect = 1;
122 
123 	/* How many main ring entries are needed to this Tx?
124 	 * indirect   => 1
125 	 * any_layout => number of segments
126 	 * default    => number of segments + 1
127 	 */
128 	can_push = rte_mbuf_refcnt_read(txm) == 1 &&
129 		   RTE_MBUF_DIRECT(txm) &&
130 		   txm->nb_segs == 1 &&
131 		   rte_pktmbuf_headroom(txm) >= hdr_size;
132 
133 	slots = use_indirect ? 1 : (txm->nb_segs + !can_push);
134 	need = slots - vq->vq_free_cnt;
135 
136 	/* Positive value indicates it need free vring descriptors */
137 	if (unlikely(need > 0)) {
138 		virtio_xmit_cleanup_inorder_packed(vq, need);
139 		need = slots - vq->vq_free_cnt;
140 		if (unlikely(need > 0)) {
141 			PMD_TX_LOG(ERR,
142 				   "No free tx descriptors to transmit");
143 			return -1;
144 		}
145 	}
146 
147 	/* Enqueue Packet buffers */
148 	virtqueue_enqueue_xmit_packed(txvq, txm, slots, use_indirect,
149 				can_push, 1);
150 
151 	txvq->stats.bytes += txm->pkt_len;
152 	return 0;
153 }
154 
155 /* Optionally fill offload information in structure */
156 static inline int
virtio_vec_rx_offload(struct rte_mbuf * m,struct virtio_net_hdr * hdr)157 virtio_vec_rx_offload(struct rte_mbuf *m, struct virtio_net_hdr *hdr)
158 {
159 	struct rte_net_hdr_lens hdr_lens;
160 	uint32_t hdrlen, ptype;
161 	int l4_supported = 0;
162 
163 	/* nothing to do */
164 	if (hdr->flags == 0)
165 		return 0;
166 
167 	/* GSO not support in vec path, skip check */
168 	m->ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_UNKNOWN;
169 
170 	ptype = rte_net_get_ptype(m, &hdr_lens, RTE_PTYPE_ALL_MASK);
171 	m->packet_type = ptype;
172 	if ((ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP ||
173 	    (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP ||
174 	    (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_SCTP)
175 		l4_supported = 1;
176 
177 	if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
178 		hdrlen = hdr_lens.l2_len + hdr_lens.l3_len + hdr_lens.l4_len;
179 		if (hdr->csum_start <= hdrlen && l4_supported) {
180 			m->ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_NONE;
181 		} else {
182 			/* Unknown proto or tunnel, do sw cksum. We can assume
183 			 * the cksum field is in the first segment since the
184 			 * buffers we provided to the host are large enough.
185 			 * In case of SCTP, this will be wrong since it's a CRC
186 			 * but there's nothing we can do.
187 			 */
188 			uint16_t csum = 0, off;
189 
190 			if (rte_raw_cksum_mbuf(m, hdr->csum_start,
191 				rte_pktmbuf_pkt_len(m) - hdr->csum_start,
192 				&csum) < 0)
193 				return -1;
194 			if (likely(csum != 0xffff))
195 				csum = ~csum;
196 			off = hdr->csum_offset + hdr->csum_start;
197 			if (rte_pktmbuf_data_len(m) >= off + 1)
198 				*rte_pktmbuf_mtod_offset(m, uint16_t *,
199 					off) = csum;
200 		}
201 	} else if (hdr->flags & VIRTIO_NET_HDR_F_DATA_VALID && l4_supported) {
202 		m->ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_GOOD;
203 	}
204 
205 	return 0;
206 }
207 
208 static inline uint16_t
virtqueue_dequeue_single_packed_vec(struct virtnet_rx * rxvq,struct rte_mbuf ** rx_pkts)209 virtqueue_dequeue_single_packed_vec(struct virtnet_rx *rxvq,
210 				    struct rte_mbuf **rx_pkts)
211 {
212 	uint16_t used_idx, id;
213 	uint32_t len;
214 	struct virtqueue *vq = virtnet_rxq_to_vq(rxvq);
215 	struct virtio_hw *hw = vq->hw;
216 	uint32_t hdr_size = hw->vtnet_hdr_size;
217 	struct virtio_net_hdr *hdr;
218 	struct vring_packed_desc *desc;
219 	struct rte_mbuf *cookie;
220 
221 	desc = vq->vq_packed.ring.desc;
222 	used_idx = vq->vq_used_cons_idx;
223 	if (!desc_is_used(&desc[used_idx], vq))
224 		return -1;
225 
226 	len = desc[used_idx].len;
227 	id = desc[used_idx].id;
228 	cookie = (struct rte_mbuf *)vq->vq_descx[id].cookie;
229 	if (unlikely(cookie == NULL)) {
230 		PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u",
231 				vq->vq_used_cons_idx);
232 		return -1;
233 	}
234 	rte_prefetch0(cookie);
235 	rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));
236 
237 	cookie->data_off = RTE_PKTMBUF_HEADROOM;
238 	cookie->ol_flags = 0;
239 	cookie->pkt_len = (uint32_t)(len - hdr_size);
240 	cookie->data_len = (uint32_t)(len - hdr_size);
241 
242 	hdr = (struct virtio_net_hdr *)((char *)cookie->buf_addr +
243 					RTE_PKTMBUF_HEADROOM - hdr_size);
244 	if (hw->has_rx_offload)
245 		virtio_vec_rx_offload(cookie, hdr);
246 
247 	*rx_pkts = cookie;
248 
249 	rxvq->stats.bytes += cookie->pkt_len;
250 
251 	vq->vq_free_cnt++;
252 	vq->vq_used_cons_idx++;
253 	if (vq->vq_used_cons_idx >= vq->vq_nentries) {
254 		vq->vq_used_cons_idx -= vq->vq_nentries;
255 		vq->vq_packed.used_wrap_counter ^= 1;
256 	}
257 
258 	return 0;
259 }
260 
261 static inline void
virtio_recv_refill_packed_vec(struct virtnet_rx * rxvq,struct rte_mbuf ** cookie,uint16_t num)262 virtio_recv_refill_packed_vec(struct virtnet_rx *rxvq,
263 			      struct rte_mbuf **cookie,
264 			      uint16_t num)
265 {
266 	struct virtqueue *vq = virtnet_rxq_to_vq(rxvq);
267 	struct vring_packed_desc *start_dp = vq->vq_packed.ring.desc;
268 	uint16_t flags = vq->vq_packed.cached_flags;
269 	struct virtio_hw *hw = vq->hw;
270 	struct vq_desc_extra *dxp;
271 	uint16_t idx, i;
272 	uint16_t batch_num, total_num = 0;
273 	uint16_t head_idx = vq->vq_avail_idx;
274 	uint16_t head_flag = vq->vq_packed.cached_flags;
275 	uint64_t addr;
276 
277 	do {
278 		idx = vq->vq_avail_idx;
279 
280 		batch_num = PACKED_BATCH_SIZE;
281 		if (unlikely((idx + PACKED_BATCH_SIZE) > vq->vq_nentries))
282 			batch_num = vq->vq_nentries - idx;
283 		if (unlikely((total_num + batch_num) > num))
284 			batch_num = num - total_num;
285 
286 		virtio_for_each_try_unroll(i, 0, batch_num) {
287 			dxp = &vq->vq_descx[idx + i];
288 			dxp->cookie = (void *)cookie[total_num + i];
289 
290 			addr = VIRTIO_MBUF_ADDR(cookie[total_num + i], vq) +
291 				RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;
292 			start_dp[idx + i].addr = addr;
293 			start_dp[idx + i].len = cookie[total_num + i]->buf_len
294 				- RTE_PKTMBUF_HEADROOM + hw->vtnet_hdr_size;
295 			if (total_num || i) {
296 				virtqueue_store_flags_packed(&start_dp[idx + i],
297 						flags, hw->weak_barriers);
298 			}
299 		}
300 
301 		vq->vq_avail_idx += batch_num;
302 		if (vq->vq_avail_idx >= vq->vq_nentries) {
303 			vq->vq_avail_idx -= vq->vq_nentries;
304 			vq->vq_packed.cached_flags ^=
305 				VRING_PACKED_DESC_F_AVAIL_USED;
306 			flags = vq->vq_packed.cached_flags;
307 		}
308 		total_num += batch_num;
309 	} while (total_num < num);
310 
311 	virtqueue_store_flags_packed(&start_dp[head_idx], head_flag,
312 				hw->weak_barriers);
313 	vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);
314 }
315 
316 #endif /* _VIRTIO_RXTX_PACKED_H_ */
317