xref: /dpdk/drivers/net/virtio/virtqueue.h (revision 665b49c51639a10c553433bc2bcd85c7331c631e)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4 
5 #ifndef _VIRTQUEUE_H_
6 #define _VIRTQUEUE_H_
7 
8 #include <stdint.h>
9 
10 #include <rte_atomic.h>
11 #include <rte_memory.h>
12 #include <rte_mempool.h>
13 #include <rte_net.h>
14 
15 #include "virtio.h"
16 #include "virtio_ring.h"
17 #include "virtio_logs.h"
18 #include "virtio_rxtx.h"
19 #include "virtio_cvq.h"
20 
21 struct rte_mbuf;
22 
23 #define DEFAULT_TX_FREE_THRESH 32
24 #define DEFAULT_RX_FREE_THRESH 32
25 
26 #define VIRTIO_MBUF_BURST_SZ 64
27 /*
28  * Per virtio_ring.h in Linux.
29  *     For virtio_pci on SMP, we don't need to order with respect to MMIO
30  *     accesses through relaxed memory I/O windows, so thread_fence is
31  *     sufficient.
32  *
33  *     For using virtio to talk to real devices (eg. vDPA) we do need real
34  *     barriers.
35  */
36 static inline void
37 virtio_mb(uint8_t weak_barriers)
38 {
39 	if (weak_barriers)
40 		rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
41 	else
42 		rte_mb();
43 }
44 
45 static inline void
46 virtio_rmb(uint8_t weak_barriers)
47 {
48 	if (weak_barriers)
49 		rte_atomic_thread_fence(__ATOMIC_ACQUIRE);
50 	else
51 		rte_io_rmb();
52 }
53 
54 static inline void
55 virtio_wmb(uint8_t weak_barriers)
56 {
57 	if (weak_barriers)
58 		rte_atomic_thread_fence(__ATOMIC_RELEASE);
59 	else
60 		rte_io_wmb();
61 }
62 
63 static inline uint16_t
64 virtqueue_fetch_flags_packed(struct vring_packed_desc *dp,
65 			      uint8_t weak_barriers)
66 {
67 	uint16_t flags;
68 
69 	if (weak_barriers) {
70 /* x86 prefers to using rte_io_rmb over __atomic_load_n as it reports
71  * a better perf(~1.5%), which comes from the saved branch by the compiler.
72  * The if and else branch are identical  on the platforms except Arm.
73  */
74 #ifdef RTE_ARCH_ARM
75 		flags = __atomic_load_n(&dp->flags, __ATOMIC_ACQUIRE);
76 #else
77 		flags = dp->flags;
78 		rte_io_rmb();
79 #endif
80 	} else {
81 		flags = dp->flags;
82 		rte_io_rmb();
83 	}
84 
85 	return flags;
86 }
87 
88 static inline void
89 virtqueue_store_flags_packed(struct vring_packed_desc *dp,
90 			      uint16_t flags, uint8_t weak_barriers)
91 {
92 	if (weak_barriers) {
93 /* x86 prefers to using rte_io_wmb over __atomic_store_n as it reports
94  * a better perf(~1.5%), which comes from the saved branch by the compiler.
95  * The if and else branch are identical on the platforms except Arm.
96  */
97 #ifdef RTE_ARCH_ARM
98 		__atomic_store_n(&dp->flags, flags, __ATOMIC_RELEASE);
99 #else
100 		rte_io_wmb();
101 		dp->flags = flags;
102 #endif
103 	} else {
104 		rte_io_wmb();
105 		dp->flags = flags;
106 	}
107 }
108 
109 #ifdef RTE_PMD_PACKET_PREFETCH
110 #define rte_packet_prefetch(p)  rte_prefetch1(p)
111 #else
112 #define rte_packet_prefetch(p)  do {} while(0)
113 #endif
114 
115 #define VIRTQUEUE_MAX_NAME_SZ 32
116 
117 /**
118  * Return the IOVA (or virtual address in case of virtio-user) of mbuf
119  * data buffer.
120  *
121  * The address is firstly casted to the word size (sizeof(uintptr_t))
122  * before casting it to uint64_t. This is to make it work with different
123  * combination of word size (64 bit and 32 bit) and virtio device
124  * (virtio-pci and virtio-user).
125  */
126 #define VIRTIO_MBUF_ADDR(mb, vq) \
127 	((uint64_t)(*(uintptr_t *)((uintptr_t)(mb) + (vq)->mbuf_addr_offset)))
128 
129 /**
130  * Return the physical address (or virtual address in case of
131  * virtio-user) of mbuf data buffer, taking care of mbuf data offset
132  */
133 #define VIRTIO_MBUF_DATA_DMA_ADDR(mb, vq) \
134 	(VIRTIO_MBUF_ADDR(mb, vq) + (mb)->data_off)
135 
136 #define VTNET_SQ_RQ_QUEUE_IDX 0
137 #define VTNET_SQ_TQ_QUEUE_IDX 1
138 #define VTNET_SQ_CQ_QUEUE_IDX 2
139 
140 enum { VTNET_RQ = 0, VTNET_TQ = 1, VTNET_CQ = 2 };
141 /**
142  * The maximum virtqueue size is 2^15. Use that value as the end of
143  * descriptor chain terminator since it will never be a valid index
144  * in the descriptor table. This is used to verify we are correctly
145  * handling vq_free_cnt.
146  */
147 #define VQ_RING_DESC_CHAIN_END 32768
148 
149 #define VIRTIO_NET_OK     0
150 #define VIRTIO_NET_ERR    1
151 
152 struct vq_desc_extra {
153 	void *cookie;
154 	uint16_t ndescs;
155 	uint16_t next;
156 };
157 
158 #define virtnet_rxq_to_vq(rxvq) container_of(rxvq, struct virtqueue, rxq)
159 #define virtnet_txq_to_vq(txvq) container_of(txvq, struct virtqueue, txq)
160 #define virtnet_cq_to_vq(cvq) container_of(cvq, struct virtqueue, cq)
161 
162 struct virtqueue {
163 	struct virtio_hw  *hw; /**< virtio_hw structure pointer. */
164 	union {
165 		struct {
166 			/**< vring keeping desc, used and avail */
167 			struct vring ring;
168 		} vq_split;
169 
170 		struct {
171 			/**< vring keeping descs and events */
172 			struct vring_packed ring;
173 			bool used_wrap_counter;
174 			uint16_t cached_flags; /**< cached flags for descs */
175 			uint16_t event_flags_shadow;
176 		} vq_packed;
177 	};
178 
179 	uint16_t vq_used_cons_idx; /**< last consumed descriptor */
180 	uint16_t vq_nentries;  /**< vring desc numbers */
181 	uint16_t vq_free_cnt;  /**< num of desc available */
182 	uint16_t vq_avail_idx; /**< sync until needed */
183 	uint16_t vq_free_thresh; /**< free threshold */
184 
185 	/**
186 	 * Head of the free chain in the descriptor table. If
187 	 * there are no free descriptors, this will be set to
188 	 * VQ_RING_DESC_CHAIN_END.
189 	 */
190 	uint16_t  vq_desc_head_idx;
191 	uint16_t  vq_desc_tail_idx;
192 	uint16_t  vq_queue_index;   /**< PCI queue index */
193 
194 	void *vq_ring_virt_mem;  /**< linear address of vring*/
195 	unsigned int vq_ring_size;
196 	uint16_t mbuf_addr_offset;
197 
198 	union {
199 		struct virtnet_rx rxq;
200 		struct virtnet_tx txq;
201 		struct virtnet_ctl cq;
202 	};
203 
204 	const struct rte_memzone *mz; /**< mem zone to populate ring. */
205 	rte_iova_t vq_ring_mem; /**< physical address of vring,
206 	                         * or virtual address for virtio_user. */
207 
208 	uint16_t  *notify_addr;
209 	struct vq_desc_extra vq_descx[];
210 };
211 
212 /* If multiqueue is provided by host, then we support it. */
213 #define VIRTIO_NET_CTRL_MQ   4
214 
215 #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET        0
216 #define VIRTIO_NET_CTRL_MQ_RSS_CONFIG          1
217 
218 #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN        1
219 #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX        0x8000
220 
221 /**
222  * This is the first element of the scatter-gather list.  If you don't
223  * specify GSO or CSUM features, you can simply ignore the header.
224  */
225 struct virtio_net_hdr {
226 #define VIRTIO_NET_HDR_F_NEEDS_CSUM 1    /**< Use csum_start,csum_offset*/
227 #define VIRTIO_NET_HDR_F_DATA_VALID 2    /**< Checksum is valid */
228 	uint8_t flags;
229 #define VIRTIO_NET_HDR_GSO_NONE     0    /**< Not a GSO frame */
230 #define VIRTIO_NET_HDR_GSO_TCPV4    1    /**< GSO frame, IPv4 TCP (TSO) */
231 #define VIRTIO_NET_HDR_GSO_UDP      3    /**< GSO frame, IPv4 UDP (UFO) */
232 #define VIRTIO_NET_HDR_GSO_TCPV6    4    /**< GSO frame, IPv6 TCP */
233 #define VIRTIO_NET_HDR_GSO_ECN      0x80 /**< TCP has ECN set */
234 	uint8_t gso_type;
235 	uint16_t hdr_len;     /**< Ethernet + IP + tcp/udp hdrs */
236 	uint16_t gso_size;    /**< Bytes to append to hdr_len per frame */
237 	uint16_t csum_start;  /**< Position to start checksumming from */
238 	uint16_t csum_offset; /**< Offset after that to place checksum */
239 };
240 
241 /**
242  * This is the version of the header to use when the MRG_RXBUF
243  * feature has been negotiated.
244  */
245 struct virtio_net_hdr_mrg_rxbuf {
246 	struct   virtio_net_hdr hdr;
247 	uint16_t num_buffers; /**< Number of merged rx buffers */
248 };
249 
250 /* Region reserved to allow for transmit header and indirect ring */
251 #define VIRTIO_MAX_TX_INDIRECT 8
252 struct virtio_tx_region {
253 	struct virtio_net_hdr_mrg_rxbuf tx_hdr;
254 	union {
255 		struct vring_desc tx_indir[VIRTIO_MAX_TX_INDIRECT];
256 		struct vring_packed_desc
257 			tx_packed_indir[VIRTIO_MAX_TX_INDIRECT];
258 	} __rte_aligned(16);
259 };
260 
261 static inline int
262 desc_is_used(struct vring_packed_desc *desc, struct virtqueue *vq)
263 {
264 	uint16_t used, avail, flags;
265 
266 	flags = virtqueue_fetch_flags_packed(desc, vq->hw->weak_barriers);
267 	used = !!(flags & VRING_PACKED_DESC_F_USED);
268 	avail = !!(flags & VRING_PACKED_DESC_F_AVAIL);
269 
270 	return avail == used && used == vq->vq_packed.used_wrap_counter;
271 }
272 
273 static inline void
274 vring_desc_init_packed(struct virtqueue *vq, int n)
275 {
276 	int i;
277 	for (i = 0; i < n - 1; i++) {
278 		vq->vq_packed.ring.desc[i].id = i;
279 		vq->vq_descx[i].next = i + 1;
280 	}
281 	vq->vq_packed.ring.desc[i].id = i;
282 	vq->vq_descx[i].next = VQ_RING_DESC_CHAIN_END;
283 }
284 
285 /* Chain all the descriptors in the ring with an END */
286 static inline void
287 vring_desc_init_split(struct vring_desc *dp, uint16_t n)
288 {
289 	uint16_t i;
290 
291 	for (i = 0; i < n - 1; i++)
292 		dp[i].next = (uint16_t)(i + 1);
293 	dp[i].next = VQ_RING_DESC_CHAIN_END;
294 }
295 
296 static inline void
297 vring_desc_init_indirect_packed(struct vring_packed_desc *dp, int n)
298 {
299 	int i;
300 	for (i = 0; i < n; i++) {
301 		dp[i].id = (uint16_t)i;
302 		dp[i].flags = VRING_DESC_F_WRITE;
303 	}
304 }
305 
306 /**
307  * Tell the backend not to interrupt us. Implementation for packed virtqueues.
308  */
309 static inline void
310 virtqueue_disable_intr_packed(struct virtqueue *vq)
311 {
312 	if (vq->vq_packed.event_flags_shadow != RING_EVENT_FLAGS_DISABLE) {
313 		vq->vq_packed.event_flags_shadow = RING_EVENT_FLAGS_DISABLE;
314 		vq->vq_packed.ring.driver->desc_event_flags =
315 			vq->vq_packed.event_flags_shadow;
316 	}
317 }
318 
319 /**
320  * Tell the backend not to interrupt us. Implementation for split virtqueues.
321  */
322 static inline void
323 virtqueue_disable_intr_split(struct virtqueue *vq)
324 {
325 	vq->vq_split.ring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT;
326 }
327 
328 /**
329  * Tell the backend not to interrupt us.
330  */
331 static inline void
332 virtqueue_disable_intr(struct virtqueue *vq)
333 {
334 	if (virtio_with_packed_queue(vq->hw))
335 		virtqueue_disable_intr_packed(vq);
336 	else
337 		virtqueue_disable_intr_split(vq);
338 }
339 
340 /**
341  * Tell the backend to interrupt. Implementation for packed virtqueues.
342  */
343 static inline void
344 virtqueue_enable_intr_packed(struct virtqueue *vq)
345 {
346 	if (vq->vq_packed.event_flags_shadow == RING_EVENT_FLAGS_DISABLE) {
347 		vq->vq_packed.event_flags_shadow = RING_EVENT_FLAGS_ENABLE;
348 		vq->vq_packed.ring.driver->desc_event_flags =
349 			vq->vq_packed.event_flags_shadow;
350 	}
351 }
352 
353 /**
354  * Tell the backend to interrupt. Implementation for split virtqueues.
355  */
356 static inline void
357 virtqueue_enable_intr_split(struct virtqueue *vq)
358 {
359 	vq->vq_split.ring.avail->flags &= (~VRING_AVAIL_F_NO_INTERRUPT);
360 }
361 
362 /**
363  * Tell the backend to interrupt us.
364  */
365 static inline void
366 virtqueue_enable_intr(struct virtqueue *vq)
367 {
368 	if (virtio_with_packed_queue(vq->hw))
369 		virtqueue_enable_intr_packed(vq);
370 	else
371 		virtqueue_enable_intr_split(vq);
372 }
373 
374 /**
375  *  Get all mbufs to be freed.
376  */
377 struct rte_mbuf *virtqueue_detach_unused(struct virtqueue *vq);
378 
379 /* Flush the elements in the used ring. */
380 void virtqueue_rxvq_flush(struct virtqueue *vq);
381 
382 int virtqueue_rxvq_reset_packed(struct virtqueue *vq);
383 
384 int virtqueue_txvq_reset_packed(struct virtqueue *vq);
385 
386 void virtqueue_txq_indirect_headers_init(struct virtqueue *vq);
387 
388 struct virtqueue *virtqueue_alloc(struct virtio_hw *hw, uint16_t index,
389 		uint16_t num, int type, int node, const char *name);
390 
391 void virtqueue_free(struct virtqueue *vq);
392 
393 static inline int
394 virtqueue_full(const struct virtqueue *vq)
395 {
396 	return vq->vq_free_cnt == 0;
397 }
398 
399 static inline int
400 virtio_get_queue_type(struct virtio_hw *hw, uint16_t vq_idx)
401 {
402 	if (vq_idx == hw->max_queue_pairs * 2)
403 		return VTNET_CQ;
404 	else if (vq_idx % 2 == 0)
405 		return VTNET_RQ;
406 	else
407 		return VTNET_TQ;
408 }
409 
410 /* virtqueue_nused has load-acquire or rte_io_rmb insed */
411 static inline uint16_t
412 virtqueue_nused(const struct virtqueue *vq)
413 {
414 	uint16_t idx;
415 
416 	if (vq->hw->weak_barriers) {
417 	/**
418 	 * x86 prefers to using rte_smp_rmb over __atomic_load_n as it
419 	 * reports a slightly better perf, which comes from the saved
420 	 * branch by the compiler.
421 	 * The if and else branches are identical with the smp and io
422 	 * barriers both defined as compiler barriers on x86.
423 	 */
424 #ifdef RTE_ARCH_X86_64
425 		idx = vq->vq_split.ring.used->idx;
426 		rte_smp_rmb();
427 #else
428 		idx = __atomic_load_n(&(vq)->vq_split.ring.used->idx,
429 				__ATOMIC_ACQUIRE);
430 #endif
431 	} else {
432 		idx = vq->vq_split.ring.used->idx;
433 		rte_io_rmb();
434 	}
435 	return idx - vq->vq_used_cons_idx;
436 }
437 
438 void vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx);
439 void vq_ring_free_chain_packed(struct virtqueue *vq, uint16_t used_idx);
440 void vq_ring_free_inorder(struct virtqueue *vq, uint16_t desc_idx,
441 			  uint16_t num);
442 
443 static inline void
444 vq_update_avail_idx(struct virtqueue *vq)
445 {
446 	if (vq->hw->weak_barriers) {
447 	/* x86 prefers to using rte_smp_wmb over __atomic_store_n as
448 	 * it reports a slightly better perf, which comes from the
449 	 * saved branch by the compiler.
450 	 * The if and else branches are identical with the smp and
451 	 * io barriers both defined as compiler barriers on x86.
452 	 */
453 #ifdef RTE_ARCH_X86_64
454 		rte_smp_wmb();
455 		vq->vq_split.ring.avail->idx = vq->vq_avail_idx;
456 #else
457 		__atomic_store_n(&vq->vq_split.ring.avail->idx,
458 				 vq->vq_avail_idx, __ATOMIC_RELEASE);
459 #endif
460 	} else {
461 		rte_io_wmb();
462 		vq->vq_split.ring.avail->idx = vq->vq_avail_idx;
463 	}
464 }
465 
466 static inline void
467 vq_update_avail_ring(struct virtqueue *vq, uint16_t desc_idx)
468 {
469 	uint16_t avail_idx;
470 	/*
471 	 * Place the head of the descriptor chain into the next slot and make
472 	 * it usable to the host. The chain is made available now rather than
473 	 * deferring to virtqueue_notify() in the hopes that if the host is
474 	 * currently running on another CPU, we can keep it processing the new
475 	 * descriptor.
476 	 */
477 	avail_idx = (uint16_t)(vq->vq_avail_idx & (vq->vq_nentries - 1));
478 	if (unlikely(vq->vq_split.ring.avail->ring[avail_idx] != desc_idx))
479 		vq->vq_split.ring.avail->ring[avail_idx] = desc_idx;
480 	vq->vq_avail_idx++;
481 }
482 
483 static inline int
484 virtqueue_kick_prepare(struct virtqueue *vq)
485 {
486 	/*
487 	 * Ensure updated avail->idx is visible to vhost before reading
488 	 * the used->flags.
489 	 */
490 	virtio_mb(vq->hw->weak_barriers);
491 	return !(vq->vq_split.ring.used->flags & VRING_USED_F_NO_NOTIFY);
492 }
493 
494 static inline int
495 virtqueue_kick_prepare_packed(struct virtqueue *vq)
496 {
497 	uint16_t flags;
498 
499 	/*
500 	 * Ensure updated data is visible to vhost before reading the flags.
501 	 */
502 	virtio_mb(vq->hw->weak_barriers);
503 	flags = vq->vq_packed.ring.device->desc_event_flags;
504 
505 	return flags != RING_EVENT_FLAGS_DISABLE;
506 }
507 
508 /*
509  * virtqueue_kick_prepare*() or the virtio_wmb() should be called
510  * before this function to be sure that all the data is visible to vhost.
511  */
512 static inline void
513 virtqueue_notify(struct virtqueue *vq)
514 {
515 	VIRTIO_OPS(vq->hw)->notify_queue(vq->hw, vq);
516 }
517 
518 #ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP
519 #define VIRTQUEUE_DUMP(vq) do { \
520 	uint16_t used_idx, nused; \
521 	used_idx = __atomic_load_n(&(vq)->vq_split.ring.used->idx, \
522 				   __ATOMIC_RELAXED); \
523 	nused = (uint16_t)(used_idx - (vq)->vq_used_cons_idx); \
524 	if (virtio_with_packed_queue((vq)->hw)) { \
525 		PMD_INIT_LOG(DEBUG, \
526 		"VQ: - size=%d; free=%d; used_cons_idx=%d; avail_idx=%d;" \
527 		" cached_flags=0x%x; used_wrap_counter=%d", \
528 		(vq)->vq_nentries, (vq)->vq_free_cnt, (vq)->vq_used_cons_idx, \
529 		(vq)->vq_avail_idx, (vq)->vq_packed.cached_flags, \
530 		(vq)->vq_packed.used_wrap_counter); \
531 		break; \
532 	} \
533 	PMD_INIT_LOG(DEBUG, \
534 	  "VQ: - size=%d; free=%d; used=%d; desc_head_idx=%d;" \
535 	  " avail.idx=%d; used_cons_idx=%d; used.idx=%d;" \
536 	  " avail.flags=0x%x; used.flags=0x%x", \
537 	  (vq)->vq_nentries, (vq)->vq_free_cnt, nused, (vq)->vq_desc_head_idx, \
538 	  (vq)->vq_split.ring.avail->idx, (vq)->vq_used_cons_idx, \
539 	  __atomic_load_n(&(vq)->vq_split.ring.used->idx, __ATOMIC_RELAXED), \
540 	  (vq)->vq_split.ring.avail->flags, (vq)->vq_split.ring.used->flags); \
541 } while (0)
542 #else
543 #define VIRTQUEUE_DUMP(vq) do { } while (0)
544 #endif
545 
546 /* avoid write operation when necessary, to lessen cache issues */
547 #define ASSIGN_UNLESS_EQUAL(var, val) do {	\
548 	typeof(var) *const var_ = &(var);	\
549 	typeof(val)  const val_ = (val);	\
550 	if (*var_ != val_)			\
551 		*var_ = val_;			\
552 } while (0)
553 
554 #define virtqueue_clear_net_hdr(hdr) do {		\
555 	typeof(hdr) hdr_ = (hdr);			\
556 	ASSIGN_UNLESS_EQUAL((hdr_)->csum_start, 0);	\
557 	ASSIGN_UNLESS_EQUAL((hdr_)->csum_offset, 0);	\
558 	ASSIGN_UNLESS_EQUAL((hdr_)->flags, 0);		\
559 	ASSIGN_UNLESS_EQUAL((hdr_)->gso_type, 0);	\
560 	ASSIGN_UNLESS_EQUAL((hdr_)->gso_size, 0);	\
561 	ASSIGN_UNLESS_EQUAL((hdr_)->hdr_len, 0);	\
562 } while (0)
563 
564 static inline void
565 virtqueue_xmit_offload(struct virtio_net_hdr *hdr, struct rte_mbuf *cookie)
566 {
567 	uint64_t csum_l4 = cookie->ol_flags & RTE_MBUF_F_TX_L4_MASK;
568 	uint16_t o_l23_len = (cookie->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) ?
569 			     cookie->outer_l2_len + cookie->outer_l3_len : 0;
570 
571 	if (cookie->ol_flags & RTE_MBUF_F_TX_TCP_SEG)
572 		csum_l4 |= RTE_MBUF_F_TX_TCP_CKSUM;
573 
574 	switch (csum_l4) {
575 	case RTE_MBUF_F_TX_UDP_CKSUM:
576 		hdr->csum_start = o_l23_len + cookie->l2_len + cookie->l3_len;
577 		hdr->csum_offset = offsetof(struct rte_udp_hdr, dgram_cksum);
578 		hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
579 		break;
580 
581 	case RTE_MBUF_F_TX_TCP_CKSUM:
582 		hdr->csum_start = o_l23_len + cookie->l2_len + cookie->l3_len;
583 		hdr->csum_offset = offsetof(struct rte_tcp_hdr, cksum);
584 		hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
585 		break;
586 
587 	default:
588 		ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0);
589 		ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0);
590 		ASSIGN_UNLESS_EQUAL(hdr->flags, 0);
591 		break;
592 	}
593 
594 	/* TCP Segmentation Offload */
595 	if (cookie->ol_flags & RTE_MBUF_F_TX_TCP_SEG) {
596 		hdr->gso_type = (cookie->ol_flags & RTE_MBUF_F_TX_IPV6) ?
597 			VIRTIO_NET_HDR_GSO_TCPV6 :
598 			VIRTIO_NET_HDR_GSO_TCPV4;
599 		hdr->gso_size = cookie->tso_segsz;
600 		hdr->hdr_len = o_l23_len + cookie->l2_len + cookie->l3_len +
601 			       cookie->l4_len;
602 	} else {
603 		ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0);
604 		ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0);
605 		ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0);
606 	}
607 }
608 
609 static inline void
610 virtqueue_enqueue_xmit_packed(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
611 			      uint16_t needed, int use_indirect, int can_push,
612 			      int in_order)
613 {
614 	struct virtio_tx_region *txr = txvq->hdr_mz->addr;
615 	struct vq_desc_extra *dxp;
616 	struct virtqueue *vq = virtnet_txq_to_vq(txvq);
617 	struct vring_packed_desc *start_dp, *head_dp;
618 	uint16_t idx, id, head_idx, head_flags;
619 	int16_t head_size = vq->hw->vtnet_hdr_size;
620 	struct virtio_net_hdr *hdr;
621 	uint16_t prev;
622 	bool prepend_header = false;
623 	uint16_t seg_num = cookie->nb_segs;
624 
625 	id = in_order ? vq->vq_avail_idx : vq->vq_desc_head_idx;
626 
627 	dxp = &vq->vq_descx[id];
628 	dxp->ndescs = needed;
629 	dxp->cookie = cookie;
630 
631 	head_idx = vq->vq_avail_idx;
632 	idx = head_idx;
633 	prev = head_idx;
634 	start_dp = vq->vq_packed.ring.desc;
635 
636 	head_dp = &vq->vq_packed.ring.desc[idx];
637 	head_flags = cookie->next ? VRING_DESC_F_NEXT : 0;
638 	head_flags |= vq->vq_packed.cached_flags;
639 
640 	if (can_push) {
641 		/* prepend cannot fail, checked by caller */
642 		hdr = rte_pktmbuf_mtod_offset(cookie, struct virtio_net_hdr *,
643 					      -head_size);
644 		prepend_header = true;
645 
646 		/* if offload disabled, it is not zeroed below, do it now */
647 		if (!vq->hw->has_tx_offload)
648 			virtqueue_clear_net_hdr(hdr);
649 	} else if (use_indirect) {
650 		/* setup tx ring slot to point to indirect
651 		 * descriptor list stored in reserved region.
652 		 *
653 		 * the first slot in indirect ring is already preset
654 		 * to point to the header in reserved region
655 		 */
656 		start_dp[idx].addr = txvq->hdr_mem + RTE_PTR_DIFF(&txr[idx].tx_packed_indir, txr);
657 		start_dp[idx].len = (seg_num + 1) * sizeof(struct vring_packed_desc);
658 		/* Packed descriptor id needs to be restored when inorder. */
659 		if (in_order)
660 			start_dp[idx].id = idx;
661 		/* reset flags for indirect desc */
662 		head_flags = VRING_DESC_F_INDIRECT;
663 		head_flags |= vq->vq_packed.cached_flags;
664 		hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
665 
666 		/* loop below will fill in rest of the indirect elements */
667 		start_dp = txr[idx].tx_packed_indir;
668 		idx = 1;
669 	} else {
670 		/* setup first tx ring slot to point to header
671 		 * stored in reserved region.
672 		 */
673 		start_dp[idx].addr = txvq->hdr_mem + RTE_PTR_DIFF(&txr[idx].tx_hdr, txr);
674 		start_dp[idx].len = vq->hw->vtnet_hdr_size;
675 		hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
676 		idx++;
677 		if (idx >= vq->vq_nentries) {
678 			idx -= vq->vq_nentries;
679 			vq->vq_packed.cached_flags ^=
680 				VRING_PACKED_DESC_F_AVAIL_USED;
681 		}
682 	}
683 
684 	if (vq->hw->has_tx_offload)
685 		virtqueue_xmit_offload(hdr, cookie);
686 
687 	do {
688 		uint16_t flags;
689 
690 		start_dp[idx].addr = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq);
691 		start_dp[idx].len  = cookie->data_len;
692 		if (prepend_header) {
693 			start_dp[idx].addr -= head_size;
694 			start_dp[idx].len += head_size;
695 			prepend_header = false;
696 		}
697 
698 		if (likely(idx != head_idx)) {
699 			flags = cookie->next ? VRING_DESC_F_NEXT : 0;
700 			flags |= vq->vq_packed.cached_flags;
701 			start_dp[idx].flags = flags;
702 		}
703 		prev = idx;
704 		idx++;
705 		if (idx >= vq->vq_nentries) {
706 			idx -= vq->vq_nentries;
707 			vq->vq_packed.cached_flags ^=
708 				VRING_PACKED_DESC_F_AVAIL_USED;
709 		}
710 	} while ((cookie = cookie->next) != NULL);
711 
712 	start_dp[prev].id = id;
713 
714 	if (use_indirect) {
715 		idx = head_idx;
716 		if (++idx >= vq->vq_nentries) {
717 			idx -= vq->vq_nentries;
718 			vq->vq_packed.cached_flags ^=
719 				VRING_PACKED_DESC_F_AVAIL_USED;
720 		}
721 	}
722 
723 	vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
724 	vq->vq_avail_idx = idx;
725 
726 	if (!in_order) {
727 		vq->vq_desc_head_idx = dxp->next;
728 		if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
729 			vq->vq_desc_tail_idx = VQ_RING_DESC_CHAIN_END;
730 	}
731 
732 	virtqueue_store_flags_packed(head_dp, head_flags,
733 				     vq->hw->weak_barriers);
734 }
735 
736 static void
737 vq_ring_free_id_packed(struct virtqueue *vq, uint16_t id)
738 {
739 	struct vq_desc_extra *dxp;
740 
741 	dxp = &vq->vq_descx[id];
742 	vq->vq_free_cnt += dxp->ndescs;
743 
744 	if (vq->vq_desc_tail_idx == VQ_RING_DESC_CHAIN_END)
745 		vq->vq_desc_head_idx = id;
746 	else
747 		vq->vq_descx[vq->vq_desc_tail_idx].next = id;
748 
749 	vq->vq_desc_tail_idx = id;
750 	dxp->next = VQ_RING_DESC_CHAIN_END;
751 }
752 
753 static void
754 virtio_xmit_cleanup_inorder_packed(struct virtqueue *vq, uint16_t num)
755 {
756 	uint16_t used_idx, id, curr_id, free_cnt = 0;
757 	uint16_t size = vq->vq_nentries;
758 	struct vring_packed_desc *desc = vq->vq_packed.ring.desc;
759 	struct vq_desc_extra *dxp;
760 	int nb = num;
761 
762 	used_idx = vq->vq_used_cons_idx;
763 	/* desc_is_used has a load-acquire or rte_io_rmb inside
764 	 * and wait for used desc in virtqueue.
765 	 */
766 	while (nb > 0 && desc_is_used(&desc[used_idx], vq)) {
767 		id = desc[used_idx].id;
768 		do {
769 			curr_id = used_idx;
770 			dxp = &vq->vq_descx[used_idx];
771 			used_idx += dxp->ndescs;
772 			free_cnt += dxp->ndescs;
773 			nb -= dxp->ndescs;
774 			if (used_idx >= size) {
775 				used_idx -= size;
776 				vq->vq_packed.used_wrap_counter ^= 1;
777 			}
778 			if (dxp->cookie != NULL) {
779 				rte_pktmbuf_free(dxp->cookie);
780 				dxp->cookie = NULL;
781 			}
782 		} while (curr_id != id);
783 	}
784 	vq->vq_used_cons_idx = used_idx;
785 	vq->vq_free_cnt += free_cnt;
786 }
787 
788 static void
789 virtio_xmit_cleanup_normal_packed(struct virtqueue *vq, uint16_t num)
790 {
791 	uint16_t used_idx, id;
792 	uint16_t size = vq->vq_nentries;
793 	struct vring_packed_desc *desc = vq->vq_packed.ring.desc;
794 	struct vq_desc_extra *dxp;
795 
796 	used_idx = vq->vq_used_cons_idx;
797 	/* desc_is_used has a load-acquire or rte_io_rmb inside
798 	 * and wait for used desc in virtqueue.
799 	 */
800 	while (num-- && desc_is_used(&desc[used_idx], vq)) {
801 		id = desc[used_idx].id;
802 		dxp = &vq->vq_descx[id];
803 		vq->vq_used_cons_idx += dxp->ndescs;
804 		if (vq->vq_used_cons_idx >= size) {
805 			vq->vq_used_cons_idx -= size;
806 			vq->vq_packed.used_wrap_counter ^= 1;
807 		}
808 		vq_ring_free_id_packed(vq, id);
809 		if (dxp->cookie != NULL) {
810 			rte_pktmbuf_free(dxp->cookie);
811 			dxp->cookie = NULL;
812 		}
813 		used_idx = vq->vq_used_cons_idx;
814 	}
815 }
816 
817 /* Cleanup from completed transmits. */
818 static inline void
819 virtio_xmit_cleanup_packed(struct virtqueue *vq, uint16_t num, int in_order)
820 {
821 	if (in_order)
822 		virtio_xmit_cleanup_inorder_packed(vq, num);
823 	else
824 		virtio_xmit_cleanup_normal_packed(vq, num);
825 }
826 
827 static inline void
828 virtio_xmit_cleanup(struct virtqueue *vq, uint16_t num)
829 {
830 	uint16_t i, used_idx, desc_idx;
831 	for (i = 0; i < num; i++) {
832 		struct vring_used_elem *uep;
833 		struct vq_desc_extra *dxp;
834 
835 		used_idx = (uint16_t)(vq->vq_used_cons_idx &
836 				(vq->vq_nentries - 1));
837 		uep = &vq->vq_split.ring.used->ring[used_idx];
838 
839 		desc_idx = (uint16_t)uep->id;
840 		dxp = &vq->vq_descx[desc_idx];
841 		vq->vq_used_cons_idx++;
842 		vq_ring_free_chain(vq, desc_idx);
843 
844 		if (dxp->cookie != NULL) {
845 			rte_pktmbuf_free(dxp->cookie);
846 			dxp->cookie = NULL;
847 		}
848 	}
849 }
850 
851 /* Cleanup from completed inorder transmits. */
852 static __rte_always_inline void
853 virtio_xmit_cleanup_inorder(struct virtqueue *vq, uint16_t num)
854 {
855 	uint16_t i, idx = vq->vq_used_cons_idx;
856 	int16_t free_cnt = 0;
857 	struct vq_desc_extra *dxp = NULL;
858 
859 	if (unlikely(num == 0))
860 		return;
861 
862 	for (i = 0; i < num; i++) {
863 		dxp = &vq->vq_descx[idx++ & (vq->vq_nentries - 1)];
864 		free_cnt += dxp->ndescs;
865 		if (dxp->cookie != NULL) {
866 			rte_pktmbuf_free(dxp->cookie);
867 			dxp->cookie = NULL;
868 		}
869 	}
870 
871 	vq->vq_free_cnt += free_cnt;
872 	vq->vq_used_cons_idx = idx;
873 }
874 #endif /* _VIRTQUEUE_H_ */
875