xref: /dpdk/drivers/net/virtio/virtqueue.h (revision 6c02043e9967a9d8f6e8c058256e257efe1d6d1a)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4 
5 #ifndef _VIRTQUEUE_H_
6 #define _VIRTQUEUE_H_
7 
8 #include <stdint.h>
9 
10 #include <rte_atomic.h>
11 #include <rte_memory.h>
12 #include <rte_mempool.h>
13 #include <rte_net.h>
14 
15 #include "virtio.h"
16 #include "virtio_ring.h"
17 #include "virtio_logs.h"
18 #include "virtio_rxtx.h"
19 
20 struct rte_mbuf;
21 
22 #define DEFAULT_TX_FREE_THRESH 32
23 #define DEFAULT_RX_FREE_THRESH 32
24 
25 #define VIRTIO_MBUF_BURST_SZ 64
26 /*
27  * Per virtio_ring.h in Linux.
28  *     For virtio_pci on SMP, we don't need to order with respect to MMIO
29  *     accesses through relaxed memory I/O windows, so thread_fence is
30  *     sufficient.
31  *
32  *     For using virtio to talk to real devices (eg. vDPA) we do need real
33  *     barriers.
34  */
35 static inline void
36 virtio_mb(uint8_t weak_barriers)
37 {
38 	if (weak_barriers)
39 		rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
40 	else
41 		rte_mb();
42 }
43 
44 static inline void
45 virtio_rmb(uint8_t weak_barriers)
46 {
47 	if (weak_barriers)
48 		rte_atomic_thread_fence(__ATOMIC_ACQUIRE);
49 	else
50 		rte_io_rmb();
51 }
52 
53 static inline void
54 virtio_wmb(uint8_t weak_barriers)
55 {
56 	if (weak_barriers)
57 		rte_atomic_thread_fence(__ATOMIC_RELEASE);
58 	else
59 		rte_io_wmb();
60 }
61 
62 static inline uint16_t
63 virtqueue_fetch_flags_packed(struct vring_packed_desc *dp,
64 			      uint8_t weak_barriers)
65 {
66 	uint16_t flags;
67 
68 	if (weak_barriers) {
69 /* x86 prefers to using rte_io_rmb over __atomic_load_n as it reports
70  * a better perf(~1.5%), which comes from the saved branch by the compiler.
71  * The if and else branch are identical  on the platforms except Arm.
72  */
73 #ifdef RTE_ARCH_ARM
74 		flags = __atomic_load_n(&dp->flags, __ATOMIC_ACQUIRE);
75 #else
76 		flags = dp->flags;
77 		rte_io_rmb();
78 #endif
79 	} else {
80 		flags = dp->flags;
81 		rte_io_rmb();
82 	}
83 
84 	return flags;
85 }
86 
87 static inline void
88 virtqueue_store_flags_packed(struct vring_packed_desc *dp,
89 			      uint16_t flags, uint8_t weak_barriers)
90 {
91 	if (weak_barriers) {
92 /* x86 prefers to using rte_io_wmb over __atomic_store_n as it reports
93  * a better perf(~1.5%), which comes from the saved branch by the compiler.
94  * The if and else branch are identical on the platforms except Arm.
95  */
96 #ifdef RTE_ARCH_ARM
97 		__atomic_store_n(&dp->flags, flags, __ATOMIC_RELEASE);
98 #else
99 		rte_io_wmb();
100 		dp->flags = flags;
101 #endif
102 	} else {
103 		rte_io_wmb();
104 		dp->flags = flags;
105 	}
106 }
107 
108 #ifdef RTE_PMD_PACKET_PREFETCH
109 #define rte_packet_prefetch(p)  rte_prefetch1(p)
110 #else
111 #define rte_packet_prefetch(p)  do {} while(0)
112 #endif
113 
114 #define VIRTQUEUE_MAX_NAME_SZ 32
115 
116 #define VTNET_SQ_RQ_QUEUE_IDX 0
117 #define VTNET_SQ_TQ_QUEUE_IDX 1
118 #define VTNET_SQ_CQ_QUEUE_IDX 2
119 
120 enum { VTNET_RQ = 0, VTNET_TQ = 1, VTNET_CQ = 2 };
121 /**
122  * The maximum virtqueue size is 2^15. Use that value as the end of
123  * descriptor chain terminator since it will never be a valid index
124  * in the descriptor table. This is used to verify we are correctly
125  * handling vq_free_cnt.
126  */
127 #define VQ_RING_DESC_CHAIN_END 32768
128 
129 /**
130  * Control the RX mode, ie. promiscuous, allmulti, etc...
131  * All commands require an "out" sg entry containing a 1 byte
132  * state value, zero = disable, non-zero = enable.  Commands
133  * 0 and 1 are supported with the VIRTIO_NET_F_CTRL_RX feature.
134  * Commands 2-5 are added with VIRTIO_NET_F_CTRL_RX_EXTRA.
135  */
136 #define VIRTIO_NET_CTRL_RX              0
137 #define VIRTIO_NET_CTRL_RX_PROMISC      0
138 #define VIRTIO_NET_CTRL_RX_ALLMULTI     1
139 #define VIRTIO_NET_CTRL_RX_ALLUNI       2
140 #define VIRTIO_NET_CTRL_RX_NOMULTI      3
141 #define VIRTIO_NET_CTRL_RX_NOUNI        4
142 #define VIRTIO_NET_CTRL_RX_NOBCAST      5
143 
144 /**
145  * Control the MAC
146  *
147  * The MAC filter table is managed by the hypervisor, the guest should
148  * assume the size is infinite.  Filtering should be considered
149  * non-perfect, ie. based on hypervisor resources, the guest may
150  * received packets from sources not specified in the filter list.
151  *
152  * In addition to the class/cmd header, the TABLE_SET command requires
153  * two out scatterlists.  Each contains a 4 byte count of entries followed
154  * by a concatenated byte stream of the ETH_ALEN MAC addresses.  The
155  * first sg list contains unicast addresses, the second is for multicast.
156  * This functionality is present if the VIRTIO_NET_F_CTRL_RX feature
157  * is available.
158  *
159  * The ADDR_SET command requests one out scatterlist, it contains a
160  * 6 bytes MAC address. This functionality is present if the
161  * VIRTIO_NET_F_CTRL_MAC_ADDR feature is available.
162  */
163 struct virtio_net_ctrl_mac {
164 	uint32_t entries;
165 	uint8_t macs[][RTE_ETHER_ADDR_LEN];
166 } __rte_packed;
167 
168 #define VIRTIO_NET_CTRL_MAC    1
169 #define VIRTIO_NET_CTRL_MAC_TABLE_SET        0
170 #define VIRTIO_NET_CTRL_MAC_ADDR_SET         1
171 
172 /**
173  * Control VLAN filtering
174  *
175  * The VLAN filter table is controlled via a simple ADD/DEL interface.
176  * VLAN IDs not added may be filtered by the hypervisor.  Del is the
177  * opposite of add.  Both commands expect an out entry containing a 2
178  * byte VLAN ID.  VLAN filtering is available with the
179  * VIRTIO_NET_F_CTRL_VLAN feature bit.
180  */
181 #define VIRTIO_NET_CTRL_VLAN     2
182 #define VIRTIO_NET_CTRL_VLAN_ADD 0
183 #define VIRTIO_NET_CTRL_VLAN_DEL 1
184 
185 /*
186  * Control link announce acknowledgement
187  *
188  * The command VIRTIO_NET_CTRL_ANNOUNCE_ACK is used to indicate that
189  * driver has recevied the notification; device would clear the
190  * VIRTIO_NET_S_ANNOUNCE bit in the status field after it receives
191  * this command.
192  */
193 #define VIRTIO_NET_CTRL_ANNOUNCE     3
194 #define VIRTIO_NET_CTRL_ANNOUNCE_ACK 0
195 
196 struct virtio_net_ctrl_hdr {
197 	uint8_t class;
198 	uint8_t cmd;
199 } __rte_packed;
200 
201 typedef uint8_t virtio_net_ctrl_ack;
202 
203 #define VIRTIO_NET_OK     0
204 #define VIRTIO_NET_ERR    1
205 
206 #define VIRTIO_MAX_CTRL_DATA 2048
207 
208 struct virtio_pmd_ctrl {
209 	struct virtio_net_ctrl_hdr hdr;
210 	virtio_net_ctrl_ack status;
211 	uint8_t data[VIRTIO_MAX_CTRL_DATA];
212 };
213 
214 struct vq_desc_extra {
215 	void *cookie;
216 	uint16_t ndescs;
217 	uint16_t next;
218 };
219 
220 struct virtqueue {
221 	struct virtio_hw  *hw; /**< virtio_hw structure pointer. */
222 	union {
223 		struct {
224 			/**< vring keeping desc, used and avail */
225 			struct vring ring;
226 		} vq_split;
227 
228 		struct {
229 			/**< vring keeping descs and events */
230 			struct vring_packed ring;
231 			bool used_wrap_counter;
232 			uint16_t cached_flags; /**< cached flags for descs */
233 			uint16_t event_flags_shadow;
234 		} vq_packed;
235 	};
236 
237 	uint16_t vq_used_cons_idx; /**< last consumed descriptor */
238 	uint16_t vq_nentries;  /**< vring desc numbers */
239 	uint16_t vq_free_cnt;  /**< num of desc available */
240 	uint16_t vq_avail_idx; /**< sync until needed */
241 	uint16_t vq_free_thresh; /**< free threshold */
242 
243 	void *vq_ring_virt_mem;  /**< linear address of vring*/
244 	unsigned int vq_ring_size;
245 
246 	union {
247 		struct virtnet_rx rxq;
248 		struct virtnet_tx txq;
249 		struct virtnet_ctl cq;
250 	};
251 
252 	rte_iova_t vq_ring_mem; /**< physical address of vring,
253 	                         * or virtual address for virtio_user. */
254 
255 	/**
256 	 * Head of the free chain in the descriptor table. If
257 	 * there are no free descriptors, this will be set to
258 	 * VQ_RING_DESC_CHAIN_END.
259 	 */
260 	uint16_t  vq_desc_head_idx;
261 	uint16_t  vq_desc_tail_idx;
262 	uint16_t  vq_queue_index;
263 	uint16_t offset; /**< relative offset to obtain addr in mbuf */
264 	uint16_t  *notify_addr;
265 	struct rte_mbuf **sw_ring;  /**< RX software ring. */
266 	struct vq_desc_extra vq_descx[0];
267 };
268 
269 /* If multiqueue is provided by host, then we suppport it. */
270 #define VIRTIO_NET_CTRL_MQ   4
271 #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET        0
272 #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN        1
273 #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX        0x8000
274 
275 /**
276  * This is the first element of the scatter-gather list.  If you don't
277  * specify GSO or CSUM features, you can simply ignore the header.
278  */
279 struct virtio_net_hdr {
280 #define VIRTIO_NET_HDR_F_NEEDS_CSUM 1    /**< Use csum_start,csum_offset*/
281 #define VIRTIO_NET_HDR_F_DATA_VALID 2    /**< Checksum is valid */
282 	uint8_t flags;
283 #define VIRTIO_NET_HDR_GSO_NONE     0    /**< Not a GSO frame */
284 #define VIRTIO_NET_HDR_GSO_TCPV4    1    /**< GSO frame, IPv4 TCP (TSO) */
285 #define VIRTIO_NET_HDR_GSO_UDP      3    /**< GSO frame, IPv4 UDP (UFO) */
286 #define VIRTIO_NET_HDR_GSO_TCPV6    4    /**< GSO frame, IPv6 TCP */
287 #define VIRTIO_NET_HDR_GSO_ECN      0x80 /**< TCP has ECN set */
288 	uint8_t gso_type;
289 	uint16_t hdr_len;     /**< Ethernet + IP + tcp/udp hdrs */
290 	uint16_t gso_size;    /**< Bytes to append to hdr_len per frame */
291 	uint16_t csum_start;  /**< Position to start checksumming from */
292 	uint16_t csum_offset; /**< Offset after that to place checksum */
293 };
294 
295 /**
296  * This is the version of the header to use when the MRG_RXBUF
297  * feature has been negotiated.
298  */
299 struct virtio_net_hdr_mrg_rxbuf {
300 	struct   virtio_net_hdr hdr;
301 	uint16_t num_buffers; /**< Number of merged rx buffers */
302 };
303 
304 /* Region reserved to allow for transmit header and indirect ring */
305 #define VIRTIO_MAX_TX_INDIRECT 8
306 struct virtio_tx_region {
307 	struct virtio_net_hdr_mrg_rxbuf tx_hdr;
308 	union {
309 		struct vring_desc tx_indir[VIRTIO_MAX_TX_INDIRECT];
310 		struct vring_packed_desc
311 			tx_packed_indir[VIRTIO_MAX_TX_INDIRECT];
312 	} __rte_aligned(16);
313 };
314 
315 static inline int
316 desc_is_used(struct vring_packed_desc *desc, struct virtqueue *vq)
317 {
318 	uint16_t used, avail, flags;
319 
320 	flags = virtqueue_fetch_flags_packed(desc, vq->hw->weak_barriers);
321 	used = !!(flags & VRING_PACKED_DESC_F_USED);
322 	avail = !!(flags & VRING_PACKED_DESC_F_AVAIL);
323 
324 	return avail == used && used == vq->vq_packed.used_wrap_counter;
325 }
326 
327 static inline void
328 vring_desc_init_packed(struct virtqueue *vq, int n)
329 {
330 	int i;
331 	for (i = 0; i < n - 1; i++) {
332 		vq->vq_packed.ring.desc[i].id = i;
333 		vq->vq_descx[i].next = i + 1;
334 	}
335 	vq->vq_packed.ring.desc[i].id = i;
336 	vq->vq_descx[i].next = VQ_RING_DESC_CHAIN_END;
337 }
338 
339 /* Chain all the descriptors in the ring with an END */
340 static inline void
341 vring_desc_init_split(struct vring_desc *dp, uint16_t n)
342 {
343 	uint16_t i;
344 
345 	for (i = 0; i < n - 1; i++)
346 		dp[i].next = (uint16_t)(i + 1);
347 	dp[i].next = VQ_RING_DESC_CHAIN_END;
348 }
349 
350 static inline void
351 vring_desc_init_indirect_packed(struct vring_packed_desc *dp, int n)
352 {
353 	int i;
354 	for (i = 0; i < n; i++) {
355 		dp[i].id = (uint16_t)i;
356 		dp[i].flags = VRING_DESC_F_WRITE;
357 	}
358 }
359 
360 /**
361  * Tell the backend not to interrupt us. Implementation for packed virtqueues.
362  */
363 static inline void
364 virtqueue_disable_intr_packed(struct virtqueue *vq)
365 {
366 	if (vq->vq_packed.event_flags_shadow != RING_EVENT_FLAGS_DISABLE) {
367 		vq->vq_packed.event_flags_shadow = RING_EVENT_FLAGS_DISABLE;
368 		vq->vq_packed.ring.driver->desc_event_flags =
369 			vq->vq_packed.event_flags_shadow;
370 	}
371 }
372 
373 /**
374  * Tell the backend not to interrupt us. Implementation for split virtqueues.
375  */
376 static inline void
377 virtqueue_disable_intr_split(struct virtqueue *vq)
378 {
379 	vq->vq_split.ring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT;
380 }
381 
382 /**
383  * Tell the backend not to interrupt us.
384  */
385 static inline void
386 virtqueue_disable_intr(struct virtqueue *vq)
387 {
388 	if (virtio_with_packed_queue(vq->hw))
389 		virtqueue_disable_intr_packed(vq);
390 	else
391 		virtqueue_disable_intr_split(vq);
392 }
393 
394 /**
395  * Tell the backend to interrupt. Implementation for packed virtqueues.
396  */
397 static inline void
398 virtqueue_enable_intr_packed(struct virtqueue *vq)
399 {
400 	if (vq->vq_packed.event_flags_shadow == RING_EVENT_FLAGS_DISABLE) {
401 		vq->vq_packed.event_flags_shadow = RING_EVENT_FLAGS_ENABLE;
402 		vq->vq_packed.ring.driver->desc_event_flags =
403 			vq->vq_packed.event_flags_shadow;
404 	}
405 }
406 
407 /**
408  * Tell the backend to interrupt. Implementation for split virtqueues.
409  */
410 static inline void
411 virtqueue_enable_intr_split(struct virtqueue *vq)
412 {
413 	vq->vq_split.ring.avail->flags &= (~VRING_AVAIL_F_NO_INTERRUPT);
414 }
415 
416 /**
417  * Tell the backend to interrupt us.
418  */
419 static inline void
420 virtqueue_enable_intr(struct virtqueue *vq)
421 {
422 	if (virtio_with_packed_queue(vq->hw))
423 		virtqueue_enable_intr_packed(vq);
424 	else
425 		virtqueue_enable_intr_split(vq);
426 }
427 
428 /**
429  *  Dump virtqueue internal structures, for debug purpose only.
430  */
431 void virtqueue_dump(struct virtqueue *vq);
432 /**
433  *  Get all mbufs to be freed.
434  */
435 struct rte_mbuf *virtqueue_detach_unused(struct virtqueue *vq);
436 
437 /* Flush the elements in the used ring. */
438 void virtqueue_rxvq_flush(struct virtqueue *vq);
439 
440 int virtqueue_rxvq_reset_packed(struct virtqueue *vq);
441 
442 int virtqueue_txvq_reset_packed(struct virtqueue *vq);
443 
444 static inline int
445 virtqueue_full(const struct virtqueue *vq)
446 {
447 	return vq->vq_free_cnt == 0;
448 }
449 
450 static inline int
451 virtio_get_queue_type(struct virtio_hw *hw, uint16_t vq_idx)
452 {
453 	if (vq_idx == hw->max_queue_pairs * 2)
454 		return VTNET_CQ;
455 	else if (vq_idx % 2 == 0)
456 		return VTNET_RQ;
457 	else
458 		return VTNET_TQ;
459 }
460 
461 /* virtqueue_nused has load-acquire or rte_io_rmb insed */
462 static inline uint16_t
463 virtqueue_nused(const struct virtqueue *vq)
464 {
465 	uint16_t idx;
466 
467 	if (vq->hw->weak_barriers) {
468 	/**
469 	 * x86 prefers to using rte_smp_rmb over __atomic_load_n as it
470 	 * reports a slightly better perf, which comes from the saved
471 	 * branch by the compiler.
472 	 * The if and else branches are identical with the smp and io
473 	 * barriers both defined as compiler barriers on x86.
474 	 */
475 #ifdef RTE_ARCH_X86_64
476 		idx = vq->vq_split.ring.used->idx;
477 		rte_smp_rmb();
478 #else
479 		idx = __atomic_load_n(&(vq)->vq_split.ring.used->idx,
480 				__ATOMIC_ACQUIRE);
481 #endif
482 	} else {
483 		idx = vq->vq_split.ring.used->idx;
484 		rte_io_rmb();
485 	}
486 	return idx - vq->vq_used_cons_idx;
487 }
488 
489 void vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx);
490 void vq_ring_free_chain_packed(struct virtqueue *vq, uint16_t used_idx);
491 void vq_ring_free_inorder(struct virtqueue *vq, uint16_t desc_idx,
492 			  uint16_t num);
493 
494 static inline void
495 vq_update_avail_idx(struct virtqueue *vq)
496 {
497 	if (vq->hw->weak_barriers) {
498 	/* x86 prefers to using rte_smp_wmb over __atomic_store_n as
499 	 * it reports a slightly better perf, which comes from the
500 	 * saved branch by the compiler.
501 	 * The if and else branches are identical with the smp and
502 	 * io barriers both defined as compiler barriers on x86.
503 	 */
504 #ifdef RTE_ARCH_X86_64
505 		rte_smp_wmb();
506 		vq->vq_split.ring.avail->idx = vq->vq_avail_idx;
507 #else
508 		__atomic_store_n(&vq->vq_split.ring.avail->idx,
509 				 vq->vq_avail_idx, __ATOMIC_RELEASE);
510 #endif
511 	} else {
512 		rte_io_wmb();
513 		vq->vq_split.ring.avail->idx = vq->vq_avail_idx;
514 	}
515 }
516 
517 static inline void
518 vq_update_avail_ring(struct virtqueue *vq, uint16_t desc_idx)
519 {
520 	uint16_t avail_idx;
521 	/*
522 	 * Place the head of the descriptor chain into the next slot and make
523 	 * it usable to the host. The chain is made available now rather than
524 	 * deferring to virtqueue_notify() in the hopes that if the host is
525 	 * currently running on another CPU, we can keep it processing the new
526 	 * descriptor.
527 	 */
528 	avail_idx = (uint16_t)(vq->vq_avail_idx & (vq->vq_nentries - 1));
529 	if (unlikely(vq->vq_split.ring.avail->ring[avail_idx] != desc_idx))
530 		vq->vq_split.ring.avail->ring[avail_idx] = desc_idx;
531 	vq->vq_avail_idx++;
532 }
533 
534 static inline int
535 virtqueue_kick_prepare(struct virtqueue *vq)
536 {
537 	/*
538 	 * Ensure updated avail->idx is visible to vhost before reading
539 	 * the used->flags.
540 	 */
541 	virtio_mb(vq->hw->weak_barriers);
542 	return !(vq->vq_split.ring.used->flags & VRING_USED_F_NO_NOTIFY);
543 }
544 
545 static inline int
546 virtqueue_kick_prepare_packed(struct virtqueue *vq)
547 {
548 	uint16_t flags;
549 
550 	/*
551 	 * Ensure updated data is visible to vhost before reading the flags.
552 	 */
553 	virtio_mb(vq->hw->weak_barriers);
554 	flags = vq->vq_packed.ring.device->desc_event_flags;
555 
556 	return flags != RING_EVENT_FLAGS_DISABLE;
557 }
558 
559 /*
560  * virtqueue_kick_prepare*() or the virtio_wmb() should be called
561  * before this function to be sure that all the data is visible to vhost.
562  */
563 static inline void
564 virtqueue_notify(struct virtqueue *vq)
565 {
566 	VIRTIO_OPS(vq->hw)->notify_queue(vq->hw, vq);
567 }
568 
569 #ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP
570 #define VIRTQUEUE_DUMP(vq) do { \
571 	uint16_t used_idx, nused; \
572 	used_idx = __atomic_load_n(&(vq)->vq_split.ring.used->idx, \
573 				   __ATOMIC_RELAXED); \
574 	nused = (uint16_t)(used_idx - (vq)->vq_used_cons_idx); \
575 	if (virtio_with_packed_queue((vq)->hw)) { \
576 		PMD_INIT_LOG(DEBUG, \
577 		"VQ: - size=%d; free=%d; used_cons_idx=%d; avail_idx=%d;" \
578 		" cached_flags=0x%x; used_wrap_counter=%d", \
579 		(vq)->vq_nentries, (vq)->vq_free_cnt, (vq)->vq_used_cons_idx, \
580 		(vq)->vq_avail_idx, (vq)->vq_packed.cached_flags, \
581 		(vq)->vq_packed.used_wrap_counter); \
582 		break; \
583 	} \
584 	PMD_INIT_LOG(DEBUG, \
585 	  "VQ: - size=%d; free=%d; used=%d; desc_head_idx=%d;" \
586 	  " avail.idx=%d; used_cons_idx=%d; used.idx=%d;" \
587 	  " avail.flags=0x%x; used.flags=0x%x", \
588 	  (vq)->vq_nentries, (vq)->vq_free_cnt, nused, (vq)->vq_desc_head_idx, \
589 	  (vq)->vq_split.ring.avail->idx, (vq)->vq_used_cons_idx, \
590 	  __atomic_load_n(&(vq)->vq_split.ring.used->idx, __ATOMIC_RELAXED), \
591 	  (vq)->vq_split.ring.avail->flags, (vq)->vq_split.ring.used->flags); \
592 } while (0)
593 #else
594 #define VIRTQUEUE_DUMP(vq) do { } while (0)
595 #endif
596 
597 /* avoid write operation when necessary, to lessen cache issues */
598 #define ASSIGN_UNLESS_EQUAL(var, val) do {	\
599 	typeof(var) *const var_ = &(var);	\
600 	typeof(val)  const val_ = (val);	\
601 	if (*var_ != val_)			\
602 		*var_ = val_;			\
603 } while (0)
604 
605 #define virtqueue_clear_net_hdr(hdr) do {		\
606 	typeof(hdr) hdr_ = (hdr);			\
607 	ASSIGN_UNLESS_EQUAL((hdr_)->csum_start, 0);	\
608 	ASSIGN_UNLESS_EQUAL((hdr_)->csum_offset, 0);	\
609 	ASSIGN_UNLESS_EQUAL((hdr_)->flags, 0);		\
610 	ASSIGN_UNLESS_EQUAL((hdr_)->gso_type, 0);	\
611 	ASSIGN_UNLESS_EQUAL((hdr_)->gso_size, 0);	\
612 	ASSIGN_UNLESS_EQUAL((hdr_)->hdr_len, 0);	\
613 } while (0)
614 
615 static inline void
616 virtqueue_xmit_offload(struct virtio_net_hdr *hdr,
617 			struct rte_mbuf *cookie,
618 			uint8_t offload)
619 {
620 	if (offload) {
621 		if (cookie->ol_flags & PKT_TX_TCP_SEG)
622 			cookie->ol_flags |= PKT_TX_TCP_CKSUM;
623 
624 		switch (cookie->ol_flags & PKT_TX_L4_MASK) {
625 		case PKT_TX_UDP_CKSUM:
626 			hdr->csum_start = cookie->l2_len + cookie->l3_len;
627 			hdr->csum_offset = offsetof(struct rte_udp_hdr,
628 				dgram_cksum);
629 			hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
630 			break;
631 
632 		case PKT_TX_TCP_CKSUM:
633 			hdr->csum_start = cookie->l2_len + cookie->l3_len;
634 			hdr->csum_offset = offsetof(struct rte_tcp_hdr, cksum);
635 			hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
636 			break;
637 
638 		default:
639 			ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0);
640 			ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0);
641 			ASSIGN_UNLESS_EQUAL(hdr->flags, 0);
642 			break;
643 		}
644 
645 		/* TCP Segmentation Offload */
646 		if (cookie->ol_flags & PKT_TX_TCP_SEG) {
647 			hdr->gso_type = (cookie->ol_flags & PKT_TX_IPV6) ?
648 				VIRTIO_NET_HDR_GSO_TCPV6 :
649 				VIRTIO_NET_HDR_GSO_TCPV4;
650 			hdr->gso_size = cookie->tso_segsz;
651 			hdr->hdr_len =
652 				cookie->l2_len +
653 				cookie->l3_len +
654 				cookie->l4_len;
655 		} else {
656 			ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0);
657 			ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0);
658 			ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0);
659 		}
660 	}
661 }
662 
663 static inline void
664 virtqueue_enqueue_xmit_packed(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
665 			      uint16_t needed, int use_indirect, int can_push,
666 			      int in_order)
667 {
668 	struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr;
669 	struct vq_desc_extra *dxp;
670 	struct virtqueue *vq = txvq->vq;
671 	struct vring_packed_desc *start_dp, *head_dp;
672 	uint16_t idx, id, head_idx, head_flags;
673 	int16_t head_size = vq->hw->vtnet_hdr_size;
674 	struct virtio_net_hdr *hdr;
675 	uint16_t prev;
676 	bool prepend_header = false;
677 	uint16_t seg_num = cookie->nb_segs;
678 
679 	id = in_order ? vq->vq_avail_idx : vq->vq_desc_head_idx;
680 
681 	dxp = &vq->vq_descx[id];
682 	dxp->ndescs = needed;
683 	dxp->cookie = cookie;
684 
685 	head_idx = vq->vq_avail_idx;
686 	idx = head_idx;
687 	prev = head_idx;
688 	start_dp = vq->vq_packed.ring.desc;
689 
690 	head_dp = &vq->vq_packed.ring.desc[idx];
691 	head_flags = cookie->next ? VRING_DESC_F_NEXT : 0;
692 	head_flags |= vq->vq_packed.cached_flags;
693 
694 	if (can_push) {
695 		/* prepend cannot fail, checked by caller */
696 		hdr = rte_pktmbuf_mtod_offset(cookie, struct virtio_net_hdr *,
697 					      -head_size);
698 		prepend_header = true;
699 
700 		/* if offload disabled, it is not zeroed below, do it now */
701 		if (!vq->hw->has_tx_offload)
702 			virtqueue_clear_net_hdr(hdr);
703 	} else if (use_indirect) {
704 		/* setup tx ring slot to point to indirect
705 		 * descriptor list stored in reserved region.
706 		 *
707 		 * the first slot in indirect ring is already preset
708 		 * to point to the header in reserved region
709 		 */
710 		start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
711 			RTE_PTR_DIFF(&txr[idx].tx_packed_indir, txr);
712 		start_dp[idx].len   = (seg_num + 1) *
713 			sizeof(struct vring_packed_desc);
714 		/* reset flags for indirect desc */
715 		head_flags = VRING_DESC_F_INDIRECT;
716 		head_flags |= vq->vq_packed.cached_flags;
717 		hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
718 
719 		/* loop below will fill in rest of the indirect elements */
720 		start_dp = txr[idx].tx_packed_indir;
721 		idx = 1;
722 	} else {
723 		/* setup first tx ring slot to point to header
724 		 * stored in reserved region.
725 		 */
726 		start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
727 			RTE_PTR_DIFF(&txr[idx].tx_hdr, txr);
728 		start_dp[idx].len   = vq->hw->vtnet_hdr_size;
729 		hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
730 		idx++;
731 		if (idx >= vq->vq_nentries) {
732 			idx -= vq->vq_nentries;
733 			vq->vq_packed.cached_flags ^=
734 				VRING_PACKED_DESC_F_AVAIL_USED;
735 		}
736 	}
737 
738 	virtqueue_xmit_offload(hdr, cookie, vq->hw->has_tx_offload);
739 
740 	do {
741 		uint16_t flags;
742 
743 		start_dp[idx].addr = rte_mbuf_data_iova(cookie);
744 		start_dp[idx].len  = cookie->data_len;
745 		if (prepend_header) {
746 			start_dp[idx].addr -= head_size;
747 			start_dp[idx].len += head_size;
748 			prepend_header = false;
749 		}
750 
751 		if (likely(idx != head_idx)) {
752 			flags = cookie->next ? VRING_DESC_F_NEXT : 0;
753 			flags |= vq->vq_packed.cached_flags;
754 			start_dp[idx].flags = flags;
755 		}
756 		prev = idx;
757 		idx++;
758 		if (idx >= vq->vq_nentries) {
759 			idx -= vq->vq_nentries;
760 			vq->vq_packed.cached_flags ^=
761 				VRING_PACKED_DESC_F_AVAIL_USED;
762 		}
763 	} while ((cookie = cookie->next) != NULL);
764 
765 	start_dp[prev].id = id;
766 
767 	if (use_indirect) {
768 		idx = head_idx;
769 		if (++idx >= vq->vq_nentries) {
770 			idx -= vq->vq_nentries;
771 			vq->vq_packed.cached_flags ^=
772 				VRING_PACKED_DESC_F_AVAIL_USED;
773 		}
774 	}
775 
776 	vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
777 	vq->vq_avail_idx = idx;
778 
779 	if (!in_order) {
780 		vq->vq_desc_head_idx = dxp->next;
781 		if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
782 			vq->vq_desc_tail_idx = VQ_RING_DESC_CHAIN_END;
783 	}
784 
785 	virtqueue_store_flags_packed(head_dp, head_flags,
786 				     vq->hw->weak_barriers);
787 }
788 
789 static void
790 vq_ring_free_id_packed(struct virtqueue *vq, uint16_t id)
791 {
792 	struct vq_desc_extra *dxp;
793 
794 	dxp = &vq->vq_descx[id];
795 	vq->vq_free_cnt += dxp->ndescs;
796 
797 	if (vq->vq_desc_tail_idx == VQ_RING_DESC_CHAIN_END)
798 		vq->vq_desc_head_idx = id;
799 	else
800 		vq->vq_descx[vq->vq_desc_tail_idx].next = id;
801 
802 	vq->vq_desc_tail_idx = id;
803 	dxp->next = VQ_RING_DESC_CHAIN_END;
804 }
805 
806 static void
807 virtio_xmit_cleanup_inorder_packed(struct virtqueue *vq, int num)
808 {
809 	uint16_t used_idx, id, curr_id, free_cnt = 0;
810 	uint16_t size = vq->vq_nentries;
811 	struct vring_packed_desc *desc = vq->vq_packed.ring.desc;
812 	struct vq_desc_extra *dxp;
813 
814 	used_idx = vq->vq_used_cons_idx;
815 	/* desc_is_used has a load-acquire or rte_io_rmb inside
816 	 * and wait for used desc in virtqueue.
817 	 */
818 	while (num > 0 && desc_is_used(&desc[used_idx], vq)) {
819 		id = desc[used_idx].id;
820 		do {
821 			curr_id = used_idx;
822 			dxp = &vq->vq_descx[used_idx];
823 			used_idx += dxp->ndescs;
824 			free_cnt += dxp->ndescs;
825 			num -= dxp->ndescs;
826 			if (used_idx >= size) {
827 				used_idx -= size;
828 				vq->vq_packed.used_wrap_counter ^= 1;
829 			}
830 			if (dxp->cookie != NULL) {
831 				rte_pktmbuf_free(dxp->cookie);
832 				dxp->cookie = NULL;
833 			}
834 		} while (curr_id != id);
835 	}
836 	vq->vq_used_cons_idx = used_idx;
837 	vq->vq_free_cnt += free_cnt;
838 }
839 
840 static void
841 virtio_xmit_cleanup_normal_packed(struct virtqueue *vq, int num)
842 {
843 	uint16_t used_idx, id;
844 	uint16_t size = vq->vq_nentries;
845 	struct vring_packed_desc *desc = vq->vq_packed.ring.desc;
846 	struct vq_desc_extra *dxp;
847 
848 	used_idx = vq->vq_used_cons_idx;
849 	/* desc_is_used has a load-acquire or rte_io_rmb inside
850 	 * and wait for used desc in virtqueue.
851 	 */
852 	while (num-- && desc_is_used(&desc[used_idx], vq)) {
853 		id = desc[used_idx].id;
854 		dxp = &vq->vq_descx[id];
855 		vq->vq_used_cons_idx += dxp->ndescs;
856 		if (vq->vq_used_cons_idx >= size) {
857 			vq->vq_used_cons_idx -= size;
858 			vq->vq_packed.used_wrap_counter ^= 1;
859 		}
860 		vq_ring_free_id_packed(vq, id);
861 		if (dxp->cookie != NULL) {
862 			rte_pktmbuf_free(dxp->cookie);
863 			dxp->cookie = NULL;
864 		}
865 		used_idx = vq->vq_used_cons_idx;
866 	}
867 }
868 
869 /* Cleanup from completed transmits. */
870 static inline void
871 virtio_xmit_cleanup_packed(struct virtqueue *vq, int num, int in_order)
872 {
873 	if (in_order)
874 		virtio_xmit_cleanup_inorder_packed(vq, num);
875 	else
876 		virtio_xmit_cleanup_normal_packed(vq, num);
877 }
878 
879 static inline void
880 virtio_xmit_cleanup(struct virtqueue *vq, uint16_t num)
881 {
882 	uint16_t i, used_idx, desc_idx;
883 	for (i = 0; i < num; i++) {
884 		struct vring_used_elem *uep;
885 		struct vq_desc_extra *dxp;
886 
887 		used_idx = (uint16_t)(vq->vq_used_cons_idx &
888 				(vq->vq_nentries - 1));
889 		uep = &vq->vq_split.ring.used->ring[used_idx];
890 
891 		desc_idx = (uint16_t)uep->id;
892 		dxp = &vq->vq_descx[desc_idx];
893 		vq->vq_used_cons_idx++;
894 		vq_ring_free_chain(vq, desc_idx);
895 
896 		if (dxp->cookie != NULL) {
897 			rte_pktmbuf_free(dxp->cookie);
898 			dxp->cookie = NULL;
899 		}
900 	}
901 }
902 
903 /* Cleanup from completed inorder transmits. */
904 static __rte_always_inline void
905 virtio_xmit_cleanup_inorder(struct virtqueue *vq, uint16_t num)
906 {
907 	uint16_t i, idx = vq->vq_used_cons_idx;
908 	int16_t free_cnt = 0;
909 	struct vq_desc_extra *dxp = NULL;
910 
911 	if (unlikely(num == 0))
912 		return;
913 
914 	for (i = 0; i < num; i++) {
915 		dxp = &vq->vq_descx[idx++ & (vq->vq_nentries - 1)];
916 		free_cnt += dxp->ndescs;
917 		if (dxp->cookie != NULL) {
918 			rte_pktmbuf_free(dxp->cookie);
919 			dxp->cookie = NULL;
920 		}
921 	}
922 
923 	vq->vq_free_cnt += free_cnt;
924 	vq->vq_used_cons_idx = idx;
925 }
926 #endif /* _VIRTQUEUE_H_ */
927