xref: /dpdk/drivers/net/intel/common/tx.h (revision d3bb1c9e0a16145eacc75deadc96726b351495ec)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2024 Intel Corporation
3  */
4 
5 #ifndef _COMMON_INTEL_TX_H_
6 #define _COMMON_INTEL_TX_H_
7 
8 #include <stdint.h>
9 #include <rte_mbuf.h>
10 #include <rte_ethdev.h>
11 
12 /* forward declaration of the common intel (ci) queue structure */
13 struct ci_tx_queue;
14 
15 /**
16  * Structure associated with each descriptor of the TX ring of a TX queue.
17  */
18 struct ci_tx_entry {
19 	struct rte_mbuf *mbuf; /* mbuf associated with TX desc, if any. */
20 	uint16_t next_id; /* Index of next descriptor in ring. */
21 	uint16_t last_id; /* Index of last scattered descriptor. */
22 };
23 
24 /**
25  * Structure associated with each descriptor of the TX ring of a TX queue in vector Tx.
26  */
27 struct ci_tx_entry_vec {
28 	struct rte_mbuf *mbuf; /* mbuf associated with TX desc, if any. */
29 };
30 
31 typedef void (*ice_tx_release_mbufs_t)(struct ci_tx_queue *txq);
32 
33 struct ci_tx_queue {
34 	union { /* TX ring virtual address */
35 		volatile struct i40e_tx_desc *i40e_tx_ring;
36 		volatile struct iavf_tx_desc *iavf_tx_ring;
37 		volatile struct ice_tx_desc *ice_tx_ring;
38 		volatile union ixgbe_adv_tx_desc *ixgbe_tx_ring;
39 	};
40 	volatile uint8_t *qtx_tail;               /* register address of tail */
41 	union {
42 		struct ci_tx_entry *sw_ring; /* virtual address of SW ring */
43 		struct ci_tx_entry_vec *sw_ring_vec;
44 	};
45 	uint16_t nb_tx_desc;           /* number of TX descriptors */
46 	uint16_t tx_tail; /* current value of tail register */
47 	uint16_t nb_tx_used; /* number of TX desc used since RS bit set */
48 	/* index to last TX descriptor to have been cleaned */
49 	uint16_t last_desc_cleaned;
50 	/* Total number of TX descriptors ready to be allocated. */
51 	uint16_t nb_tx_free;
52 	/* Start freeing TX buffers if there are less free descriptors than
53 	 * this value.
54 	 */
55 	uint16_t tx_free_thresh;
56 	/* Number of TX descriptors to use before RS bit is set. */
57 	uint16_t tx_rs_thresh;
58 	uint16_t port_id;  /* Device port identifier. */
59 	uint16_t queue_id; /* TX queue index. */
60 	uint16_t reg_idx;
61 	uint16_t tx_next_dd;
62 	uint16_t tx_next_rs;
63 	uint64_t offloads;
64 	uint64_t mbuf_errors;
65 	rte_iova_t tx_ring_dma;        /* TX ring DMA address */
66 	bool tx_deferred_start; /* don't start this queue in dev start */
67 	bool q_set;             /* indicate if tx queue has been configured */
68 	bool vector_tx;         /* port is using vector TX */
69 	union {                  /* the VSI this queue belongs to */
70 		struct i40e_vsi *i40e_vsi;
71 		struct iavf_vsi *iavf_vsi;
72 		struct ice_vsi *ice_vsi;
73 	};
74 	const struct rte_memzone *mz;
75 
76 	union {
77 		struct { /* ICE driver specific values */
78 			uint32_t q_teid; /* TX schedule node id. */
79 		};
80 		struct { /* I40E driver specific values */
81 			uint8_t dcb_tc;
82 		};
83 		struct { /* iavf driver specific values */
84 			uint16_t ipsec_crypto_pkt_md_offset;
85 			uint8_t rel_mbufs_type;
86 #define IAVF_TX_FLAGS_VLAN_TAG_LOC_L2TAG1 BIT(0)
87 #define IAVF_TX_FLAGS_VLAN_TAG_LOC_L2TAG2 BIT(1)
88 			uint8_t vlan_flag;
89 			uint8_t tc;
90 			bool use_ctx;  /* with ctx info, each pkt needs two descriptors */
91 		};
92 		struct { /* ixgbe specific values */
93 			const struct ixgbe_txq_ops *ops;
94 			struct ixgbe_advctx_info *ctx_cache;
95 			uint32_t ctx_curr;
96 			uint8_t pthresh;   /**< Prefetch threshold register. */
97 			uint8_t hthresh;   /**< Host threshold register. */
98 			uint8_t wthresh;   /**< Write-back threshold reg. */
99 			uint8_t using_ipsec;  /**< indicates that IPsec TX feature is in use */
100 		};
101 	};
102 };
103 
104 static __rte_always_inline void
105 ci_tx_backlog_entry(struct ci_tx_entry *txep, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
106 {
107 	for (uint16_t i = 0; i < (int)nb_pkts; ++i)
108 		txep[i].mbuf = tx_pkts[i];
109 }
110 
111 static __rte_always_inline void
112 ci_tx_backlog_entry_vec(struct ci_tx_entry_vec *txep, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
113 {
114 	for (uint16_t i = 0; i < nb_pkts; ++i)
115 		txep[i].mbuf = tx_pkts[i];
116 }
117 
118 #define IETH_VPMD_TX_MAX_FREE_BUF 64
119 
120 typedef int (*ci_desc_done_fn)(struct ci_tx_queue *txq, uint16_t idx);
121 
122 static __rte_always_inline int
123 ci_tx_free_bufs_vec(struct ci_tx_queue *txq, ci_desc_done_fn desc_done, bool ctx_descs)
124 {
125 	int nb_free = 0;
126 	struct rte_mbuf *free[IETH_VPMD_TX_MAX_FREE_BUF];
127 	struct rte_mbuf *m;
128 
129 	/* check DD bits on threshold descriptor */
130 	if (!desc_done(txq, txq->tx_next_dd))
131 		return 0;
132 
133 	const uint32_t n = txq->tx_rs_thresh >> ctx_descs;
134 
135 	/* first buffer to free from S/W ring is at index
136 	 * tx_next_dd - (tx_rs_thresh - 1)
137 	 */
138 	struct ci_tx_entry_vec *txep = txq->sw_ring_vec;
139 	txep += (txq->tx_next_dd >> ctx_descs) - (n - 1);
140 
141 	if (txq->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE && (n & 31) == 0) {
142 		struct rte_mempool *mp = txep[0].mbuf->pool;
143 		void **cache_objs;
144 		struct rte_mempool_cache *cache = rte_mempool_default_cache(mp, rte_lcore_id());
145 
146 		if (!cache || cache->len == 0)
147 			goto normal;
148 
149 		cache_objs = &cache->objs[cache->len];
150 
151 		if (n > RTE_MEMPOOL_CACHE_MAX_SIZE) {
152 			rte_mempool_ops_enqueue_bulk(mp, (void *)txep, n);
153 			goto done;
154 		}
155 
156 		/* The cache follows the following algorithm
157 		 *   1. Add the objects to the cache
158 		 *   2. Anything greater than the cache min value (if it
159 		 *   crosses the cache flush threshold) is flushed to the ring.
160 		 */
161 		/* Add elements back into the cache */
162 		uint32_t copied = 0;
163 		/* n is multiple of 32 */
164 		while (copied < n) {
165 			memcpy(&cache_objs[copied], &txep[copied], 32 * sizeof(void *));
166 			copied += 32;
167 		}
168 		cache->len += n;
169 
170 		if (cache->len >= cache->flushthresh) {
171 			rte_mempool_ops_enqueue_bulk(mp, &cache->objs[cache->size],
172 					cache->len - cache->size);
173 			cache->len = cache->size;
174 		}
175 		goto done;
176 	}
177 
178 normal:
179 	m = rte_pktmbuf_prefree_seg(txep[0].mbuf);
180 	if (likely(m)) {
181 		free[0] = m;
182 		nb_free = 1;
183 		for (uint32_t i = 1; i < n; i++) {
184 			m = rte_pktmbuf_prefree_seg(txep[i].mbuf);
185 			if (likely(m)) {
186 				if (likely(m->pool == free[0]->pool)) {
187 					free[nb_free++] = m;
188 				} else {
189 					rte_mempool_put_bulk(free[0]->pool, (void *)free, nb_free);
190 					free[0] = m;
191 					nb_free = 1;
192 				}
193 			}
194 		}
195 		rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);
196 	} else {
197 		for (uint32_t i = 1; i < n; i++) {
198 			m = rte_pktmbuf_prefree_seg(txep[i].mbuf);
199 			if (m)
200 				rte_mempool_put(m->pool, m);
201 		}
202 	}
203 
204 done:
205 	/* buffers were freed, update counters */
206 	txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
207 	txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
208 	if (txq->tx_next_dd >= txq->nb_tx_desc)
209 		txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
210 
211 	return txq->tx_rs_thresh;
212 }
213 
214 static inline void
215 ci_txq_release_all_mbufs(struct ci_tx_queue *txq, bool use_ctx)
216 {
217 	if (unlikely(!txq || !txq->sw_ring))
218 		return;
219 
220 	if (!txq->vector_tx) {
221 		for (uint16_t i = 0; i < txq->nb_tx_desc; i++) {
222 			if (txq->sw_ring[i].mbuf != NULL) {
223 				rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
224 				txq->sw_ring[i].mbuf = NULL;
225 			}
226 		}
227 		return;
228 	}
229 
230 	/**
231 	 *  vPMD tx will not set sw_ring's mbuf to NULL after free,
232 	 *  so determining buffers to free is a little more complex.
233 	 */
234 	const uint16_t start = (txq->tx_next_dd - txq->tx_rs_thresh + 1) >> use_ctx;
235 	const uint16_t nb_desc = txq->nb_tx_desc >> use_ctx;
236 	const uint16_t end = txq->tx_tail >> use_ctx;
237 
238 	uint16_t i = start;
239 	if (end < i) {
240 		for (; i < nb_desc; i++)
241 			rte_pktmbuf_free_seg(txq->sw_ring_vec[i].mbuf);
242 		i = 0;
243 	}
244 	for (; i < end; i++)
245 		rte_pktmbuf_free_seg(txq->sw_ring_vec[i].mbuf);
246 	memset(txq->sw_ring_vec, 0, sizeof(txq->sw_ring_vec[0]) * nb_desc);
247 }
248 
249 #endif /* _COMMON_INTEL_TX_H_ */
250