xref: /dpdk/drivers/net/mlx5/mlx5_tx.c (revision 99f9d799ce21ab22e922ffec8aad51d56e24d04d)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2021 6WIND S.A.
3  * Copyright 2021 Mellanox Technologies, Ltd
4  */
5 
6 #include <stdint.h>
7 #include <string.h>
8 #include <stdlib.h>
9 
10 #include <rte_mbuf.h>
11 #include <rte_mempool.h>
12 #include <rte_prefetch.h>
13 #include <rte_common.h>
14 #include <rte_branch_prediction.h>
15 #include <rte_ether.h>
16 #include <rte_cycles.h>
17 #include <rte_flow.h>
18 
19 #include <mlx5_prm.h>
20 #include <mlx5_common.h>
21 
22 #include "mlx5_autoconf.h"
23 #include "mlx5_defs.h"
24 #include "mlx5.h"
25 #include "mlx5_mr.h"
26 #include "mlx5_utils.h"
27 #include "mlx5_rxtx.h"
28 #include "mlx5_tx.h"
29 
30 #define MLX5_TXOFF_INFO(func, olx) {mlx5_tx_burst_##func, olx},
31 
32 /**
33  * Move QP from error state to running state and initialize indexes.
34  *
35  * @param txq_ctrl
36  *   Pointer to TX queue control structure.
37  *
38  * @return
39  *   0 on success, else -1.
40  */
41 static int
42 tx_recover_qp(struct mlx5_txq_ctrl *txq_ctrl)
43 {
44 	struct mlx5_mp_arg_queue_state_modify sm = {
45 			.is_wq = 0,
46 			.queue_id = txq_ctrl->txq.idx,
47 	};
48 
49 	if (mlx5_queue_state_modify(ETH_DEV(txq_ctrl->priv), &sm))
50 		return -1;
51 	txq_ctrl->txq.wqe_ci = 0;
52 	txq_ctrl->txq.wqe_pi = 0;
53 	txq_ctrl->txq.elts_comp = 0;
54 	return 0;
55 }
56 
57 /* Return 1 if the error CQE is signed otherwise, sign it and return 0. */
58 static int
59 check_err_cqe_seen(volatile struct mlx5_err_cqe *err_cqe)
60 {
61 	static const uint8_t magic[] = "seen";
62 	int ret = 1;
63 	unsigned int i;
64 
65 	for (i = 0; i < sizeof(magic); ++i)
66 		if (!ret || err_cqe->rsvd1[i] != magic[i]) {
67 			ret = 0;
68 			err_cqe->rsvd1[i] = magic[i];
69 		}
70 	return ret;
71 }
72 
73 /**
74  * Handle error CQE.
75  *
76  * @param txq
77  *   Pointer to TX queue structure.
78  * @param error_cqe
79  *   Pointer to the error CQE.
80  *
81  * @return
82  *   Negative value if queue recovery failed, otherwise
83  *   the error completion entry is handled successfully.
84  */
85 static int
86 mlx5_tx_error_cqe_handle(struct mlx5_txq_data *__rte_restrict txq,
87 			 volatile struct mlx5_err_cqe *err_cqe)
88 {
89 	if (err_cqe->syndrome != MLX5_CQE_SYNDROME_WR_FLUSH_ERR) {
90 		const uint16_t wqe_m = ((1 << txq->wqe_n) - 1);
91 		struct mlx5_txq_ctrl *txq_ctrl =
92 				container_of(txq, struct mlx5_txq_ctrl, txq);
93 		uint16_t new_wqe_pi = rte_be_to_cpu_16(err_cqe->wqe_counter);
94 		int seen = check_err_cqe_seen(err_cqe);
95 
96 		if (!seen && txq_ctrl->dump_file_n <
97 		    txq_ctrl->priv->config.max_dump_files_num) {
98 			MKSTR(err_str, "Unexpected CQE error syndrome "
99 			      "0x%02x CQN = %u SQN = %u wqe_counter = %u "
100 			      "wq_ci = %u cq_ci = %u", err_cqe->syndrome,
101 			      txq->cqe_s, txq->qp_num_8s >> 8,
102 			      rte_be_to_cpu_16(err_cqe->wqe_counter),
103 			      txq->wqe_ci, txq->cq_ci);
104 			MKSTR(name, "dpdk_mlx5_port_%u_txq_%u_index_%u_%u",
105 			      PORT_ID(txq_ctrl->priv), txq->idx,
106 			      txq_ctrl->dump_file_n, (uint32_t)rte_rdtsc());
107 			mlx5_dump_debug_information(name, NULL, err_str, 0);
108 			mlx5_dump_debug_information(name, "MLX5 Error CQ:",
109 						    (const void *)((uintptr_t)
110 						    txq->cqes),
111 						    sizeof(*err_cqe) *
112 						    (1 << txq->cqe_n));
113 			mlx5_dump_debug_information(name, "MLX5 Error SQ:",
114 						    (const void *)((uintptr_t)
115 						    txq->wqes),
116 						    MLX5_WQE_SIZE *
117 						    (1 << txq->wqe_n));
118 			txq_ctrl->dump_file_n++;
119 		}
120 		if (!seen)
121 			/*
122 			 * Count errors in WQEs units.
123 			 * Later it can be improved to count error packets,
124 			 * for example, by SQ parsing to find how much packets
125 			 * should be counted for each WQE.
126 			 */
127 			txq->stats.oerrors += ((txq->wqe_ci & wqe_m) -
128 						new_wqe_pi) & wqe_m;
129 		if (tx_recover_qp(txq_ctrl)) {
130 			/* Recovering failed - retry later on the same WQE. */
131 			return -1;
132 		}
133 		/* Release all the remaining buffers. */
134 		txq_free_elts(txq_ctrl);
135 	}
136 	return 0;
137 }
138 
139 /**
140  * Dummy DPDK callback for TX.
141  *
142  * This function is used to temporarily replace the real callback during
143  * unsafe control operations on the queue, or in case of error.
144  *
145  * @param dpdk_txq
146  *   Generic pointer to TX queue structure.
147  * @param[in] pkts
148  *   Packets to transmit.
149  * @param pkts_n
150  *   Number of packets in array.
151  *
152  * @return
153  *   Number of packets successfully transmitted (<= pkts_n).
154  */
155 uint16_t
156 removed_tx_burst(void *dpdk_txq __rte_unused,
157 		 struct rte_mbuf **pkts __rte_unused,
158 		 uint16_t pkts_n __rte_unused)
159 {
160 	rte_mb();
161 	return 0;
162 }
163 
164 /**
165  * Update completion queue consuming index via doorbell
166  * and flush the completed data buffers.
167  *
168  * @param txq
169  *   Pointer to TX queue structure.
170  * @param last_cqe
171  *   valid CQE pointer, if not NULL update txq->wqe_pi and flush the buffers.
172  * @param olx
173  *   Configured Tx offloads mask. It is fully defined at
174  *   compile time and may be used for optimization.
175  */
176 static __rte_always_inline void
177 mlx5_tx_comp_flush(struct mlx5_txq_data *__rte_restrict txq,
178 		   volatile struct mlx5_cqe *last_cqe,
179 		   unsigned int olx __rte_unused)
180 {
181 	if (likely(last_cqe != NULL)) {
182 		uint16_t tail;
183 
184 		txq->wqe_pi = rte_be_to_cpu_16(last_cqe->wqe_counter);
185 		tail = txq->fcqs[(txq->cq_ci - 1) & txq->cqe_m];
186 		if (likely(tail != txq->elts_tail)) {
187 			mlx5_tx_free_elts(txq, tail, olx);
188 			MLX5_ASSERT(tail == txq->elts_tail);
189 		}
190 	}
191 }
192 
193 /**
194  * Manage TX completions. This routine checks the CQ for
195  * arrived CQEs, deduces the last accomplished WQE in SQ,
196  * updates SQ producing index and frees all completed mbufs.
197  *
198  * @param txq
199  *   Pointer to TX queue structure.
200  * @param olx
201  *   Configured Tx offloads mask. It is fully defined at
202  *   compile time and may be used for optimization.
203  *
204  * NOTE: not inlined intentionally, it makes tx_burst
205  * routine smaller, simple and faster - from experiments.
206  */
207 void
208 mlx5_tx_handle_completion(struct mlx5_txq_data *__rte_restrict txq,
209 			  unsigned int olx __rte_unused)
210 {
211 	unsigned int count = MLX5_TX_COMP_MAX_CQE;
212 	volatile struct mlx5_cqe *last_cqe = NULL;
213 	bool ring_doorbell = false;
214 	int ret;
215 
216 	do {
217 		volatile struct mlx5_cqe *cqe;
218 
219 		cqe = &txq->cqes[txq->cq_ci & txq->cqe_m];
220 		ret = check_cqe(cqe, txq->cqe_s, txq->cq_ci);
221 		if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) {
222 			if (likely(ret != MLX5_CQE_STATUS_ERR)) {
223 				/* No new CQEs in completion queue. */
224 				MLX5_ASSERT(ret == MLX5_CQE_STATUS_HW_OWN);
225 				break;
226 			}
227 			/*
228 			 * Some error occurred, try to restart.
229 			 * We have no barrier after WQE related Doorbell
230 			 * written, make sure all writes are completed
231 			 * here, before we might perform SQ reset.
232 			 */
233 			rte_wmb();
234 			ret = mlx5_tx_error_cqe_handle
235 				(txq, (volatile struct mlx5_err_cqe *)cqe);
236 			if (unlikely(ret < 0)) {
237 				/*
238 				 * Some error occurred on queue error
239 				 * handling, we do not advance the index
240 				 * here, allowing to retry on next call.
241 				 */
242 				return;
243 			}
244 			/*
245 			 * We are going to fetch all entries with
246 			 * MLX5_CQE_SYNDROME_WR_FLUSH_ERR status.
247 			 * The send queue is supposed to be empty.
248 			 */
249 			ring_doorbell = true;
250 			++txq->cq_ci;
251 			txq->cq_pi = txq->cq_ci;
252 			last_cqe = NULL;
253 			continue;
254 		}
255 		/* Normal transmit completion. */
256 		MLX5_ASSERT(txq->cq_ci != txq->cq_pi);
257 #ifdef RTE_LIBRTE_MLX5_DEBUG
258 		MLX5_ASSERT((txq->fcqs[txq->cq_ci & txq->cqe_m] >> 16) ==
259 			    cqe->wqe_counter);
260 #endif
261 		ring_doorbell = true;
262 		++txq->cq_ci;
263 		last_cqe = cqe;
264 		/*
265 		 * We have to restrict the amount of processed CQEs
266 		 * in one tx_burst routine call. The CQ may be large
267 		 * and many CQEs may be updated by the NIC in one
268 		 * transaction. Buffers freeing is time consuming,
269 		 * multiple iterations may introduce significant latency.
270 		 */
271 		if (likely(--count == 0))
272 			break;
273 	} while (true);
274 	if (likely(ring_doorbell)) {
275 		/* Ring doorbell to notify hardware. */
276 		rte_compiler_barrier();
277 		*txq->cq_db = rte_cpu_to_be_32(txq->cq_ci);
278 		mlx5_tx_comp_flush(txq, last_cqe, olx);
279 	}
280 }
281 
282 /**
283  * DPDK callback to check the status of a Tx descriptor.
284  *
285  * @param tx_queue
286  *   The Tx queue.
287  * @param[in] offset
288  *   The index of the descriptor in the ring.
289  *
290  * @return
291  *   The status of the Tx descriptor.
292  */
293 int
294 mlx5_tx_descriptor_status(void *tx_queue, uint16_t offset)
295 {
296 	struct mlx5_txq_data *__rte_restrict txq = tx_queue;
297 	uint16_t used;
298 
299 	mlx5_tx_handle_completion(txq, 0);
300 	used = txq->elts_head - txq->elts_tail;
301 	if (offset < used)
302 		return RTE_ETH_TX_DESC_FULL;
303 	return RTE_ETH_TX_DESC_DONE;
304 }
305 
306 /*
307  * Array of declared and compiled Tx burst function and corresponding
308  * supported offloads set. The array is used to select the Tx burst
309  * function for specified offloads set at Tx queue configuration time.
310  */
311 const struct {
312 	eth_tx_burst_t func;
313 	unsigned int olx;
314 } txoff_func[] = {
315 MLX5_TXOFF_INFO(full_empw,
316 		MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO |
317 		MLX5_TXOFF_CONFIG_SWP |	MLX5_TXOFF_CONFIG_CSUM |
318 		MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN |
319 		MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW)
320 
321 MLX5_TXOFF_INFO(none_empw,
322 		MLX5_TXOFF_CONFIG_NONE | MLX5_TXOFF_CONFIG_EMPW)
323 
324 MLX5_TXOFF_INFO(md_empw,
325 		MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW)
326 
327 MLX5_TXOFF_INFO(mt_empw,
328 		MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO |
329 		MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW)
330 
331 MLX5_TXOFF_INFO(mtsc_empw,
332 		MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO |
333 		MLX5_TXOFF_CONFIG_SWP |	MLX5_TXOFF_CONFIG_CSUM |
334 		MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW)
335 
336 MLX5_TXOFF_INFO(mti_empw,
337 		MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO |
338 		MLX5_TXOFF_CONFIG_INLINE |
339 		MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW)
340 
341 MLX5_TXOFF_INFO(mtv_empw,
342 		MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO |
343 		MLX5_TXOFF_CONFIG_VLAN |
344 		MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW)
345 
346 MLX5_TXOFF_INFO(mtiv_empw,
347 		MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO |
348 		MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN |
349 		MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW)
350 
351 MLX5_TXOFF_INFO(sc_empw,
352 		MLX5_TXOFF_CONFIG_SWP |	MLX5_TXOFF_CONFIG_CSUM |
353 		MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW)
354 
355 MLX5_TXOFF_INFO(sci_empw,
356 		MLX5_TXOFF_CONFIG_SWP |	MLX5_TXOFF_CONFIG_CSUM |
357 		MLX5_TXOFF_CONFIG_INLINE |
358 		MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW)
359 
360 MLX5_TXOFF_INFO(scv_empw,
361 		MLX5_TXOFF_CONFIG_SWP |	MLX5_TXOFF_CONFIG_CSUM |
362 		MLX5_TXOFF_CONFIG_VLAN |
363 		MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW)
364 
365 MLX5_TXOFF_INFO(sciv_empw,
366 		MLX5_TXOFF_CONFIG_SWP |	MLX5_TXOFF_CONFIG_CSUM |
367 		MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN |
368 		MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW)
369 
370 MLX5_TXOFF_INFO(i_empw,
371 		MLX5_TXOFF_CONFIG_INLINE |
372 		MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW)
373 
374 MLX5_TXOFF_INFO(v_empw,
375 		MLX5_TXOFF_CONFIG_VLAN |
376 		MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW)
377 
378 MLX5_TXOFF_INFO(iv_empw,
379 		MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN |
380 		MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW)
381 
382 MLX5_TXOFF_INFO(full_ts_nompw,
383 		MLX5_TXOFF_CONFIG_FULL | MLX5_TXOFF_CONFIG_TXPP)
384 
385 MLX5_TXOFF_INFO(full_ts_nompwi,
386 		MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO |
387 		MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM |
388 		MLX5_TXOFF_CONFIG_VLAN | MLX5_TXOFF_CONFIG_METADATA |
389 		MLX5_TXOFF_CONFIG_TXPP)
390 
391 MLX5_TXOFF_INFO(full_ts,
392 		MLX5_TXOFF_CONFIG_FULL | MLX5_TXOFF_CONFIG_TXPP |
393 		MLX5_TXOFF_CONFIG_EMPW)
394 
395 MLX5_TXOFF_INFO(full_ts_noi,
396 		MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO |
397 		MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM |
398 		MLX5_TXOFF_CONFIG_VLAN | MLX5_TXOFF_CONFIG_METADATA |
399 		MLX5_TXOFF_CONFIG_TXPP | MLX5_TXOFF_CONFIG_EMPW)
400 
401 MLX5_TXOFF_INFO(none_ts,
402 		MLX5_TXOFF_CONFIG_NONE | MLX5_TXOFF_CONFIG_TXPP |
403 		MLX5_TXOFF_CONFIG_EMPW)
404 
405 MLX5_TXOFF_INFO(mdi_ts,
406 		MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_METADATA |
407 		MLX5_TXOFF_CONFIG_TXPP | MLX5_TXOFF_CONFIG_EMPW)
408 
409 MLX5_TXOFF_INFO(mti_ts,
410 		MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO |
411 		MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_METADATA |
412 		MLX5_TXOFF_CONFIG_TXPP | MLX5_TXOFF_CONFIG_EMPW)
413 
414 MLX5_TXOFF_INFO(mtiv_ts,
415 		MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO |
416 		MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN |
417 		MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_TXPP |
418 		MLX5_TXOFF_CONFIG_EMPW)
419 
420 MLX5_TXOFF_INFO(full,
421 		MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO |
422 		MLX5_TXOFF_CONFIG_SWP |	MLX5_TXOFF_CONFIG_CSUM |
423 		MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN |
424 		MLX5_TXOFF_CONFIG_METADATA)
425 
426 MLX5_TXOFF_INFO(none,
427 		MLX5_TXOFF_CONFIG_NONE)
428 
429 MLX5_TXOFF_INFO(md,
430 		MLX5_TXOFF_CONFIG_METADATA)
431 
432 MLX5_TXOFF_INFO(mt,
433 		MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO |
434 		MLX5_TXOFF_CONFIG_METADATA)
435 
436 MLX5_TXOFF_INFO(mtsc,
437 		MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO |
438 		MLX5_TXOFF_CONFIG_SWP |	MLX5_TXOFF_CONFIG_CSUM |
439 		MLX5_TXOFF_CONFIG_METADATA)
440 
441 MLX5_TXOFF_INFO(mti,
442 		MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO |
443 		MLX5_TXOFF_CONFIG_INLINE |
444 		MLX5_TXOFF_CONFIG_METADATA)
445 
446 MLX5_TXOFF_INFO(mtv,
447 		MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO |
448 		MLX5_TXOFF_CONFIG_VLAN |
449 		MLX5_TXOFF_CONFIG_METADATA)
450 
451 MLX5_TXOFF_INFO(mtiv,
452 		MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO |
453 		MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN |
454 		MLX5_TXOFF_CONFIG_METADATA)
455 
456 MLX5_TXOFF_INFO(sc,
457 		MLX5_TXOFF_CONFIG_SWP |	MLX5_TXOFF_CONFIG_CSUM |
458 		MLX5_TXOFF_CONFIG_METADATA)
459 
460 MLX5_TXOFF_INFO(sci,
461 		MLX5_TXOFF_CONFIG_SWP |	MLX5_TXOFF_CONFIG_CSUM |
462 		MLX5_TXOFF_CONFIG_INLINE |
463 		MLX5_TXOFF_CONFIG_METADATA)
464 
465 MLX5_TXOFF_INFO(scv,
466 		MLX5_TXOFF_CONFIG_SWP |	MLX5_TXOFF_CONFIG_CSUM |
467 		MLX5_TXOFF_CONFIG_VLAN |
468 		MLX5_TXOFF_CONFIG_METADATA)
469 
470 MLX5_TXOFF_INFO(sciv,
471 		MLX5_TXOFF_CONFIG_SWP |	MLX5_TXOFF_CONFIG_CSUM |
472 		MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN |
473 		MLX5_TXOFF_CONFIG_METADATA)
474 
475 MLX5_TXOFF_INFO(i,
476 		MLX5_TXOFF_CONFIG_INLINE |
477 		MLX5_TXOFF_CONFIG_METADATA)
478 
479 MLX5_TXOFF_INFO(v,
480 		MLX5_TXOFF_CONFIG_VLAN |
481 		MLX5_TXOFF_CONFIG_METADATA)
482 
483 MLX5_TXOFF_INFO(iv,
484 		MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN |
485 		MLX5_TXOFF_CONFIG_METADATA)
486 
487 MLX5_TXOFF_INFO(none_mpw,
488 		MLX5_TXOFF_CONFIG_NONE | MLX5_TXOFF_CONFIG_EMPW |
489 		MLX5_TXOFF_CONFIG_MPW)
490 
491 MLX5_TXOFF_INFO(mci_mpw,
492 		MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_CSUM |
493 		MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_EMPW |
494 		MLX5_TXOFF_CONFIG_MPW)
495 
496 MLX5_TXOFF_INFO(mc_mpw,
497 		MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_CSUM |
498 		MLX5_TXOFF_CONFIG_EMPW | MLX5_TXOFF_CONFIG_MPW)
499 
500 MLX5_TXOFF_INFO(i_mpw,
501 		MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_EMPW |
502 		MLX5_TXOFF_CONFIG_MPW)
503 };
504 
505 /**
506  * Configure the Tx function to use. The routine checks configured
507  * Tx offloads for the device and selects appropriate Tx burst routine.
508  * There are multiple Tx burst routines compiled from the same template
509  * in the most optimal way for the dedicated Tx offloads set.
510  *
511  * @param dev
512  *   Pointer to private data structure.
513  *
514  * @return
515  *   Pointer to selected Tx burst function.
516  */
517 eth_tx_burst_t
518 mlx5_select_tx_function(struct rte_eth_dev *dev)
519 {
520 	struct mlx5_priv *priv = dev->data->dev_private;
521 	struct mlx5_dev_config *config = &priv->config;
522 	uint64_t tx_offloads = dev->data->dev_conf.txmode.offloads;
523 	unsigned int diff = 0, olx = 0, i, m;
524 
525 	MLX5_ASSERT(priv);
526 	if (tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS) {
527 		/* We should support Multi-Segment Packets. */
528 		olx |= MLX5_TXOFF_CONFIG_MULTI;
529 	}
530 	if (tx_offloads & (DEV_TX_OFFLOAD_TCP_TSO |
531 			   DEV_TX_OFFLOAD_VXLAN_TNL_TSO |
532 			   DEV_TX_OFFLOAD_GRE_TNL_TSO |
533 			   DEV_TX_OFFLOAD_IP_TNL_TSO |
534 			   DEV_TX_OFFLOAD_UDP_TNL_TSO)) {
535 		/* We should support TCP Send Offload. */
536 		olx |= MLX5_TXOFF_CONFIG_TSO;
537 	}
538 	if (tx_offloads & (DEV_TX_OFFLOAD_IP_TNL_TSO |
539 			   DEV_TX_OFFLOAD_UDP_TNL_TSO |
540 			   DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM)) {
541 		/* We should support Software Parser for Tunnels. */
542 		olx |= MLX5_TXOFF_CONFIG_SWP;
543 	}
544 	if (tx_offloads & (DEV_TX_OFFLOAD_IPV4_CKSUM |
545 			   DEV_TX_OFFLOAD_UDP_CKSUM |
546 			   DEV_TX_OFFLOAD_TCP_CKSUM |
547 			   DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM)) {
548 		/* We should support IP/TCP/UDP Checksums. */
549 		olx |= MLX5_TXOFF_CONFIG_CSUM;
550 	}
551 	if (tx_offloads & DEV_TX_OFFLOAD_VLAN_INSERT) {
552 		/* We should support VLAN insertion. */
553 		olx |= MLX5_TXOFF_CONFIG_VLAN;
554 	}
555 	if (tx_offloads & DEV_TX_OFFLOAD_SEND_ON_TIMESTAMP &&
556 	    rte_mbuf_dynflag_lookup
557 			(RTE_MBUF_DYNFLAG_TX_TIMESTAMP_NAME, NULL) >= 0 &&
558 	    rte_mbuf_dynfield_lookup
559 			(RTE_MBUF_DYNFIELD_TIMESTAMP_NAME, NULL) >= 0) {
560 		/* Offload configured, dynamic entities registered. */
561 		olx |= MLX5_TXOFF_CONFIG_TXPP;
562 	}
563 	if (priv->txqs_n && (*priv->txqs)[0]) {
564 		struct mlx5_txq_data *txd = (*priv->txqs)[0];
565 
566 		if (txd->inlen_send) {
567 			/*
568 			 * Check the data inline requirements. Data inline
569 			 * is enabled on per device basis, we can check
570 			 * the first Tx queue only.
571 			 *
572 			 * If device does not support VLAN insertion in WQE
573 			 * and some queues are requested to perform VLAN
574 			 * insertion offload than inline must be enabled.
575 			 */
576 			olx |= MLX5_TXOFF_CONFIG_INLINE;
577 		}
578 	}
579 	if (config->mps == MLX5_MPW_ENHANCED &&
580 	    config->txq_inline_min <= 0) {
581 		/*
582 		 * The NIC supports Enhanced Multi-Packet Write
583 		 * and does not require minimal inline data.
584 		 */
585 		olx |= MLX5_TXOFF_CONFIG_EMPW;
586 	}
587 	if (rte_flow_dynf_metadata_avail()) {
588 		/* We should support Flow metadata. */
589 		olx |= MLX5_TXOFF_CONFIG_METADATA;
590 	}
591 	if (config->mps == MLX5_MPW) {
592 		/*
593 		 * The NIC supports Legacy Multi-Packet Write.
594 		 * The MLX5_TXOFF_CONFIG_MPW controls the descriptor building
595 		 * method in combination with MLX5_TXOFF_CONFIG_EMPW.
596 		 */
597 		if (!(olx & (MLX5_TXOFF_CONFIG_TSO |
598 			     MLX5_TXOFF_CONFIG_SWP |
599 			     MLX5_TXOFF_CONFIG_VLAN |
600 			     MLX5_TXOFF_CONFIG_METADATA)))
601 			olx |= MLX5_TXOFF_CONFIG_EMPW |
602 			       MLX5_TXOFF_CONFIG_MPW;
603 	}
604 	/*
605 	 * Scan the routines table to find the minimal
606 	 * satisfying routine with requested offloads.
607 	 */
608 	m = RTE_DIM(txoff_func);
609 	for (i = 0; i < RTE_DIM(txoff_func); i++) {
610 		unsigned int tmp;
611 
612 		tmp = txoff_func[i].olx;
613 		if (tmp == olx) {
614 			/* Meets requested offloads exactly.*/
615 			m = i;
616 			break;
617 		}
618 		if ((tmp & olx) != olx) {
619 			/* Does not meet requested offloads at all. */
620 			continue;
621 		}
622 		if ((olx ^ tmp) & MLX5_TXOFF_CONFIG_MPW)
623 			/* Do not enable legacy MPW if not configured. */
624 			continue;
625 		if ((olx ^ tmp) & MLX5_TXOFF_CONFIG_EMPW)
626 			/* Do not enable eMPW if not configured. */
627 			continue;
628 		if ((olx ^ tmp) & MLX5_TXOFF_CONFIG_INLINE)
629 			/* Do not enable inlining if not configured. */
630 			continue;
631 		if ((olx ^ tmp) & MLX5_TXOFF_CONFIG_TXPP)
632 			/* Do not enable scheduling if not configured. */
633 			continue;
634 		/*
635 		 * Some routine meets the requirements.
636 		 * Check whether it has minimal amount
637 		 * of not requested offloads.
638 		 */
639 		tmp = __builtin_popcountl(tmp & ~olx);
640 		if (m >= RTE_DIM(txoff_func) || tmp < diff) {
641 			/* First or better match, save and continue. */
642 			m = i;
643 			diff = tmp;
644 			continue;
645 		}
646 		if (tmp == diff) {
647 			tmp = txoff_func[i].olx ^ txoff_func[m].olx;
648 			if (__builtin_ffsl(txoff_func[i].olx & ~tmp) <
649 			    __builtin_ffsl(txoff_func[m].olx & ~tmp)) {
650 				/* Lighter not requested offload. */
651 				m = i;
652 			}
653 		}
654 	}
655 	if (m >= RTE_DIM(txoff_func)) {
656 		DRV_LOG(DEBUG, "port %u has no selected Tx function"
657 			       " for requested offloads %04X",
658 				dev->data->port_id, olx);
659 		return NULL;
660 	}
661 	DRV_LOG(DEBUG, "port %u has selected Tx function"
662 		       " supporting offloads %04X/%04X",
663 			dev->data->port_id, olx, txoff_func[m].olx);
664 	if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_MULTI)
665 		DRV_LOG(DEBUG, "\tMULTI (multi segment)");
666 	if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_TSO)
667 		DRV_LOG(DEBUG, "\tTSO   (TCP send offload)");
668 	if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_SWP)
669 		DRV_LOG(DEBUG, "\tSWP   (software parser)");
670 	if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_CSUM)
671 		DRV_LOG(DEBUG, "\tCSUM  (checksum offload)");
672 	if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_INLINE)
673 		DRV_LOG(DEBUG, "\tINLIN (inline data)");
674 	if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_VLAN)
675 		DRV_LOG(DEBUG, "\tVLANI (VLAN insertion)");
676 	if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_METADATA)
677 		DRV_LOG(DEBUG, "\tMETAD (tx Flow metadata)");
678 	if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_TXPP)
679 		DRV_LOG(DEBUG, "\tMETAD (tx Scheduling)");
680 	if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_EMPW) {
681 		if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_MPW)
682 			DRV_LOG(DEBUG, "\tMPW   (Legacy MPW)");
683 		else
684 			DRV_LOG(DEBUG, "\tEMPW  (Enhanced MPW)");
685 	}
686 	return txoff_func[m].func;
687 }
688 
689 /**
690  * DPDK callback to get the TX queue information.
691  *
692  * @param dev
693  *   Pointer to the device structure.
694  *
695  * @param tx_queue_id
696  *   Tx queue identificator.
697  *
698  * @param qinfo
699  *   Pointer to the TX queue information structure.
700  *
701  * @return
702  *   None.
703  */
704 void
705 mlx5_txq_info_get(struct rte_eth_dev *dev, uint16_t tx_queue_id,
706 		  struct rte_eth_txq_info *qinfo)
707 {
708 	struct mlx5_priv *priv = dev->data->dev_private;
709 	struct mlx5_txq_data *txq = (*priv->txqs)[tx_queue_id];
710 	struct mlx5_txq_ctrl *txq_ctrl =
711 			container_of(txq, struct mlx5_txq_ctrl, txq);
712 
713 	if (!txq)
714 		return;
715 	qinfo->nb_desc = txq->elts_s;
716 	qinfo->conf.tx_thresh.pthresh = 0;
717 	qinfo->conf.tx_thresh.hthresh = 0;
718 	qinfo->conf.tx_thresh.wthresh = 0;
719 	qinfo->conf.tx_rs_thresh = 0;
720 	qinfo->conf.tx_free_thresh = 0;
721 	qinfo->conf.tx_deferred_start = txq_ctrl ? 0 : 1;
722 	qinfo->conf.offloads = dev->data->dev_conf.txmode.offloads;
723 }
724 
725 /**
726  * DPDK callback to get the TX packet burst mode information.
727  *
728  * @param dev
729  *   Pointer to the device structure.
730  *
731  * @param tx_queue_id
732  *   Tx queue identificatior.
733  *
734  * @param mode
735  *   Pointer to the burts mode information.
736  *
737  * @return
738  *   0 as success, -EINVAL as failure.
739  */
740 int
741 mlx5_tx_burst_mode_get(struct rte_eth_dev *dev,
742 		       uint16_t tx_queue_id,
743 		       struct rte_eth_burst_mode *mode)
744 {
745 	eth_tx_burst_t pkt_burst = dev->tx_pkt_burst;
746 	struct mlx5_priv *priv = dev->data->dev_private;
747 	struct mlx5_txq_data *txq = (*priv->txqs)[tx_queue_id];
748 	unsigned int i, olx;
749 
750 	for (i = 0; i < RTE_DIM(txoff_func); i++) {
751 		if (pkt_burst == txoff_func[i].func) {
752 			olx = txoff_func[i].olx;
753 			snprintf(mode->info, sizeof(mode->info),
754 				 "%s%s%s%s%s%s%s%s%s%s",
755 				 (olx & MLX5_TXOFF_CONFIG_EMPW) ?
756 				 ((olx & MLX5_TXOFF_CONFIG_MPW) ?
757 				 "Legacy MPW" : "Enhanced MPW") : "No MPW",
758 				 (olx & MLX5_TXOFF_CONFIG_MULTI) ?
759 				 " + MULTI" : "",
760 				 (olx & MLX5_TXOFF_CONFIG_TSO) ?
761 				 " + TSO" : "",
762 				 (olx & MLX5_TXOFF_CONFIG_SWP) ?
763 				 " + SWP" : "",
764 				 (olx & MLX5_TXOFF_CONFIG_CSUM) ?
765 				 "  + CSUM" : "",
766 				 (olx & MLX5_TXOFF_CONFIG_INLINE) ?
767 				 " + INLINE" : "",
768 				 (olx & MLX5_TXOFF_CONFIG_VLAN) ?
769 				 " + VLAN" : "",
770 				 (olx & MLX5_TXOFF_CONFIG_METADATA) ?
771 				 " + METADATA" : "",
772 				 (olx & MLX5_TXOFF_CONFIG_TXPP) ?
773 				 " + TXPP" : "",
774 				 (txq && txq->fast_free) ?
775 				 " + Fast Free" : "");
776 			return 0;
777 		}
778 	}
779 	return -EINVAL;
780 }
781