xref: /dpdk/drivers/net/mlx5/mlx5_tx.c (revision 3cddeba0ca38b00c7dc646277484d08a4cb2d862)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2021 6WIND S.A.
3  * Copyright 2021 Mellanox Technologies, Ltd
4  */
5 
6 #include <stdint.h>
7 #include <string.h>
8 #include <stdlib.h>
9 
10 #include <rte_mbuf.h>
11 #include <rte_mempool.h>
12 #include <rte_prefetch.h>
13 #include <rte_common.h>
14 #include <rte_branch_prediction.h>
15 #include <rte_ether.h>
16 #include <rte_cycles.h>
17 #include <rte_flow.h>
18 
19 #include <mlx5_prm.h>
20 #include <mlx5_common.h>
21 
22 #include "mlx5_autoconf.h"
23 #include "mlx5_defs.h"
24 #include "mlx5.h"
25 #include "mlx5_utils.h"
26 #include "mlx5_rxtx.h"
27 #include "mlx5_tx.h"
28 
29 #define MLX5_TXOFF_INFO(func, olx) {mlx5_tx_burst_##func, olx},
30 
31 /**
32  * Move QP from error state to running state and initialize indexes.
33  *
34  * @param txq_ctrl
35  *   Pointer to TX queue control structure.
36  *
37  * @return
38  *   0 on success, else -1.
39  */
40 static int
41 tx_recover_qp(struct mlx5_txq_ctrl *txq_ctrl)
42 {
43 	struct mlx5_mp_arg_queue_state_modify sm = {
44 			.is_wq = 0,
45 			.queue_id = txq_ctrl->txq.idx,
46 	};
47 
48 	if (mlx5_queue_state_modify(ETH_DEV(txq_ctrl->priv), &sm))
49 		return -1;
50 	txq_ctrl->txq.wqe_ci = 0;
51 	txq_ctrl->txq.wqe_pi = 0;
52 	txq_ctrl->txq.elts_comp = 0;
53 	return 0;
54 }
55 
56 /* Return 1 if the error CQE is signed otherwise, sign it and return 0. */
57 static int
58 check_err_cqe_seen(volatile struct mlx5_error_cqe *err_cqe)
59 {
60 	static const uint8_t magic[] = "seen";
61 	int ret = 1;
62 	unsigned int i;
63 
64 	for (i = 0; i < sizeof(magic); ++i)
65 		if (!ret || err_cqe->rsvd1[i] != magic[i]) {
66 			ret = 0;
67 			err_cqe->rsvd1[i] = magic[i];
68 		}
69 	return ret;
70 }
71 
72 /**
73  * Handle error CQE.
74  *
75  * @param txq
76  *   Pointer to TX queue structure.
77  * @param error_cqe
78  *   Pointer to the error CQE.
79  *
80  * @return
81  *   Negative value if queue recovery failed, otherwise
82  *   the error completion entry is handled successfully.
83  */
84 static int
85 mlx5_tx_error_cqe_handle(struct mlx5_txq_data *__rte_restrict txq,
86 			 volatile struct mlx5_error_cqe *err_cqe)
87 {
88 	if (err_cqe->syndrome != MLX5_CQE_SYNDROME_WR_FLUSH_ERR) {
89 		const uint16_t wqe_m = ((1 << txq->wqe_n) - 1);
90 		struct mlx5_txq_ctrl *txq_ctrl =
91 				container_of(txq, struct mlx5_txq_ctrl, txq);
92 		uint16_t new_wqe_pi = rte_be_to_cpu_16(err_cqe->wqe_counter);
93 		int seen = check_err_cqe_seen(err_cqe);
94 
95 		if (!seen && txq_ctrl->dump_file_n <
96 		    txq_ctrl->priv->config.max_dump_files_num) {
97 			MKSTR(err_str, "Unexpected CQE error syndrome "
98 			      "0x%02x CQN = %u SQN = %u wqe_counter = %u "
99 			      "wq_ci = %u cq_ci = %u", err_cqe->syndrome,
100 			      txq->cqe_s, txq->qp_num_8s >> 8,
101 			      rte_be_to_cpu_16(err_cqe->wqe_counter),
102 			      txq->wqe_ci, txq->cq_ci);
103 			MKSTR(name, "dpdk_mlx5_port_%u_txq_%u_index_%u_%u",
104 			      PORT_ID(txq_ctrl->priv), txq->idx,
105 			      txq_ctrl->dump_file_n, (uint32_t)rte_rdtsc());
106 			mlx5_dump_debug_information(name, NULL, err_str, 0);
107 			mlx5_dump_debug_information(name, "MLX5 Error CQ:",
108 						    (const void *)((uintptr_t)
109 						    txq->cqes),
110 						    sizeof(struct mlx5_error_cqe) *
111 						    (1 << txq->cqe_n));
112 			mlx5_dump_debug_information(name, "MLX5 Error SQ:",
113 						    (const void *)((uintptr_t)
114 						    txq->wqes),
115 						    MLX5_WQE_SIZE *
116 						    (1 << txq->wqe_n));
117 			txq_ctrl->dump_file_n++;
118 		}
119 		if (!seen)
120 			/*
121 			 * Count errors in WQEs units.
122 			 * Later it can be improved to count error packets,
123 			 * for example, by SQ parsing to find how much packets
124 			 * should be counted for each WQE.
125 			 */
126 			txq->stats.oerrors += ((txq->wqe_ci & wqe_m) -
127 						new_wqe_pi) & wqe_m;
128 		if (tx_recover_qp(txq_ctrl)) {
129 			/* Recovering failed - retry later on the same WQE. */
130 			return -1;
131 		}
132 		/* Release all the remaining buffers. */
133 		txq_free_elts(txq_ctrl);
134 	}
135 	return 0;
136 }
137 
138 /**
139  * Update completion queue consuming index via doorbell
140  * and flush the completed data buffers.
141  *
142  * @param txq
143  *   Pointer to TX queue structure.
144  * @param last_cqe
145  *   valid CQE pointer, if not NULL update txq->wqe_pi and flush the buffers.
146  * @param olx
147  *   Configured Tx offloads mask. It is fully defined at
148  *   compile time and may be used for optimization.
149  */
150 static __rte_always_inline void
151 mlx5_tx_comp_flush(struct mlx5_txq_data *__rte_restrict txq,
152 		   volatile struct mlx5_cqe *last_cqe,
153 		   unsigned int olx __rte_unused)
154 {
155 	if (likely(last_cqe != NULL)) {
156 		uint16_t tail;
157 
158 		txq->wqe_pi = rte_be_to_cpu_16(last_cqe->wqe_counter);
159 		tail = txq->fcqs[(txq->cq_ci - 1) & txq->cqe_m];
160 		if (likely(tail != txq->elts_tail)) {
161 			mlx5_tx_free_elts(txq, tail, olx);
162 			MLX5_ASSERT(tail == txq->elts_tail);
163 		}
164 	}
165 }
166 
167 /**
168  * Manage TX completions. This routine checks the CQ for
169  * arrived CQEs, deduces the last accomplished WQE in SQ,
170  * updates SQ producing index and frees all completed mbufs.
171  *
172  * @param txq
173  *   Pointer to TX queue structure.
174  * @param olx
175  *   Configured Tx offloads mask. It is fully defined at
176  *   compile time and may be used for optimization.
177  *
178  * NOTE: not inlined intentionally, it makes tx_burst
179  * routine smaller, simple and faster - from experiments.
180  */
181 void
182 mlx5_tx_handle_completion(struct mlx5_txq_data *__rte_restrict txq,
183 			  unsigned int olx __rte_unused)
184 {
185 	unsigned int count = MLX5_TX_COMP_MAX_CQE;
186 	volatile struct mlx5_cqe *last_cqe = NULL;
187 	bool ring_doorbell = false;
188 	int ret;
189 
190 	do {
191 		volatile struct mlx5_cqe *cqe;
192 
193 		cqe = &txq->cqes[txq->cq_ci & txq->cqe_m];
194 		ret = check_cqe(cqe, txq->cqe_s, txq->cq_ci);
195 		if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) {
196 			if (likely(ret != MLX5_CQE_STATUS_ERR)) {
197 				/* No new CQEs in completion queue. */
198 				MLX5_ASSERT(ret == MLX5_CQE_STATUS_HW_OWN);
199 				break;
200 			}
201 			/*
202 			 * Some error occurred, try to restart.
203 			 * We have no barrier after WQE related Doorbell
204 			 * written, make sure all writes are completed
205 			 * here, before we might perform SQ reset.
206 			 */
207 			rte_wmb();
208 			ret = mlx5_tx_error_cqe_handle
209 				(txq, (volatile struct mlx5_error_cqe *)cqe);
210 			if (unlikely(ret < 0)) {
211 				/*
212 				 * Some error occurred on queue error
213 				 * handling, we do not advance the index
214 				 * here, allowing to retry on next call.
215 				 */
216 				return;
217 			}
218 			/*
219 			 * We are going to fetch all entries with
220 			 * MLX5_CQE_SYNDROME_WR_FLUSH_ERR status.
221 			 * The send queue is supposed to be empty.
222 			 */
223 			ring_doorbell = true;
224 			++txq->cq_ci;
225 			txq->cq_pi = txq->cq_ci;
226 			last_cqe = NULL;
227 			continue;
228 		}
229 		/* Normal transmit completion. */
230 		MLX5_ASSERT(txq->cq_ci != txq->cq_pi);
231 #ifdef RTE_LIBRTE_MLX5_DEBUG
232 		MLX5_ASSERT((txq->fcqs[txq->cq_ci & txq->cqe_m] >> 16) ==
233 			    cqe->wqe_counter);
234 #endif
235 		if (__rte_trace_point_fp_is_enabled()) {
236 			uint64_t ts = rte_be_to_cpu_64(cqe->timestamp);
237 			uint16_t wqe_id = rte_be_to_cpu_16(cqe->wqe_counter);
238 
239 			if (txq->rt_timestamp)
240 				ts = mlx5_txpp_convert_rx_ts(NULL, ts);
241 			rte_pmd_mlx5_trace_tx_complete(txq->port_id, txq->idx,
242 						       wqe_id, ts);
243 		}
244 		ring_doorbell = true;
245 		++txq->cq_ci;
246 		last_cqe = cqe;
247 		/*
248 		 * We have to restrict the amount of processed CQEs
249 		 * in one tx_burst routine call. The CQ may be large
250 		 * and many CQEs may be updated by the NIC in one
251 		 * transaction. Buffers freeing is time consuming,
252 		 * multiple iterations may introduce significant latency.
253 		 */
254 		if (likely(--count == 0))
255 			break;
256 	} while (true);
257 	if (likely(ring_doorbell)) {
258 		/* Ring doorbell to notify hardware. */
259 		rte_compiler_barrier();
260 		*txq->cq_db = rte_cpu_to_be_32(txq->cq_ci);
261 		mlx5_tx_comp_flush(txq, last_cqe, olx);
262 	}
263 }
264 
265 /**
266  * DPDK callback to check the status of a Tx descriptor.
267  *
268  * @param tx_queue
269  *   The Tx queue.
270  * @param[in] offset
271  *   The index of the descriptor in the ring.
272  *
273  * @return
274  *   The status of the Tx descriptor.
275  */
276 int
277 mlx5_tx_descriptor_status(void *tx_queue, uint16_t offset)
278 {
279 	struct mlx5_txq_data *__rte_restrict txq = tx_queue;
280 	uint16_t used;
281 
282 	mlx5_tx_handle_completion(txq, 0);
283 	used = txq->elts_head - txq->elts_tail;
284 	if (offset < used)
285 		return RTE_ETH_TX_DESC_FULL;
286 	return RTE_ETH_TX_DESC_DONE;
287 }
288 
289 /*
290  * Array of declared and compiled Tx burst function and corresponding
291  * supported offloads set. The array is used to select the Tx burst
292  * function for specified offloads set at Tx queue configuration time.
293  */
294 const struct {
295 	eth_tx_burst_t func;
296 	unsigned int olx;
297 } txoff_func[] = {
298 MLX5_TXOFF_INFO(full_empw,
299 		MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO |
300 		MLX5_TXOFF_CONFIG_SWP |	MLX5_TXOFF_CONFIG_CSUM |
301 		MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN |
302 		MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW)
303 
304 MLX5_TXOFF_INFO(none_empw,
305 		MLX5_TXOFF_CONFIG_NONE | MLX5_TXOFF_CONFIG_EMPW)
306 
307 MLX5_TXOFF_INFO(md_empw,
308 		MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW)
309 
310 MLX5_TXOFF_INFO(mt_empw,
311 		MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO |
312 		MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW)
313 
314 MLX5_TXOFF_INFO(mtsc_empw,
315 		MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO |
316 		MLX5_TXOFF_CONFIG_SWP |	MLX5_TXOFF_CONFIG_CSUM |
317 		MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW)
318 
319 MLX5_TXOFF_INFO(mti_empw,
320 		MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO |
321 		MLX5_TXOFF_CONFIG_INLINE |
322 		MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW)
323 
324 MLX5_TXOFF_INFO(mtv_empw,
325 		MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO |
326 		MLX5_TXOFF_CONFIG_VLAN |
327 		MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW)
328 
329 MLX5_TXOFF_INFO(mtiv_empw,
330 		MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO |
331 		MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN |
332 		MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW)
333 
334 MLX5_TXOFF_INFO(sc_empw,
335 		MLX5_TXOFF_CONFIG_SWP |	MLX5_TXOFF_CONFIG_CSUM |
336 		MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW)
337 
338 MLX5_TXOFF_INFO(sci_empw,
339 		MLX5_TXOFF_CONFIG_SWP |	MLX5_TXOFF_CONFIG_CSUM |
340 		MLX5_TXOFF_CONFIG_INLINE |
341 		MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW)
342 
343 MLX5_TXOFF_INFO(scv_empw,
344 		MLX5_TXOFF_CONFIG_SWP |	MLX5_TXOFF_CONFIG_CSUM |
345 		MLX5_TXOFF_CONFIG_VLAN |
346 		MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW)
347 
348 MLX5_TXOFF_INFO(sciv_empw,
349 		MLX5_TXOFF_CONFIG_SWP |	MLX5_TXOFF_CONFIG_CSUM |
350 		MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN |
351 		MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW)
352 
353 MLX5_TXOFF_INFO(i_empw,
354 		MLX5_TXOFF_CONFIG_INLINE |
355 		MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW)
356 
357 MLX5_TXOFF_INFO(v_empw,
358 		MLX5_TXOFF_CONFIG_VLAN |
359 		MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW)
360 
361 MLX5_TXOFF_INFO(iv_empw,
362 		MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN |
363 		MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW)
364 
365 MLX5_TXOFF_INFO(full_ts_nompw,
366 		MLX5_TXOFF_CONFIG_FULL | MLX5_TXOFF_CONFIG_TXPP)
367 
368 MLX5_TXOFF_INFO(full_ts_nompwi,
369 		MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO |
370 		MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM |
371 		MLX5_TXOFF_CONFIG_VLAN | MLX5_TXOFF_CONFIG_METADATA |
372 		MLX5_TXOFF_CONFIG_TXPP)
373 
374 MLX5_TXOFF_INFO(full_ts,
375 		MLX5_TXOFF_CONFIG_FULL | MLX5_TXOFF_CONFIG_TXPP |
376 		MLX5_TXOFF_CONFIG_EMPW)
377 
378 MLX5_TXOFF_INFO(full_ts_noi,
379 		MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO |
380 		MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM |
381 		MLX5_TXOFF_CONFIG_VLAN | MLX5_TXOFF_CONFIG_METADATA |
382 		MLX5_TXOFF_CONFIG_TXPP | MLX5_TXOFF_CONFIG_EMPW)
383 
384 MLX5_TXOFF_INFO(none_ts,
385 		MLX5_TXOFF_CONFIG_NONE | MLX5_TXOFF_CONFIG_TXPP |
386 		MLX5_TXOFF_CONFIG_EMPW)
387 
388 MLX5_TXOFF_INFO(mdi_ts,
389 		MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_METADATA |
390 		MLX5_TXOFF_CONFIG_TXPP | MLX5_TXOFF_CONFIG_EMPW)
391 
392 MLX5_TXOFF_INFO(mti_ts,
393 		MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO |
394 		MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_METADATA |
395 		MLX5_TXOFF_CONFIG_TXPP | MLX5_TXOFF_CONFIG_EMPW)
396 
397 MLX5_TXOFF_INFO(mtiv_ts,
398 		MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO |
399 		MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN |
400 		MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_TXPP |
401 		MLX5_TXOFF_CONFIG_EMPW)
402 
403 MLX5_TXOFF_INFO(full,
404 		MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO |
405 		MLX5_TXOFF_CONFIG_SWP |	MLX5_TXOFF_CONFIG_CSUM |
406 		MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN |
407 		MLX5_TXOFF_CONFIG_METADATA)
408 
409 MLX5_TXOFF_INFO(none,
410 		MLX5_TXOFF_CONFIG_NONE)
411 
412 MLX5_TXOFF_INFO(md,
413 		MLX5_TXOFF_CONFIG_METADATA)
414 
415 MLX5_TXOFF_INFO(mt,
416 		MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO |
417 		MLX5_TXOFF_CONFIG_METADATA)
418 
419 MLX5_TXOFF_INFO(mtsc,
420 		MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO |
421 		MLX5_TXOFF_CONFIG_SWP |	MLX5_TXOFF_CONFIG_CSUM |
422 		MLX5_TXOFF_CONFIG_METADATA)
423 
424 MLX5_TXOFF_INFO(mti,
425 		MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO |
426 		MLX5_TXOFF_CONFIG_INLINE |
427 		MLX5_TXOFF_CONFIG_METADATA)
428 
429 MLX5_TXOFF_INFO(mtv,
430 		MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO |
431 		MLX5_TXOFF_CONFIG_VLAN |
432 		MLX5_TXOFF_CONFIG_METADATA)
433 
434 MLX5_TXOFF_INFO(mtiv,
435 		MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO |
436 		MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN |
437 		MLX5_TXOFF_CONFIG_METADATA)
438 
439 MLX5_TXOFF_INFO(sc,
440 		MLX5_TXOFF_CONFIG_SWP |	MLX5_TXOFF_CONFIG_CSUM |
441 		MLX5_TXOFF_CONFIG_METADATA)
442 
443 MLX5_TXOFF_INFO(sci,
444 		MLX5_TXOFF_CONFIG_SWP |	MLX5_TXOFF_CONFIG_CSUM |
445 		MLX5_TXOFF_CONFIG_INLINE |
446 		MLX5_TXOFF_CONFIG_METADATA)
447 
448 MLX5_TXOFF_INFO(scv,
449 		MLX5_TXOFF_CONFIG_SWP |	MLX5_TXOFF_CONFIG_CSUM |
450 		MLX5_TXOFF_CONFIG_VLAN |
451 		MLX5_TXOFF_CONFIG_METADATA)
452 
453 MLX5_TXOFF_INFO(sciv,
454 		MLX5_TXOFF_CONFIG_SWP |	MLX5_TXOFF_CONFIG_CSUM |
455 		MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN |
456 		MLX5_TXOFF_CONFIG_METADATA)
457 
458 MLX5_TXOFF_INFO(i,
459 		MLX5_TXOFF_CONFIG_INLINE |
460 		MLX5_TXOFF_CONFIG_METADATA)
461 
462 MLX5_TXOFF_INFO(v,
463 		MLX5_TXOFF_CONFIG_VLAN |
464 		MLX5_TXOFF_CONFIG_METADATA)
465 
466 MLX5_TXOFF_INFO(iv,
467 		MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN |
468 		MLX5_TXOFF_CONFIG_METADATA)
469 
470 MLX5_TXOFF_INFO(none_mpw,
471 		MLX5_TXOFF_CONFIG_NONE | MLX5_TXOFF_CONFIG_EMPW |
472 		MLX5_TXOFF_CONFIG_MPW)
473 
474 MLX5_TXOFF_INFO(mci_mpw,
475 		MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_CSUM |
476 		MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_EMPW |
477 		MLX5_TXOFF_CONFIG_MPW)
478 
479 MLX5_TXOFF_INFO(mc_mpw,
480 		MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_CSUM |
481 		MLX5_TXOFF_CONFIG_EMPW | MLX5_TXOFF_CONFIG_MPW)
482 
483 MLX5_TXOFF_INFO(i_mpw,
484 		MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_EMPW |
485 		MLX5_TXOFF_CONFIG_MPW)
486 };
487 
488 /**
489  * Configure the Tx function to use. The routine checks configured
490  * Tx offloads for the device and selects appropriate Tx burst routine.
491  * There are multiple Tx burst routines compiled from the same template
492  * in the most optimal way for the dedicated Tx offloads set.
493  *
494  * @param dev
495  *   Pointer to private data structure.
496  *
497  * @return
498  *   Pointer to selected Tx burst function.
499  */
500 eth_tx_burst_t
501 mlx5_select_tx_function(struct rte_eth_dev *dev)
502 {
503 	struct mlx5_priv *priv = dev->data->dev_private;
504 	struct mlx5_port_config *config = &priv->config;
505 	uint64_t tx_offloads = dev->data->dev_conf.txmode.offloads;
506 	unsigned int diff = 0, olx = 0, i, m;
507 
508 	MLX5_ASSERT(priv);
509 	if (tx_offloads & RTE_ETH_TX_OFFLOAD_MULTI_SEGS) {
510 		/* We should support Multi-Segment Packets. */
511 		olx |= MLX5_TXOFF_CONFIG_MULTI;
512 	}
513 	if (tx_offloads & (RTE_ETH_TX_OFFLOAD_TCP_TSO |
514 			   RTE_ETH_TX_OFFLOAD_VXLAN_TNL_TSO |
515 			   RTE_ETH_TX_OFFLOAD_GRE_TNL_TSO |
516 			   RTE_ETH_TX_OFFLOAD_IP_TNL_TSO |
517 			   RTE_ETH_TX_OFFLOAD_UDP_TNL_TSO)) {
518 		/* We should support TCP Send Offload. */
519 		olx |= MLX5_TXOFF_CONFIG_TSO;
520 	}
521 	if (tx_offloads & (RTE_ETH_TX_OFFLOAD_IP_TNL_TSO |
522 			   RTE_ETH_TX_OFFLOAD_UDP_TNL_TSO |
523 			   RTE_ETH_TX_OFFLOAD_OUTER_IPV4_CKSUM)) {
524 		/* We should support Software Parser for Tunnels. */
525 		olx |= MLX5_TXOFF_CONFIG_SWP;
526 	}
527 	if (tx_offloads & (RTE_ETH_TX_OFFLOAD_IPV4_CKSUM |
528 			   RTE_ETH_TX_OFFLOAD_UDP_CKSUM |
529 			   RTE_ETH_TX_OFFLOAD_TCP_CKSUM |
530 			   RTE_ETH_TX_OFFLOAD_OUTER_IPV4_CKSUM)) {
531 		/* We should support IP/TCP/UDP Checksums. */
532 		olx |= MLX5_TXOFF_CONFIG_CSUM;
533 	}
534 	if (tx_offloads & RTE_ETH_TX_OFFLOAD_VLAN_INSERT) {
535 		/* We should support VLAN insertion. */
536 		olx |= MLX5_TXOFF_CONFIG_VLAN;
537 	}
538 	if (tx_offloads & RTE_ETH_TX_OFFLOAD_SEND_ON_TIMESTAMP &&
539 	    rte_mbuf_dynflag_lookup
540 			(RTE_MBUF_DYNFLAG_TX_TIMESTAMP_NAME, NULL) >= 0 &&
541 	    rte_mbuf_dynfield_lookup
542 			(RTE_MBUF_DYNFIELD_TIMESTAMP_NAME, NULL) >= 0) {
543 		/* Offload configured, dynamic entities registered. */
544 		olx |= MLX5_TXOFF_CONFIG_TXPP;
545 	}
546 	if (priv->txqs_n && (*priv->txqs)[0]) {
547 		struct mlx5_txq_data *txd = (*priv->txqs)[0];
548 
549 		if (txd->inlen_send) {
550 			/*
551 			 * Check the data inline requirements. Data inline
552 			 * is enabled on per device basis, we can check
553 			 * the first Tx queue only.
554 			 *
555 			 * If device does not support VLAN insertion in WQE
556 			 * and some queues are requested to perform VLAN
557 			 * insertion offload than inline must be enabled.
558 			 */
559 			olx |= MLX5_TXOFF_CONFIG_INLINE;
560 		}
561 	}
562 	if (config->mps == MLX5_MPW_ENHANCED &&
563 	    config->txq_inline_min <= 0) {
564 		/*
565 		 * The NIC supports Enhanced Multi-Packet Write
566 		 * and does not require minimal inline data.
567 		 */
568 		olx |= MLX5_TXOFF_CONFIG_EMPW;
569 	}
570 	if (rte_flow_dynf_metadata_avail()) {
571 		/* We should support Flow metadata. */
572 		olx |= MLX5_TXOFF_CONFIG_METADATA;
573 	}
574 	if (config->mps == MLX5_MPW) {
575 		/*
576 		 * The NIC supports Legacy Multi-Packet Write.
577 		 * The MLX5_TXOFF_CONFIG_MPW controls the descriptor building
578 		 * method in combination with MLX5_TXOFF_CONFIG_EMPW.
579 		 */
580 		if (!(olx & (MLX5_TXOFF_CONFIG_TSO |
581 			     MLX5_TXOFF_CONFIG_SWP |
582 			     MLX5_TXOFF_CONFIG_VLAN |
583 			     MLX5_TXOFF_CONFIG_METADATA)))
584 			olx |= MLX5_TXOFF_CONFIG_EMPW |
585 			       MLX5_TXOFF_CONFIG_MPW;
586 	}
587 	/*
588 	 * Scan the routines table to find the minimal
589 	 * satisfying routine with requested offloads.
590 	 */
591 	m = RTE_DIM(txoff_func);
592 	for (i = 0; i < RTE_DIM(txoff_func); i++) {
593 		unsigned int tmp;
594 
595 		tmp = txoff_func[i].olx;
596 		if (tmp == olx) {
597 			/* Meets requested offloads exactly.*/
598 			m = i;
599 			break;
600 		}
601 		if ((tmp & olx) != olx) {
602 			/* Does not meet requested offloads at all. */
603 			continue;
604 		}
605 		if ((olx ^ tmp) & MLX5_TXOFF_CONFIG_MPW)
606 			/* Do not enable legacy MPW if not configured. */
607 			continue;
608 		if ((olx ^ tmp) & MLX5_TXOFF_CONFIG_EMPW)
609 			/* Do not enable eMPW if not configured. */
610 			continue;
611 		if ((olx ^ tmp) & MLX5_TXOFF_CONFIG_INLINE)
612 			/* Do not enable inlining if not configured. */
613 			continue;
614 		if ((olx ^ tmp) & MLX5_TXOFF_CONFIG_TXPP)
615 			/* Do not enable scheduling if not configured. */
616 			continue;
617 		/*
618 		 * Some routine meets the requirements.
619 		 * Check whether it has minimal amount
620 		 * of not requested offloads.
621 		 */
622 		tmp = rte_popcount64(tmp & ~olx);
623 		if (m >= RTE_DIM(txoff_func) || tmp < diff) {
624 			/* First or better match, save and continue. */
625 			m = i;
626 			diff = tmp;
627 			continue;
628 		}
629 		if (tmp == diff) {
630 			tmp = txoff_func[i].olx ^ txoff_func[m].olx;
631 			if (__builtin_ffsl(txoff_func[i].olx & ~tmp) <
632 			    __builtin_ffsl(txoff_func[m].olx & ~tmp)) {
633 				/* Lighter not requested offload. */
634 				m = i;
635 			}
636 		}
637 	}
638 	if (m >= RTE_DIM(txoff_func)) {
639 		DRV_LOG(DEBUG, "port %u has no selected Tx function"
640 			       " for requested offloads %04X",
641 				dev->data->port_id, olx);
642 		return NULL;
643 	}
644 	DRV_LOG(DEBUG, "port %u has selected Tx function"
645 		       " supporting offloads %04X/%04X",
646 			dev->data->port_id, olx, txoff_func[m].olx);
647 	if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_MULTI)
648 		DRV_LOG(DEBUG, "\tMULTI (multi segment)");
649 	if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_TSO)
650 		DRV_LOG(DEBUG, "\tTSO   (TCP send offload)");
651 	if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_SWP)
652 		DRV_LOG(DEBUG, "\tSWP   (software parser)");
653 	if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_CSUM)
654 		DRV_LOG(DEBUG, "\tCSUM  (checksum offload)");
655 	if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_INLINE)
656 		DRV_LOG(DEBUG, "\tINLIN (inline data)");
657 	if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_VLAN)
658 		DRV_LOG(DEBUG, "\tVLANI (VLAN insertion)");
659 	if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_METADATA)
660 		DRV_LOG(DEBUG, "\tMETAD (tx Flow metadata)");
661 	if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_TXPP)
662 		DRV_LOG(DEBUG, "\tMETAD (tx Scheduling)");
663 	if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_EMPW) {
664 		if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_MPW)
665 			DRV_LOG(DEBUG, "\tMPW   (Legacy MPW)");
666 		else
667 			DRV_LOG(DEBUG, "\tEMPW  (Enhanced MPW)");
668 	}
669 	return txoff_func[m].func;
670 }
671 
672 /**
673  * DPDK callback to get the TX queue information.
674  *
675  * @param dev
676  *   Pointer to the device structure.
677  *
678  * @param tx_queue_id
679  *   Tx queue identificator.
680  *
681  * @param qinfo
682  *   Pointer to the TX queue information structure.
683  *
684  * @return
685  *   None.
686  */
687 void
688 mlx5_txq_info_get(struct rte_eth_dev *dev, uint16_t tx_queue_id,
689 		  struct rte_eth_txq_info *qinfo)
690 {
691 	struct mlx5_priv *priv = dev->data->dev_private;
692 	struct mlx5_txq_data *txq = (*priv->txqs)[tx_queue_id];
693 	struct mlx5_txq_ctrl *txq_ctrl =
694 			container_of(txq, struct mlx5_txq_ctrl, txq);
695 
696 	if (!txq)
697 		return;
698 	qinfo->nb_desc = txq->elts_s;
699 	qinfo->conf.tx_thresh.pthresh = 0;
700 	qinfo->conf.tx_thresh.hthresh = 0;
701 	qinfo->conf.tx_thresh.wthresh = 0;
702 	qinfo->conf.tx_rs_thresh = 0;
703 	qinfo->conf.tx_free_thresh = 0;
704 	qinfo->conf.tx_deferred_start = txq_ctrl ? 0 : 1;
705 	qinfo->conf.offloads = dev->data->dev_conf.txmode.offloads;
706 }
707 
708 /**
709  * DPDK callback to get the TX packet burst mode information.
710  *
711  * @param dev
712  *   Pointer to the device structure.
713  *
714  * @param tx_queue_id
715  *   Tx queue identification.
716  *
717  * @param mode
718  *   Pointer to the burts mode information.
719  *
720  * @return
721  *   0 as success, -EINVAL as failure.
722  */
723 int
724 mlx5_tx_burst_mode_get(struct rte_eth_dev *dev,
725 		       uint16_t tx_queue_id,
726 		       struct rte_eth_burst_mode *mode)
727 {
728 	eth_tx_burst_t pkt_burst = dev->tx_pkt_burst;
729 	struct mlx5_priv *priv = dev->data->dev_private;
730 	struct mlx5_txq_data *txq = (*priv->txqs)[tx_queue_id];
731 	unsigned int i, olx;
732 
733 	for (i = 0; i < RTE_DIM(txoff_func); i++) {
734 		if (pkt_burst == txoff_func[i].func) {
735 			olx = txoff_func[i].olx;
736 			snprintf(mode->info, sizeof(mode->info),
737 				 "%s%s%s%s%s%s%s%s%s%s",
738 				 (olx & MLX5_TXOFF_CONFIG_EMPW) ?
739 				 ((olx & MLX5_TXOFF_CONFIG_MPW) ?
740 				 "Legacy MPW" : "Enhanced MPW") : "No MPW",
741 				 (olx & MLX5_TXOFF_CONFIG_MULTI) ?
742 				 " + MULTI" : "",
743 				 (olx & MLX5_TXOFF_CONFIG_TSO) ?
744 				 " + TSO" : "",
745 				 (olx & MLX5_TXOFF_CONFIG_SWP) ?
746 				 " + SWP" : "",
747 				 (olx & MLX5_TXOFF_CONFIG_CSUM) ?
748 				 "  + CSUM" : "",
749 				 (olx & MLX5_TXOFF_CONFIG_INLINE) ?
750 				 " + INLINE" : "",
751 				 (olx & MLX5_TXOFF_CONFIG_VLAN) ?
752 				 " + VLAN" : "",
753 				 (olx & MLX5_TXOFF_CONFIG_METADATA) ?
754 				 " + METADATA" : "",
755 				 (olx & MLX5_TXOFF_CONFIG_TXPP) ?
756 				 " + TXPP" : "",
757 				 (txq && txq->fast_free) ?
758 				 " + Fast Free" : "");
759 			return 0;
760 		}
761 	}
762 	return -EINVAL;
763 }
764 
765 /**
766  * Dump SQ/CQ Context to a file.
767  *
768  * @param[in] port_id
769  *   Port ID
770  * @param[in] queue_id
771  *   Queue ID
772  * @param[in] filename
773  *   Name of file to dump the Tx Queue Context
774  *
775  * @return
776  *   0 for success, non-zero value depending on failure.
777  *
778  */
779 int rte_pmd_mlx5_txq_dump_contexts(uint16_t port_id, uint16_t queue_id, const char *filename)
780 {
781 	struct rte_eth_dev *dev;
782 	struct mlx5_priv *priv;
783 	struct mlx5_txq_data *txq_data;
784 	struct mlx5_txq_ctrl *txq_ctrl;
785 	struct mlx5_txq_obj *txq_obj;
786 	struct mlx5_devx_sq *sq;
787 	struct mlx5_devx_cq *cq;
788 	struct mlx5_devx_obj *sq_devx_obj;
789 	struct mlx5_devx_obj *cq_devx_obj;
790 
791 	uint32_t sq_out[MLX5_ST_SZ_DW(query_sq_out)] = {0};
792 	uint32_t cq_out[MLX5_ST_SZ_DW(query_cq_out)] = {0};
793 
794 	int ret;
795 	FILE *fd;
796 	MKSTR(path, "./%s", filename);
797 
798 	if (!rte_eth_dev_is_valid_port(port_id))
799 		return -ENODEV;
800 
801 	if (rte_eth_tx_queue_is_valid(port_id, queue_id))
802 		return -EINVAL;
803 
804 	fd = fopen(path, "w");
805 	if (!fd) {
806 		rte_errno = errno;
807 		return -EIO;
808 	}
809 
810 	dev = &rte_eth_devices[port_id];
811 	priv = dev->data->dev_private;
812 	txq_data = (*priv->txqs)[queue_id];
813 	txq_ctrl = container_of(txq_data, struct mlx5_txq_ctrl, txq);
814 	txq_obj = txq_ctrl->obj;
815 	sq = &txq_obj->sq_obj;
816 	cq = &txq_obj->cq_obj;
817 	sq_devx_obj = sq->sq;
818 	cq_devx_obj = cq->cq;
819 
820 	do {
821 		ret = mlx5_devx_cmd_query_sq(sq_devx_obj, sq_out, sizeof(sq_out));
822 		if (ret)
823 			break;
824 
825 		/* Dump sq query output to file */
826 		MKSTR(sq_headline, "SQ DevX ID = %u Port = %u Queue index = %u ",
827 					sq_devx_obj->id, port_id, queue_id);
828 		mlx5_dump_to_file(fd, NULL, sq_headline, 0);
829 		mlx5_dump_to_file(fd, "Query SQ Dump:",
830 					(const void *)((uintptr_t)sq_out),
831 					sizeof(sq_out));
832 
833 		ret = mlx5_devx_cmd_query_cq(cq_devx_obj, cq_out, sizeof(cq_out));
834 		if (ret)
835 			break;
836 
837 		/* Dump cq query output to file */
838 		MKSTR(cq_headline, "CQ DevX ID = %u Port = %u Queue index = %u ",
839 						cq_devx_obj->id, port_id, queue_id);
840 		mlx5_dump_to_file(fd, NULL, cq_headline, 0);
841 		mlx5_dump_to_file(fd, "Query CQ Dump:",
842 					(const void *)((uintptr_t)cq_out),
843 					sizeof(cq_out));
844 	} while (false);
845 
846 	fclose(fd);
847 	return ret;
848 }
849