xref: /dpdk/drivers/net/mlx5/mlx5_trigger.c (revision b53d106d34b5c638f5a2cbdfee0da5bd42d4383f)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2015 6WIND S.A.
3  * Copyright 2015 Mellanox Technologies, Ltd
4  */
5 
6 #include <unistd.h>
7 
8 #include <rte_ether.h>
9 #include <ethdev_driver.h>
10 #include <rte_interrupts.h>
11 #include <rte_alarm.h>
12 #include <rte_cycles.h>
13 
14 #include <mlx5_malloc.h>
15 
16 #include "mlx5.h"
17 #include "mlx5_flow.h"
18 #include "mlx5_rx.h"
19 #include "mlx5_tx.h"
20 #include "mlx5_utils.h"
21 #include "rte_pmd_mlx5.h"
22 
23 /**
24  * Stop traffic on Tx queues.
25  *
26  * @param dev
27  *   Pointer to Ethernet device structure.
28  */
29 static void
30 mlx5_txq_stop(struct rte_eth_dev *dev)
31 {
32 	struct mlx5_priv *priv = dev->data->dev_private;
33 	unsigned int i;
34 
35 	for (i = 0; i != priv->txqs_n; ++i)
36 		mlx5_txq_release(dev, i);
37 }
38 
39 /**
40  * Start traffic on Tx queues.
41  *
42  * @param dev
43  *   Pointer to Ethernet device structure.
44  *
45  * @return
46  *   0 on success, a negative errno value otherwise and rte_errno is set.
47  */
48 static int
49 mlx5_txq_start(struct rte_eth_dev *dev)
50 {
51 	struct mlx5_priv *priv = dev->data->dev_private;
52 	unsigned int i;
53 	int ret;
54 
55 	for (i = 0; i != priv->txqs_n; ++i) {
56 		struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i);
57 		struct mlx5_txq_data *txq_data = &txq_ctrl->txq;
58 		uint32_t flags = MLX5_MEM_RTE | MLX5_MEM_ZERO;
59 
60 		if (!txq_ctrl)
61 			continue;
62 		if (txq_ctrl->type == MLX5_TXQ_TYPE_STANDARD)
63 			txq_alloc_elts(txq_ctrl);
64 		MLX5_ASSERT(!txq_ctrl->obj);
65 		txq_ctrl->obj = mlx5_malloc(flags, sizeof(struct mlx5_txq_obj),
66 					    0, txq_ctrl->socket);
67 		if (!txq_ctrl->obj) {
68 			DRV_LOG(ERR, "Port %u Tx queue %u cannot allocate "
69 				"memory resources.", dev->data->port_id,
70 				txq_data->idx);
71 			rte_errno = ENOMEM;
72 			goto error;
73 		}
74 		ret = priv->obj_ops.txq_obj_new(dev, i);
75 		if (ret < 0) {
76 			mlx5_free(txq_ctrl->obj);
77 			txq_ctrl->obj = NULL;
78 			goto error;
79 		}
80 		if (txq_ctrl->type == MLX5_TXQ_TYPE_STANDARD) {
81 			size_t size = txq_data->cqe_s * sizeof(*txq_data->fcqs);
82 
83 			txq_data->fcqs = mlx5_malloc(flags, size,
84 						     RTE_CACHE_LINE_SIZE,
85 						     txq_ctrl->socket);
86 			if (!txq_data->fcqs) {
87 				DRV_LOG(ERR, "Port %u Tx queue %u cannot "
88 					"allocate memory (FCQ).",
89 					dev->data->port_id, i);
90 				rte_errno = ENOMEM;
91 				goto error;
92 			}
93 		}
94 		DRV_LOG(DEBUG, "Port %u txq %u updated with %p.",
95 			dev->data->port_id, i, (void *)&txq_ctrl->obj);
96 		LIST_INSERT_HEAD(&priv->txqsobj, txq_ctrl->obj, next);
97 	}
98 	return 0;
99 error:
100 	ret = rte_errno; /* Save rte_errno before cleanup. */
101 	do {
102 		mlx5_txq_release(dev, i);
103 	} while (i-- != 0);
104 	rte_errno = ret; /* Restore rte_errno. */
105 	return -rte_errno;
106 }
107 
108 /**
109  * Register Rx queue mempools and fill the Rx queue cache.
110  * This function tolerates repeated mempool registration.
111  *
112  * @param[in] rxq_ctrl
113  *   Rx queue control data.
114  *
115  * @return
116  *   0 on success, (-1) on failure and rte_errno is set.
117  */
118 static int
119 mlx5_rxq_mempool_register(struct mlx5_rxq_ctrl *rxq_ctrl)
120 {
121 	struct rte_mempool *mp;
122 	uint32_t s;
123 	int ret = 0;
124 
125 	mlx5_mr_flush_local_cache(&rxq_ctrl->rxq.mr_ctrl);
126 	/* MPRQ mempool is registered on creation, just fill the cache. */
127 	if (mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq))
128 		return mlx5_mr_mempool_populate_cache(&rxq_ctrl->rxq.mr_ctrl,
129 						      rxq_ctrl->rxq.mprq_mp);
130 	for (s = 0; s < rxq_ctrl->rxq.rxseg_n; s++) {
131 		bool is_extmem;
132 
133 		mp = rxq_ctrl->rxq.rxseg[s].mp;
134 		is_extmem = (rte_pktmbuf_priv_flags(mp) &
135 			     RTE_PKTMBUF_POOL_F_PINNED_EXT_BUF) != 0;
136 		ret = mlx5_mr_mempool_register(rxq_ctrl->sh->cdev, mp,
137 					       is_extmem);
138 		if (ret < 0 && rte_errno != EEXIST)
139 			return ret;
140 		ret = mlx5_mr_mempool_populate_cache(&rxq_ctrl->rxq.mr_ctrl,
141 						     mp);
142 		if (ret < 0)
143 			return ret;
144 	}
145 	return 0;
146 }
147 
148 /**
149  * Stop traffic on Rx queues.
150  *
151  * @param dev
152  *   Pointer to Ethernet device structure.
153  */
154 static void
155 mlx5_rxq_stop(struct rte_eth_dev *dev)
156 {
157 	struct mlx5_priv *priv = dev->data->dev_private;
158 	unsigned int i;
159 
160 	for (i = 0; i != priv->rxqs_n; ++i)
161 		mlx5_rxq_release(dev, i);
162 }
163 
164 static int
165 mlx5_rxq_ctrl_prepare(struct rte_eth_dev *dev, struct mlx5_rxq_ctrl *rxq_ctrl,
166 		      unsigned int idx)
167 {
168 	int ret = 0;
169 
170 	if (rxq_ctrl->type == MLX5_RXQ_TYPE_STANDARD) {
171 		/*
172 		 * Pre-register the mempools. Regardless of whether
173 		 * the implicit registration is enabled or not,
174 		 * Rx mempool destruction is tracked to free MRs.
175 		 */
176 		if (mlx5_rxq_mempool_register(rxq_ctrl) < 0)
177 			return -rte_errno;
178 		ret = rxq_alloc_elts(rxq_ctrl);
179 		if (ret)
180 			return ret;
181 	}
182 	MLX5_ASSERT(!rxq_ctrl->obj);
183 	rxq_ctrl->obj = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO,
184 				    sizeof(*rxq_ctrl->obj), 0,
185 				    rxq_ctrl->socket);
186 	if (!rxq_ctrl->obj) {
187 		DRV_LOG(ERR, "Port %u Rx queue %u can't allocate resources.",
188 			dev->data->port_id, idx);
189 		rte_errno = ENOMEM;
190 		return -rte_errno;
191 	}
192 	DRV_LOG(DEBUG, "Port %u rxq %u updated with %p.", dev->data->port_id,
193 		idx, (void *)&rxq_ctrl->obj);
194 	return 0;
195 }
196 
197 /**
198  * Start traffic on Rx queues.
199  *
200  * @param dev
201  *   Pointer to Ethernet device structure.
202  *
203  * @return
204  *   0 on success, a negative errno value otherwise and rte_errno is set.
205  */
206 static int
207 mlx5_rxq_start(struct rte_eth_dev *dev)
208 {
209 	struct mlx5_priv *priv = dev->data->dev_private;
210 	unsigned int i;
211 	int ret = 0;
212 
213 	/* Allocate/reuse/resize mempool for Multi-Packet RQ. */
214 	if (mlx5_mprq_alloc_mp(dev)) {
215 		/* Should not release Rx queues but return immediately. */
216 		return -rte_errno;
217 	}
218 	DRV_LOG(DEBUG, "Port %u device_attr.max_qp_wr is %d.",
219 		dev->data->port_id, priv->sh->device_attr.max_qp_wr);
220 	DRV_LOG(DEBUG, "Port %u device_attr.max_sge is %d.",
221 		dev->data->port_id, priv->sh->device_attr.max_sge);
222 	for (i = 0; i != priv->rxqs_n; ++i) {
223 		struct mlx5_rxq_priv *rxq = mlx5_rxq_ref(dev, i);
224 		struct mlx5_rxq_ctrl *rxq_ctrl;
225 
226 		if (rxq == NULL)
227 			continue;
228 		rxq_ctrl = rxq->ctrl;
229 		if (!rxq_ctrl->started) {
230 			if (mlx5_rxq_ctrl_prepare(dev, rxq_ctrl, i) < 0)
231 				goto error;
232 			LIST_INSERT_HEAD(&priv->rxqsobj, rxq_ctrl->obj, next);
233 		}
234 		ret = priv->obj_ops.rxq_obj_new(rxq);
235 		if (ret) {
236 			mlx5_free(rxq_ctrl->obj);
237 			rxq_ctrl->obj = NULL;
238 			goto error;
239 		}
240 		rxq_ctrl->started = true;
241 	}
242 	return 0;
243 error:
244 	ret = rte_errno; /* Save rte_errno before cleanup. */
245 	do {
246 		mlx5_rxq_release(dev, i);
247 	} while (i-- != 0);
248 	rte_errno = ret; /* Restore rte_errno. */
249 	return -rte_errno;
250 }
251 
252 /**
253  * Binds Tx queues to Rx queues for hairpin.
254  *
255  * Binds Tx queues to the target Rx queues.
256  *
257  * @param dev
258  *   Pointer to Ethernet device structure.
259  *
260  * @return
261  *   0 on success, a negative errno value otherwise and rte_errno is set.
262  */
263 static int
264 mlx5_hairpin_auto_bind(struct rte_eth_dev *dev)
265 {
266 	struct mlx5_priv *priv = dev->data->dev_private;
267 	struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
268 	struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
269 	struct mlx5_txq_ctrl *txq_ctrl;
270 	struct mlx5_rxq_priv *rxq;
271 	struct mlx5_rxq_ctrl *rxq_ctrl;
272 	struct mlx5_devx_obj *sq;
273 	struct mlx5_devx_obj *rq;
274 	unsigned int i;
275 	int ret = 0;
276 	bool need_auto = false;
277 	uint16_t self_port = dev->data->port_id;
278 
279 	for (i = 0; i != priv->txqs_n; ++i) {
280 		txq_ctrl = mlx5_txq_get(dev, i);
281 		if (!txq_ctrl)
282 			continue;
283 		if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN ||
284 		    txq_ctrl->hairpin_conf.peers[0].port != self_port) {
285 			mlx5_txq_release(dev, i);
286 			continue;
287 		}
288 		if (txq_ctrl->hairpin_conf.manual_bind) {
289 			mlx5_txq_release(dev, i);
290 			return 0;
291 		}
292 		need_auto = true;
293 		mlx5_txq_release(dev, i);
294 	}
295 	if (!need_auto)
296 		return 0;
297 	for (i = 0; i != priv->txqs_n; ++i) {
298 		txq_ctrl = mlx5_txq_get(dev, i);
299 		if (!txq_ctrl)
300 			continue;
301 		/* Skip hairpin queues with other peer ports. */
302 		if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN ||
303 		    txq_ctrl->hairpin_conf.peers[0].port != self_port) {
304 			mlx5_txq_release(dev, i);
305 			continue;
306 		}
307 		if (!txq_ctrl->obj) {
308 			rte_errno = ENOMEM;
309 			DRV_LOG(ERR, "port %u no txq object found: %d",
310 				dev->data->port_id, i);
311 			mlx5_txq_release(dev, i);
312 			return -rte_errno;
313 		}
314 		sq = txq_ctrl->obj->sq;
315 		rxq = mlx5_rxq_get(dev, txq_ctrl->hairpin_conf.peers[0].queue);
316 		if (rxq == NULL) {
317 			mlx5_txq_release(dev, i);
318 			rte_errno = EINVAL;
319 			DRV_LOG(ERR, "port %u no rxq object found: %d",
320 				dev->data->port_id,
321 				txq_ctrl->hairpin_conf.peers[0].queue);
322 			return -rte_errno;
323 		}
324 		rxq_ctrl = rxq->ctrl;
325 		if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN ||
326 		    rxq->hairpin_conf.peers[0].queue != i) {
327 			rte_errno = ENOMEM;
328 			DRV_LOG(ERR, "port %u Tx queue %d can't be binded to "
329 				"Rx queue %d", dev->data->port_id,
330 				i, txq_ctrl->hairpin_conf.peers[0].queue);
331 			goto error;
332 		}
333 		rq = rxq_ctrl->obj->rq;
334 		if (!rq) {
335 			rte_errno = ENOMEM;
336 			DRV_LOG(ERR, "port %u hairpin no matching rxq: %d",
337 				dev->data->port_id,
338 				txq_ctrl->hairpin_conf.peers[0].queue);
339 			goto error;
340 		}
341 		sq_attr.state = MLX5_SQC_STATE_RDY;
342 		sq_attr.sq_state = MLX5_SQC_STATE_RST;
343 		sq_attr.hairpin_peer_rq = rq->id;
344 		sq_attr.hairpin_peer_vhca = priv->config.hca_attr.vhca_id;
345 		ret = mlx5_devx_cmd_modify_sq(sq, &sq_attr);
346 		if (ret)
347 			goto error;
348 		rq_attr.state = MLX5_SQC_STATE_RDY;
349 		rq_attr.rq_state = MLX5_SQC_STATE_RST;
350 		rq_attr.hairpin_peer_sq = sq->id;
351 		rq_attr.hairpin_peer_vhca = priv->config.hca_attr.vhca_id;
352 		ret = mlx5_devx_cmd_modify_rq(rq, &rq_attr);
353 		if (ret)
354 			goto error;
355 		/* Qs with auto-bind will be destroyed directly. */
356 		rxq->hairpin_status = 1;
357 		txq_ctrl->hairpin_status = 1;
358 		mlx5_txq_release(dev, i);
359 	}
360 	return 0;
361 error:
362 	mlx5_txq_release(dev, i);
363 	return -rte_errno;
364 }
365 
366 /*
367  * Fetch the peer queue's SW & HW information.
368  *
369  * @param dev
370  *   Pointer to Ethernet device structure.
371  * @param peer_queue
372  *   Index of the queue to fetch the information.
373  * @param current_info
374  *   Pointer to the input peer information, not used currently.
375  * @param peer_info
376  *   Pointer to the structure to store the information, output.
377  * @param direction
378  *   Positive to get the RxQ information, zero to get the TxQ information.
379  *
380  * @return
381  *   0 on success, a negative errno value otherwise and rte_errno is set.
382  */
383 int
384 mlx5_hairpin_queue_peer_update(struct rte_eth_dev *dev, uint16_t peer_queue,
385 			       struct rte_hairpin_peer_info *current_info,
386 			       struct rte_hairpin_peer_info *peer_info,
387 			       uint32_t direction)
388 {
389 	struct mlx5_priv *priv = dev->data->dev_private;
390 	RTE_SET_USED(current_info);
391 
392 	if (dev->data->dev_started == 0) {
393 		rte_errno = EBUSY;
394 		DRV_LOG(ERR, "peer port %u is not started",
395 			dev->data->port_id);
396 		return -rte_errno;
397 	}
398 	/*
399 	 * Peer port used as egress. In the current design, hairpin Tx queue
400 	 * will be bound to the peer Rx queue. Indeed, only the information of
401 	 * peer Rx queue needs to be fetched.
402 	 */
403 	if (direction == 0) {
404 		struct mlx5_txq_ctrl *txq_ctrl;
405 
406 		txq_ctrl = mlx5_txq_get(dev, peer_queue);
407 		if (txq_ctrl == NULL) {
408 			rte_errno = EINVAL;
409 			DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
410 				dev->data->port_id, peer_queue);
411 			return -rte_errno;
412 		}
413 		if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
414 			rte_errno = EINVAL;
415 			DRV_LOG(ERR, "port %u queue %d is not a hairpin Txq",
416 				dev->data->port_id, peer_queue);
417 			mlx5_txq_release(dev, peer_queue);
418 			return -rte_errno;
419 		}
420 		if (txq_ctrl->obj == NULL || txq_ctrl->obj->sq == NULL) {
421 			rte_errno = ENOMEM;
422 			DRV_LOG(ERR, "port %u no Txq object found: %d",
423 				dev->data->port_id, peer_queue);
424 			mlx5_txq_release(dev, peer_queue);
425 			return -rte_errno;
426 		}
427 		peer_info->qp_id = txq_ctrl->obj->sq->id;
428 		peer_info->vhca_id = priv->config.hca_attr.vhca_id;
429 		/* 1-to-1 mapping, only the first one is used. */
430 		peer_info->peer_q = txq_ctrl->hairpin_conf.peers[0].queue;
431 		peer_info->tx_explicit = txq_ctrl->hairpin_conf.tx_explicit;
432 		peer_info->manual_bind = txq_ctrl->hairpin_conf.manual_bind;
433 		mlx5_txq_release(dev, peer_queue);
434 	} else { /* Peer port used as ingress. */
435 		struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, peer_queue);
436 		struct mlx5_rxq_ctrl *rxq_ctrl;
437 
438 		if (rxq == NULL) {
439 			rte_errno = EINVAL;
440 			DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
441 				dev->data->port_id, peer_queue);
442 			return -rte_errno;
443 		}
444 		rxq_ctrl = rxq->ctrl;
445 		if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
446 			rte_errno = EINVAL;
447 			DRV_LOG(ERR, "port %u queue %d is not a hairpin Rxq",
448 				dev->data->port_id, peer_queue);
449 			return -rte_errno;
450 		}
451 		if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) {
452 			rte_errno = ENOMEM;
453 			DRV_LOG(ERR, "port %u no Rxq object found: %d",
454 				dev->data->port_id, peer_queue);
455 			return -rte_errno;
456 		}
457 		peer_info->qp_id = rxq_ctrl->obj->rq->id;
458 		peer_info->vhca_id = priv->config.hca_attr.vhca_id;
459 		peer_info->peer_q = rxq->hairpin_conf.peers[0].queue;
460 		peer_info->tx_explicit = rxq->hairpin_conf.tx_explicit;
461 		peer_info->manual_bind = rxq->hairpin_conf.manual_bind;
462 	}
463 	return 0;
464 }
465 
466 /*
467  * Bind the hairpin queue with the peer HW information.
468  * This needs to be called twice both for Tx and Rx queues of a pair.
469  * If the queue is already bound, it is considered successful.
470  *
471  * @param dev
472  *   Pointer to Ethernet device structure.
473  * @param cur_queue
474  *   Index of the queue to change the HW configuration to bind.
475  * @param peer_info
476  *   Pointer to information of the peer queue.
477  * @param direction
478  *   Positive to configure the TxQ, zero to configure the RxQ.
479  *
480  * @return
481  *   0 on success, a negative errno value otherwise and rte_errno is set.
482  */
483 int
484 mlx5_hairpin_queue_peer_bind(struct rte_eth_dev *dev, uint16_t cur_queue,
485 			     struct rte_hairpin_peer_info *peer_info,
486 			     uint32_t direction)
487 {
488 	int ret = 0;
489 
490 	/*
491 	 * Consistency checking of the peer queue: opposite direction is used
492 	 * to get the peer queue info with ethdev port ID, no need to check.
493 	 */
494 	if (peer_info->peer_q != cur_queue) {
495 		rte_errno = EINVAL;
496 		DRV_LOG(ERR, "port %u queue %d and peer queue %d mismatch",
497 			dev->data->port_id, cur_queue, peer_info->peer_q);
498 		return -rte_errno;
499 	}
500 	if (direction != 0) {
501 		struct mlx5_txq_ctrl *txq_ctrl;
502 		struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
503 
504 		txq_ctrl = mlx5_txq_get(dev, cur_queue);
505 		if (txq_ctrl == NULL) {
506 			rte_errno = EINVAL;
507 			DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
508 				dev->data->port_id, cur_queue);
509 			return -rte_errno;
510 		}
511 		if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
512 			rte_errno = EINVAL;
513 			DRV_LOG(ERR, "port %u queue %d not a hairpin Txq",
514 				dev->data->port_id, cur_queue);
515 			mlx5_txq_release(dev, cur_queue);
516 			return -rte_errno;
517 		}
518 		if (txq_ctrl->obj == NULL || txq_ctrl->obj->sq == NULL) {
519 			rte_errno = ENOMEM;
520 			DRV_LOG(ERR, "port %u no Txq object found: %d",
521 				dev->data->port_id, cur_queue);
522 			mlx5_txq_release(dev, cur_queue);
523 			return -rte_errno;
524 		}
525 		if (txq_ctrl->hairpin_status != 0) {
526 			DRV_LOG(DEBUG, "port %u Tx queue %d is already bound",
527 				dev->data->port_id, cur_queue);
528 			mlx5_txq_release(dev, cur_queue);
529 			return 0;
530 		}
531 		/*
532 		 * All queues' of one port consistency checking is done in the
533 		 * bind() function, and that is optional.
534 		 */
535 		if (peer_info->tx_explicit !=
536 		    txq_ctrl->hairpin_conf.tx_explicit) {
537 			rte_errno = EINVAL;
538 			DRV_LOG(ERR, "port %u Tx queue %d and peer Tx rule mode"
539 				" mismatch", dev->data->port_id, cur_queue);
540 			mlx5_txq_release(dev, cur_queue);
541 			return -rte_errno;
542 		}
543 		if (peer_info->manual_bind !=
544 		    txq_ctrl->hairpin_conf.manual_bind) {
545 			rte_errno = EINVAL;
546 			DRV_LOG(ERR, "port %u Tx queue %d and peer binding mode"
547 				" mismatch", dev->data->port_id, cur_queue);
548 			mlx5_txq_release(dev, cur_queue);
549 			return -rte_errno;
550 		}
551 		sq_attr.state = MLX5_SQC_STATE_RDY;
552 		sq_attr.sq_state = MLX5_SQC_STATE_RST;
553 		sq_attr.hairpin_peer_rq = peer_info->qp_id;
554 		sq_attr.hairpin_peer_vhca = peer_info->vhca_id;
555 		ret = mlx5_devx_cmd_modify_sq(txq_ctrl->obj->sq, &sq_attr);
556 		if (ret == 0)
557 			txq_ctrl->hairpin_status = 1;
558 		mlx5_txq_release(dev, cur_queue);
559 	} else {
560 		struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, cur_queue);
561 		struct mlx5_rxq_ctrl *rxq_ctrl;
562 		struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
563 
564 		if (rxq == NULL) {
565 			rte_errno = EINVAL;
566 			DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
567 				dev->data->port_id, cur_queue);
568 			return -rte_errno;
569 		}
570 		rxq_ctrl = rxq->ctrl;
571 		if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
572 			rte_errno = EINVAL;
573 			DRV_LOG(ERR, "port %u queue %d not a hairpin Rxq",
574 				dev->data->port_id, cur_queue);
575 			return -rte_errno;
576 		}
577 		if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) {
578 			rte_errno = ENOMEM;
579 			DRV_LOG(ERR, "port %u no Rxq object found: %d",
580 				dev->data->port_id, cur_queue);
581 			return -rte_errno;
582 		}
583 		if (rxq->hairpin_status != 0) {
584 			DRV_LOG(DEBUG, "port %u Rx queue %d is already bound",
585 				dev->data->port_id, cur_queue);
586 			return 0;
587 		}
588 		if (peer_info->tx_explicit !=
589 		    rxq->hairpin_conf.tx_explicit) {
590 			rte_errno = EINVAL;
591 			DRV_LOG(ERR, "port %u Rx queue %d and peer Tx rule mode"
592 				" mismatch", dev->data->port_id, cur_queue);
593 			return -rte_errno;
594 		}
595 		if (peer_info->manual_bind !=
596 		    rxq->hairpin_conf.manual_bind) {
597 			rte_errno = EINVAL;
598 			DRV_LOG(ERR, "port %u Rx queue %d and peer binding mode"
599 				" mismatch", dev->data->port_id, cur_queue);
600 			return -rte_errno;
601 		}
602 		rq_attr.state = MLX5_SQC_STATE_RDY;
603 		rq_attr.rq_state = MLX5_SQC_STATE_RST;
604 		rq_attr.hairpin_peer_sq = peer_info->qp_id;
605 		rq_attr.hairpin_peer_vhca = peer_info->vhca_id;
606 		ret = mlx5_devx_cmd_modify_rq(rxq_ctrl->obj->rq, &rq_attr);
607 		if (ret == 0)
608 			rxq->hairpin_status = 1;
609 	}
610 	return ret;
611 }
612 
613 /*
614  * Unbind the hairpin queue and reset its HW configuration.
615  * This needs to be called twice both for Tx and Rx queues of a pair.
616  * If the queue is already unbound, it is considered successful.
617  *
618  * @param dev
619  *   Pointer to Ethernet device structure.
620  * @param cur_queue
621  *   Index of the queue to change the HW configuration to unbind.
622  * @param direction
623  *   Positive to reset the TxQ, zero to reset the RxQ.
624  *
625  * @return
626  *   0 on success, a negative errno value otherwise and rte_errno is set.
627  */
628 int
629 mlx5_hairpin_queue_peer_unbind(struct rte_eth_dev *dev, uint16_t cur_queue,
630 			       uint32_t direction)
631 {
632 	int ret = 0;
633 
634 	if (direction != 0) {
635 		struct mlx5_txq_ctrl *txq_ctrl;
636 		struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
637 
638 		txq_ctrl = mlx5_txq_get(dev, cur_queue);
639 		if (txq_ctrl == NULL) {
640 			rte_errno = EINVAL;
641 			DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
642 				dev->data->port_id, cur_queue);
643 			return -rte_errno;
644 		}
645 		if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
646 			rte_errno = EINVAL;
647 			DRV_LOG(ERR, "port %u queue %d not a hairpin Txq",
648 				dev->data->port_id, cur_queue);
649 			mlx5_txq_release(dev, cur_queue);
650 			return -rte_errno;
651 		}
652 		/* Already unbound, return success before obj checking. */
653 		if (txq_ctrl->hairpin_status == 0) {
654 			DRV_LOG(DEBUG, "port %u Tx queue %d is already unbound",
655 				dev->data->port_id, cur_queue);
656 			mlx5_txq_release(dev, cur_queue);
657 			return 0;
658 		}
659 		if (!txq_ctrl->obj || !txq_ctrl->obj->sq) {
660 			rte_errno = ENOMEM;
661 			DRV_LOG(ERR, "port %u no Txq object found: %d",
662 				dev->data->port_id, cur_queue);
663 			mlx5_txq_release(dev, cur_queue);
664 			return -rte_errno;
665 		}
666 		sq_attr.state = MLX5_SQC_STATE_RST;
667 		sq_attr.sq_state = MLX5_SQC_STATE_RST;
668 		ret = mlx5_devx_cmd_modify_sq(txq_ctrl->obj->sq, &sq_attr);
669 		if (ret == 0)
670 			txq_ctrl->hairpin_status = 0;
671 		mlx5_txq_release(dev, cur_queue);
672 	} else {
673 		struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, cur_queue);
674 		struct mlx5_rxq_ctrl *rxq_ctrl;
675 		struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
676 
677 		if (rxq == NULL) {
678 			rte_errno = EINVAL;
679 			DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
680 				dev->data->port_id, cur_queue);
681 			return -rte_errno;
682 		}
683 		rxq_ctrl = rxq->ctrl;
684 		if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
685 			rte_errno = EINVAL;
686 			DRV_LOG(ERR, "port %u queue %d not a hairpin Rxq",
687 				dev->data->port_id, cur_queue);
688 			return -rte_errno;
689 		}
690 		if (rxq->hairpin_status == 0) {
691 			DRV_LOG(DEBUG, "port %u Rx queue %d is already unbound",
692 				dev->data->port_id, cur_queue);
693 			return 0;
694 		}
695 		if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) {
696 			rte_errno = ENOMEM;
697 			DRV_LOG(ERR, "port %u no Rxq object found: %d",
698 				dev->data->port_id, cur_queue);
699 			return -rte_errno;
700 		}
701 		rq_attr.state = MLX5_SQC_STATE_RST;
702 		rq_attr.rq_state = MLX5_SQC_STATE_RST;
703 		ret = mlx5_devx_cmd_modify_rq(rxq_ctrl->obj->rq, &rq_attr);
704 		if (ret == 0)
705 			rxq->hairpin_status = 0;
706 	}
707 	return ret;
708 }
709 
710 /*
711  * Bind the hairpin port pairs, from the Tx to the peer Rx.
712  * This function only supports to bind the Tx to one Rx.
713  *
714  * @param dev
715  *   Pointer to Ethernet device structure.
716  * @param rx_port
717  *   Port identifier of the Rx port.
718  *
719  * @return
720  *   0 on success, a negative errno value otherwise and rte_errno is set.
721  */
722 static int
723 mlx5_hairpin_bind_single_port(struct rte_eth_dev *dev, uint16_t rx_port)
724 {
725 	struct mlx5_priv *priv = dev->data->dev_private;
726 	int ret = 0;
727 	struct mlx5_txq_ctrl *txq_ctrl;
728 	uint32_t i;
729 	struct rte_hairpin_peer_info peer = {0xffffff};
730 	struct rte_hairpin_peer_info cur;
731 	const struct rte_eth_hairpin_conf *conf;
732 	uint16_t num_q = 0;
733 	uint16_t local_port = priv->dev_data->port_id;
734 	uint32_t manual;
735 	uint32_t explicit;
736 	uint16_t rx_queue;
737 
738 	if (mlx5_eth_find_next(rx_port, dev->device) != rx_port) {
739 		rte_errno = ENODEV;
740 		DRV_LOG(ERR, "Rx port %u does not belong to mlx5", rx_port);
741 		return -rte_errno;
742 	}
743 	/*
744 	 * Before binding TxQ to peer RxQ, first round loop will be used for
745 	 * checking the queues' configuration consistency. This would be a
746 	 * little time consuming but better than doing the rollback.
747 	 */
748 	for (i = 0; i != priv->txqs_n; i++) {
749 		txq_ctrl = mlx5_txq_get(dev, i);
750 		if (txq_ctrl == NULL)
751 			continue;
752 		if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
753 			mlx5_txq_release(dev, i);
754 			continue;
755 		}
756 		/*
757 		 * All hairpin Tx queues of a single port that connected to the
758 		 * same peer Rx port should have the same "auto binding" and
759 		 * "implicit Tx flow" modes.
760 		 * Peer consistency checking will be done in per queue binding.
761 		 */
762 		conf = &txq_ctrl->hairpin_conf;
763 		if (conf->peers[0].port == rx_port) {
764 			if (num_q == 0) {
765 				manual = conf->manual_bind;
766 				explicit = conf->tx_explicit;
767 			} else {
768 				if (manual != conf->manual_bind ||
769 				    explicit != conf->tx_explicit) {
770 					rte_errno = EINVAL;
771 					DRV_LOG(ERR, "port %u queue %d mode"
772 						" mismatch: %u %u, %u %u",
773 						local_port, i, manual,
774 						conf->manual_bind, explicit,
775 						conf->tx_explicit);
776 					mlx5_txq_release(dev, i);
777 					return -rte_errno;
778 				}
779 			}
780 			num_q++;
781 		}
782 		mlx5_txq_release(dev, i);
783 	}
784 	/* Once no queue is configured, success is returned directly. */
785 	if (num_q == 0)
786 		return ret;
787 	/* All the hairpin TX queues need to be traversed again. */
788 	for (i = 0; i != priv->txqs_n; i++) {
789 		txq_ctrl = mlx5_txq_get(dev, i);
790 		if (txq_ctrl == NULL)
791 			continue;
792 		if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
793 			mlx5_txq_release(dev, i);
794 			continue;
795 		}
796 		if (txq_ctrl->hairpin_conf.peers[0].port != rx_port) {
797 			mlx5_txq_release(dev, i);
798 			continue;
799 		}
800 		rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
801 		/*
802 		 * Fetch peer RxQ's information.
803 		 * No need to pass the information of the current queue.
804 		 */
805 		ret = rte_eth_hairpin_queue_peer_update(rx_port, rx_queue,
806 							NULL, &peer, 1);
807 		if (ret != 0) {
808 			mlx5_txq_release(dev, i);
809 			goto error;
810 		}
811 		/* Accessing its own device, inside mlx5 PMD. */
812 		ret = mlx5_hairpin_queue_peer_bind(dev, i, &peer, 1);
813 		if (ret != 0) {
814 			mlx5_txq_release(dev, i);
815 			goto error;
816 		}
817 		/* Pass TxQ's information to peer RxQ and try binding. */
818 		cur.peer_q = rx_queue;
819 		cur.qp_id = txq_ctrl->obj->sq->id;
820 		cur.vhca_id = priv->config.hca_attr.vhca_id;
821 		cur.tx_explicit = txq_ctrl->hairpin_conf.tx_explicit;
822 		cur.manual_bind = txq_ctrl->hairpin_conf.manual_bind;
823 		/*
824 		 * In order to access another device in a proper way, RTE level
825 		 * private function is needed.
826 		 */
827 		ret = rte_eth_hairpin_queue_peer_bind(rx_port, rx_queue,
828 						      &cur, 0);
829 		if (ret != 0) {
830 			mlx5_txq_release(dev, i);
831 			goto error;
832 		}
833 		mlx5_txq_release(dev, i);
834 	}
835 	return 0;
836 error:
837 	/*
838 	 * Do roll-back process for the queues already bound.
839 	 * No need to check the return value of the queue unbind function.
840 	 */
841 	do {
842 		/* No validation is needed here. */
843 		txq_ctrl = mlx5_txq_get(dev, i);
844 		if (txq_ctrl == NULL)
845 			continue;
846 		rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
847 		rte_eth_hairpin_queue_peer_unbind(rx_port, rx_queue, 0);
848 		mlx5_hairpin_queue_peer_unbind(dev, i, 1);
849 		mlx5_txq_release(dev, i);
850 	} while (i--);
851 	return ret;
852 }
853 
854 /*
855  * Unbind the hairpin port pair, HW configuration of both devices will be clear
856  * and status will be reset for all the queues used between them.
857  * This function only supports to unbind the Tx from one Rx.
858  *
859  * @param dev
860  *   Pointer to Ethernet device structure.
861  * @param rx_port
862  *   Port identifier of the Rx port.
863  *
864  * @return
865  *   0 on success, a negative errno value otherwise and rte_errno is set.
866  */
867 static int
868 mlx5_hairpin_unbind_single_port(struct rte_eth_dev *dev, uint16_t rx_port)
869 {
870 	struct mlx5_priv *priv = dev->data->dev_private;
871 	struct mlx5_txq_ctrl *txq_ctrl;
872 	uint32_t i;
873 	int ret;
874 	uint16_t cur_port = priv->dev_data->port_id;
875 
876 	if (mlx5_eth_find_next(rx_port, dev->device) != rx_port) {
877 		rte_errno = ENODEV;
878 		DRV_LOG(ERR, "Rx port %u does not belong to mlx5", rx_port);
879 		return -rte_errno;
880 	}
881 	for (i = 0; i != priv->txqs_n; i++) {
882 		uint16_t rx_queue;
883 
884 		txq_ctrl = mlx5_txq_get(dev, i);
885 		if (txq_ctrl == NULL)
886 			continue;
887 		if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
888 			mlx5_txq_release(dev, i);
889 			continue;
890 		}
891 		if (txq_ctrl->hairpin_conf.peers[0].port != rx_port) {
892 			mlx5_txq_release(dev, i);
893 			continue;
894 		}
895 		/* Indeed, only the first used queue needs to be checked. */
896 		if (txq_ctrl->hairpin_conf.manual_bind == 0) {
897 			if (cur_port != rx_port) {
898 				rte_errno = EINVAL;
899 				DRV_LOG(ERR, "port %u and port %u are in"
900 					" auto-bind mode", cur_port, rx_port);
901 				mlx5_txq_release(dev, i);
902 				return -rte_errno;
903 			} else {
904 				return 0;
905 			}
906 		}
907 		rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
908 		mlx5_txq_release(dev, i);
909 		ret = rte_eth_hairpin_queue_peer_unbind(rx_port, rx_queue, 0);
910 		if (ret) {
911 			DRV_LOG(ERR, "port %u Rx queue %d unbind - failure",
912 				rx_port, rx_queue);
913 			return ret;
914 		}
915 		ret = mlx5_hairpin_queue_peer_unbind(dev, i, 1);
916 		if (ret) {
917 			DRV_LOG(ERR, "port %u Tx queue %d unbind - failure",
918 				cur_port, i);
919 			return ret;
920 		}
921 	}
922 	return 0;
923 }
924 
925 /*
926  * Bind hairpin ports, Rx could be all ports when using RTE_MAX_ETHPORTS.
927  * @see mlx5_hairpin_bind_single_port()
928  */
929 int
930 mlx5_hairpin_bind(struct rte_eth_dev *dev, uint16_t rx_port)
931 {
932 	int ret = 0;
933 	uint16_t p, pp;
934 
935 	/*
936 	 * If the Rx port has no hairpin configuration with the current port,
937 	 * the binding will be skipped in the called function of single port.
938 	 * Device started status will be checked only before the queue
939 	 * information updating.
940 	 */
941 	if (rx_port == RTE_MAX_ETHPORTS) {
942 		MLX5_ETH_FOREACH_DEV(p, dev->device) {
943 			ret = mlx5_hairpin_bind_single_port(dev, p);
944 			if (ret != 0)
945 				goto unbind;
946 		}
947 		return ret;
948 	} else {
949 		return mlx5_hairpin_bind_single_port(dev, rx_port);
950 	}
951 unbind:
952 	MLX5_ETH_FOREACH_DEV(pp, dev->device)
953 		if (pp < p)
954 			mlx5_hairpin_unbind_single_port(dev, pp);
955 	return ret;
956 }
957 
958 /*
959  * Unbind hairpin ports, Rx could be all ports when using RTE_MAX_ETHPORTS.
960  * @see mlx5_hairpin_unbind_single_port()
961  */
962 int
963 mlx5_hairpin_unbind(struct rte_eth_dev *dev, uint16_t rx_port)
964 {
965 	int ret = 0;
966 	uint16_t p;
967 
968 	if (rx_port == RTE_MAX_ETHPORTS)
969 		MLX5_ETH_FOREACH_DEV(p, dev->device) {
970 			ret = mlx5_hairpin_unbind_single_port(dev, p);
971 			if (ret != 0)
972 				return ret;
973 		}
974 	else
975 		ret = mlx5_hairpin_unbind_single_port(dev, rx_port);
976 	return ret;
977 }
978 
979 /*
980  * DPDK callback to get the hairpin peer ports list.
981  * This will return the actual number of peer ports and save the identifiers
982  * into the array (sorted, may be different from that when setting up the
983  * hairpin peer queues).
984  * The peer port ID could be the same as the port ID of the current device.
985  *
986  * @param dev
987  *   Pointer to Ethernet device structure.
988  * @param peer_ports
989  *   Pointer to array to save the port identifiers.
990  * @param len
991  *   The length of the array.
992  * @param direction
993  *   Current port to peer port direction.
994  *   positive - current used as Tx to get all peer Rx ports.
995  *   zero - current used as Rx to get all peer Tx ports.
996  *
997  * @return
998  *   0 or positive value on success, actual number of peer ports.
999  *   a negative errno value otherwise and rte_errno is set.
1000  */
1001 int
1002 mlx5_hairpin_get_peer_ports(struct rte_eth_dev *dev, uint16_t *peer_ports,
1003 			    size_t len, uint32_t direction)
1004 {
1005 	struct mlx5_priv *priv = dev->data->dev_private;
1006 	struct mlx5_txq_ctrl *txq_ctrl;
1007 	uint32_t i;
1008 	uint16_t pp;
1009 	uint32_t bits[(RTE_MAX_ETHPORTS + 31) / 32] = {0};
1010 	int ret = 0;
1011 
1012 	if (direction) {
1013 		for (i = 0; i < priv->txqs_n; i++) {
1014 			txq_ctrl = mlx5_txq_get(dev, i);
1015 			if (!txq_ctrl)
1016 				continue;
1017 			if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
1018 				mlx5_txq_release(dev, i);
1019 				continue;
1020 			}
1021 			pp = txq_ctrl->hairpin_conf.peers[0].port;
1022 			if (pp >= RTE_MAX_ETHPORTS) {
1023 				rte_errno = ERANGE;
1024 				mlx5_txq_release(dev, i);
1025 				DRV_LOG(ERR, "port %hu queue %u peer port "
1026 					"out of range %hu",
1027 					priv->dev_data->port_id, i, pp);
1028 				return -rte_errno;
1029 			}
1030 			bits[pp / 32] |= 1 << (pp % 32);
1031 			mlx5_txq_release(dev, i);
1032 		}
1033 	} else {
1034 		for (i = 0; i < priv->rxqs_n; i++) {
1035 			struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, i);
1036 			struct mlx5_rxq_ctrl *rxq_ctrl;
1037 
1038 			if (rxq == NULL)
1039 				continue;
1040 			rxq_ctrl = rxq->ctrl;
1041 			if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN)
1042 				continue;
1043 			pp = rxq->hairpin_conf.peers[0].port;
1044 			if (pp >= RTE_MAX_ETHPORTS) {
1045 				rte_errno = ERANGE;
1046 				DRV_LOG(ERR, "port %hu queue %u peer port "
1047 					"out of range %hu",
1048 					priv->dev_data->port_id, i, pp);
1049 				return -rte_errno;
1050 			}
1051 			bits[pp / 32] |= 1 << (pp % 32);
1052 		}
1053 	}
1054 	for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1055 		if (bits[i / 32] & (1 << (i % 32))) {
1056 			if ((size_t)ret >= len) {
1057 				rte_errno = E2BIG;
1058 				return -rte_errno;
1059 			}
1060 			peer_ports[ret++] = i;
1061 		}
1062 	}
1063 	return ret;
1064 }
1065 
1066 /**
1067  * DPDK callback to start the device.
1068  *
1069  * Simulate device start by attaching all configured flows.
1070  *
1071  * @param dev
1072  *   Pointer to Ethernet device structure.
1073  *
1074  * @return
1075  *   0 on success, a negative errno value otherwise and rte_errno is set.
1076  */
1077 int
1078 mlx5_dev_start(struct rte_eth_dev *dev)
1079 {
1080 	struct mlx5_priv *priv = dev->data->dev_private;
1081 	int ret;
1082 	int fine_inline;
1083 
1084 	DRV_LOG(DEBUG, "port %u starting device", dev->data->port_id);
1085 	fine_inline = rte_mbuf_dynflag_lookup
1086 		(RTE_PMD_MLX5_FINE_GRANULARITY_INLINE, NULL);
1087 	if (fine_inline >= 0)
1088 		rte_net_mlx5_dynf_inline_mask = 1UL << fine_inline;
1089 	else
1090 		rte_net_mlx5_dynf_inline_mask = 0;
1091 	if (dev->data->nb_rx_queues > 0) {
1092 		ret = mlx5_dev_configure_rss_reta(dev);
1093 		if (ret) {
1094 			DRV_LOG(ERR, "port %u reta config failed: %s",
1095 				dev->data->port_id, strerror(rte_errno));
1096 			return -rte_errno;
1097 		}
1098 	}
1099 	ret = mlx5_txpp_start(dev);
1100 	if (ret) {
1101 		DRV_LOG(ERR, "port %u Tx packet pacing init failed: %s",
1102 			dev->data->port_id, strerror(rte_errno));
1103 		goto error;
1104 	}
1105 	if ((priv->sh->devx && priv->config.dv_flow_en &&
1106 	    priv->config.dest_tir) && priv->obj_ops.lb_dummy_queue_create) {
1107 		ret = priv->obj_ops.lb_dummy_queue_create(dev);
1108 		if (ret)
1109 			goto error;
1110 	}
1111 	ret = mlx5_txq_start(dev);
1112 	if (ret) {
1113 		DRV_LOG(ERR, "port %u Tx queue allocation failed: %s",
1114 			dev->data->port_id, strerror(rte_errno));
1115 		goto error;
1116 	}
1117 	if (priv->config.std_delay_drop || priv->config.hp_delay_drop) {
1118 		if (!priv->config.vf && !priv->config.sf &&
1119 		    !priv->representor) {
1120 			ret = mlx5_get_flag_dropless_rq(dev);
1121 			if (ret < 0)
1122 				DRV_LOG(WARNING,
1123 					"port %u cannot query dropless flag",
1124 					dev->data->port_id);
1125 			else if (!ret)
1126 				DRV_LOG(WARNING,
1127 					"port %u dropless_rq OFF, no rearming",
1128 					dev->data->port_id);
1129 		} else {
1130 			DRV_LOG(DEBUG,
1131 				"port %u doesn't support dropless_rq flag",
1132 				dev->data->port_id);
1133 		}
1134 	}
1135 	ret = mlx5_rxq_start(dev);
1136 	if (ret) {
1137 		DRV_LOG(ERR, "port %u Rx queue allocation failed: %s",
1138 			dev->data->port_id, strerror(rte_errno));
1139 		goto error;
1140 	}
1141 	/*
1142 	 * Such step will be skipped if there is no hairpin TX queue configured
1143 	 * with RX peer queue from the same device.
1144 	 */
1145 	ret = mlx5_hairpin_auto_bind(dev);
1146 	if (ret) {
1147 		DRV_LOG(ERR, "port %u hairpin auto binding failed: %s",
1148 			dev->data->port_id, strerror(rte_errno));
1149 		goto error;
1150 	}
1151 	/* Set started flag here for the following steps like control flow. */
1152 	dev->data->dev_started = 1;
1153 	ret = mlx5_rx_intr_vec_enable(dev);
1154 	if (ret) {
1155 		DRV_LOG(ERR, "port %u Rx interrupt vector creation failed",
1156 			dev->data->port_id);
1157 		goto error;
1158 	}
1159 	mlx5_os_stats_init(dev);
1160 	/*
1161 	 * Attach indirection table objects detached on port stop.
1162 	 * They may be needed to create RSS in non-isolated mode.
1163 	 */
1164 	ret = mlx5_action_handle_attach(dev);
1165 	if (ret) {
1166 		DRV_LOG(ERR,
1167 			"port %u failed to attach indirect actions: %s",
1168 			dev->data->port_id, rte_strerror(rte_errno));
1169 		goto error;
1170 	}
1171 	ret = mlx5_traffic_enable(dev);
1172 	if (ret) {
1173 		DRV_LOG(ERR, "port %u failed to set defaults flows",
1174 			dev->data->port_id);
1175 		goto error;
1176 	}
1177 	/* Set a mask and offset of dynamic metadata flows into Rx queues. */
1178 	mlx5_flow_rxq_dynf_metadata_set(dev);
1179 	/* Set flags and context to convert Rx timestamps. */
1180 	mlx5_rxq_timestamp_set(dev);
1181 	/* Set a mask and offset of scheduling on timestamp into Tx queues. */
1182 	mlx5_txq_dynf_timestamp_set(dev);
1183 	/*
1184 	 * In non-cached mode, it only needs to start the default mreg copy
1185 	 * action and no flow created by application exists anymore.
1186 	 * But it is worth wrapping the interface for further usage.
1187 	 */
1188 	ret = mlx5_flow_start_default(dev);
1189 	if (ret) {
1190 		DRV_LOG(DEBUG, "port %u failed to start default actions: %s",
1191 			dev->data->port_id, strerror(rte_errno));
1192 		goto error;
1193 	}
1194 	if (mlx5_dev_ctx_shared_mempool_subscribe(dev) != 0) {
1195 		DRV_LOG(ERR, "port %u failed to subscribe for mempool life cycle: %s",
1196 			dev->data->port_id, rte_strerror(rte_errno));
1197 		goto error;
1198 	}
1199 	rte_wmb();
1200 	dev->tx_pkt_burst = mlx5_select_tx_function(dev);
1201 	dev->rx_pkt_burst = mlx5_select_rx_function(dev);
1202 	/* Enable datapath on secondary process. */
1203 	mlx5_mp_os_req_start_rxtx(dev);
1204 	if (rte_intr_fd_get(priv->sh->intr_handle) >= 0) {
1205 		priv->sh->port[priv->dev_port - 1].ih_port_id =
1206 					(uint32_t)dev->data->port_id;
1207 	} else {
1208 		DRV_LOG(INFO, "port %u starts without LSC and RMV interrupts.",
1209 			dev->data->port_id);
1210 		dev->data->dev_conf.intr_conf.lsc = 0;
1211 		dev->data->dev_conf.intr_conf.rmv = 0;
1212 	}
1213 	if (rte_intr_fd_get(priv->sh->intr_handle_devx) >= 0)
1214 		priv->sh->port[priv->dev_port - 1].devx_ih_port_id =
1215 					(uint32_t)dev->data->port_id;
1216 	return 0;
1217 error:
1218 	ret = rte_errno; /* Save rte_errno before cleanup. */
1219 	/* Rollback. */
1220 	dev->data->dev_started = 0;
1221 	mlx5_flow_stop_default(dev);
1222 	mlx5_traffic_disable(dev);
1223 	mlx5_txq_stop(dev);
1224 	mlx5_rxq_stop(dev);
1225 	if (priv->obj_ops.lb_dummy_queue_release)
1226 		priv->obj_ops.lb_dummy_queue_release(dev);
1227 	mlx5_txpp_stop(dev); /* Stop last. */
1228 	rte_errno = ret; /* Restore rte_errno. */
1229 	return -rte_errno;
1230 }
1231 
1232 /**
1233  * DPDK callback to stop the device.
1234  *
1235  * Simulate device stop by detaching all configured flows.
1236  *
1237  * @param dev
1238  *   Pointer to Ethernet device structure.
1239  */
1240 int
1241 mlx5_dev_stop(struct rte_eth_dev *dev)
1242 {
1243 	struct mlx5_priv *priv = dev->data->dev_private;
1244 
1245 	dev->data->dev_started = 0;
1246 	/* Prevent crashes when queues are still in use. */
1247 	dev->rx_pkt_burst = removed_rx_burst;
1248 	dev->tx_pkt_burst = removed_tx_burst;
1249 	rte_wmb();
1250 	/* Disable datapath on secondary process. */
1251 	mlx5_mp_os_req_stop_rxtx(dev);
1252 	rte_delay_us_sleep(1000 * priv->rxqs_n);
1253 	DRV_LOG(DEBUG, "port %u stopping device", dev->data->port_id);
1254 	mlx5_flow_stop_default(dev);
1255 	/* Control flows for default traffic can be removed firstly. */
1256 	mlx5_traffic_disable(dev);
1257 	/* All RX queue flags will be cleared in the flush interface. */
1258 	mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_GEN, true);
1259 	mlx5_flow_meter_rxq_flush(dev);
1260 	mlx5_action_handle_detach(dev);
1261 	mlx5_rx_intr_vec_disable(dev);
1262 	priv->sh->port[priv->dev_port - 1].ih_port_id = RTE_MAX_ETHPORTS;
1263 	priv->sh->port[priv->dev_port - 1].devx_ih_port_id = RTE_MAX_ETHPORTS;
1264 	mlx5_txq_stop(dev);
1265 	mlx5_rxq_stop(dev);
1266 	if (priv->obj_ops.lb_dummy_queue_release)
1267 		priv->obj_ops.lb_dummy_queue_release(dev);
1268 	mlx5_txpp_stop(dev);
1269 
1270 	return 0;
1271 }
1272 
1273 /**
1274  * Enable traffic flows configured by control plane
1275  *
1276  * @param dev
1277  *   Pointer to Ethernet device private data.
1278  * @param dev
1279  *   Pointer to Ethernet device structure.
1280  *
1281  * @return
1282  *   0 on success, a negative errno value otherwise and rte_errno is set.
1283  */
1284 int
1285 mlx5_traffic_enable(struct rte_eth_dev *dev)
1286 {
1287 	struct mlx5_priv *priv = dev->data->dev_private;
1288 	struct rte_flow_item_eth bcast = {
1289 		.dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1290 	};
1291 	struct rte_flow_item_eth ipv6_multi_spec = {
1292 		.dst.addr_bytes = "\x33\x33\x00\x00\x00\x00",
1293 	};
1294 	struct rte_flow_item_eth ipv6_multi_mask = {
1295 		.dst.addr_bytes = "\xff\xff\x00\x00\x00\x00",
1296 	};
1297 	struct rte_flow_item_eth unicast = {
1298 		.src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1299 	};
1300 	struct rte_flow_item_eth unicast_mask = {
1301 		.dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1302 	};
1303 	const unsigned int vlan_filter_n = priv->vlan_filter_n;
1304 	const struct rte_ether_addr cmp = {
1305 		.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1306 	};
1307 	unsigned int i;
1308 	unsigned int j;
1309 	int ret;
1310 
1311 	/*
1312 	 * Hairpin txq default flow should be created no matter if it is
1313 	 * isolation mode. Or else all the packets to be sent will be sent
1314 	 * out directly without the TX flow actions, e.g. encapsulation.
1315 	 */
1316 	for (i = 0; i != priv->txqs_n; ++i) {
1317 		struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i);
1318 		if (!txq_ctrl)
1319 			continue;
1320 		/* Only Tx implicit mode requires the default Tx flow. */
1321 		if (txq_ctrl->type == MLX5_TXQ_TYPE_HAIRPIN &&
1322 		    txq_ctrl->hairpin_conf.tx_explicit == 0 &&
1323 		    txq_ctrl->hairpin_conf.peers[0].port ==
1324 		    priv->dev_data->port_id) {
1325 			ret = mlx5_ctrl_flow_source_queue(dev, i);
1326 			if (ret) {
1327 				mlx5_txq_release(dev, i);
1328 				goto error;
1329 			}
1330 		}
1331 		if ((priv->representor || priv->master) &&
1332 		    priv->config.dv_esw_en) {
1333 			if (mlx5_flow_create_devx_sq_miss_flow(dev, i) == 0) {
1334 				DRV_LOG(ERR,
1335 					"Port %u Tx queue %u SQ create representor devx default miss rule failed.",
1336 					dev->data->port_id, i);
1337 				goto error;
1338 			}
1339 		}
1340 		mlx5_txq_release(dev, i);
1341 	}
1342 	if ((priv->master || priv->representor) && priv->config.dv_esw_en) {
1343 		if (mlx5_flow_create_esw_table_zero_flow(dev))
1344 			priv->fdb_def_rule = 1;
1345 		else
1346 			DRV_LOG(INFO, "port %u FDB default rule cannot be"
1347 				" configured - only Eswitch group 0 flows are"
1348 				" supported.", dev->data->port_id);
1349 	}
1350 	if (!priv->config.lacp_by_user && priv->pf_bond >= 0) {
1351 		ret = mlx5_flow_lacp_miss(dev);
1352 		if (ret)
1353 			DRV_LOG(INFO, "port %u LACP rule cannot be created - "
1354 				"forward LACP to kernel.", dev->data->port_id);
1355 		else
1356 			DRV_LOG(INFO, "LACP traffic will be missed in port %u."
1357 				, dev->data->port_id);
1358 	}
1359 	if (priv->isolated)
1360 		return 0;
1361 	if (dev->data->promiscuous) {
1362 		struct rte_flow_item_eth promisc = {
1363 			.dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1364 			.src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1365 			.type = 0,
1366 		};
1367 
1368 		ret = mlx5_ctrl_flow(dev, &promisc, &promisc);
1369 		if (ret)
1370 			goto error;
1371 	}
1372 	if (dev->data->all_multicast) {
1373 		struct rte_flow_item_eth multicast = {
1374 			.dst.addr_bytes = "\x01\x00\x00\x00\x00\x00",
1375 			.src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1376 			.type = 0,
1377 		};
1378 
1379 		ret = mlx5_ctrl_flow(dev, &multicast, &multicast);
1380 		if (ret)
1381 			goto error;
1382 	} else {
1383 		/* Add broadcast/multicast flows. */
1384 		for (i = 0; i != vlan_filter_n; ++i) {
1385 			uint16_t vlan = priv->vlan_filter[i];
1386 
1387 			struct rte_flow_item_vlan vlan_spec = {
1388 				.tci = rte_cpu_to_be_16(vlan),
1389 			};
1390 			struct rte_flow_item_vlan vlan_mask =
1391 				rte_flow_item_vlan_mask;
1392 
1393 			ret = mlx5_ctrl_flow_vlan(dev, &bcast, &bcast,
1394 						  &vlan_spec, &vlan_mask);
1395 			if (ret)
1396 				goto error;
1397 			ret = mlx5_ctrl_flow_vlan(dev, &ipv6_multi_spec,
1398 						  &ipv6_multi_mask,
1399 						  &vlan_spec, &vlan_mask);
1400 			if (ret)
1401 				goto error;
1402 		}
1403 		if (!vlan_filter_n) {
1404 			ret = mlx5_ctrl_flow(dev, &bcast, &bcast);
1405 			if (ret)
1406 				goto error;
1407 			ret = mlx5_ctrl_flow(dev, &ipv6_multi_spec,
1408 					     &ipv6_multi_mask);
1409 			if (ret) {
1410 				/* Do not fail on IPv6 broadcast creation failure. */
1411 				DRV_LOG(WARNING,
1412 					"IPv6 broadcast is not supported");
1413 				ret = 0;
1414 			}
1415 		}
1416 	}
1417 	/* Add MAC address flows. */
1418 	for (i = 0; i != MLX5_MAX_MAC_ADDRESSES; ++i) {
1419 		struct rte_ether_addr *mac = &dev->data->mac_addrs[i];
1420 
1421 		if (!memcmp(mac, &cmp, sizeof(*mac)))
1422 			continue;
1423 		memcpy(&unicast.dst.addr_bytes,
1424 		       mac->addr_bytes,
1425 		       RTE_ETHER_ADDR_LEN);
1426 		for (j = 0; j != vlan_filter_n; ++j) {
1427 			uint16_t vlan = priv->vlan_filter[j];
1428 
1429 			struct rte_flow_item_vlan vlan_spec = {
1430 				.tci = rte_cpu_to_be_16(vlan),
1431 			};
1432 			struct rte_flow_item_vlan vlan_mask =
1433 				rte_flow_item_vlan_mask;
1434 
1435 			ret = mlx5_ctrl_flow_vlan(dev, &unicast,
1436 						  &unicast_mask,
1437 						  &vlan_spec,
1438 						  &vlan_mask);
1439 			if (ret)
1440 				goto error;
1441 		}
1442 		if (!vlan_filter_n) {
1443 			ret = mlx5_ctrl_flow(dev, &unicast, &unicast_mask);
1444 			if (ret)
1445 				goto error;
1446 		}
1447 	}
1448 	return 0;
1449 error:
1450 	ret = rte_errno; /* Save rte_errno before cleanup. */
1451 	mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_CTL, false);
1452 	rte_errno = ret; /* Restore rte_errno. */
1453 	return -rte_errno;
1454 }
1455 
1456 
1457 /**
1458  * Disable traffic flows configured by control plane
1459  *
1460  * @param dev
1461  *   Pointer to Ethernet device private data.
1462  */
1463 void
1464 mlx5_traffic_disable(struct rte_eth_dev *dev)
1465 {
1466 	mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_CTL, false);
1467 }
1468 
1469 /**
1470  * Restart traffic flows configured by control plane
1471  *
1472  * @param dev
1473  *   Pointer to Ethernet device private data.
1474  *
1475  * @return
1476  *   0 on success, a negative errno value otherwise and rte_errno is set.
1477  */
1478 int
1479 mlx5_traffic_restart(struct rte_eth_dev *dev)
1480 {
1481 	if (dev->data->dev_started) {
1482 		mlx5_traffic_disable(dev);
1483 		return mlx5_traffic_enable(dev);
1484 	}
1485 	return 0;
1486 }
1487