xref: /dpdk/drivers/net/mlx5/mlx5_trigger.c (revision bbbe38a6d59ccdda25917712701e629d0b10af6f)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2015 6WIND S.A.
3  * Copyright 2015 Mellanox Technologies, Ltd
4  */
5 
6 #include <unistd.h>
7 
8 #include <rte_ether.h>
9 #include <ethdev_driver.h>
10 #include <rte_interrupts.h>
11 #include <rte_alarm.h>
12 #include <rte_cycles.h>
13 
14 #include <mlx5_malloc.h>
15 
16 #include "mlx5.h"
17 #include "mlx5_mr.h"
18 #include "mlx5_rx.h"
19 #include "mlx5_tx.h"
20 #include "mlx5_utils.h"
21 #include "rte_pmd_mlx5.h"
22 
23 /**
24  * Stop traffic on Tx queues.
25  *
26  * @param dev
27  *   Pointer to Ethernet device structure.
28  */
29 static void
30 mlx5_txq_stop(struct rte_eth_dev *dev)
31 {
32 	struct mlx5_priv *priv = dev->data->dev_private;
33 	unsigned int i;
34 
35 	for (i = 0; i != priv->txqs_n; ++i)
36 		mlx5_txq_release(dev, i);
37 }
38 
39 /**
40  * Start traffic on Tx queues.
41  *
42  * @param dev
43  *   Pointer to Ethernet device structure.
44  *
45  * @return
46  *   0 on success, a negative errno value otherwise and rte_errno is set.
47  */
48 static int
49 mlx5_txq_start(struct rte_eth_dev *dev)
50 {
51 	struct mlx5_priv *priv = dev->data->dev_private;
52 	unsigned int i;
53 	int ret;
54 
55 	for (i = 0; i != priv->txqs_n; ++i) {
56 		struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i);
57 		struct mlx5_txq_data *txq_data = &txq_ctrl->txq;
58 		uint32_t flags = MLX5_MEM_RTE | MLX5_MEM_ZERO;
59 
60 		if (!txq_ctrl)
61 			continue;
62 		if (txq_ctrl->type == MLX5_TXQ_TYPE_STANDARD)
63 			txq_alloc_elts(txq_ctrl);
64 		MLX5_ASSERT(!txq_ctrl->obj);
65 		txq_ctrl->obj = mlx5_malloc(flags, sizeof(struct mlx5_txq_obj),
66 					    0, txq_ctrl->socket);
67 		if (!txq_ctrl->obj) {
68 			DRV_LOG(ERR, "Port %u Tx queue %u cannot allocate "
69 				"memory resources.", dev->data->port_id,
70 				txq_data->idx);
71 			rte_errno = ENOMEM;
72 			goto error;
73 		}
74 		ret = priv->obj_ops.txq_obj_new(dev, i);
75 		if (ret < 0) {
76 			mlx5_free(txq_ctrl->obj);
77 			txq_ctrl->obj = NULL;
78 			goto error;
79 		}
80 		if (txq_ctrl->type == MLX5_TXQ_TYPE_STANDARD) {
81 			size_t size = txq_data->cqe_s * sizeof(*txq_data->fcqs);
82 
83 			txq_data->fcqs = mlx5_malloc(flags, size,
84 						     RTE_CACHE_LINE_SIZE,
85 						     txq_ctrl->socket);
86 			if (!txq_data->fcqs) {
87 				DRV_LOG(ERR, "Port %u Tx queue %u cannot "
88 					"allocate memory (FCQ).",
89 					dev->data->port_id, i);
90 				rte_errno = ENOMEM;
91 				goto error;
92 			}
93 		}
94 		DRV_LOG(DEBUG, "Port %u txq %u updated with %p.",
95 			dev->data->port_id, i, (void *)&txq_ctrl->obj);
96 		LIST_INSERT_HEAD(&priv->txqsobj, txq_ctrl->obj, next);
97 	}
98 	return 0;
99 error:
100 	ret = rte_errno; /* Save rte_errno before cleanup. */
101 	do {
102 		mlx5_txq_release(dev, i);
103 	} while (i-- != 0);
104 	rte_errno = ret; /* Restore rte_errno. */
105 	return -rte_errno;
106 }
107 
108 /**
109  * Stop traffic on Rx queues.
110  *
111  * @param dev
112  *   Pointer to Ethernet device structure.
113  */
114 static void
115 mlx5_rxq_stop(struct rte_eth_dev *dev)
116 {
117 	struct mlx5_priv *priv = dev->data->dev_private;
118 	unsigned int i;
119 
120 	for (i = 0; i != priv->rxqs_n; ++i)
121 		mlx5_rxq_release(dev, i);
122 }
123 
124 /**
125  * Start traffic on Rx queues.
126  *
127  * @param dev
128  *   Pointer to Ethernet device structure.
129  *
130  * @return
131  *   0 on success, a negative errno value otherwise and rte_errno is set.
132  */
133 static int
134 mlx5_rxq_start(struct rte_eth_dev *dev)
135 {
136 	struct mlx5_priv *priv = dev->data->dev_private;
137 	unsigned int i;
138 	int ret = 0;
139 
140 	/* Allocate/reuse/resize mempool for Multi-Packet RQ. */
141 	if (mlx5_mprq_alloc_mp(dev)) {
142 		/* Should not release Rx queues but return immediately. */
143 		return -rte_errno;
144 	}
145 	DRV_LOG(DEBUG, "Port %u device_attr.max_qp_wr is %d.",
146 		dev->data->port_id, priv->sh->device_attr.max_qp_wr);
147 	DRV_LOG(DEBUG, "Port %u device_attr.max_sge is %d.",
148 		dev->data->port_id, priv->sh->device_attr.max_sge);
149 	for (i = 0; i != priv->rxqs_n; ++i) {
150 		struct mlx5_rxq_ctrl *rxq_ctrl = mlx5_rxq_get(dev, i);
151 
152 		if (!rxq_ctrl)
153 			continue;
154 		if (rxq_ctrl->type == MLX5_RXQ_TYPE_STANDARD) {
155 			/* Pre-register Rx mempools. */
156 			if (mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq)) {
157 				mlx5_mr_update_mp(dev, &rxq_ctrl->rxq.mr_ctrl,
158 						  rxq_ctrl->rxq.mprq_mp);
159 			} else {
160 				uint32_t s;
161 
162 				for (s = 0; s < rxq_ctrl->rxq.rxseg_n; s++)
163 					mlx5_mr_update_mp
164 						(dev, &rxq_ctrl->rxq.mr_ctrl,
165 						rxq_ctrl->rxq.rxseg[s].mp);
166 			}
167 			ret = rxq_alloc_elts(rxq_ctrl);
168 			if (ret)
169 				goto error;
170 		}
171 		MLX5_ASSERT(!rxq_ctrl->obj);
172 		rxq_ctrl->obj = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO,
173 					    sizeof(*rxq_ctrl->obj), 0,
174 					    rxq_ctrl->socket);
175 		if (!rxq_ctrl->obj) {
176 			DRV_LOG(ERR,
177 				"Port %u Rx queue %u can't allocate resources.",
178 				dev->data->port_id, (*priv->rxqs)[i]->idx);
179 			rte_errno = ENOMEM;
180 			goto error;
181 		}
182 		ret = priv->obj_ops.rxq_obj_new(dev, i);
183 		if (ret) {
184 			mlx5_free(rxq_ctrl->obj);
185 			goto error;
186 		}
187 		DRV_LOG(DEBUG, "Port %u rxq %u updated with %p.",
188 			dev->data->port_id, i, (void *)&rxq_ctrl->obj);
189 		LIST_INSERT_HEAD(&priv->rxqsobj, rxq_ctrl->obj, next);
190 	}
191 	return 0;
192 error:
193 	ret = rte_errno; /* Save rte_errno before cleanup. */
194 	do {
195 		mlx5_rxq_release(dev, i);
196 	} while (i-- != 0);
197 	rte_errno = ret; /* Restore rte_errno. */
198 	return -rte_errno;
199 }
200 
201 /**
202  * Binds Tx queues to Rx queues for hairpin.
203  *
204  * Binds Tx queues to the target Rx queues.
205  *
206  * @param dev
207  *   Pointer to Ethernet device structure.
208  *
209  * @return
210  *   0 on success, a negative errno value otherwise and rte_errno is set.
211  */
212 static int
213 mlx5_hairpin_auto_bind(struct rte_eth_dev *dev)
214 {
215 	struct mlx5_priv *priv = dev->data->dev_private;
216 	struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
217 	struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
218 	struct mlx5_txq_ctrl *txq_ctrl;
219 	struct mlx5_rxq_ctrl *rxq_ctrl;
220 	struct mlx5_devx_obj *sq;
221 	struct mlx5_devx_obj *rq;
222 	unsigned int i;
223 	int ret = 0;
224 	bool need_auto = false;
225 	uint16_t self_port = dev->data->port_id;
226 
227 	for (i = 0; i != priv->txqs_n; ++i) {
228 		txq_ctrl = mlx5_txq_get(dev, i);
229 		if (!txq_ctrl)
230 			continue;
231 		if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
232 			mlx5_txq_release(dev, i);
233 			continue;
234 		}
235 		if (txq_ctrl->hairpin_conf.peers[0].port != self_port)
236 			continue;
237 		if (txq_ctrl->hairpin_conf.manual_bind) {
238 			mlx5_txq_release(dev, i);
239 			return 0;
240 		}
241 		need_auto = true;
242 		mlx5_txq_release(dev, i);
243 	}
244 	if (!need_auto)
245 		return 0;
246 	for (i = 0; i != priv->txqs_n; ++i) {
247 		txq_ctrl = mlx5_txq_get(dev, i);
248 		if (!txq_ctrl)
249 			continue;
250 		if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
251 			mlx5_txq_release(dev, i);
252 			continue;
253 		}
254 		/* Skip hairpin queues with other peer ports. */
255 		if (txq_ctrl->hairpin_conf.peers[0].port != self_port)
256 			continue;
257 		if (!txq_ctrl->obj) {
258 			rte_errno = ENOMEM;
259 			DRV_LOG(ERR, "port %u no txq object found: %d",
260 				dev->data->port_id, i);
261 			mlx5_txq_release(dev, i);
262 			return -rte_errno;
263 		}
264 		sq = txq_ctrl->obj->sq;
265 		rxq_ctrl = mlx5_rxq_get(dev,
266 					txq_ctrl->hairpin_conf.peers[0].queue);
267 		if (!rxq_ctrl) {
268 			mlx5_txq_release(dev, i);
269 			rte_errno = EINVAL;
270 			DRV_LOG(ERR, "port %u no rxq object found: %d",
271 				dev->data->port_id,
272 				txq_ctrl->hairpin_conf.peers[0].queue);
273 			return -rte_errno;
274 		}
275 		if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN ||
276 		    rxq_ctrl->hairpin_conf.peers[0].queue != i) {
277 			rte_errno = ENOMEM;
278 			DRV_LOG(ERR, "port %u Tx queue %d can't be binded to "
279 				"Rx queue %d", dev->data->port_id,
280 				i, txq_ctrl->hairpin_conf.peers[0].queue);
281 			goto error;
282 		}
283 		rq = rxq_ctrl->obj->rq;
284 		if (!rq) {
285 			rte_errno = ENOMEM;
286 			DRV_LOG(ERR, "port %u hairpin no matching rxq: %d",
287 				dev->data->port_id,
288 				txq_ctrl->hairpin_conf.peers[0].queue);
289 			goto error;
290 		}
291 		sq_attr.state = MLX5_SQC_STATE_RDY;
292 		sq_attr.sq_state = MLX5_SQC_STATE_RST;
293 		sq_attr.hairpin_peer_rq = rq->id;
294 		sq_attr.hairpin_peer_vhca = priv->config.hca_attr.vhca_id;
295 		ret = mlx5_devx_cmd_modify_sq(sq, &sq_attr);
296 		if (ret)
297 			goto error;
298 		rq_attr.state = MLX5_SQC_STATE_RDY;
299 		rq_attr.rq_state = MLX5_SQC_STATE_RST;
300 		rq_attr.hairpin_peer_sq = sq->id;
301 		rq_attr.hairpin_peer_vhca = priv->config.hca_attr.vhca_id;
302 		ret = mlx5_devx_cmd_modify_rq(rq, &rq_attr);
303 		if (ret)
304 			goto error;
305 		/* Qs with auto-bind will be destroyed directly. */
306 		rxq_ctrl->hairpin_status = 1;
307 		txq_ctrl->hairpin_status = 1;
308 		mlx5_txq_release(dev, i);
309 		mlx5_rxq_release(dev, txq_ctrl->hairpin_conf.peers[0].queue);
310 	}
311 	return 0;
312 error:
313 	mlx5_txq_release(dev, i);
314 	mlx5_rxq_release(dev, txq_ctrl->hairpin_conf.peers[0].queue);
315 	return -rte_errno;
316 }
317 
318 /*
319  * Fetch the peer queue's SW & HW information.
320  *
321  * @param dev
322  *   Pointer to Ethernet device structure.
323  * @param peer_queue
324  *   Index of the queue to fetch the information.
325  * @param current_info
326  *   Pointer to the input peer information, not used currently.
327  * @param peer_info
328  *   Pointer to the structure to store the information, output.
329  * @param direction
330  *   Positive to get the RxQ information, zero to get the TxQ information.
331  *
332  * @return
333  *   0 on success, a negative errno value otherwise and rte_errno is set.
334  */
335 int
336 mlx5_hairpin_queue_peer_update(struct rte_eth_dev *dev, uint16_t peer_queue,
337 			       struct rte_hairpin_peer_info *current_info,
338 			       struct rte_hairpin_peer_info *peer_info,
339 			       uint32_t direction)
340 {
341 	struct mlx5_priv *priv = dev->data->dev_private;
342 	RTE_SET_USED(current_info);
343 
344 	if (dev->data->dev_started == 0) {
345 		rte_errno = EBUSY;
346 		DRV_LOG(ERR, "peer port %u is not started",
347 			dev->data->port_id);
348 		return -rte_errno;
349 	}
350 	/*
351 	 * Peer port used as egress. In the current design, hairpin Tx queue
352 	 * will be bound to the peer Rx queue. Indeed, only the information of
353 	 * peer Rx queue needs to be fetched.
354 	 */
355 	if (direction == 0) {
356 		struct mlx5_txq_ctrl *txq_ctrl;
357 
358 		txq_ctrl = mlx5_txq_get(dev, peer_queue);
359 		if (txq_ctrl == NULL) {
360 			rte_errno = EINVAL;
361 			DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
362 				dev->data->port_id, peer_queue);
363 			return -rte_errno;
364 		}
365 		if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
366 			rte_errno = EINVAL;
367 			DRV_LOG(ERR, "port %u queue %d is not a hairpin Txq",
368 				dev->data->port_id, peer_queue);
369 			mlx5_txq_release(dev, peer_queue);
370 			return -rte_errno;
371 		}
372 		if (txq_ctrl->obj == NULL || txq_ctrl->obj->sq == NULL) {
373 			rte_errno = ENOMEM;
374 			DRV_LOG(ERR, "port %u no Txq object found: %d",
375 				dev->data->port_id, peer_queue);
376 			mlx5_txq_release(dev, peer_queue);
377 			return -rte_errno;
378 		}
379 		peer_info->qp_id = txq_ctrl->obj->sq->id;
380 		peer_info->vhca_id = priv->config.hca_attr.vhca_id;
381 		/* 1-to-1 mapping, only the first one is used. */
382 		peer_info->peer_q = txq_ctrl->hairpin_conf.peers[0].queue;
383 		peer_info->tx_explicit = txq_ctrl->hairpin_conf.tx_explicit;
384 		peer_info->manual_bind = txq_ctrl->hairpin_conf.manual_bind;
385 		mlx5_txq_release(dev, peer_queue);
386 	} else { /* Peer port used as ingress. */
387 		struct mlx5_rxq_ctrl *rxq_ctrl;
388 
389 		rxq_ctrl = mlx5_rxq_get(dev, peer_queue);
390 		if (rxq_ctrl == NULL) {
391 			rte_errno = EINVAL;
392 			DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
393 				dev->data->port_id, peer_queue);
394 			return -rte_errno;
395 		}
396 		if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
397 			rte_errno = EINVAL;
398 			DRV_LOG(ERR, "port %u queue %d is not a hairpin Rxq",
399 				dev->data->port_id, peer_queue);
400 			mlx5_rxq_release(dev, peer_queue);
401 			return -rte_errno;
402 		}
403 		if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) {
404 			rte_errno = ENOMEM;
405 			DRV_LOG(ERR, "port %u no Rxq object found: %d",
406 				dev->data->port_id, peer_queue);
407 			mlx5_rxq_release(dev, peer_queue);
408 			return -rte_errno;
409 		}
410 		peer_info->qp_id = rxq_ctrl->obj->rq->id;
411 		peer_info->vhca_id = priv->config.hca_attr.vhca_id;
412 		peer_info->peer_q = rxq_ctrl->hairpin_conf.peers[0].queue;
413 		peer_info->tx_explicit = rxq_ctrl->hairpin_conf.tx_explicit;
414 		peer_info->manual_bind = rxq_ctrl->hairpin_conf.manual_bind;
415 		mlx5_rxq_release(dev, peer_queue);
416 	}
417 	return 0;
418 }
419 
420 /*
421  * Bind the hairpin queue with the peer HW information.
422  * This needs to be called twice both for Tx and Rx queues of a pair.
423  * If the queue is already bound, it is considered successful.
424  *
425  * @param dev
426  *   Pointer to Ethernet device structure.
427  * @param cur_queue
428  *   Index of the queue to change the HW configuration to bind.
429  * @param peer_info
430  *   Pointer to information of the peer queue.
431  * @param direction
432  *   Positive to configure the TxQ, zero to configure the RxQ.
433  *
434  * @return
435  *   0 on success, a negative errno value otherwise and rte_errno is set.
436  */
437 int
438 mlx5_hairpin_queue_peer_bind(struct rte_eth_dev *dev, uint16_t cur_queue,
439 			     struct rte_hairpin_peer_info *peer_info,
440 			     uint32_t direction)
441 {
442 	int ret = 0;
443 
444 	/*
445 	 * Consistency checking of the peer queue: opposite direction is used
446 	 * to get the peer queue info with ethdev port ID, no need to check.
447 	 */
448 	if (peer_info->peer_q != cur_queue) {
449 		rte_errno = EINVAL;
450 		DRV_LOG(ERR, "port %u queue %d and peer queue %d mismatch",
451 			dev->data->port_id, cur_queue, peer_info->peer_q);
452 		return -rte_errno;
453 	}
454 	if (direction != 0) {
455 		struct mlx5_txq_ctrl *txq_ctrl;
456 		struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
457 
458 		txq_ctrl = mlx5_txq_get(dev, cur_queue);
459 		if (txq_ctrl == NULL) {
460 			rte_errno = EINVAL;
461 			DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
462 				dev->data->port_id, cur_queue);
463 			return -rte_errno;
464 		}
465 		if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
466 			rte_errno = EINVAL;
467 			DRV_LOG(ERR, "port %u queue %d not a hairpin Txq",
468 				dev->data->port_id, cur_queue);
469 			mlx5_txq_release(dev, cur_queue);
470 			return -rte_errno;
471 		}
472 		if (txq_ctrl->obj == NULL || txq_ctrl->obj->sq == NULL) {
473 			rte_errno = ENOMEM;
474 			DRV_LOG(ERR, "port %u no Txq object found: %d",
475 				dev->data->port_id, cur_queue);
476 			mlx5_txq_release(dev, cur_queue);
477 			return -rte_errno;
478 		}
479 		if (txq_ctrl->hairpin_status != 0) {
480 			DRV_LOG(DEBUG, "port %u Tx queue %d is already bound",
481 				dev->data->port_id, cur_queue);
482 			mlx5_txq_release(dev, cur_queue);
483 			return 0;
484 		}
485 		/*
486 		 * All queues' of one port consistency checking is done in the
487 		 * bind() function, and that is optional.
488 		 */
489 		if (peer_info->tx_explicit !=
490 		    txq_ctrl->hairpin_conf.tx_explicit) {
491 			rte_errno = EINVAL;
492 			DRV_LOG(ERR, "port %u Tx queue %d and peer Tx rule mode"
493 				" mismatch", dev->data->port_id, cur_queue);
494 			mlx5_txq_release(dev, cur_queue);
495 			return -rte_errno;
496 		}
497 		if (peer_info->manual_bind !=
498 		    txq_ctrl->hairpin_conf.manual_bind) {
499 			rte_errno = EINVAL;
500 			DRV_LOG(ERR, "port %u Tx queue %d and peer binding mode"
501 				" mismatch", dev->data->port_id, cur_queue);
502 			mlx5_txq_release(dev, cur_queue);
503 			return -rte_errno;
504 		}
505 		sq_attr.state = MLX5_SQC_STATE_RDY;
506 		sq_attr.sq_state = MLX5_SQC_STATE_RST;
507 		sq_attr.hairpin_peer_rq = peer_info->qp_id;
508 		sq_attr.hairpin_peer_vhca = peer_info->vhca_id;
509 		ret = mlx5_devx_cmd_modify_sq(txq_ctrl->obj->sq, &sq_attr);
510 		if (ret == 0)
511 			txq_ctrl->hairpin_status = 1;
512 		mlx5_txq_release(dev, cur_queue);
513 	} else {
514 		struct mlx5_rxq_ctrl *rxq_ctrl;
515 		struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
516 
517 		rxq_ctrl = mlx5_rxq_get(dev, cur_queue);
518 		if (rxq_ctrl == NULL) {
519 			rte_errno = EINVAL;
520 			DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
521 				dev->data->port_id, cur_queue);
522 			return -rte_errno;
523 		}
524 		if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
525 			rte_errno = EINVAL;
526 			DRV_LOG(ERR, "port %u queue %d not a hairpin Rxq",
527 				dev->data->port_id, cur_queue);
528 			mlx5_rxq_release(dev, cur_queue);
529 			return -rte_errno;
530 		}
531 		if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) {
532 			rte_errno = ENOMEM;
533 			DRV_LOG(ERR, "port %u no Rxq object found: %d",
534 				dev->data->port_id, cur_queue);
535 			mlx5_rxq_release(dev, cur_queue);
536 			return -rte_errno;
537 		}
538 		if (rxq_ctrl->hairpin_status != 0) {
539 			DRV_LOG(DEBUG, "port %u Rx queue %d is already bound",
540 				dev->data->port_id, cur_queue);
541 			mlx5_rxq_release(dev, cur_queue);
542 			return 0;
543 		}
544 		if (peer_info->tx_explicit !=
545 		    rxq_ctrl->hairpin_conf.tx_explicit) {
546 			rte_errno = EINVAL;
547 			DRV_LOG(ERR, "port %u Rx queue %d and peer Tx rule mode"
548 				" mismatch", dev->data->port_id, cur_queue);
549 			mlx5_rxq_release(dev, cur_queue);
550 			return -rte_errno;
551 		}
552 		if (peer_info->manual_bind !=
553 		    rxq_ctrl->hairpin_conf.manual_bind) {
554 			rte_errno = EINVAL;
555 			DRV_LOG(ERR, "port %u Rx queue %d and peer binding mode"
556 				" mismatch", dev->data->port_id, cur_queue);
557 			mlx5_rxq_release(dev, cur_queue);
558 			return -rte_errno;
559 		}
560 		rq_attr.state = MLX5_SQC_STATE_RDY;
561 		rq_attr.rq_state = MLX5_SQC_STATE_RST;
562 		rq_attr.hairpin_peer_sq = peer_info->qp_id;
563 		rq_attr.hairpin_peer_vhca = peer_info->vhca_id;
564 		ret = mlx5_devx_cmd_modify_rq(rxq_ctrl->obj->rq, &rq_attr);
565 		if (ret == 0)
566 			rxq_ctrl->hairpin_status = 1;
567 		mlx5_rxq_release(dev, cur_queue);
568 	}
569 	return ret;
570 }
571 
572 /*
573  * Unbind the hairpin queue and reset its HW configuration.
574  * This needs to be called twice both for Tx and Rx queues of a pair.
575  * If the queue is already unbound, it is considered successful.
576  *
577  * @param dev
578  *   Pointer to Ethernet device structure.
579  * @param cur_queue
580  *   Index of the queue to change the HW configuration to unbind.
581  * @param direction
582  *   Positive to reset the TxQ, zero to reset the RxQ.
583  *
584  * @return
585  *   0 on success, a negative errno value otherwise and rte_errno is set.
586  */
587 int
588 mlx5_hairpin_queue_peer_unbind(struct rte_eth_dev *dev, uint16_t cur_queue,
589 			       uint32_t direction)
590 {
591 	int ret = 0;
592 
593 	if (direction != 0) {
594 		struct mlx5_txq_ctrl *txq_ctrl;
595 		struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
596 
597 		txq_ctrl = mlx5_txq_get(dev, cur_queue);
598 		if (txq_ctrl == NULL) {
599 			rte_errno = EINVAL;
600 			DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
601 				dev->data->port_id, cur_queue);
602 			return -rte_errno;
603 		}
604 		if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
605 			rte_errno = EINVAL;
606 			DRV_LOG(ERR, "port %u queue %d not a hairpin Txq",
607 				dev->data->port_id, cur_queue);
608 			mlx5_txq_release(dev, cur_queue);
609 			return -rte_errno;
610 		}
611 		/* Already unbound, return success before obj checking. */
612 		if (txq_ctrl->hairpin_status == 0) {
613 			DRV_LOG(DEBUG, "port %u Tx queue %d is already unbound",
614 				dev->data->port_id, cur_queue);
615 			mlx5_txq_release(dev, cur_queue);
616 			return 0;
617 		}
618 		if (!txq_ctrl->obj || !txq_ctrl->obj->sq) {
619 			rte_errno = ENOMEM;
620 			DRV_LOG(ERR, "port %u no Txq object found: %d",
621 				dev->data->port_id, cur_queue);
622 			mlx5_txq_release(dev, cur_queue);
623 			return -rte_errno;
624 		}
625 		sq_attr.state = MLX5_SQC_STATE_RST;
626 		sq_attr.sq_state = MLX5_SQC_STATE_RST;
627 		ret = mlx5_devx_cmd_modify_sq(txq_ctrl->obj->sq, &sq_attr);
628 		if (ret == 0)
629 			txq_ctrl->hairpin_status = 0;
630 		mlx5_txq_release(dev, cur_queue);
631 	} else {
632 		struct mlx5_rxq_ctrl *rxq_ctrl;
633 		struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
634 
635 		rxq_ctrl = mlx5_rxq_get(dev, cur_queue);
636 		if (rxq_ctrl == NULL) {
637 			rte_errno = EINVAL;
638 			DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
639 				dev->data->port_id, cur_queue);
640 			return -rte_errno;
641 		}
642 		if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
643 			rte_errno = EINVAL;
644 			DRV_LOG(ERR, "port %u queue %d not a hairpin Rxq",
645 				dev->data->port_id, cur_queue);
646 			mlx5_rxq_release(dev, cur_queue);
647 			return -rte_errno;
648 		}
649 		if (rxq_ctrl->hairpin_status == 0) {
650 			DRV_LOG(DEBUG, "port %u Rx queue %d is already unbound",
651 				dev->data->port_id, cur_queue);
652 			mlx5_rxq_release(dev, cur_queue);
653 			return 0;
654 		}
655 		if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) {
656 			rte_errno = ENOMEM;
657 			DRV_LOG(ERR, "port %u no Rxq object found: %d",
658 				dev->data->port_id, cur_queue);
659 			mlx5_rxq_release(dev, cur_queue);
660 			return -rte_errno;
661 		}
662 		rq_attr.state = MLX5_SQC_STATE_RST;
663 		rq_attr.rq_state = MLX5_SQC_STATE_RST;
664 		ret = mlx5_devx_cmd_modify_rq(rxq_ctrl->obj->rq, &rq_attr);
665 		if (ret == 0)
666 			rxq_ctrl->hairpin_status = 0;
667 		mlx5_rxq_release(dev, cur_queue);
668 	}
669 	return ret;
670 }
671 
672 /*
673  * Bind the hairpin port pairs, from the Tx to the peer Rx.
674  * This function only supports to bind the Tx to one Rx.
675  *
676  * @param dev
677  *   Pointer to Ethernet device structure.
678  * @param rx_port
679  *   Port identifier of the Rx port.
680  *
681  * @return
682  *   0 on success, a negative errno value otherwise and rte_errno is set.
683  */
684 static int
685 mlx5_hairpin_bind_single_port(struct rte_eth_dev *dev, uint16_t rx_port)
686 {
687 	struct mlx5_priv *priv = dev->data->dev_private;
688 	int ret = 0;
689 	struct mlx5_txq_ctrl *txq_ctrl;
690 	uint32_t i;
691 	struct rte_hairpin_peer_info peer = {0xffffff};
692 	struct rte_hairpin_peer_info cur;
693 	const struct rte_eth_hairpin_conf *conf;
694 	uint16_t num_q = 0;
695 	uint16_t local_port = priv->dev_data->port_id;
696 	uint32_t manual;
697 	uint32_t explicit;
698 	uint16_t rx_queue;
699 
700 	if (mlx5_eth_find_next(rx_port, priv->pci_dev) != rx_port) {
701 		rte_errno = ENODEV;
702 		DRV_LOG(ERR, "Rx port %u does not belong to mlx5", rx_port);
703 		return -rte_errno;
704 	}
705 	/*
706 	 * Before binding TxQ to peer RxQ, first round loop will be used for
707 	 * checking the queues' configuration consistency. This would be a
708 	 * little time consuming but better than doing the rollback.
709 	 */
710 	for (i = 0; i != priv->txqs_n; i++) {
711 		txq_ctrl = mlx5_txq_get(dev, i);
712 		if (txq_ctrl == NULL)
713 			continue;
714 		if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
715 			mlx5_txq_release(dev, i);
716 			continue;
717 		}
718 		/*
719 		 * All hairpin Tx queues of a single port that connected to the
720 		 * same peer Rx port should have the same "auto binding" and
721 		 * "implicit Tx flow" modes.
722 		 * Peer consistency checking will be done in per queue binding.
723 		 */
724 		conf = &txq_ctrl->hairpin_conf;
725 		if (conf->peers[0].port == rx_port) {
726 			if (num_q == 0) {
727 				manual = conf->manual_bind;
728 				explicit = conf->tx_explicit;
729 			} else {
730 				if (manual != conf->manual_bind ||
731 				    explicit != conf->tx_explicit) {
732 					rte_errno = EINVAL;
733 					DRV_LOG(ERR, "port %u queue %d mode"
734 						" mismatch: %u %u, %u %u",
735 						local_port, i, manual,
736 						conf->manual_bind, explicit,
737 						conf->tx_explicit);
738 					mlx5_txq_release(dev, i);
739 					return -rte_errno;
740 				}
741 			}
742 			num_q++;
743 		}
744 		mlx5_txq_release(dev, i);
745 	}
746 	/* Once no queue is configured, success is returned directly. */
747 	if (num_q == 0)
748 		return ret;
749 	/* All the hairpin TX queues need to be traversed again. */
750 	for (i = 0; i != priv->txqs_n; i++) {
751 		txq_ctrl = mlx5_txq_get(dev, i);
752 		if (txq_ctrl == NULL)
753 			continue;
754 		if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
755 			mlx5_txq_release(dev, i);
756 			continue;
757 		}
758 		if (txq_ctrl->hairpin_conf.peers[0].port != rx_port) {
759 			mlx5_txq_release(dev, i);
760 			continue;
761 		}
762 		rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
763 		/*
764 		 * Fetch peer RxQ's information.
765 		 * No need to pass the information of the current queue.
766 		 */
767 		ret = rte_eth_hairpin_queue_peer_update(rx_port, rx_queue,
768 							NULL, &peer, 1);
769 		if (ret != 0) {
770 			mlx5_txq_release(dev, i);
771 			goto error;
772 		}
773 		/* Accessing its own device, inside mlx5 PMD. */
774 		ret = mlx5_hairpin_queue_peer_bind(dev, i, &peer, 1);
775 		if (ret != 0) {
776 			mlx5_txq_release(dev, i);
777 			goto error;
778 		}
779 		/* Pass TxQ's information to peer RxQ and try binding. */
780 		cur.peer_q = rx_queue;
781 		cur.qp_id = txq_ctrl->obj->sq->id;
782 		cur.vhca_id = priv->config.hca_attr.vhca_id;
783 		cur.tx_explicit = txq_ctrl->hairpin_conf.tx_explicit;
784 		cur.manual_bind = txq_ctrl->hairpin_conf.manual_bind;
785 		/*
786 		 * In order to access another device in a proper way, RTE level
787 		 * private function is needed.
788 		 */
789 		ret = rte_eth_hairpin_queue_peer_bind(rx_port, rx_queue,
790 						      &cur, 0);
791 		if (ret != 0) {
792 			mlx5_txq_release(dev, i);
793 			goto error;
794 		}
795 		mlx5_txq_release(dev, i);
796 	}
797 	return 0;
798 error:
799 	/*
800 	 * Do roll-back process for the queues already bound.
801 	 * No need to check the return value of the queue unbind function.
802 	 */
803 	do {
804 		/* No validation is needed here. */
805 		txq_ctrl = mlx5_txq_get(dev, i);
806 		if (txq_ctrl == NULL)
807 			continue;
808 		rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
809 		rte_eth_hairpin_queue_peer_unbind(rx_port, rx_queue, 0);
810 		mlx5_hairpin_queue_peer_unbind(dev, i, 1);
811 		mlx5_txq_release(dev, i);
812 	} while (i--);
813 	return ret;
814 }
815 
816 /*
817  * Unbind the hairpin port pair, HW configuration of both devices will be clear
818  * and status will be reset for all the queues used between the them.
819  * This function only supports to unbind the Tx from one Rx.
820  *
821  * @param dev
822  *   Pointer to Ethernet device structure.
823  * @param rx_port
824  *   Port identifier of the Rx port.
825  *
826  * @return
827  *   0 on success, a negative errno value otherwise and rte_errno is set.
828  */
829 static int
830 mlx5_hairpin_unbind_single_port(struct rte_eth_dev *dev, uint16_t rx_port)
831 {
832 	struct mlx5_priv *priv = dev->data->dev_private;
833 	struct mlx5_txq_ctrl *txq_ctrl;
834 	uint32_t i;
835 	int ret;
836 	uint16_t cur_port = priv->dev_data->port_id;
837 
838 	if (mlx5_eth_find_next(rx_port, priv->pci_dev) != rx_port) {
839 		rte_errno = ENODEV;
840 		DRV_LOG(ERR, "Rx port %u does not belong to mlx5", rx_port);
841 		return -rte_errno;
842 	}
843 	for (i = 0; i != priv->txqs_n; i++) {
844 		uint16_t rx_queue;
845 
846 		txq_ctrl = mlx5_txq_get(dev, i);
847 		if (txq_ctrl == NULL)
848 			continue;
849 		if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
850 			mlx5_txq_release(dev, i);
851 			continue;
852 		}
853 		if (txq_ctrl->hairpin_conf.peers[0].port != rx_port) {
854 			mlx5_txq_release(dev, i);
855 			continue;
856 		}
857 		/* Indeed, only the first used queue needs to be checked. */
858 		if (txq_ctrl->hairpin_conf.manual_bind == 0) {
859 			if (cur_port != rx_port) {
860 				rte_errno = EINVAL;
861 				DRV_LOG(ERR, "port %u and port %u are in"
862 					" auto-bind mode", cur_port, rx_port);
863 				mlx5_txq_release(dev, i);
864 				return -rte_errno;
865 			} else {
866 				return 0;
867 			}
868 		}
869 		rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
870 		mlx5_txq_release(dev, i);
871 		ret = rte_eth_hairpin_queue_peer_unbind(rx_port, rx_queue, 0);
872 		if (ret) {
873 			DRV_LOG(ERR, "port %u Rx queue %d unbind - failure",
874 				rx_port, rx_queue);
875 			return ret;
876 		}
877 		ret = mlx5_hairpin_queue_peer_unbind(dev, i, 1);
878 		if (ret) {
879 			DRV_LOG(ERR, "port %u Tx queue %d unbind - failure",
880 				cur_port, i);
881 			return ret;
882 		}
883 	}
884 	return 0;
885 }
886 
887 /*
888  * Bind hairpin ports, Rx could be all ports when using RTE_MAX_ETHPORTS.
889  * @see mlx5_hairpin_bind_single_port()
890  */
891 int
892 mlx5_hairpin_bind(struct rte_eth_dev *dev, uint16_t rx_port)
893 {
894 	int ret = 0;
895 	uint16_t p, pp;
896 	struct mlx5_priv *priv = dev->data->dev_private;
897 
898 	/*
899 	 * If the Rx port has no hairpin configuration with the current port,
900 	 * the binding will be skipped in the called function of single port.
901 	 * Device started status will be checked only before the queue
902 	 * information updating.
903 	 */
904 	if (rx_port == RTE_MAX_ETHPORTS) {
905 		MLX5_ETH_FOREACH_DEV(p, priv->pci_dev) {
906 			ret = mlx5_hairpin_bind_single_port(dev, p);
907 			if (ret != 0)
908 				goto unbind;
909 		}
910 		return ret;
911 	} else {
912 		return mlx5_hairpin_bind_single_port(dev, rx_port);
913 	}
914 unbind:
915 	MLX5_ETH_FOREACH_DEV(pp, priv->pci_dev)
916 		if (pp < p)
917 			mlx5_hairpin_unbind_single_port(dev, pp);
918 	return ret;
919 }
920 
921 /*
922  * Unbind hairpin ports, Rx could be all ports when using RTE_MAX_ETHPORTS.
923  * @see mlx5_hairpin_unbind_single_port()
924  */
925 int
926 mlx5_hairpin_unbind(struct rte_eth_dev *dev, uint16_t rx_port)
927 {
928 	int ret = 0;
929 	uint16_t p;
930 	struct mlx5_priv *priv = dev->data->dev_private;
931 
932 	if (rx_port == RTE_MAX_ETHPORTS)
933 		MLX5_ETH_FOREACH_DEV(p, priv->pci_dev) {
934 			ret = mlx5_hairpin_unbind_single_port(dev, p);
935 			if (ret != 0)
936 				return ret;
937 		}
938 	else
939 		ret = mlx5_hairpin_unbind_single_port(dev, rx_port);
940 	return ret;
941 }
942 
943 /*
944  * DPDK callback to get the hairpin peer ports list.
945  * This will return the actual number of peer ports and save the identifiers
946  * into the array (sorted, may be different from that when setting up the
947  * hairpin peer queues).
948  * The peer port ID could be the same as the port ID of the current device.
949  *
950  * @param dev
951  *   Pointer to Ethernet device structure.
952  * @param peer_ports
953  *   Pointer to array to save the port identifiers.
954  * @param len
955  *   The length of the array.
956  * @param direction
957  *   Current port to peer port direction.
958  *   positive - current used as Tx to get all peer Rx ports.
959  *   zero - current used as Rx to get all peer Tx ports.
960  *
961  * @return
962  *   0 or positive value on success, actual number of peer ports.
963  *   a negative errno value otherwise and rte_errno is set.
964  */
965 int
966 mlx5_hairpin_get_peer_ports(struct rte_eth_dev *dev, uint16_t *peer_ports,
967 			    size_t len, uint32_t direction)
968 {
969 	struct mlx5_priv *priv = dev->data->dev_private;
970 	struct mlx5_txq_ctrl *txq_ctrl;
971 	struct mlx5_rxq_ctrl *rxq_ctrl;
972 	uint32_t i;
973 	uint16_t pp;
974 	uint32_t bits[(RTE_MAX_ETHPORTS + 31) / 32] = {0};
975 	int ret = 0;
976 
977 	if (direction) {
978 		for (i = 0; i < priv->txqs_n; i++) {
979 			txq_ctrl = mlx5_txq_get(dev, i);
980 			if (!txq_ctrl)
981 				continue;
982 			if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
983 				mlx5_txq_release(dev, i);
984 				continue;
985 			}
986 			pp = txq_ctrl->hairpin_conf.peers[0].port;
987 			if (pp >= RTE_MAX_ETHPORTS) {
988 				rte_errno = ERANGE;
989 				mlx5_txq_release(dev, i);
990 				DRV_LOG(ERR, "port %hu queue %u peer port "
991 					"out of range %hu",
992 					priv->dev_data->port_id, i, pp);
993 				return -rte_errno;
994 			}
995 			bits[pp / 32] |= 1 << (pp % 32);
996 			mlx5_txq_release(dev, i);
997 		}
998 	} else {
999 		for (i = 0; i < priv->rxqs_n; i++) {
1000 			rxq_ctrl = mlx5_rxq_get(dev, i);
1001 			if (!rxq_ctrl)
1002 				continue;
1003 			if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
1004 				mlx5_rxq_release(dev, i);
1005 				continue;
1006 			}
1007 			pp = rxq_ctrl->hairpin_conf.peers[0].port;
1008 			if (pp >= RTE_MAX_ETHPORTS) {
1009 				rte_errno = ERANGE;
1010 				mlx5_rxq_release(dev, i);
1011 				DRV_LOG(ERR, "port %hu queue %u peer port "
1012 					"out of range %hu",
1013 					priv->dev_data->port_id, i, pp);
1014 				return -rte_errno;
1015 			}
1016 			bits[pp / 32] |= 1 << (pp % 32);
1017 			mlx5_rxq_release(dev, i);
1018 		}
1019 	}
1020 	for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1021 		if (bits[i / 32] & (1 << (i % 32))) {
1022 			if ((size_t)ret >= len) {
1023 				rte_errno = E2BIG;
1024 				return -rte_errno;
1025 			}
1026 			peer_ports[ret++] = i;
1027 		}
1028 	}
1029 	return ret;
1030 }
1031 
1032 /**
1033  * DPDK callback to start the device.
1034  *
1035  * Simulate device start by attaching all configured flows.
1036  *
1037  * @param dev
1038  *   Pointer to Ethernet device structure.
1039  *
1040  * @return
1041  *   0 on success, a negative errno value otherwise and rte_errno is set.
1042  */
1043 int
1044 mlx5_dev_start(struct rte_eth_dev *dev)
1045 {
1046 	struct mlx5_priv *priv = dev->data->dev_private;
1047 	int ret;
1048 	int fine_inline;
1049 
1050 	DRV_LOG(DEBUG, "port %u starting device", dev->data->port_id);
1051 	fine_inline = rte_mbuf_dynflag_lookup
1052 		(RTE_PMD_MLX5_FINE_GRANULARITY_INLINE, NULL);
1053 	if (fine_inline >= 0)
1054 		rte_net_mlx5_dynf_inline_mask = 1UL << fine_inline;
1055 	else
1056 		rte_net_mlx5_dynf_inline_mask = 0;
1057 	if (dev->data->nb_rx_queues > 0) {
1058 		ret = mlx5_dev_configure_rss_reta(dev);
1059 		if (ret) {
1060 			DRV_LOG(ERR, "port %u reta config failed: %s",
1061 				dev->data->port_id, strerror(rte_errno));
1062 			return -rte_errno;
1063 		}
1064 	}
1065 	ret = mlx5_txpp_start(dev);
1066 	if (ret) {
1067 		DRV_LOG(ERR, "port %u Tx packet pacing init failed: %s",
1068 			dev->data->port_id, strerror(rte_errno));
1069 		goto error;
1070 	}
1071 	if ((priv->config.devx && priv->config.dv_flow_en &&
1072 	    priv->config.dest_tir) && priv->obj_ops.lb_dummy_queue_create) {
1073 		ret = priv->obj_ops.lb_dummy_queue_create(dev);
1074 		if (ret)
1075 			goto error;
1076 	}
1077 	ret = mlx5_txq_start(dev);
1078 	if (ret) {
1079 		DRV_LOG(ERR, "port %u Tx queue allocation failed: %s",
1080 			dev->data->port_id, strerror(rte_errno));
1081 		goto error;
1082 	}
1083 	ret = mlx5_rxq_start(dev);
1084 	if (ret) {
1085 		DRV_LOG(ERR, "port %u Rx queue allocation failed: %s",
1086 			dev->data->port_id, strerror(rte_errno));
1087 		goto error;
1088 	}
1089 	/*
1090 	 * Such step will be skipped if there is no hairpin TX queue configured
1091 	 * with RX peer queue from the same device.
1092 	 */
1093 	ret = mlx5_hairpin_auto_bind(dev);
1094 	if (ret) {
1095 		DRV_LOG(ERR, "port %u hairpin auto binding failed: %s",
1096 			dev->data->port_id, strerror(rte_errno));
1097 		goto error;
1098 	}
1099 	/* Set started flag here for the following steps like control flow. */
1100 	dev->data->dev_started = 1;
1101 	ret = mlx5_rx_intr_vec_enable(dev);
1102 	if (ret) {
1103 		DRV_LOG(ERR, "port %u Rx interrupt vector creation failed",
1104 			dev->data->port_id);
1105 		goto error;
1106 	}
1107 	mlx5_os_stats_init(dev);
1108 	ret = mlx5_traffic_enable(dev);
1109 	if (ret) {
1110 		DRV_LOG(ERR, "port %u failed to set defaults flows",
1111 			dev->data->port_id);
1112 		goto error;
1113 	}
1114 	/* Set a mask and offset of dynamic metadata flows into Rx queues. */
1115 	mlx5_flow_rxq_dynf_metadata_set(dev);
1116 	/* Set flags and context to convert Rx timestamps. */
1117 	mlx5_rxq_timestamp_set(dev);
1118 	/* Set a mask and offset of scheduling on timestamp into Tx queues. */
1119 	mlx5_txq_dynf_timestamp_set(dev);
1120 	/*
1121 	 * In non-cached mode, it only needs to start the default mreg copy
1122 	 * action and no flow created by application exists anymore.
1123 	 * But it is worth wrapping the interface for further usage.
1124 	 */
1125 	ret = mlx5_flow_start_default(dev);
1126 	if (ret) {
1127 		DRV_LOG(DEBUG, "port %u failed to start default actions: %s",
1128 			dev->data->port_id, strerror(rte_errno));
1129 		goto error;
1130 	}
1131 	rte_wmb();
1132 	dev->tx_pkt_burst = mlx5_select_tx_function(dev);
1133 	dev->rx_pkt_burst = mlx5_select_rx_function(dev);
1134 	/* Enable datapath on secondary process. */
1135 	mlx5_mp_os_req_start_rxtx(dev);
1136 	if (priv->sh->intr_handle.fd >= 0) {
1137 		priv->sh->port[priv->dev_port - 1].ih_port_id =
1138 					(uint32_t)dev->data->port_id;
1139 	} else {
1140 		DRV_LOG(INFO, "port %u starts without LSC and RMV interrupts.",
1141 			dev->data->port_id);
1142 		dev->data->dev_conf.intr_conf.lsc = 0;
1143 		dev->data->dev_conf.intr_conf.rmv = 0;
1144 	}
1145 	if (priv->sh->intr_handle_devx.fd >= 0)
1146 		priv->sh->port[priv->dev_port - 1].devx_ih_port_id =
1147 					(uint32_t)dev->data->port_id;
1148 	return 0;
1149 error:
1150 	ret = rte_errno; /* Save rte_errno before cleanup. */
1151 	/* Rollback. */
1152 	dev->data->dev_started = 0;
1153 	mlx5_flow_stop_default(dev);
1154 	mlx5_traffic_disable(dev);
1155 	mlx5_txq_stop(dev);
1156 	mlx5_rxq_stop(dev);
1157 	if (priv->obj_ops.lb_dummy_queue_release)
1158 		priv->obj_ops.lb_dummy_queue_release(dev);
1159 	mlx5_txpp_stop(dev); /* Stop last. */
1160 	rte_errno = ret; /* Restore rte_errno. */
1161 	return -rte_errno;
1162 }
1163 
1164 /**
1165  * DPDK callback to stop the device.
1166  *
1167  * Simulate device stop by detaching all configured flows.
1168  *
1169  * @param dev
1170  *   Pointer to Ethernet device structure.
1171  */
1172 int
1173 mlx5_dev_stop(struct rte_eth_dev *dev)
1174 {
1175 	struct mlx5_priv *priv = dev->data->dev_private;
1176 
1177 	dev->data->dev_started = 0;
1178 	/* Prevent crashes when queues are still in use. */
1179 	dev->rx_pkt_burst = removed_rx_burst;
1180 	dev->tx_pkt_burst = removed_tx_burst;
1181 	rte_wmb();
1182 	/* Disable datapath on secondary process. */
1183 	mlx5_mp_os_req_stop_rxtx(dev);
1184 	rte_delay_us_sleep(1000 * priv->rxqs_n);
1185 	DRV_LOG(DEBUG, "port %u stopping device", dev->data->port_id);
1186 	mlx5_flow_stop_default(dev);
1187 	/* Control flows for default traffic can be removed firstly. */
1188 	mlx5_traffic_disable(dev);
1189 	/* All RX queue flags will be cleared in the flush interface. */
1190 	mlx5_flow_list_flush(dev, &priv->flows, true);
1191 	mlx5_flow_meter_rxq_flush(dev);
1192 	mlx5_rx_intr_vec_disable(dev);
1193 	priv->sh->port[priv->dev_port - 1].ih_port_id = RTE_MAX_ETHPORTS;
1194 	priv->sh->port[priv->dev_port - 1].devx_ih_port_id = RTE_MAX_ETHPORTS;
1195 	mlx5_txq_stop(dev);
1196 	mlx5_rxq_stop(dev);
1197 	if (priv->obj_ops.lb_dummy_queue_release)
1198 		priv->obj_ops.lb_dummy_queue_release(dev);
1199 	mlx5_txpp_stop(dev);
1200 
1201 	return 0;
1202 }
1203 
1204 /**
1205  * Enable traffic flows configured by control plane
1206  *
1207  * @param dev
1208  *   Pointer to Ethernet device private data.
1209  * @param dev
1210  *   Pointer to Ethernet device structure.
1211  *
1212  * @return
1213  *   0 on success, a negative errno value otherwise and rte_errno is set.
1214  */
1215 int
1216 mlx5_traffic_enable(struct rte_eth_dev *dev)
1217 {
1218 	struct mlx5_priv *priv = dev->data->dev_private;
1219 	struct rte_flow_item_eth bcast = {
1220 		.dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1221 	};
1222 	struct rte_flow_item_eth ipv6_multi_spec = {
1223 		.dst.addr_bytes = "\x33\x33\x00\x00\x00\x00",
1224 	};
1225 	struct rte_flow_item_eth ipv6_multi_mask = {
1226 		.dst.addr_bytes = "\xff\xff\x00\x00\x00\x00",
1227 	};
1228 	struct rte_flow_item_eth unicast = {
1229 		.src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1230 	};
1231 	struct rte_flow_item_eth unicast_mask = {
1232 		.dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1233 	};
1234 	const unsigned int vlan_filter_n = priv->vlan_filter_n;
1235 	const struct rte_ether_addr cmp = {
1236 		.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1237 	};
1238 	unsigned int i;
1239 	unsigned int j;
1240 	int ret;
1241 
1242 	/*
1243 	 * Hairpin txq default flow should be created no matter if it is
1244 	 * isolation mode. Or else all the packets to be sent will be sent
1245 	 * out directly without the TX flow actions, e.g. encapsulation.
1246 	 */
1247 	for (i = 0; i != priv->txqs_n; ++i) {
1248 		struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i);
1249 		if (!txq_ctrl)
1250 			continue;
1251 		/* Only Tx implicit mode requires the default Tx flow. */
1252 		if (txq_ctrl->type == MLX5_TXQ_TYPE_HAIRPIN &&
1253 		    txq_ctrl->hairpin_conf.tx_explicit == 0 &&
1254 		    txq_ctrl->hairpin_conf.peers[0].port ==
1255 		    priv->dev_data->port_id) {
1256 			ret = mlx5_ctrl_flow_source_queue(dev, i);
1257 			if (ret) {
1258 				mlx5_txq_release(dev, i);
1259 				goto error;
1260 			}
1261 		}
1262 		mlx5_txq_release(dev, i);
1263 	}
1264 	if (priv->config.dv_esw_en && !priv->config.vf) {
1265 		if (mlx5_flow_create_esw_table_zero_flow(dev))
1266 			priv->fdb_def_rule = 1;
1267 		else
1268 			DRV_LOG(INFO, "port %u FDB default rule cannot be"
1269 				" configured - only Eswitch group 0 flows are"
1270 				" supported.", dev->data->port_id);
1271 	}
1272 	if (!priv->config.lacp_by_user && priv->pf_bond >= 0) {
1273 		ret = mlx5_flow_lacp_miss(dev);
1274 		if (ret)
1275 			DRV_LOG(INFO, "port %u LACP rule cannot be created - "
1276 				"forward LACP to kernel.", dev->data->port_id);
1277 		else
1278 			DRV_LOG(INFO, "LACP traffic will be missed in port %u."
1279 				, dev->data->port_id);
1280 	}
1281 	if (priv->isolated)
1282 		return 0;
1283 	if (dev->data->promiscuous) {
1284 		struct rte_flow_item_eth promisc = {
1285 			.dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1286 			.src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1287 			.type = 0,
1288 		};
1289 
1290 		ret = mlx5_ctrl_flow(dev, &promisc, &promisc);
1291 		if (ret)
1292 			goto error;
1293 	}
1294 	if (dev->data->all_multicast) {
1295 		struct rte_flow_item_eth multicast = {
1296 			.dst.addr_bytes = "\x01\x00\x00\x00\x00\x00",
1297 			.src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1298 			.type = 0,
1299 		};
1300 
1301 		ret = mlx5_ctrl_flow(dev, &multicast, &multicast);
1302 		if (ret)
1303 			goto error;
1304 	} else {
1305 		/* Add broadcast/multicast flows. */
1306 		for (i = 0; i != vlan_filter_n; ++i) {
1307 			uint16_t vlan = priv->vlan_filter[i];
1308 
1309 			struct rte_flow_item_vlan vlan_spec = {
1310 				.tci = rte_cpu_to_be_16(vlan),
1311 			};
1312 			struct rte_flow_item_vlan vlan_mask =
1313 				rte_flow_item_vlan_mask;
1314 
1315 			ret = mlx5_ctrl_flow_vlan(dev, &bcast, &bcast,
1316 						  &vlan_spec, &vlan_mask);
1317 			if (ret)
1318 				goto error;
1319 			ret = mlx5_ctrl_flow_vlan(dev, &ipv6_multi_spec,
1320 						  &ipv6_multi_mask,
1321 						  &vlan_spec, &vlan_mask);
1322 			if (ret)
1323 				goto error;
1324 		}
1325 		if (!vlan_filter_n) {
1326 			ret = mlx5_ctrl_flow(dev, &bcast, &bcast);
1327 			if (ret)
1328 				goto error;
1329 			ret = mlx5_ctrl_flow(dev, &ipv6_multi_spec,
1330 					     &ipv6_multi_mask);
1331 			if (ret) {
1332 				/* Do not fail on IPv6 broadcast creation failure. */
1333 				DRV_LOG(WARNING,
1334 					"IPv6 broadcast is not supported");
1335 				ret = 0;
1336 			}
1337 		}
1338 	}
1339 	/* Add MAC address flows. */
1340 	for (i = 0; i != MLX5_MAX_MAC_ADDRESSES; ++i) {
1341 		struct rte_ether_addr *mac = &dev->data->mac_addrs[i];
1342 
1343 		if (!memcmp(mac, &cmp, sizeof(*mac)))
1344 			continue;
1345 		memcpy(&unicast.dst.addr_bytes,
1346 		       mac->addr_bytes,
1347 		       RTE_ETHER_ADDR_LEN);
1348 		for (j = 0; j != vlan_filter_n; ++j) {
1349 			uint16_t vlan = priv->vlan_filter[j];
1350 
1351 			struct rte_flow_item_vlan vlan_spec = {
1352 				.tci = rte_cpu_to_be_16(vlan),
1353 			};
1354 			struct rte_flow_item_vlan vlan_mask =
1355 				rte_flow_item_vlan_mask;
1356 
1357 			ret = mlx5_ctrl_flow_vlan(dev, &unicast,
1358 						  &unicast_mask,
1359 						  &vlan_spec,
1360 						  &vlan_mask);
1361 			if (ret)
1362 				goto error;
1363 		}
1364 		if (!vlan_filter_n) {
1365 			ret = mlx5_ctrl_flow(dev, &unicast, &unicast_mask);
1366 			if (ret)
1367 				goto error;
1368 		}
1369 	}
1370 	return 0;
1371 error:
1372 	ret = rte_errno; /* Save rte_errno before cleanup. */
1373 	mlx5_flow_list_flush(dev, &priv->ctrl_flows, false);
1374 	rte_errno = ret; /* Restore rte_errno. */
1375 	return -rte_errno;
1376 }
1377 
1378 
1379 /**
1380  * Disable traffic flows configured by control plane
1381  *
1382  * @param dev
1383  *   Pointer to Ethernet device private data.
1384  */
1385 void
1386 mlx5_traffic_disable(struct rte_eth_dev *dev)
1387 {
1388 	struct mlx5_priv *priv = dev->data->dev_private;
1389 
1390 	mlx5_flow_list_flush(dev, &priv->ctrl_flows, false);
1391 }
1392 
1393 /**
1394  * Restart traffic flows configured by control plane
1395  *
1396  * @param dev
1397  *   Pointer to Ethernet device private data.
1398  *
1399  * @return
1400  *   0 on success, a negative errno value otherwise and rte_errno is set.
1401  */
1402 int
1403 mlx5_traffic_restart(struct rte_eth_dev *dev)
1404 {
1405 	if (dev->data->dev_started) {
1406 		mlx5_traffic_disable(dev);
1407 		return mlx5_traffic_enable(dev);
1408 	}
1409 	return 0;
1410 }
1411