xref: /dpdk/drivers/net/mlx5/mlx5_trigger.c (revision 86d09686c6c623adf6edd40d1217b48a8e56a425)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2015 6WIND S.A.
3  * Copyright 2015 Mellanox Technologies, Ltd
4  */
5 
6 #include <unistd.h>
7 
8 #include <rte_ether.h>
9 #include <ethdev_driver.h>
10 #include <rte_interrupts.h>
11 #include <rte_alarm.h>
12 #include <rte_cycles.h>
13 
14 #include <mlx5_malloc.h>
15 
16 #include "mlx5.h"
17 #include "mlx5_flow.h"
18 #include "mlx5_rx.h"
19 #include "mlx5_tx.h"
20 #include "mlx5_utils.h"
21 #include "rte_pmd_mlx5.h"
22 
23 static void mlx5_traffic_disable_legacy(struct rte_eth_dev *dev);
24 
25 /**
26  * Stop traffic on Tx queues.
27  *
28  * @param dev
29  *   Pointer to Ethernet device structure.
30  */
31 static void
32 mlx5_txq_stop(struct rte_eth_dev *dev)
33 {
34 	struct mlx5_priv *priv = dev->data->dev_private;
35 	unsigned int i;
36 
37 	for (i = 0; i != priv->txqs_n; ++i)
38 		mlx5_txq_release(dev, i);
39 }
40 
41 /**
42  * Start traffic on Tx queues.
43  *
44  * @param dev
45  *   Pointer to Ethernet device structure.
46  *
47  * @return
48  *   0 on success, a negative errno value otherwise and rte_errno is set.
49  */
50 static int
51 mlx5_txq_start(struct rte_eth_dev *dev)
52 {
53 	struct mlx5_priv *priv = dev->data->dev_private;
54 	unsigned int i;
55 	int ret;
56 
57 	for (i = 0; i != priv->txqs_n; ++i) {
58 		struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i);
59 		struct mlx5_txq_data *txq_data = &txq_ctrl->txq;
60 		uint32_t flags = MLX5_MEM_RTE | MLX5_MEM_ZERO;
61 
62 		if (!txq_ctrl)
63 			continue;
64 		if (!txq_ctrl->is_hairpin)
65 			txq_alloc_elts(txq_ctrl);
66 		MLX5_ASSERT(!txq_ctrl->obj);
67 		txq_ctrl->obj = mlx5_malloc(flags, sizeof(struct mlx5_txq_obj),
68 					    0, txq_ctrl->socket);
69 		if (!txq_ctrl->obj) {
70 			DRV_LOG(ERR, "Port %u Tx queue %u cannot allocate "
71 				"memory resources.", dev->data->port_id,
72 				txq_data->idx);
73 			rte_errno = ENOMEM;
74 			goto error;
75 		}
76 		ret = priv->obj_ops.txq_obj_new(dev, i);
77 		if (ret < 0) {
78 			mlx5_free(txq_ctrl->obj);
79 			txq_ctrl->obj = NULL;
80 			goto error;
81 		}
82 		if (!txq_ctrl->is_hairpin) {
83 			size_t size = txq_data->cqe_s * sizeof(*txq_data->fcqs);
84 
85 			txq_data->fcqs = mlx5_malloc(flags, size,
86 						     RTE_CACHE_LINE_SIZE,
87 						     txq_ctrl->socket);
88 			if (!txq_data->fcqs) {
89 				DRV_LOG(ERR, "Port %u Tx queue %u cannot "
90 					"allocate memory (FCQ).",
91 					dev->data->port_id, i);
92 				rte_errno = ENOMEM;
93 				goto error;
94 			}
95 		}
96 		DRV_LOG(DEBUG, "Port %u txq %u updated with %p.",
97 			dev->data->port_id, i, (void *)&txq_ctrl->obj);
98 		LIST_INSERT_HEAD(&priv->txqsobj, txq_ctrl->obj, next);
99 	}
100 	return 0;
101 error:
102 	ret = rte_errno; /* Save rte_errno before cleanup. */
103 	do {
104 		mlx5_txq_release(dev, i);
105 	} while (i-- != 0);
106 	rte_errno = ret; /* Restore rte_errno. */
107 	return -rte_errno;
108 }
109 
110 /**
111  * Register Rx queue mempools and fill the Rx queue cache.
112  * This function tolerates repeated mempool registration.
113  *
114  * @param[in] rxq_ctrl
115  *   Rx queue control data.
116  *
117  * @return
118  *   0 on success, (-1) on failure and rte_errno is set.
119  */
120 static int
121 mlx5_rxq_mempool_register(struct mlx5_rxq_ctrl *rxq_ctrl)
122 {
123 	struct rte_mempool *mp;
124 	uint32_t s;
125 	int ret = 0;
126 
127 	mlx5_mr_flush_local_cache(&rxq_ctrl->rxq.mr_ctrl);
128 	/* MPRQ mempool is registered on creation, just fill the cache. */
129 	if (mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq))
130 		return mlx5_mr_mempool_populate_cache(&rxq_ctrl->rxq.mr_ctrl,
131 						      rxq_ctrl->rxq.mprq_mp);
132 	for (s = 0; s < rxq_ctrl->rxq.rxseg_n; s++) {
133 		bool is_extmem;
134 
135 		mp = rxq_ctrl->rxq.rxseg[s].mp;
136 		is_extmem = (rte_pktmbuf_priv_flags(mp) &
137 			     RTE_PKTMBUF_POOL_F_PINNED_EXT_BUF) != 0;
138 		ret = mlx5_mr_mempool_register(rxq_ctrl->sh->cdev, mp,
139 					       is_extmem);
140 		if (ret < 0 && rte_errno != EEXIST)
141 			return ret;
142 		ret = mlx5_mr_mempool_populate_cache(&rxq_ctrl->rxq.mr_ctrl,
143 						     mp);
144 		if (ret < 0)
145 			return ret;
146 	}
147 	return 0;
148 }
149 
150 /**
151  * Stop traffic on Rx queues.
152  *
153  * @param dev
154  *   Pointer to Ethernet device structure.
155  */
156 static void
157 mlx5_rxq_stop(struct rte_eth_dev *dev)
158 {
159 	struct mlx5_priv *priv = dev->data->dev_private;
160 	unsigned int i;
161 
162 	for (i = 0; i != priv->rxqs_n; ++i)
163 		mlx5_rxq_release(dev, i);
164 }
165 
166 static int
167 mlx5_rxq_ctrl_prepare(struct rte_eth_dev *dev, struct mlx5_rxq_ctrl *rxq_ctrl,
168 		      unsigned int idx)
169 {
170 	int ret = 0;
171 
172 	if (!rxq_ctrl->is_hairpin) {
173 		/*
174 		 * Pre-register the mempools. Regardless of whether
175 		 * the implicit registration is enabled or not,
176 		 * Rx mempool destruction is tracked to free MRs.
177 		 */
178 		if (mlx5_rxq_mempool_register(rxq_ctrl) < 0)
179 			return -rte_errno;
180 		ret = rxq_alloc_elts(rxq_ctrl);
181 		if (ret)
182 			return ret;
183 	}
184 	MLX5_ASSERT(!rxq_ctrl->obj);
185 	rxq_ctrl->obj = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO,
186 				    sizeof(*rxq_ctrl->obj), 0,
187 				    rxq_ctrl->socket);
188 	if (!rxq_ctrl->obj) {
189 		DRV_LOG(ERR, "Port %u Rx queue %u can't allocate resources.",
190 			dev->data->port_id, idx);
191 		rte_errno = ENOMEM;
192 		return -rte_errno;
193 	}
194 	DRV_LOG(DEBUG, "Port %u rxq %u updated with %p.", dev->data->port_id,
195 		idx, (void *)&rxq_ctrl->obj);
196 	return 0;
197 }
198 
199 /**
200  * Start traffic on Rx queues.
201  *
202  * @param dev
203  *   Pointer to Ethernet device structure.
204  *
205  * @return
206  *   0 on success, a negative errno value otherwise and rte_errno is set.
207  */
208 static int
209 mlx5_rxq_start(struct rte_eth_dev *dev)
210 {
211 	struct mlx5_priv *priv = dev->data->dev_private;
212 	unsigned int i;
213 	int ret = 0;
214 
215 	/* Allocate/reuse/resize mempool for Multi-Packet RQ. */
216 	if (mlx5_mprq_alloc_mp(dev)) {
217 		/* Should not release Rx queues but return immediately. */
218 		return -rte_errno;
219 	}
220 	DRV_LOG(DEBUG, "Port %u dev_cap.max_qp_wr is %d.",
221 		dev->data->port_id, priv->sh->dev_cap.max_qp_wr);
222 	DRV_LOG(DEBUG, "Port %u dev_cap.max_sge is %d.",
223 		dev->data->port_id, priv->sh->dev_cap.max_sge);
224 	for (i = 0; i != priv->rxqs_n; ++i) {
225 		struct mlx5_rxq_priv *rxq = mlx5_rxq_ref(dev, i);
226 		struct mlx5_rxq_ctrl *rxq_ctrl;
227 
228 		if (rxq == NULL)
229 			continue;
230 		rxq_ctrl = rxq->ctrl;
231 		if (!rxq_ctrl->started)
232 			if (mlx5_rxq_ctrl_prepare(dev, rxq_ctrl, i) < 0)
233 				goto error;
234 		ret = priv->obj_ops.rxq_obj_new(rxq);
235 		if (ret) {
236 			mlx5_free(rxq_ctrl->obj);
237 			rxq_ctrl->obj = NULL;
238 			goto error;
239 		}
240 		if (!rxq_ctrl->started)
241 			LIST_INSERT_HEAD(&priv->rxqsobj, rxq_ctrl->obj, next);
242 		rxq_ctrl->started = true;
243 	}
244 	return 0;
245 error:
246 	ret = rte_errno; /* Save rte_errno before cleanup. */
247 	do {
248 		mlx5_rxq_release(dev, i);
249 	} while (i-- != 0);
250 	rte_errno = ret; /* Restore rte_errno. */
251 	return -rte_errno;
252 }
253 
254 /**
255  * Binds Tx queues to Rx queues for hairpin.
256  *
257  * Binds Tx queues to the target Rx queues.
258  *
259  * @param dev
260  *   Pointer to Ethernet device structure.
261  *
262  * @return
263  *   0 on success, a negative errno value otherwise and rte_errno is set.
264  */
265 static int
266 mlx5_hairpin_auto_bind(struct rte_eth_dev *dev)
267 {
268 	struct mlx5_priv *priv = dev->data->dev_private;
269 	struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
270 	struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
271 	struct mlx5_txq_ctrl *txq_ctrl;
272 	struct mlx5_rxq_priv *rxq;
273 	struct mlx5_rxq_ctrl *rxq_ctrl;
274 	struct mlx5_devx_obj *sq;
275 	struct mlx5_devx_obj *rq;
276 	unsigned int i;
277 	int ret = 0;
278 	bool need_auto = false;
279 	uint16_t self_port = dev->data->port_id;
280 
281 	for (i = 0; i != priv->txqs_n; ++i) {
282 		txq_ctrl = mlx5_txq_get(dev, i);
283 		if (!txq_ctrl)
284 			continue;
285 		if (!txq_ctrl->is_hairpin ||
286 		    txq_ctrl->hairpin_conf.peers[0].port != self_port) {
287 			mlx5_txq_release(dev, i);
288 			continue;
289 		}
290 		if (txq_ctrl->hairpin_conf.manual_bind) {
291 			mlx5_txq_release(dev, i);
292 			return 0;
293 		}
294 		need_auto = true;
295 		mlx5_txq_release(dev, i);
296 	}
297 	if (!need_auto)
298 		return 0;
299 	for (i = 0; i != priv->txqs_n; ++i) {
300 		txq_ctrl = mlx5_txq_get(dev, i);
301 		if (!txq_ctrl)
302 			continue;
303 		/* Skip hairpin queues with other peer ports. */
304 		if (!txq_ctrl->is_hairpin ||
305 		    txq_ctrl->hairpin_conf.peers[0].port != self_port) {
306 			mlx5_txq_release(dev, i);
307 			continue;
308 		}
309 		if (!txq_ctrl->obj) {
310 			rte_errno = ENOMEM;
311 			DRV_LOG(ERR, "port %u no txq object found: %d",
312 				dev->data->port_id, i);
313 			mlx5_txq_release(dev, i);
314 			return -rte_errno;
315 		}
316 		sq = txq_ctrl->obj->sq;
317 		rxq = mlx5_rxq_get(dev, txq_ctrl->hairpin_conf.peers[0].queue);
318 		if (rxq == NULL) {
319 			mlx5_txq_release(dev, i);
320 			rte_errno = EINVAL;
321 			DRV_LOG(ERR, "port %u no rxq object found: %d",
322 				dev->data->port_id,
323 				txq_ctrl->hairpin_conf.peers[0].queue);
324 			return -rte_errno;
325 		}
326 		rxq_ctrl = rxq->ctrl;
327 		if (!rxq_ctrl->is_hairpin ||
328 		    rxq->hairpin_conf.peers[0].queue != i) {
329 			rte_errno = ENOMEM;
330 			DRV_LOG(ERR, "port %u Tx queue %d can't be binded to "
331 				"Rx queue %d", dev->data->port_id,
332 				i, txq_ctrl->hairpin_conf.peers[0].queue);
333 			goto error;
334 		}
335 		rq = rxq_ctrl->obj->rq;
336 		if (!rq) {
337 			rte_errno = ENOMEM;
338 			DRV_LOG(ERR, "port %u hairpin no matching rxq: %d",
339 				dev->data->port_id,
340 				txq_ctrl->hairpin_conf.peers[0].queue);
341 			goto error;
342 		}
343 		sq_attr.state = MLX5_SQC_STATE_RDY;
344 		sq_attr.sq_state = MLX5_SQC_STATE_RST;
345 		sq_attr.hairpin_peer_rq = rq->id;
346 		sq_attr.hairpin_peer_vhca =
347 				priv->sh->cdev->config.hca_attr.vhca_id;
348 		ret = mlx5_devx_cmd_modify_sq(sq, &sq_attr);
349 		if (ret)
350 			goto error;
351 		rq_attr.state = MLX5_RQC_STATE_RDY;
352 		rq_attr.rq_state = MLX5_RQC_STATE_RST;
353 		rq_attr.hairpin_peer_sq = sq->id;
354 		rq_attr.hairpin_peer_vhca =
355 				priv->sh->cdev->config.hca_attr.vhca_id;
356 		ret = mlx5_devx_cmd_modify_rq(rq, &rq_attr);
357 		if (ret)
358 			goto error;
359 		/* Qs with auto-bind will be destroyed directly. */
360 		rxq->hairpin_status = 1;
361 		txq_ctrl->hairpin_status = 1;
362 		mlx5_txq_release(dev, i);
363 	}
364 	return 0;
365 error:
366 	mlx5_txq_release(dev, i);
367 	return -rte_errno;
368 }
369 
370 /*
371  * Fetch the peer queue's SW & HW information.
372  *
373  * @param dev
374  *   Pointer to Ethernet device structure.
375  * @param peer_queue
376  *   Index of the queue to fetch the information.
377  * @param current_info
378  *   Pointer to the input peer information, not used currently.
379  * @param peer_info
380  *   Pointer to the structure to store the information, output.
381  * @param direction
382  *   Positive to get the RxQ information, zero to get the TxQ information.
383  *
384  * @return
385  *   0 on success, a negative errno value otherwise and rte_errno is set.
386  */
387 int
388 mlx5_hairpin_queue_peer_update(struct rte_eth_dev *dev, uint16_t peer_queue,
389 			       struct rte_hairpin_peer_info *current_info,
390 			       struct rte_hairpin_peer_info *peer_info,
391 			       uint32_t direction)
392 {
393 	struct mlx5_priv *priv = dev->data->dev_private;
394 	RTE_SET_USED(current_info);
395 
396 	if (dev->data->dev_started == 0) {
397 		rte_errno = EBUSY;
398 		DRV_LOG(ERR, "peer port %u is not started",
399 			dev->data->port_id);
400 		return -rte_errno;
401 	}
402 	/*
403 	 * Peer port used as egress. In the current design, hairpin Tx queue
404 	 * will be bound to the peer Rx queue. Indeed, only the information of
405 	 * peer Rx queue needs to be fetched.
406 	 */
407 	if (direction == 0) {
408 		struct mlx5_txq_ctrl *txq_ctrl;
409 
410 		txq_ctrl = mlx5_txq_get(dev, peer_queue);
411 		if (txq_ctrl == NULL) {
412 			rte_errno = EINVAL;
413 			DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
414 				dev->data->port_id, peer_queue);
415 			return -rte_errno;
416 		}
417 		if (!txq_ctrl->is_hairpin) {
418 			rte_errno = EINVAL;
419 			DRV_LOG(ERR, "port %u queue %d is not a hairpin Txq",
420 				dev->data->port_id, peer_queue);
421 			mlx5_txq_release(dev, peer_queue);
422 			return -rte_errno;
423 		}
424 		if (txq_ctrl->obj == NULL || txq_ctrl->obj->sq == NULL) {
425 			rte_errno = ENOMEM;
426 			DRV_LOG(ERR, "port %u no Txq object found: %d",
427 				dev->data->port_id, peer_queue);
428 			mlx5_txq_release(dev, peer_queue);
429 			return -rte_errno;
430 		}
431 		peer_info->qp_id = mlx5_txq_get_sqn(txq_ctrl);
432 		peer_info->vhca_id = priv->sh->cdev->config.hca_attr.vhca_id;
433 		/* 1-to-1 mapping, only the first one is used. */
434 		peer_info->peer_q = txq_ctrl->hairpin_conf.peers[0].queue;
435 		peer_info->tx_explicit = txq_ctrl->hairpin_conf.tx_explicit;
436 		peer_info->manual_bind = txq_ctrl->hairpin_conf.manual_bind;
437 		mlx5_txq_release(dev, peer_queue);
438 	} else { /* Peer port used as ingress. */
439 		struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, peer_queue);
440 		struct mlx5_rxq_ctrl *rxq_ctrl;
441 
442 		if (rxq == NULL) {
443 			rte_errno = EINVAL;
444 			DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
445 				dev->data->port_id, peer_queue);
446 			return -rte_errno;
447 		}
448 		rxq_ctrl = rxq->ctrl;
449 		if (!rxq_ctrl->is_hairpin) {
450 			rte_errno = EINVAL;
451 			DRV_LOG(ERR, "port %u queue %d is not a hairpin Rxq",
452 				dev->data->port_id, peer_queue);
453 			return -rte_errno;
454 		}
455 		if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) {
456 			rte_errno = ENOMEM;
457 			DRV_LOG(ERR, "port %u no Rxq object found: %d",
458 				dev->data->port_id, peer_queue);
459 			return -rte_errno;
460 		}
461 		peer_info->qp_id = rxq_ctrl->obj->rq->id;
462 		peer_info->vhca_id = priv->sh->cdev->config.hca_attr.vhca_id;
463 		peer_info->peer_q = rxq->hairpin_conf.peers[0].queue;
464 		peer_info->tx_explicit = rxq->hairpin_conf.tx_explicit;
465 		peer_info->manual_bind = rxq->hairpin_conf.manual_bind;
466 	}
467 	return 0;
468 }
469 
470 /*
471  * Bind the hairpin queue with the peer HW information.
472  * This needs to be called twice both for Tx and Rx queues of a pair.
473  * If the queue is already bound, it is considered successful.
474  *
475  * @param dev
476  *   Pointer to Ethernet device structure.
477  * @param cur_queue
478  *   Index of the queue to change the HW configuration to bind.
479  * @param peer_info
480  *   Pointer to information of the peer queue.
481  * @param direction
482  *   Positive to configure the TxQ, zero to configure the RxQ.
483  *
484  * @return
485  *   0 on success, a negative errno value otherwise and rte_errno is set.
486  */
487 int
488 mlx5_hairpin_queue_peer_bind(struct rte_eth_dev *dev, uint16_t cur_queue,
489 			     struct rte_hairpin_peer_info *peer_info,
490 			     uint32_t direction)
491 {
492 	int ret = 0;
493 
494 	/*
495 	 * Consistency checking of the peer queue: opposite direction is used
496 	 * to get the peer queue info with ethdev port ID, no need to check.
497 	 */
498 	if (peer_info->peer_q != cur_queue) {
499 		rte_errno = EINVAL;
500 		DRV_LOG(ERR, "port %u queue %d and peer queue %d mismatch",
501 			dev->data->port_id, cur_queue, peer_info->peer_q);
502 		return -rte_errno;
503 	}
504 	if (direction != 0) {
505 		struct mlx5_txq_ctrl *txq_ctrl;
506 		struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
507 
508 		txq_ctrl = mlx5_txq_get(dev, cur_queue);
509 		if (txq_ctrl == NULL) {
510 			rte_errno = EINVAL;
511 			DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
512 				dev->data->port_id, cur_queue);
513 			return -rte_errno;
514 		}
515 		if (!txq_ctrl->is_hairpin) {
516 			rte_errno = EINVAL;
517 			DRV_LOG(ERR, "port %u queue %d not a hairpin Txq",
518 				dev->data->port_id, cur_queue);
519 			mlx5_txq_release(dev, cur_queue);
520 			return -rte_errno;
521 		}
522 		if (txq_ctrl->obj == NULL || txq_ctrl->obj->sq == NULL) {
523 			rte_errno = ENOMEM;
524 			DRV_LOG(ERR, "port %u no Txq object found: %d",
525 				dev->data->port_id, cur_queue);
526 			mlx5_txq_release(dev, cur_queue);
527 			return -rte_errno;
528 		}
529 		if (txq_ctrl->hairpin_status != 0) {
530 			DRV_LOG(DEBUG, "port %u Tx queue %d is already bound",
531 				dev->data->port_id, cur_queue);
532 			mlx5_txq_release(dev, cur_queue);
533 			return 0;
534 		}
535 		/*
536 		 * All queues' of one port consistency checking is done in the
537 		 * bind() function, and that is optional.
538 		 */
539 		if (peer_info->tx_explicit !=
540 		    txq_ctrl->hairpin_conf.tx_explicit) {
541 			rte_errno = EINVAL;
542 			DRV_LOG(ERR, "port %u Tx queue %d and peer Tx rule mode"
543 				" mismatch", dev->data->port_id, cur_queue);
544 			mlx5_txq_release(dev, cur_queue);
545 			return -rte_errno;
546 		}
547 		if (peer_info->manual_bind !=
548 		    txq_ctrl->hairpin_conf.manual_bind) {
549 			rte_errno = EINVAL;
550 			DRV_LOG(ERR, "port %u Tx queue %d and peer binding mode"
551 				" mismatch", dev->data->port_id, cur_queue);
552 			mlx5_txq_release(dev, cur_queue);
553 			return -rte_errno;
554 		}
555 		sq_attr.state = MLX5_SQC_STATE_RDY;
556 		sq_attr.sq_state = MLX5_SQC_STATE_RST;
557 		sq_attr.hairpin_peer_rq = peer_info->qp_id;
558 		sq_attr.hairpin_peer_vhca = peer_info->vhca_id;
559 		ret = mlx5_devx_cmd_modify_sq(txq_ctrl->obj->sq, &sq_attr);
560 		if (ret == 0)
561 			txq_ctrl->hairpin_status = 1;
562 		mlx5_txq_release(dev, cur_queue);
563 	} else {
564 		struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, cur_queue);
565 		struct mlx5_rxq_ctrl *rxq_ctrl;
566 		struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
567 
568 		if (rxq == NULL) {
569 			rte_errno = EINVAL;
570 			DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
571 				dev->data->port_id, cur_queue);
572 			return -rte_errno;
573 		}
574 		rxq_ctrl = rxq->ctrl;
575 		if (!rxq_ctrl->is_hairpin) {
576 			rte_errno = EINVAL;
577 			DRV_LOG(ERR, "port %u queue %d not a hairpin Rxq",
578 				dev->data->port_id, cur_queue);
579 			return -rte_errno;
580 		}
581 		if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) {
582 			rte_errno = ENOMEM;
583 			DRV_LOG(ERR, "port %u no Rxq object found: %d",
584 				dev->data->port_id, cur_queue);
585 			return -rte_errno;
586 		}
587 		if (rxq->hairpin_status != 0) {
588 			DRV_LOG(DEBUG, "port %u Rx queue %d is already bound",
589 				dev->data->port_id, cur_queue);
590 			return 0;
591 		}
592 		if (peer_info->tx_explicit !=
593 		    rxq->hairpin_conf.tx_explicit) {
594 			rte_errno = EINVAL;
595 			DRV_LOG(ERR, "port %u Rx queue %d and peer Tx rule mode"
596 				" mismatch", dev->data->port_id, cur_queue);
597 			return -rte_errno;
598 		}
599 		if (peer_info->manual_bind !=
600 		    rxq->hairpin_conf.manual_bind) {
601 			rte_errno = EINVAL;
602 			DRV_LOG(ERR, "port %u Rx queue %d and peer binding mode"
603 				" mismatch", dev->data->port_id, cur_queue);
604 			return -rte_errno;
605 		}
606 		rq_attr.state = MLX5_RQC_STATE_RDY;
607 		rq_attr.rq_state = MLX5_RQC_STATE_RST;
608 		rq_attr.hairpin_peer_sq = peer_info->qp_id;
609 		rq_attr.hairpin_peer_vhca = peer_info->vhca_id;
610 		ret = mlx5_devx_cmd_modify_rq(rxq_ctrl->obj->rq, &rq_attr);
611 		if (ret == 0)
612 			rxq->hairpin_status = 1;
613 	}
614 	return ret;
615 }
616 
617 /*
618  * Unbind the hairpin queue and reset its HW configuration.
619  * This needs to be called twice both for Tx and Rx queues of a pair.
620  * If the queue is already unbound, it is considered successful.
621  *
622  * @param dev
623  *   Pointer to Ethernet device structure.
624  * @param cur_queue
625  *   Index of the queue to change the HW configuration to unbind.
626  * @param direction
627  *   Positive to reset the TxQ, zero to reset the RxQ.
628  *
629  * @return
630  *   0 on success, a negative errno value otherwise and rte_errno is set.
631  */
632 int
633 mlx5_hairpin_queue_peer_unbind(struct rte_eth_dev *dev, uint16_t cur_queue,
634 			       uint32_t direction)
635 {
636 	int ret = 0;
637 
638 	if (direction != 0) {
639 		struct mlx5_txq_ctrl *txq_ctrl;
640 		struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
641 
642 		txq_ctrl = mlx5_txq_get(dev, cur_queue);
643 		if (txq_ctrl == NULL) {
644 			rte_errno = EINVAL;
645 			DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
646 				dev->data->port_id, cur_queue);
647 			return -rte_errno;
648 		}
649 		if (!txq_ctrl->is_hairpin) {
650 			rte_errno = EINVAL;
651 			DRV_LOG(ERR, "port %u queue %d not a hairpin Txq",
652 				dev->data->port_id, cur_queue);
653 			mlx5_txq_release(dev, cur_queue);
654 			return -rte_errno;
655 		}
656 		/* Already unbound, return success before obj checking. */
657 		if (txq_ctrl->hairpin_status == 0) {
658 			DRV_LOG(DEBUG, "port %u Tx queue %d is already unbound",
659 				dev->data->port_id, cur_queue);
660 			mlx5_txq_release(dev, cur_queue);
661 			return 0;
662 		}
663 		if (!txq_ctrl->obj || !txq_ctrl->obj->sq) {
664 			rte_errno = ENOMEM;
665 			DRV_LOG(ERR, "port %u no Txq object found: %d",
666 				dev->data->port_id, cur_queue);
667 			mlx5_txq_release(dev, cur_queue);
668 			return -rte_errno;
669 		}
670 		sq_attr.state = MLX5_SQC_STATE_RST;
671 		sq_attr.sq_state = MLX5_SQC_STATE_RDY;
672 		ret = mlx5_devx_cmd_modify_sq(txq_ctrl->obj->sq, &sq_attr);
673 		if (ret == 0)
674 			txq_ctrl->hairpin_status = 0;
675 		mlx5_txq_release(dev, cur_queue);
676 	} else {
677 		struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, cur_queue);
678 		struct mlx5_rxq_ctrl *rxq_ctrl;
679 		struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
680 
681 		if (rxq == NULL) {
682 			rte_errno = EINVAL;
683 			DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
684 				dev->data->port_id, cur_queue);
685 			return -rte_errno;
686 		}
687 		rxq_ctrl = rxq->ctrl;
688 		if (!rxq_ctrl->is_hairpin) {
689 			rte_errno = EINVAL;
690 			DRV_LOG(ERR, "port %u queue %d not a hairpin Rxq",
691 				dev->data->port_id, cur_queue);
692 			return -rte_errno;
693 		}
694 		if (rxq->hairpin_status == 0) {
695 			DRV_LOG(DEBUG, "port %u Rx queue %d is already unbound",
696 				dev->data->port_id, cur_queue);
697 			return 0;
698 		}
699 		if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) {
700 			rte_errno = ENOMEM;
701 			DRV_LOG(ERR, "port %u no Rxq object found: %d",
702 				dev->data->port_id, cur_queue);
703 			return -rte_errno;
704 		}
705 		rq_attr.state = MLX5_RQC_STATE_RST;
706 		rq_attr.rq_state = MLX5_RQC_STATE_RDY;
707 		ret = mlx5_devx_cmd_modify_rq(rxq_ctrl->obj->rq, &rq_attr);
708 		if (ret == 0)
709 			rxq->hairpin_status = 0;
710 	}
711 	return ret;
712 }
713 
714 /*
715  * Bind the hairpin port pairs, from the Tx to the peer Rx.
716  * This function only supports to bind the Tx to one Rx.
717  *
718  * @param dev
719  *   Pointer to Ethernet device structure.
720  * @param rx_port
721  *   Port identifier of the Rx port.
722  *
723  * @return
724  *   0 on success, a negative errno value otherwise and rte_errno is set.
725  */
726 static int
727 mlx5_hairpin_bind_single_port(struct rte_eth_dev *dev, uint16_t rx_port)
728 {
729 	struct mlx5_priv *priv = dev->data->dev_private;
730 	int ret = 0;
731 	struct mlx5_txq_ctrl *txq_ctrl;
732 	uint32_t i;
733 	struct rte_hairpin_peer_info peer = {0xffffff};
734 	struct rte_hairpin_peer_info cur;
735 	const struct rte_eth_hairpin_conf *conf;
736 	uint16_t num_q = 0;
737 	uint16_t local_port = priv->dev_data->port_id;
738 	uint32_t manual;
739 	uint32_t explicit;
740 	uint16_t rx_queue;
741 
742 	if (mlx5_eth_find_next(rx_port, dev->device) != rx_port) {
743 		rte_errno = ENODEV;
744 		DRV_LOG(ERR, "Rx port %u does not belong to mlx5", rx_port);
745 		return -rte_errno;
746 	}
747 	/*
748 	 * Before binding TxQ to peer RxQ, first round loop will be used for
749 	 * checking the queues' configuration consistency. This would be a
750 	 * little time consuming but better than doing the rollback.
751 	 */
752 	for (i = 0; i != priv->txqs_n; i++) {
753 		txq_ctrl = mlx5_txq_get(dev, i);
754 		if (txq_ctrl == NULL)
755 			continue;
756 		if (!txq_ctrl->is_hairpin) {
757 			mlx5_txq_release(dev, i);
758 			continue;
759 		}
760 		/*
761 		 * All hairpin Tx queues of a single port that connected to the
762 		 * same peer Rx port should have the same "auto binding" and
763 		 * "implicit Tx flow" modes.
764 		 * Peer consistency checking will be done in per queue binding.
765 		 */
766 		conf = &txq_ctrl->hairpin_conf;
767 		if (conf->peers[0].port == rx_port) {
768 			if (num_q == 0) {
769 				manual = conf->manual_bind;
770 				explicit = conf->tx_explicit;
771 			} else {
772 				if (manual != conf->manual_bind ||
773 				    explicit != conf->tx_explicit) {
774 					rte_errno = EINVAL;
775 					DRV_LOG(ERR, "port %u queue %d mode"
776 						" mismatch: %u %u, %u %u",
777 						local_port, i, manual,
778 						conf->manual_bind, explicit,
779 						conf->tx_explicit);
780 					mlx5_txq_release(dev, i);
781 					return -rte_errno;
782 				}
783 			}
784 			num_q++;
785 		}
786 		mlx5_txq_release(dev, i);
787 	}
788 	/* Once no queue is configured, success is returned directly. */
789 	if (num_q == 0)
790 		return ret;
791 	/* All the hairpin TX queues need to be traversed again. */
792 	for (i = 0; i != priv->txqs_n; i++) {
793 		txq_ctrl = mlx5_txq_get(dev, i);
794 		if (txq_ctrl == NULL)
795 			continue;
796 		if (!txq_ctrl->is_hairpin) {
797 			mlx5_txq_release(dev, i);
798 			continue;
799 		}
800 		if (txq_ctrl->hairpin_conf.peers[0].port != rx_port) {
801 			mlx5_txq_release(dev, i);
802 			continue;
803 		}
804 		rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
805 		/*
806 		 * Fetch peer RxQ's information.
807 		 * No need to pass the information of the current queue.
808 		 */
809 		ret = rte_eth_hairpin_queue_peer_update(rx_port, rx_queue,
810 							NULL, &peer, 1);
811 		if (ret != 0) {
812 			mlx5_txq_release(dev, i);
813 			goto error;
814 		}
815 		/* Accessing its own device, inside mlx5 PMD. */
816 		ret = mlx5_hairpin_queue_peer_bind(dev, i, &peer, 1);
817 		if (ret != 0) {
818 			mlx5_txq_release(dev, i);
819 			goto error;
820 		}
821 		/* Pass TxQ's information to peer RxQ and try binding. */
822 		cur.peer_q = rx_queue;
823 		cur.qp_id = mlx5_txq_get_sqn(txq_ctrl);
824 		cur.vhca_id = priv->sh->cdev->config.hca_attr.vhca_id;
825 		cur.tx_explicit = txq_ctrl->hairpin_conf.tx_explicit;
826 		cur.manual_bind = txq_ctrl->hairpin_conf.manual_bind;
827 		/*
828 		 * In order to access another device in a proper way, RTE level
829 		 * private function is needed.
830 		 */
831 		ret = rte_eth_hairpin_queue_peer_bind(rx_port, rx_queue,
832 						      &cur, 0);
833 		if (ret != 0) {
834 			mlx5_txq_release(dev, i);
835 			goto error;
836 		}
837 		mlx5_txq_release(dev, i);
838 	}
839 	return 0;
840 error:
841 	/*
842 	 * Do roll-back process for the queues already bound.
843 	 * No need to check the return value of the queue unbind function.
844 	 */
845 	do {
846 		/* No validation is needed here. */
847 		txq_ctrl = mlx5_txq_get(dev, i);
848 		if (txq_ctrl == NULL)
849 			continue;
850 		if (!txq_ctrl->is_hairpin ||
851 		    txq_ctrl->hairpin_conf.peers[0].port != rx_port) {
852 			mlx5_txq_release(dev, i);
853 			continue;
854 		}
855 		rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
856 		rte_eth_hairpin_queue_peer_unbind(rx_port, rx_queue, 0);
857 		mlx5_hairpin_queue_peer_unbind(dev, i, 1);
858 		mlx5_txq_release(dev, i);
859 	} while (i--);
860 	return ret;
861 }
862 
863 /*
864  * Unbind the hairpin port pair, HW configuration of both devices will be clear
865  * and status will be reset for all the queues used between them.
866  * This function only supports to unbind the Tx from one Rx.
867  *
868  * @param dev
869  *   Pointer to Ethernet device structure.
870  * @param rx_port
871  *   Port identifier of the Rx port.
872  *
873  * @return
874  *   0 on success, a negative errno value otherwise and rte_errno is set.
875  */
876 static int
877 mlx5_hairpin_unbind_single_port(struct rte_eth_dev *dev, uint16_t rx_port)
878 {
879 	struct mlx5_priv *priv = dev->data->dev_private;
880 	struct mlx5_txq_ctrl *txq_ctrl;
881 	uint32_t i;
882 	int ret;
883 	uint16_t cur_port = priv->dev_data->port_id;
884 
885 	if (mlx5_eth_find_next(rx_port, dev->device) != rx_port) {
886 		rte_errno = ENODEV;
887 		DRV_LOG(ERR, "Rx port %u does not belong to mlx5", rx_port);
888 		return -rte_errno;
889 	}
890 	for (i = 0; i != priv->txqs_n; i++) {
891 		uint16_t rx_queue;
892 
893 		txq_ctrl = mlx5_txq_get(dev, i);
894 		if (txq_ctrl == NULL)
895 			continue;
896 		if (!txq_ctrl->is_hairpin) {
897 			mlx5_txq_release(dev, i);
898 			continue;
899 		}
900 		if (txq_ctrl->hairpin_conf.peers[0].port != rx_port) {
901 			mlx5_txq_release(dev, i);
902 			continue;
903 		}
904 		/* Indeed, only the first used queue needs to be checked. */
905 		if (txq_ctrl->hairpin_conf.manual_bind == 0) {
906 			mlx5_txq_release(dev, i);
907 			if (cur_port != rx_port) {
908 				rte_errno = EINVAL;
909 				DRV_LOG(ERR, "port %u and port %u are in"
910 					" auto-bind mode", cur_port, rx_port);
911 				return -rte_errno;
912 			} else {
913 				return 0;
914 			}
915 		}
916 		rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
917 		mlx5_txq_release(dev, i);
918 		ret = rte_eth_hairpin_queue_peer_unbind(rx_port, rx_queue, 0);
919 		if (ret) {
920 			DRV_LOG(ERR, "port %u Rx queue %d unbind - failure",
921 				rx_port, rx_queue);
922 			return ret;
923 		}
924 		ret = mlx5_hairpin_queue_peer_unbind(dev, i, 1);
925 		if (ret) {
926 			DRV_LOG(ERR, "port %u Tx queue %d unbind - failure",
927 				cur_port, i);
928 			return ret;
929 		}
930 	}
931 	return 0;
932 }
933 
934 /*
935  * Bind hairpin ports, Rx could be all ports when using RTE_MAX_ETHPORTS.
936  * @see mlx5_hairpin_bind_single_port()
937  */
938 int
939 mlx5_hairpin_bind(struct rte_eth_dev *dev, uint16_t rx_port)
940 {
941 	int ret = 0;
942 	uint16_t p, pp;
943 
944 	/*
945 	 * If the Rx port has no hairpin configuration with the current port,
946 	 * the binding will be skipped in the called function of single port.
947 	 * Device started status will be checked only before the queue
948 	 * information updating.
949 	 */
950 	if (rx_port == RTE_MAX_ETHPORTS) {
951 		MLX5_ETH_FOREACH_DEV(p, dev->device) {
952 			ret = mlx5_hairpin_bind_single_port(dev, p);
953 			if (ret != 0)
954 				goto unbind;
955 		}
956 		return ret;
957 	} else {
958 		return mlx5_hairpin_bind_single_port(dev, rx_port);
959 	}
960 unbind:
961 	MLX5_ETH_FOREACH_DEV(pp, dev->device)
962 		if (pp < p)
963 			mlx5_hairpin_unbind_single_port(dev, pp);
964 	return ret;
965 }
966 
967 /*
968  * Unbind hairpin ports, Rx could be all ports when using RTE_MAX_ETHPORTS.
969  * @see mlx5_hairpin_unbind_single_port()
970  */
971 int
972 mlx5_hairpin_unbind(struct rte_eth_dev *dev, uint16_t rx_port)
973 {
974 	int ret = 0;
975 	uint16_t p;
976 
977 	if (rx_port == RTE_MAX_ETHPORTS)
978 		MLX5_ETH_FOREACH_DEV(p, dev->device) {
979 			ret = mlx5_hairpin_unbind_single_port(dev, p);
980 			if (ret != 0)
981 				return ret;
982 		}
983 	else
984 		ret = mlx5_hairpin_unbind_single_port(dev, rx_port);
985 	return ret;
986 }
987 
988 /*
989  * DPDK callback to get the hairpin peer ports list.
990  * This will return the actual number of peer ports and save the identifiers
991  * into the array (sorted, may be different from that when setting up the
992  * hairpin peer queues).
993  * The peer port ID could be the same as the port ID of the current device.
994  *
995  * @param dev
996  *   Pointer to Ethernet device structure.
997  * @param peer_ports
998  *   Pointer to array to save the port identifiers.
999  * @param len
1000  *   The length of the array.
1001  * @param direction
1002  *   Current port to peer port direction.
1003  *   positive - current used as Tx to get all peer Rx ports.
1004  *   zero - current used as Rx to get all peer Tx ports.
1005  *
1006  * @return
1007  *   0 or positive value on success, actual number of peer ports.
1008  *   a negative errno value otherwise and rte_errno is set.
1009  */
1010 int
1011 mlx5_hairpin_get_peer_ports(struct rte_eth_dev *dev, uint16_t *peer_ports,
1012 			    size_t len, uint32_t direction)
1013 {
1014 	struct mlx5_priv *priv = dev->data->dev_private;
1015 	struct mlx5_txq_ctrl *txq_ctrl;
1016 	uint32_t i;
1017 	uint16_t pp;
1018 	uint32_t bits[(RTE_MAX_ETHPORTS + 31) / 32] = {0};
1019 	int ret = 0;
1020 
1021 	if (direction) {
1022 		for (i = 0; i < priv->txqs_n; i++) {
1023 			txq_ctrl = mlx5_txq_get(dev, i);
1024 			if (!txq_ctrl)
1025 				continue;
1026 			if (!txq_ctrl->is_hairpin) {
1027 				mlx5_txq_release(dev, i);
1028 				continue;
1029 			}
1030 			pp = txq_ctrl->hairpin_conf.peers[0].port;
1031 			if (pp >= RTE_MAX_ETHPORTS) {
1032 				rte_errno = ERANGE;
1033 				mlx5_txq_release(dev, i);
1034 				DRV_LOG(ERR, "port %hu queue %u peer port "
1035 					"out of range %hu",
1036 					priv->dev_data->port_id, i, pp);
1037 				return -rte_errno;
1038 			}
1039 			bits[pp / 32] |= 1 << (pp % 32);
1040 			mlx5_txq_release(dev, i);
1041 		}
1042 	} else {
1043 		for (i = 0; i < priv->rxqs_n; i++) {
1044 			struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, i);
1045 			struct mlx5_rxq_ctrl *rxq_ctrl;
1046 
1047 			if (rxq == NULL)
1048 				continue;
1049 			rxq_ctrl = rxq->ctrl;
1050 			if (!rxq_ctrl->is_hairpin)
1051 				continue;
1052 			pp = rxq->hairpin_conf.peers[0].port;
1053 			if (pp >= RTE_MAX_ETHPORTS) {
1054 				rte_errno = ERANGE;
1055 				DRV_LOG(ERR, "port %hu queue %u peer port "
1056 					"out of range %hu",
1057 					priv->dev_data->port_id, i, pp);
1058 				return -rte_errno;
1059 			}
1060 			bits[pp / 32] |= 1 << (pp % 32);
1061 		}
1062 	}
1063 	for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1064 		if (bits[i / 32] & (1 << (i % 32))) {
1065 			if ((size_t)ret >= len) {
1066 				rte_errno = E2BIG;
1067 				return -rte_errno;
1068 			}
1069 			peer_ports[ret++] = i;
1070 		}
1071 	}
1072 	return ret;
1073 }
1074 
1075 #ifdef HAVE_MLX5_HWS_SUPPORT
1076 
1077 /**
1078  * Check if starting representor port is allowed.
1079  *
1080  * If transfer proxy port is configured for HWS, then starting representor port
1081  * is allowed if and only if transfer proxy port is started as well.
1082  *
1083  * @param dev
1084  *   Pointer to Ethernet device structure.
1085  *
1086  * @return
1087  *   If stopping representor port is allowed, then 0 is returned.
1088  *   Otherwise rte_errno is set, and negative errno value is returned.
1089  */
1090 static int
1091 mlx5_hw_representor_port_allowed_start(struct rte_eth_dev *dev)
1092 {
1093 	struct mlx5_priv *priv = dev->data->dev_private;
1094 	struct rte_eth_dev *proxy_dev;
1095 	struct mlx5_priv *proxy_priv;
1096 	uint16_t proxy_port_id = UINT16_MAX;
1097 	int ret;
1098 
1099 	MLX5_ASSERT(priv->sh->config.dv_flow_en == 2);
1100 	MLX5_ASSERT(priv->sh->config.dv_esw_en);
1101 	MLX5_ASSERT(priv->representor);
1102 	ret = rte_flow_pick_transfer_proxy(dev->data->port_id, &proxy_port_id, NULL);
1103 	if (ret) {
1104 		if (ret == -ENODEV)
1105 			DRV_LOG(ERR, "Starting representor port %u is not allowed. Transfer "
1106 				     "proxy port is not available.", dev->data->port_id);
1107 		else
1108 			DRV_LOG(ERR, "Failed to pick transfer proxy for port %u (ret = %d)",
1109 				dev->data->port_id, ret);
1110 		return ret;
1111 	}
1112 	proxy_dev = &rte_eth_devices[proxy_port_id];
1113 	proxy_priv = proxy_dev->data->dev_private;
1114 	if (proxy_priv->dr_ctx == NULL) {
1115 		DRV_LOG(DEBUG, "Starting representor port %u is allowed, but default traffic flows"
1116 			       " will not be created. Transfer proxy port must be configured"
1117 			       " for HWS and started.",
1118 			       dev->data->port_id);
1119 		return 0;
1120 	}
1121 	if (!proxy_dev->data->dev_started) {
1122 		DRV_LOG(ERR, "Failed to start port %u: transfer proxy (port %u) must be started",
1123 			     dev->data->port_id, proxy_port_id);
1124 		rte_errno = EAGAIN;
1125 		return -rte_errno;
1126 	}
1127 	if (priv->sh->config.repr_matching && !priv->dr_ctx) {
1128 		DRV_LOG(ERR, "Failed to start port %u: with representor matching enabled, port "
1129 			     "must be configured for HWS", dev->data->port_id);
1130 		rte_errno = EINVAL;
1131 		return -rte_errno;
1132 	}
1133 	return 0;
1134 }
1135 
1136 #endif
1137 
1138 /**
1139  * DPDK callback to start the device.
1140  *
1141  * Simulate device start by attaching all configured flows.
1142  *
1143  * @param dev
1144  *   Pointer to Ethernet device structure.
1145  *
1146  * @return
1147  *   0 on success, a negative errno value otherwise and rte_errno is set.
1148  *   The following error values are defined:
1149  *
1150  *   - -EAGAIN: If port representor cannot be started,
1151  *     because transfer proxy port is not started.
1152  */
1153 int
1154 mlx5_dev_start(struct rte_eth_dev *dev)
1155 {
1156 	struct mlx5_priv *priv = dev->data->dev_private;
1157 	int ret;
1158 	int fine_inline;
1159 
1160 	DRV_LOG(DEBUG, "port %u starting device", dev->data->port_id);
1161 #ifdef HAVE_MLX5_HWS_SUPPORT
1162 	if (priv->sh->config.dv_flow_en == 2) {
1163 		/*If previous configuration does not exist. */
1164 		if (!(priv->dr_ctx)) {
1165 			ret = flow_hw_init(dev, NULL);
1166 			if (ret)
1167 				return ret;
1168 		}
1169 		/* If there is no E-Switch, then there are no start/stop order limitations. */
1170 		if (!priv->sh->config.dv_esw_en)
1171 			goto continue_dev_start;
1172 		/* If master is being started, then it is always allowed. */
1173 		if (priv->master)
1174 			goto continue_dev_start;
1175 		if (mlx5_hw_representor_port_allowed_start(dev))
1176 			return -rte_errno;
1177 	}
1178 continue_dev_start:
1179 #endif
1180 	fine_inline = rte_mbuf_dynflag_lookup
1181 		(RTE_PMD_MLX5_FINE_GRANULARITY_INLINE, NULL);
1182 	if (fine_inline >= 0)
1183 		rte_net_mlx5_dynf_inline_mask = 1UL << fine_inline;
1184 	else
1185 		rte_net_mlx5_dynf_inline_mask = 0;
1186 	if (dev->data->nb_rx_queues > 0) {
1187 		uint32_t max_lro_msg_size = priv->max_lro_msg_size;
1188 
1189 		if (max_lro_msg_size < MLX5_LRO_SEG_CHUNK_SIZE) {
1190 			uint32_t i;
1191 			struct mlx5_rxq_priv *rxq;
1192 
1193 			for (i = 0; i != priv->rxqs_n; ++i) {
1194 				rxq = mlx5_rxq_get(dev, i);
1195 				if (rxq && rxq->ctrl && rxq->ctrl->rxq.lro) {
1196 					DRV_LOG(ERR, "port %u invalid max LRO size",
1197 						dev->data->port_id);
1198 					rte_errno = EINVAL;
1199 					return -rte_errno;
1200 				}
1201 			}
1202 		}
1203 		ret = mlx5_dev_configure_rss_reta(dev);
1204 		if (ret) {
1205 			DRV_LOG(ERR, "port %u reta config failed: %s",
1206 				dev->data->port_id, strerror(rte_errno));
1207 			return -rte_errno;
1208 		}
1209 	}
1210 	ret = mlx5_txpp_start(dev);
1211 	if (ret) {
1212 		DRV_LOG(ERR, "port %u Tx packet pacing init failed: %s",
1213 			dev->data->port_id, strerror(rte_errno));
1214 		goto error;
1215 	}
1216 	if (mlx5_devx_obj_ops_en(priv->sh) &&
1217 	    priv->obj_ops.lb_dummy_queue_create) {
1218 		ret = priv->obj_ops.lb_dummy_queue_create(dev);
1219 		if (ret)
1220 			goto error;
1221 	}
1222 	ret = mlx5_txq_start(dev);
1223 	if (ret) {
1224 		DRV_LOG(ERR, "port %u Tx queue allocation failed: %s",
1225 			dev->data->port_id, strerror(rte_errno));
1226 		goto error;
1227 	}
1228 	if (priv->config.std_delay_drop || priv->config.hp_delay_drop) {
1229 		if (!priv->sh->dev_cap.vf && !priv->sh->dev_cap.sf &&
1230 		    !priv->representor) {
1231 			ret = mlx5_get_flag_dropless_rq(dev);
1232 			if (ret < 0)
1233 				DRV_LOG(WARNING,
1234 					"port %u cannot query dropless flag",
1235 					dev->data->port_id);
1236 			else if (!ret)
1237 				DRV_LOG(WARNING,
1238 					"port %u dropless_rq OFF, no rearming",
1239 					dev->data->port_id);
1240 		} else {
1241 			DRV_LOG(DEBUG,
1242 				"port %u doesn't support dropless_rq flag",
1243 				dev->data->port_id);
1244 		}
1245 	}
1246 	ret = mlx5_rxq_start(dev);
1247 	if (ret) {
1248 		DRV_LOG(ERR, "port %u Rx queue allocation failed: %s",
1249 			dev->data->port_id, strerror(rte_errno));
1250 		goto error;
1251 	}
1252 	/*
1253 	 * Such step will be skipped if there is no hairpin TX queue configured
1254 	 * with RX peer queue from the same device.
1255 	 */
1256 	ret = mlx5_hairpin_auto_bind(dev);
1257 	if (ret) {
1258 		DRV_LOG(ERR, "port %u hairpin auto binding failed: %s",
1259 			dev->data->port_id, strerror(rte_errno));
1260 		goto error;
1261 	}
1262 	/* Set started flag here for the following steps like control flow. */
1263 	dev->data->dev_started = 1;
1264 	ret = mlx5_rx_intr_vec_enable(dev);
1265 	if (ret) {
1266 		DRV_LOG(ERR, "port %u Rx interrupt vector creation failed",
1267 			dev->data->port_id);
1268 		goto error;
1269 	}
1270 	mlx5_os_stats_init(dev);
1271 	/*
1272 	 * Attach indirection table objects detached on port stop.
1273 	 * They may be needed to create RSS in non-isolated mode.
1274 	 */
1275 	ret = mlx5_action_handle_attach(dev);
1276 	if (ret) {
1277 		DRV_LOG(ERR,
1278 			"port %u failed to attach indirect actions: %s",
1279 			dev->data->port_id, rte_strerror(rte_errno));
1280 		goto error;
1281 	}
1282 #ifdef HAVE_MLX5_HWS_SUPPORT
1283 	if (priv->sh->config.dv_flow_en == 2) {
1284 		ret = flow_hw_table_update(dev, NULL);
1285 		if (ret) {
1286 			DRV_LOG(ERR, "port %u failed to update HWS tables",
1287 				dev->data->port_id);
1288 			goto error;
1289 		}
1290 	}
1291 #endif
1292 	ret = mlx5_traffic_enable(dev);
1293 	if (ret) {
1294 		DRV_LOG(ERR, "port %u failed to set defaults flows",
1295 			dev->data->port_id);
1296 		goto error;
1297 	}
1298 	/* Set dynamic fields and flags into Rx queues. */
1299 	mlx5_flow_rxq_dynf_set(dev);
1300 	/* Set flags and context to convert Rx timestamps. */
1301 	mlx5_rxq_timestamp_set(dev);
1302 	/* Set a mask and offset of scheduling on timestamp into Tx queues. */
1303 	mlx5_txq_dynf_timestamp_set(dev);
1304 	/*
1305 	 * In non-cached mode, it only needs to start the default mreg copy
1306 	 * action and no flow created by application exists anymore.
1307 	 * But it is worth wrapping the interface for further usage.
1308 	 */
1309 	ret = mlx5_flow_start_default(dev);
1310 	if (ret) {
1311 		DRV_LOG(DEBUG, "port %u failed to start default actions: %s",
1312 			dev->data->port_id, strerror(rte_errno));
1313 		goto error;
1314 	}
1315 	if (mlx5_dev_ctx_shared_mempool_subscribe(dev) != 0) {
1316 		DRV_LOG(ERR, "port %u failed to subscribe for mempool life cycle: %s",
1317 			dev->data->port_id, rte_strerror(rte_errno));
1318 		goto error;
1319 	}
1320 	rte_wmb();
1321 	dev->tx_pkt_burst = mlx5_select_tx_function(dev);
1322 	dev->rx_pkt_burst = mlx5_select_rx_function(dev);
1323 	/* Enable datapath on secondary process. */
1324 	mlx5_mp_os_req_start_rxtx(dev);
1325 	if (rte_intr_fd_get(priv->sh->intr_handle) >= 0) {
1326 		priv->sh->port[priv->dev_port - 1].ih_port_id =
1327 					(uint32_t)dev->data->port_id;
1328 	} else {
1329 		DRV_LOG(INFO, "port %u starts without RMV interrupts.",
1330 			dev->data->port_id);
1331 		dev->data->dev_conf.intr_conf.rmv = 0;
1332 	}
1333 	if (rte_intr_fd_get(priv->sh->intr_handle_nl) >= 0) {
1334 		priv->sh->port[priv->dev_port - 1].nl_ih_port_id =
1335 					(uint32_t)dev->data->port_id;
1336 	} else {
1337 		DRV_LOG(INFO, "port %u starts without LSC interrupts.",
1338 			dev->data->port_id);
1339 		dev->data->dev_conf.intr_conf.lsc = 0;
1340 	}
1341 	if (rte_intr_fd_get(priv->sh->intr_handle_devx) >= 0)
1342 		priv->sh->port[priv->dev_port - 1].devx_ih_port_id =
1343 					(uint32_t)dev->data->port_id;
1344 	return 0;
1345 error:
1346 	ret = rte_errno; /* Save rte_errno before cleanup. */
1347 	/* Rollback. */
1348 	dev->data->dev_started = 0;
1349 	mlx5_flow_stop_default(dev);
1350 	mlx5_traffic_disable(dev);
1351 	mlx5_txq_stop(dev);
1352 	mlx5_rxq_stop(dev);
1353 	if (priv->obj_ops.lb_dummy_queue_release)
1354 		priv->obj_ops.lb_dummy_queue_release(dev);
1355 	mlx5_txpp_stop(dev); /* Stop last. */
1356 	rte_errno = ret; /* Restore rte_errno. */
1357 	return -rte_errno;
1358 }
1359 
1360 #ifdef HAVE_MLX5_HWS_SUPPORT
1361 /**
1362  * Check if stopping transfer proxy port is allowed.
1363  *
1364  * If transfer proxy port is configured for HWS, then it is allowed to stop it
1365  * if and only if all other representor ports are stopped.
1366  *
1367  * @param dev
1368  *   Pointer to Ethernet device structure.
1369  *
1370  * @return
1371  *   If stopping transfer proxy port is allowed, then 0 is returned.
1372  *   Otherwise rte_errno is set, and negative errno value is returned.
1373  */
1374 static int
1375 mlx5_hw_proxy_port_allowed_stop(struct rte_eth_dev *dev)
1376 {
1377 	struct mlx5_priv *priv = dev->data->dev_private;
1378 	bool representor_started = false;
1379 	uint16_t port_id;
1380 
1381 	MLX5_ASSERT(priv->sh->config.dv_flow_en == 2);
1382 	MLX5_ASSERT(priv->sh->config.dv_esw_en);
1383 	MLX5_ASSERT(priv->master);
1384 	/* If transfer proxy port was not configured for HWS, then stopping it is allowed. */
1385 	if (!priv->dr_ctx)
1386 		return 0;
1387 	MLX5_ETH_FOREACH_DEV(port_id, dev->device) {
1388 		const struct rte_eth_dev *port_dev = &rte_eth_devices[port_id];
1389 		const struct mlx5_priv *port_priv = port_dev->data->dev_private;
1390 
1391 		if (port_id != dev->data->port_id &&
1392 		    port_priv->domain_id == priv->domain_id &&
1393 		    port_dev->data->dev_started)
1394 			representor_started = true;
1395 	}
1396 	if (representor_started) {
1397 		DRV_LOG(ERR, "Failed to stop port %u: attached representor ports"
1398 			     " must be stopped before stopping transfer proxy port",
1399 			     dev->data->port_id);
1400 		rte_errno = EBUSY;
1401 		return -rte_errno;
1402 	}
1403 	return 0;
1404 }
1405 #endif
1406 
1407 /**
1408  * DPDK callback to stop the device.
1409  *
1410  * Simulate device stop by detaching all configured flows.
1411  *
1412  * @param dev
1413  *   Pointer to Ethernet device structure.
1414  *
1415  * @return
1416  *   0 on success, a negative errno value otherwise and rte_errno is set.
1417  *   The following error values are defined:
1418  *
1419  *   - -EBUSY: If transfer proxy port cannot be stopped,
1420  *     because other port representors are still running.
1421  */
1422 int
1423 mlx5_dev_stop(struct rte_eth_dev *dev)
1424 {
1425 	struct mlx5_priv *priv = dev->data->dev_private;
1426 
1427 #ifdef HAVE_MLX5_HWS_SUPPORT
1428 	if (priv->sh->config.dv_flow_en == 2) {
1429 		/* If there is no E-Switch, then there are no start/stop order limitations. */
1430 		if (!priv->sh->config.dv_esw_en)
1431 			goto continue_dev_stop;
1432 		/* If representor is being stopped, then it is always allowed. */
1433 		if (priv->representor)
1434 			goto continue_dev_stop;
1435 		if (mlx5_hw_proxy_port_allowed_stop(dev)) {
1436 			dev->data->dev_started = 1;
1437 			return -rte_errno;
1438 		}
1439 	}
1440 continue_dev_stop:
1441 #endif
1442 	dev->data->dev_started = 0;
1443 	/* Prevent crashes when queues are still in use. */
1444 	dev->rx_pkt_burst = rte_eth_pkt_burst_dummy;
1445 	dev->tx_pkt_burst = rte_eth_pkt_burst_dummy;
1446 	rte_wmb();
1447 	/* Disable datapath on secondary process. */
1448 	mlx5_mp_os_req_stop_rxtx(dev);
1449 	rte_delay_us_sleep(1000 * priv->rxqs_n);
1450 	DRV_LOG(DEBUG, "port %u stopping device", dev->data->port_id);
1451 	mlx5_flow_stop_default(dev);
1452 	/* Control flows for default traffic can be removed firstly. */
1453 	mlx5_traffic_disable(dev);
1454 	/* All RX queue flags will be cleared in the flush interface. */
1455 	mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_GEN, true);
1456 	mlx5_flow_meter_rxq_flush(dev);
1457 	mlx5_action_handle_detach(dev);
1458 #ifdef HAVE_MLX5_HWS_SUPPORT
1459 	mlx5_flow_hw_cleanup_ctrl_rx_templates(dev);
1460 #endif
1461 	mlx5_rx_intr_vec_disable(dev);
1462 	priv->sh->port[priv->dev_port - 1].ih_port_id = RTE_MAX_ETHPORTS;
1463 	priv->sh->port[priv->dev_port - 1].devx_ih_port_id = RTE_MAX_ETHPORTS;
1464 	priv->sh->port[priv->dev_port - 1].nl_ih_port_id = RTE_MAX_ETHPORTS;
1465 	mlx5_txq_stop(dev);
1466 	mlx5_rxq_stop(dev);
1467 	if (priv->obj_ops.lb_dummy_queue_release)
1468 		priv->obj_ops.lb_dummy_queue_release(dev);
1469 	mlx5_txpp_stop(dev);
1470 
1471 	return 0;
1472 }
1473 
1474 #ifdef HAVE_MLX5_HWS_SUPPORT
1475 
1476 static int
1477 mlx5_traffic_enable_hws(struct rte_eth_dev *dev)
1478 {
1479 	struct mlx5_priv *priv = dev->data->dev_private;
1480 	struct mlx5_sh_config *config = &priv->sh->config;
1481 	uint64_t flags = 0;
1482 	unsigned int i;
1483 	int ret;
1484 
1485 	/*
1486 	 * With extended metadata enabled, the Tx metadata copy is handled by default
1487 	 * Tx tagging flow rules, so default Tx flow rule is not needed. It is only
1488 	 * required when representor matching is disabled.
1489 	 */
1490 	if (config->dv_esw_en &&
1491 	    !config->repr_matching &&
1492 	    config->dv_xmeta_en == MLX5_XMETA_MODE_META32_HWS &&
1493 	    priv->master) {
1494 		if (mlx5_flow_hw_create_tx_default_mreg_copy_flow(dev))
1495 			goto error;
1496 	}
1497 	for (i = 0; i < priv->txqs_n; ++i) {
1498 		struct mlx5_txq_ctrl *txq = mlx5_txq_get(dev, i);
1499 		uint32_t queue;
1500 
1501 		if (!txq)
1502 			continue;
1503 		queue = mlx5_txq_get_sqn(txq);
1504 		if ((priv->representor || priv->master) &&
1505 		    config->dv_esw_en &&
1506 		    config->fdb_def_rule) {
1507 			if (mlx5_flow_hw_esw_create_sq_miss_flow(dev, queue, false)) {
1508 				mlx5_txq_release(dev, i);
1509 				goto error;
1510 			}
1511 		}
1512 		if (config->dv_esw_en && config->repr_matching) {
1513 			if (mlx5_flow_hw_tx_repr_matching_flow(dev, queue, false)) {
1514 				mlx5_txq_release(dev, i);
1515 				goto error;
1516 			}
1517 		}
1518 		mlx5_txq_release(dev, i);
1519 	}
1520 	if (config->fdb_def_rule) {
1521 		if ((priv->master || priv->representor) && config->dv_esw_en) {
1522 			if (!mlx5_flow_hw_esw_create_default_jump_flow(dev))
1523 				priv->fdb_def_rule = 1;
1524 			else
1525 				goto error;
1526 		}
1527 	} else {
1528 		DRV_LOG(INFO, "port %u FDB default rule is disabled", dev->data->port_id);
1529 	}
1530 	if (priv->isolated)
1531 		return 0;
1532 	if (!priv->sh->config.lacp_by_user && priv->pf_bond >= 0 && priv->master)
1533 		if (mlx5_flow_hw_lacp_rx_flow(dev))
1534 			goto error;
1535 	if (dev->data->promiscuous)
1536 		flags |= MLX5_CTRL_PROMISCUOUS;
1537 	if (dev->data->all_multicast)
1538 		flags |= MLX5_CTRL_ALL_MULTICAST;
1539 	else
1540 		flags |= MLX5_CTRL_BROADCAST | MLX5_CTRL_IPV4_MULTICAST | MLX5_CTRL_IPV6_MULTICAST;
1541 	flags |= MLX5_CTRL_DMAC;
1542 	if (priv->vlan_filter_n)
1543 		flags |= MLX5_CTRL_VLAN_FILTER;
1544 	return mlx5_flow_hw_ctrl_flows(dev, flags);
1545 error:
1546 	ret = rte_errno;
1547 	mlx5_flow_hw_flush_ctrl_flows(dev);
1548 	rte_errno = ret;
1549 	return -rte_errno;
1550 }
1551 
1552 #endif
1553 
1554 /**
1555  * Enable traffic flows configured by control plane
1556  *
1557  * @param dev
1558  *   Pointer to Ethernet device structure.
1559  *
1560  * @return
1561  *   0 on success, a negative errno value otherwise and rte_errno is set.
1562  */
1563 int
1564 mlx5_traffic_enable(struct rte_eth_dev *dev)
1565 {
1566 	struct mlx5_priv *priv = dev->data->dev_private;
1567 	struct rte_flow_item_eth bcast = {
1568 		.hdr.dst_addr.addr_bytes = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
1569 	};
1570 	struct rte_flow_item_eth ipv6_multi_spec = {
1571 		.hdr.dst_addr.addr_bytes = { 0x33, 0x33, 0x00, 0x00, 0x00, 0x00 },
1572 	};
1573 	struct rte_flow_item_eth ipv6_multi_mask = {
1574 		.hdr.dst_addr.addr_bytes = { 0xff, 0xff, 0x00, 0x00, 0x00, 0x00 },
1575 	};
1576 	struct rte_flow_item_eth unicast = {
1577 		.hdr.src_addr.addr_bytes = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
1578 	};
1579 	struct rte_flow_item_eth unicast_mask = {
1580 		.hdr.dst_addr.addr_bytes = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
1581 	};
1582 	const unsigned int vlan_filter_n = priv->vlan_filter_n;
1583 	const struct rte_ether_addr cmp = {
1584 		.addr_bytes = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
1585 	};
1586 	unsigned int i;
1587 	unsigned int j;
1588 	int ret;
1589 
1590 #ifdef HAVE_MLX5_HWS_SUPPORT
1591 	if (priv->sh->config.dv_flow_en == 2)
1592 		return mlx5_traffic_enable_hws(dev);
1593 #endif
1594 	/*
1595 	 * Hairpin txq default flow should be created no matter if it is
1596 	 * isolation mode. Or else all the packets to be sent will be sent
1597 	 * out directly without the TX flow actions, e.g. encapsulation.
1598 	 */
1599 	for (i = 0; i != priv->txqs_n; ++i) {
1600 		struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i);
1601 		if (!txq_ctrl)
1602 			continue;
1603 		/* Only Tx implicit mode requires the default Tx flow. */
1604 		if (txq_ctrl->is_hairpin &&
1605 		    txq_ctrl->hairpin_conf.tx_explicit == 0 &&
1606 		    txq_ctrl->hairpin_conf.peers[0].port ==
1607 		    priv->dev_data->port_id) {
1608 			ret = mlx5_ctrl_flow_source_queue(dev,
1609 					mlx5_txq_get_sqn(txq_ctrl));
1610 			if (ret) {
1611 				mlx5_txq_release(dev, i);
1612 				goto error;
1613 			}
1614 		}
1615 		if (priv->sh->config.dv_esw_en) {
1616 			uint32_t q = mlx5_txq_get_sqn(txq_ctrl);
1617 
1618 			if (mlx5_flow_create_devx_sq_miss_flow(dev, q) == 0) {
1619 				mlx5_txq_release(dev, i);
1620 				DRV_LOG(ERR,
1621 					"Port %u Tx queue %u SQ create representor devx default miss rule failed.",
1622 					dev->data->port_id, i);
1623 				goto error;
1624 			}
1625 		}
1626 		mlx5_txq_release(dev, i);
1627 	}
1628 	if (priv->sh->config.fdb_def_rule) {
1629 		if (priv->sh->config.dv_esw_en) {
1630 			if (mlx5_flow_create_esw_table_zero_flow(dev))
1631 				priv->fdb_def_rule = 1;
1632 			else
1633 				DRV_LOG(INFO, "port %u FDB default rule cannot be configured - only Eswitch group 0 flows are supported.",
1634 					dev->data->port_id);
1635 		}
1636 	} else {
1637 		DRV_LOG(INFO, "port %u FDB default rule is disabled",
1638 			dev->data->port_id);
1639 	}
1640 	if (!priv->sh->config.lacp_by_user && priv->pf_bond >= 0 && priv->master) {
1641 		ret = mlx5_flow_lacp_miss(dev);
1642 		if (ret)
1643 			DRV_LOG(INFO, "port %u LACP rule cannot be created - "
1644 				"forward LACP to kernel.", dev->data->port_id);
1645 		else
1646 			DRV_LOG(INFO, "LACP traffic will be missed in port %u.",
1647 				dev->data->port_id);
1648 	}
1649 	if (priv->isolated)
1650 		return 0;
1651 	if (dev->data->promiscuous) {
1652 		struct rte_flow_item_eth promisc = {
1653 			.hdr.dst_addr.addr_bytes = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
1654 			.hdr.src_addr.addr_bytes = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
1655 			.hdr.ether_type = 0,
1656 		};
1657 
1658 		ret = mlx5_ctrl_flow(dev, &promisc, &promisc);
1659 		if (ret)
1660 			goto error;
1661 	}
1662 	if (dev->data->all_multicast) {
1663 		struct rte_flow_item_eth multicast = {
1664 			.hdr.dst_addr.addr_bytes = { 0x01, 0x00, 0x00, 0x00, 0x00, 0x00 },
1665 			.hdr.src_addr.addr_bytes = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
1666 			.hdr.ether_type = 0,
1667 		};
1668 
1669 		ret = mlx5_ctrl_flow(dev, &multicast, &multicast);
1670 		if (ret)
1671 			goto error;
1672 	} else {
1673 		/* Add broadcast/multicast flows. */
1674 		for (i = 0; i != vlan_filter_n; ++i) {
1675 			uint16_t vlan = priv->vlan_filter[i];
1676 
1677 			struct rte_flow_item_vlan vlan_spec = {
1678 				.hdr.vlan_tci = rte_cpu_to_be_16(vlan),
1679 			};
1680 			struct rte_flow_item_vlan vlan_mask =
1681 				rte_flow_item_vlan_mask;
1682 
1683 			ret = mlx5_ctrl_flow_vlan(dev, &bcast, &bcast,
1684 						  &vlan_spec, &vlan_mask);
1685 			if (ret)
1686 				goto error;
1687 			ret = mlx5_ctrl_flow_vlan(dev, &ipv6_multi_spec,
1688 						  &ipv6_multi_mask,
1689 						  &vlan_spec, &vlan_mask);
1690 			if (ret)
1691 				goto error;
1692 		}
1693 		if (!vlan_filter_n) {
1694 			ret = mlx5_ctrl_flow(dev, &bcast, &bcast);
1695 			if (ret)
1696 				goto error;
1697 			ret = mlx5_ctrl_flow(dev, &ipv6_multi_spec,
1698 					     &ipv6_multi_mask);
1699 			if (ret) {
1700 				/* Do not fail on IPv6 broadcast creation failure. */
1701 				DRV_LOG(WARNING,
1702 					"IPv6 broadcast is not supported");
1703 				ret = 0;
1704 			}
1705 		}
1706 	}
1707 	/* Add MAC address flows. */
1708 	for (i = 0; i != MLX5_MAX_MAC_ADDRESSES; ++i) {
1709 		struct rte_ether_addr *mac = &dev->data->mac_addrs[i];
1710 
1711 		if (!memcmp(mac, &cmp, sizeof(*mac)))
1712 			continue;
1713 		memcpy(&unicast.hdr.dst_addr.addr_bytes,
1714 		       mac->addr_bytes,
1715 		       RTE_ETHER_ADDR_LEN);
1716 		for (j = 0; j != vlan_filter_n; ++j) {
1717 			uint16_t vlan = priv->vlan_filter[j];
1718 
1719 			struct rte_flow_item_vlan vlan_spec = {
1720 				.hdr.vlan_tci = rte_cpu_to_be_16(vlan),
1721 			};
1722 			struct rte_flow_item_vlan vlan_mask =
1723 				rte_flow_item_vlan_mask;
1724 
1725 			ret = mlx5_ctrl_flow_vlan(dev, &unicast,
1726 						  &unicast_mask,
1727 						  &vlan_spec,
1728 						  &vlan_mask);
1729 			if (ret)
1730 				goto error;
1731 		}
1732 		if (!vlan_filter_n) {
1733 			ret = mlx5_ctrl_flow(dev, &unicast, &unicast_mask);
1734 			if (ret)
1735 				goto error;
1736 		}
1737 	}
1738 	return 0;
1739 error:
1740 	ret = rte_errno; /* Save rte_errno before cleanup. */
1741 	mlx5_traffic_disable_legacy(dev);
1742 	rte_errno = ret; /* Restore rte_errno. */
1743 	return -rte_errno;
1744 }
1745 
1746 static void
1747 mlx5_traffic_disable_legacy(struct rte_eth_dev *dev)
1748 {
1749 	struct mlx5_priv *priv = dev->data->dev_private;
1750 	struct mlx5_ctrl_flow_entry *entry;
1751 	struct mlx5_ctrl_flow_entry *tmp;
1752 
1753 	/*
1754 	 * Free registered control flow rules first,
1755 	 * to free the memory allocated for list entries
1756 	 */
1757 	entry = LIST_FIRST(&priv->hw_ctrl_flows);
1758 	while (entry != NULL) {
1759 		tmp = LIST_NEXT(entry, next);
1760 		mlx5_legacy_ctrl_flow_destroy(dev, entry);
1761 		entry = tmp;
1762 	}
1763 
1764 	mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_CTL, false);
1765 }
1766 
1767 /**
1768  * Disable traffic flows configured by control plane
1769  *
1770  * @param dev
1771  *   Pointer to Ethernet device private data.
1772  */
1773 void
1774 mlx5_traffic_disable(struct rte_eth_dev *dev)
1775 {
1776 #ifdef HAVE_MLX5_HWS_SUPPORT
1777 	struct mlx5_priv *priv = dev->data->dev_private;
1778 
1779 	if (priv->sh->config.dv_flow_en == 2)
1780 		mlx5_flow_hw_flush_ctrl_flows(dev);
1781 	else
1782 #endif
1783 		mlx5_traffic_disable_legacy(dev);
1784 }
1785 
1786 /**
1787  * Restart traffic flows configured by control plane
1788  *
1789  * @param dev
1790  *   Pointer to Ethernet device private data.
1791  *
1792  * @return
1793  *   0 on success, a negative errno value otherwise and rte_errno is set.
1794  */
1795 int
1796 mlx5_traffic_restart(struct rte_eth_dev *dev)
1797 {
1798 	if (dev->data->dev_started) {
1799 		mlx5_traffic_disable(dev);
1800 #ifdef HAVE_MLX5_HWS_SUPPORT
1801 		mlx5_flow_hw_cleanup_ctrl_rx_templates(dev);
1802 #endif
1803 		return mlx5_traffic_enable(dev);
1804 	}
1805 	return 0;
1806 }
1807