xref: /dpdk/drivers/net/mlx5/mlx5_trigger.c (revision 3da59f30a23f2e795d2315f3d949e1b3e0ce0c3d)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2015 6WIND S.A.
3  * Copyright 2015 Mellanox Technologies, Ltd
4  */
5 
6 #include <unistd.h>
7 
8 #include <rte_ether.h>
9 #include <ethdev_driver.h>
10 #include <rte_interrupts.h>
11 #include <rte_alarm.h>
12 #include <rte_cycles.h>
13 
14 #include <mlx5_malloc.h>
15 
16 #include "mlx5.h"
17 #include "mlx5_flow.h"
18 #include "mlx5_rx.h"
19 #include "mlx5_tx.h"
20 #include "mlx5_utils.h"
21 #include "rte_pmd_mlx5.h"
22 
23 /**
24  * Stop traffic on Tx queues.
25  *
26  * @param dev
27  *   Pointer to Ethernet device structure.
28  */
29 static void
30 mlx5_txq_stop(struct rte_eth_dev *dev)
31 {
32 	struct mlx5_priv *priv = dev->data->dev_private;
33 	unsigned int i;
34 
35 	for (i = 0; i != priv->txqs_n; ++i)
36 		mlx5_txq_release(dev, i);
37 }
38 
39 /**
40  * Start traffic on Tx queues.
41  *
42  * @param dev
43  *   Pointer to Ethernet device structure.
44  *
45  * @return
46  *   0 on success, a negative errno value otherwise and rte_errno is set.
47  */
48 static int
49 mlx5_txq_start(struct rte_eth_dev *dev)
50 {
51 	struct mlx5_priv *priv = dev->data->dev_private;
52 	unsigned int i;
53 	int ret;
54 
55 	for (i = 0; i != priv->txqs_n; ++i) {
56 		struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i);
57 		struct mlx5_txq_data *txq_data = &txq_ctrl->txq;
58 		uint32_t flags = MLX5_MEM_RTE | MLX5_MEM_ZERO;
59 
60 		if (!txq_ctrl)
61 			continue;
62 		if (!txq_ctrl->is_hairpin)
63 			txq_alloc_elts(txq_ctrl);
64 		MLX5_ASSERT(!txq_ctrl->obj);
65 		txq_ctrl->obj = mlx5_malloc(flags, sizeof(struct mlx5_txq_obj),
66 					    0, txq_ctrl->socket);
67 		if (!txq_ctrl->obj) {
68 			DRV_LOG(ERR, "Port %u Tx queue %u cannot allocate "
69 				"memory resources.", dev->data->port_id,
70 				txq_data->idx);
71 			rte_errno = ENOMEM;
72 			goto error;
73 		}
74 		ret = priv->obj_ops.txq_obj_new(dev, i);
75 		if (ret < 0) {
76 			mlx5_free(txq_ctrl->obj);
77 			txq_ctrl->obj = NULL;
78 			goto error;
79 		}
80 		if (!txq_ctrl->is_hairpin) {
81 			size_t size = txq_data->cqe_s * sizeof(*txq_data->fcqs);
82 
83 			txq_data->fcqs = mlx5_malloc(flags, size,
84 						     RTE_CACHE_LINE_SIZE,
85 						     txq_ctrl->socket);
86 			if (!txq_data->fcqs) {
87 				DRV_LOG(ERR, "Port %u Tx queue %u cannot "
88 					"allocate memory (FCQ).",
89 					dev->data->port_id, i);
90 				rte_errno = ENOMEM;
91 				goto error;
92 			}
93 		}
94 		DRV_LOG(DEBUG, "Port %u txq %u updated with %p.",
95 			dev->data->port_id, i, (void *)&txq_ctrl->obj);
96 		LIST_INSERT_HEAD(&priv->txqsobj, txq_ctrl->obj, next);
97 	}
98 	return 0;
99 error:
100 	ret = rte_errno; /* Save rte_errno before cleanup. */
101 	do {
102 		mlx5_txq_release(dev, i);
103 	} while (i-- != 0);
104 	rte_errno = ret; /* Restore rte_errno. */
105 	return -rte_errno;
106 }
107 
108 /**
109  * Register Rx queue mempools and fill the Rx queue cache.
110  * This function tolerates repeated mempool registration.
111  *
112  * @param[in] rxq_ctrl
113  *   Rx queue control data.
114  *
115  * @return
116  *   0 on success, (-1) on failure and rte_errno is set.
117  */
118 static int
119 mlx5_rxq_mempool_register(struct mlx5_rxq_ctrl *rxq_ctrl)
120 {
121 	struct rte_mempool *mp;
122 	uint32_t s;
123 	int ret = 0;
124 
125 	mlx5_mr_flush_local_cache(&rxq_ctrl->rxq.mr_ctrl);
126 	/* MPRQ mempool is registered on creation, just fill the cache. */
127 	if (mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq))
128 		return mlx5_mr_mempool_populate_cache(&rxq_ctrl->rxq.mr_ctrl,
129 						      rxq_ctrl->rxq.mprq_mp);
130 	for (s = 0; s < rxq_ctrl->rxq.rxseg_n; s++) {
131 		bool is_extmem;
132 
133 		mp = rxq_ctrl->rxq.rxseg[s].mp;
134 		is_extmem = (rte_pktmbuf_priv_flags(mp) &
135 			     RTE_PKTMBUF_POOL_F_PINNED_EXT_BUF) != 0;
136 		ret = mlx5_mr_mempool_register(rxq_ctrl->sh->cdev, mp,
137 					       is_extmem);
138 		if (ret < 0 && rte_errno != EEXIST)
139 			return ret;
140 		ret = mlx5_mr_mempool_populate_cache(&rxq_ctrl->rxq.mr_ctrl,
141 						     mp);
142 		if (ret < 0)
143 			return ret;
144 	}
145 	return 0;
146 }
147 
148 /**
149  * Stop traffic on Rx queues.
150  *
151  * @param dev
152  *   Pointer to Ethernet device structure.
153  */
154 static void
155 mlx5_rxq_stop(struct rte_eth_dev *dev)
156 {
157 	struct mlx5_priv *priv = dev->data->dev_private;
158 	unsigned int i;
159 
160 	for (i = 0; i != priv->rxqs_n; ++i)
161 		mlx5_rxq_release(dev, i);
162 }
163 
164 static int
165 mlx5_rxq_ctrl_prepare(struct rte_eth_dev *dev, struct mlx5_rxq_ctrl *rxq_ctrl,
166 		      unsigned int idx)
167 {
168 	int ret = 0;
169 
170 	if (!rxq_ctrl->is_hairpin) {
171 		/*
172 		 * Pre-register the mempools. Regardless of whether
173 		 * the implicit registration is enabled or not,
174 		 * Rx mempool destruction is tracked to free MRs.
175 		 */
176 		if (mlx5_rxq_mempool_register(rxq_ctrl) < 0)
177 			return -rte_errno;
178 		ret = rxq_alloc_elts(rxq_ctrl);
179 		if (ret)
180 			return ret;
181 	}
182 	MLX5_ASSERT(!rxq_ctrl->obj);
183 	rxq_ctrl->obj = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO,
184 				    sizeof(*rxq_ctrl->obj), 0,
185 				    rxq_ctrl->socket);
186 	if (!rxq_ctrl->obj) {
187 		DRV_LOG(ERR, "Port %u Rx queue %u can't allocate resources.",
188 			dev->data->port_id, idx);
189 		rte_errno = ENOMEM;
190 		return -rte_errno;
191 	}
192 	DRV_LOG(DEBUG, "Port %u rxq %u updated with %p.", dev->data->port_id,
193 		idx, (void *)&rxq_ctrl->obj);
194 	return 0;
195 }
196 
197 /**
198  * Start traffic on Rx queues.
199  *
200  * @param dev
201  *   Pointer to Ethernet device structure.
202  *
203  * @return
204  *   0 on success, a negative errno value otherwise and rte_errno is set.
205  */
206 static int
207 mlx5_rxq_start(struct rte_eth_dev *dev)
208 {
209 	struct mlx5_priv *priv = dev->data->dev_private;
210 	unsigned int i;
211 	int ret = 0;
212 
213 	/* Allocate/reuse/resize mempool for Multi-Packet RQ. */
214 	if (mlx5_mprq_alloc_mp(dev)) {
215 		/* Should not release Rx queues but return immediately. */
216 		return -rte_errno;
217 	}
218 	DRV_LOG(DEBUG, "Port %u dev_cap.max_qp_wr is %d.",
219 		dev->data->port_id, priv->sh->dev_cap.max_qp_wr);
220 	DRV_LOG(DEBUG, "Port %u dev_cap.max_sge is %d.",
221 		dev->data->port_id, priv->sh->dev_cap.max_sge);
222 	for (i = 0; i != priv->rxqs_n; ++i) {
223 		struct mlx5_rxq_priv *rxq = mlx5_rxq_ref(dev, i);
224 		struct mlx5_rxq_ctrl *rxq_ctrl;
225 
226 		if (rxq == NULL)
227 			continue;
228 		rxq_ctrl = rxq->ctrl;
229 		if (!rxq_ctrl->started)
230 			if (mlx5_rxq_ctrl_prepare(dev, rxq_ctrl, i) < 0)
231 				goto error;
232 		ret = priv->obj_ops.rxq_obj_new(rxq);
233 		if (ret) {
234 			mlx5_free(rxq_ctrl->obj);
235 			rxq_ctrl->obj = NULL;
236 			goto error;
237 		}
238 		if (!rxq_ctrl->started)
239 			LIST_INSERT_HEAD(&priv->rxqsobj, rxq_ctrl->obj, next);
240 		rxq_ctrl->started = true;
241 	}
242 	return 0;
243 error:
244 	ret = rte_errno; /* Save rte_errno before cleanup. */
245 	do {
246 		mlx5_rxq_release(dev, i);
247 	} while (i-- != 0);
248 	rte_errno = ret; /* Restore rte_errno. */
249 	return -rte_errno;
250 }
251 
252 /**
253  * Binds Tx queues to Rx queues for hairpin.
254  *
255  * Binds Tx queues to the target Rx queues.
256  *
257  * @param dev
258  *   Pointer to Ethernet device structure.
259  *
260  * @return
261  *   0 on success, a negative errno value otherwise and rte_errno is set.
262  */
263 static int
264 mlx5_hairpin_auto_bind(struct rte_eth_dev *dev)
265 {
266 	struct mlx5_priv *priv = dev->data->dev_private;
267 	struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
268 	struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
269 	struct mlx5_txq_ctrl *txq_ctrl;
270 	struct mlx5_rxq_priv *rxq;
271 	struct mlx5_rxq_ctrl *rxq_ctrl;
272 	struct mlx5_devx_obj *sq;
273 	struct mlx5_devx_obj *rq;
274 	unsigned int i;
275 	int ret = 0;
276 	bool need_auto = false;
277 	uint16_t self_port = dev->data->port_id;
278 
279 	for (i = 0; i != priv->txqs_n; ++i) {
280 		txq_ctrl = mlx5_txq_get(dev, i);
281 		if (!txq_ctrl)
282 			continue;
283 		if (!txq_ctrl->is_hairpin ||
284 		    txq_ctrl->hairpin_conf.peers[0].port != self_port) {
285 			mlx5_txq_release(dev, i);
286 			continue;
287 		}
288 		if (txq_ctrl->hairpin_conf.manual_bind) {
289 			mlx5_txq_release(dev, i);
290 			return 0;
291 		}
292 		need_auto = true;
293 		mlx5_txq_release(dev, i);
294 	}
295 	if (!need_auto)
296 		return 0;
297 	for (i = 0; i != priv->txqs_n; ++i) {
298 		txq_ctrl = mlx5_txq_get(dev, i);
299 		if (!txq_ctrl)
300 			continue;
301 		/* Skip hairpin queues with other peer ports. */
302 		if (!txq_ctrl->is_hairpin ||
303 		    txq_ctrl->hairpin_conf.peers[0].port != self_port) {
304 			mlx5_txq_release(dev, i);
305 			continue;
306 		}
307 		if (!txq_ctrl->obj) {
308 			rte_errno = ENOMEM;
309 			DRV_LOG(ERR, "port %u no txq object found: %d",
310 				dev->data->port_id, i);
311 			mlx5_txq_release(dev, i);
312 			return -rte_errno;
313 		}
314 		sq = txq_ctrl->obj->sq;
315 		rxq = mlx5_rxq_get(dev, txq_ctrl->hairpin_conf.peers[0].queue);
316 		if (rxq == NULL) {
317 			mlx5_txq_release(dev, i);
318 			rte_errno = EINVAL;
319 			DRV_LOG(ERR, "port %u no rxq object found: %d",
320 				dev->data->port_id,
321 				txq_ctrl->hairpin_conf.peers[0].queue);
322 			return -rte_errno;
323 		}
324 		rxq_ctrl = rxq->ctrl;
325 		if (!rxq_ctrl->is_hairpin ||
326 		    rxq->hairpin_conf.peers[0].queue != i) {
327 			rte_errno = ENOMEM;
328 			DRV_LOG(ERR, "port %u Tx queue %d can't be binded to "
329 				"Rx queue %d", dev->data->port_id,
330 				i, txq_ctrl->hairpin_conf.peers[0].queue);
331 			goto error;
332 		}
333 		rq = rxq_ctrl->obj->rq;
334 		if (!rq) {
335 			rte_errno = ENOMEM;
336 			DRV_LOG(ERR, "port %u hairpin no matching rxq: %d",
337 				dev->data->port_id,
338 				txq_ctrl->hairpin_conf.peers[0].queue);
339 			goto error;
340 		}
341 		sq_attr.state = MLX5_SQC_STATE_RDY;
342 		sq_attr.sq_state = MLX5_SQC_STATE_RST;
343 		sq_attr.hairpin_peer_rq = rq->id;
344 		sq_attr.hairpin_peer_vhca =
345 				priv->sh->cdev->config.hca_attr.vhca_id;
346 		ret = mlx5_devx_cmd_modify_sq(sq, &sq_attr);
347 		if (ret)
348 			goto error;
349 		rq_attr.state = MLX5_RQC_STATE_RDY;
350 		rq_attr.rq_state = MLX5_RQC_STATE_RST;
351 		rq_attr.hairpin_peer_sq = sq->id;
352 		rq_attr.hairpin_peer_vhca =
353 				priv->sh->cdev->config.hca_attr.vhca_id;
354 		ret = mlx5_devx_cmd_modify_rq(rq, &rq_attr);
355 		if (ret)
356 			goto error;
357 		/* Qs with auto-bind will be destroyed directly. */
358 		rxq->hairpin_status = 1;
359 		txq_ctrl->hairpin_status = 1;
360 		mlx5_txq_release(dev, i);
361 	}
362 	return 0;
363 error:
364 	mlx5_txq_release(dev, i);
365 	return -rte_errno;
366 }
367 
368 /*
369  * Fetch the peer queue's SW & HW information.
370  *
371  * @param dev
372  *   Pointer to Ethernet device structure.
373  * @param peer_queue
374  *   Index of the queue to fetch the information.
375  * @param current_info
376  *   Pointer to the input peer information, not used currently.
377  * @param peer_info
378  *   Pointer to the structure to store the information, output.
379  * @param direction
380  *   Positive to get the RxQ information, zero to get the TxQ information.
381  *
382  * @return
383  *   0 on success, a negative errno value otherwise and rte_errno is set.
384  */
385 int
386 mlx5_hairpin_queue_peer_update(struct rte_eth_dev *dev, uint16_t peer_queue,
387 			       struct rte_hairpin_peer_info *current_info,
388 			       struct rte_hairpin_peer_info *peer_info,
389 			       uint32_t direction)
390 {
391 	struct mlx5_priv *priv = dev->data->dev_private;
392 	RTE_SET_USED(current_info);
393 
394 	if (dev->data->dev_started == 0) {
395 		rte_errno = EBUSY;
396 		DRV_LOG(ERR, "peer port %u is not started",
397 			dev->data->port_id);
398 		return -rte_errno;
399 	}
400 	/*
401 	 * Peer port used as egress. In the current design, hairpin Tx queue
402 	 * will be bound to the peer Rx queue. Indeed, only the information of
403 	 * peer Rx queue needs to be fetched.
404 	 */
405 	if (direction == 0) {
406 		struct mlx5_txq_ctrl *txq_ctrl;
407 
408 		txq_ctrl = mlx5_txq_get(dev, peer_queue);
409 		if (txq_ctrl == NULL) {
410 			rte_errno = EINVAL;
411 			DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
412 				dev->data->port_id, peer_queue);
413 			return -rte_errno;
414 		}
415 		if (!txq_ctrl->is_hairpin) {
416 			rte_errno = EINVAL;
417 			DRV_LOG(ERR, "port %u queue %d is not a hairpin Txq",
418 				dev->data->port_id, peer_queue);
419 			mlx5_txq_release(dev, peer_queue);
420 			return -rte_errno;
421 		}
422 		if (txq_ctrl->obj == NULL || txq_ctrl->obj->sq == NULL) {
423 			rte_errno = ENOMEM;
424 			DRV_LOG(ERR, "port %u no Txq object found: %d",
425 				dev->data->port_id, peer_queue);
426 			mlx5_txq_release(dev, peer_queue);
427 			return -rte_errno;
428 		}
429 		peer_info->qp_id = mlx5_txq_get_sqn(txq_ctrl);
430 		peer_info->vhca_id = priv->sh->cdev->config.hca_attr.vhca_id;
431 		/* 1-to-1 mapping, only the first one is used. */
432 		peer_info->peer_q = txq_ctrl->hairpin_conf.peers[0].queue;
433 		peer_info->tx_explicit = txq_ctrl->hairpin_conf.tx_explicit;
434 		peer_info->manual_bind = txq_ctrl->hairpin_conf.manual_bind;
435 		mlx5_txq_release(dev, peer_queue);
436 	} else { /* Peer port used as ingress. */
437 		struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, peer_queue);
438 		struct mlx5_rxq_ctrl *rxq_ctrl;
439 
440 		if (rxq == NULL) {
441 			rte_errno = EINVAL;
442 			DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
443 				dev->data->port_id, peer_queue);
444 			return -rte_errno;
445 		}
446 		rxq_ctrl = rxq->ctrl;
447 		if (!rxq_ctrl->is_hairpin) {
448 			rte_errno = EINVAL;
449 			DRV_LOG(ERR, "port %u queue %d is not a hairpin Rxq",
450 				dev->data->port_id, peer_queue);
451 			return -rte_errno;
452 		}
453 		if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) {
454 			rte_errno = ENOMEM;
455 			DRV_LOG(ERR, "port %u no Rxq object found: %d",
456 				dev->data->port_id, peer_queue);
457 			return -rte_errno;
458 		}
459 		peer_info->qp_id = rxq_ctrl->obj->rq->id;
460 		peer_info->vhca_id = priv->sh->cdev->config.hca_attr.vhca_id;
461 		peer_info->peer_q = rxq->hairpin_conf.peers[0].queue;
462 		peer_info->tx_explicit = rxq->hairpin_conf.tx_explicit;
463 		peer_info->manual_bind = rxq->hairpin_conf.manual_bind;
464 	}
465 	return 0;
466 }
467 
468 /*
469  * Bind the hairpin queue with the peer HW information.
470  * This needs to be called twice both for Tx and Rx queues of a pair.
471  * If the queue is already bound, it is considered successful.
472  *
473  * @param dev
474  *   Pointer to Ethernet device structure.
475  * @param cur_queue
476  *   Index of the queue to change the HW configuration to bind.
477  * @param peer_info
478  *   Pointer to information of the peer queue.
479  * @param direction
480  *   Positive to configure the TxQ, zero to configure the RxQ.
481  *
482  * @return
483  *   0 on success, a negative errno value otherwise and rte_errno is set.
484  */
485 int
486 mlx5_hairpin_queue_peer_bind(struct rte_eth_dev *dev, uint16_t cur_queue,
487 			     struct rte_hairpin_peer_info *peer_info,
488 			     uint32_t direction)
489 {
490 	int ret = 0;
491 
492 	/*
493 	 * Consistency checking of the peer queue: opposite direction is used
494 	 * to get the peer queue info with ethdev port ID, no need to check.
495 	 */
496 	if (peer_info->peer_q != cur_queue) {
497 		rte_errno = EINVAL;
498 		DRV_LOG(ERR, "port %u queue %d and peer queue %d mismatch",
499 			dev->data->port_id, cur_queue, peer_info->peer_q);
500 		return -rte_errno;
501 	}
502 	if (direction != 0) {
503 		struct mlx5_txq_ctrl *txq_ctrl;
504 		struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
505 
506 		txq_ctrl = mlx5_txq_get(dev, cur_queue);
507 		if (txq_ctrl == NULL) {
508 			rte_errno = EINVAL;
509 			DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
510 				dev->data->port_id, cur_queue);
511 			return -rte_errno;
512 		}
513 		if (!txq_ctrl->is_hairpin) {
514 			rte_errno = EINVAL;
515 			DRV_LOG(ERR, "port %u queue %d not a hairpin Txq",
516 				dev->data->port_id, cur_queue);
517 			mlx5_txq_release(dev, cur_queue);
518 			return -rte_errno;
519 		}
520 		if (txq_ctrl->obj == NULL || txq_ctrl->obj->sq == NULL) {
521 			rte_errno = ENOMEM;
522 			DRV_LOG(ERR, "port %u no Txq object found: %d",
523 				dev->data->port_id, cur_queue);
524 			mlx5_txq_release(dev, cur_queue);
525 			return -rte_errno;
526 		}
527 		if (txq_ctrl->hairpin_status != 0) {
528 			DRV_LOG(DEBUG, "port %u Tx queue %d is already bound",
529 				dev->data->port_id, cur_queue);
530 			mlx5_txq_release(dev, cur_queue);
531 			return 0;
532 		}
533 		/*
534 		 * All queues' of one port consistency checking is done in the
535 		 * bind() function, and that is optional.
536 		 */
537 		if (peer_info->tx_explicit !=
538 		    txq_ctrl->hairpin_conf.tx_explicit) {
539 			rte_errno = EINVAL;
540 			DRV_LOG(ERR, "port %u Tx queue %d and peer Tx rule mode"
541 				" mismatch", dev->data->port_id, cur_queue);
542 			mlx5_txq_release(dev, cur_queue);
543 			return -rte_errno;
544 		}
545 		if (peer_info->manual_bind !=
546 		    txq_ctrl->hairpin_conf.manual_bind) {
547 			rte_errno = EINVAL;
548 			DRV_LOG(ERR, "port %u Tx queue %d and peer binding mode"
549 				" mismatch", dev->data->port_id, cur_queue);
550 			mlx5_txq_release(dev, cur_queue);
551 			return -rte_errno;
552 		}
553 		sq_attr.state = MLX5_SQC_STATE_RDY;
554 		sq_attr.sq_state = MLX5_SQC_STATE_RST;
555 		sq_attr.hairpin_peer_rq = peer_info->qp_id;
556 		sq_attr.hairpin_peer_vhca = peer_info->vhca_id;
557 		ret = mlx5_devx_cmd_modify_sq(txq_ctrl->obj->sq, &sq_attr);
558 		if (ret == 0)
559 			txq_ctrl->hairpin_status = 1;
560 		mlx5_txq_release(dev, cur_queue);
561 	} else {
562 		struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, cur_queue);
563 		struct mlx5_rxq_ctrl *rxq_ctrl;
564 		struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
565 
566 		if (rxq == NULL) {
567 			rte_errno = EINVAL;
568 			DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
569 				dev->data->port_id, cur_queue);
570 			return -rte_errno;
571 		}
572 		rxq_ctrl = rxq->ctrl;
573 		if (!rxq_ctrl->is_hairpin) {
574 			rte_errno = EINVAL;
575 			DRV_LOG(ERR, "port %u queue %d not a hairpin Rxq",
576 				dev->data->port_id, cur_queue);
577 			return -rte_errno;
578 		}
579 		if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) {
580 			rte_errno = ENOMEM;
581 			DRV_LOG(ERR, "port %u no Rxq object found: %d",
582 				dev->data->port_id, cur_queue);
583 			return -rte_errno;
584 		}
585 		if (rxq->hairpin_status != 0) {
586 			DRV_LOG(DEBUG, "port %u Rx queue %d is already bound",
587 				dev->data->port_id, cur_queue);
588 			return 0;
589 		}
590 		if (peer_info->tx_explicit !=
591 		    rxq->hairpin_conf.tx_explicit) {
592 			rte_errno = EINVAL;
593 			DRV_LOG(ERR, "port %u Rx queue %d and peer Tx rule mode"
594 				" mismatch", dev->data->port_id, cur_queue);
595 			return -rte_errno;
596 		}
597 		if (peer_info->manual_bind !=
598 		    rxq->hairpin_conf.manual_bind) {
599 			rte_errno = EINVAL;
600 			DRV_LOG(ERR, "port %u Rx queue %d and peer binding mode"
601 				" mismatch", dev->data->port_id, cur_queue);
602 			return -rte_errno;
603 		}
604 		rq_attr.state = MLX5_RQC_STATE_RDY;
605 		rq_attr.rq_state = MLX5_RQC_STATE_RST;
606 		rq_attr.hairpin_peer_sq = peer_info->qp_id;
607 		rq_attr.hairpin_peer_vhca = peer_info->vhca_id;
608 		ret = mlx5_devx_cmd_modify_rq(rxq_ctrl->obj->rq, &rq_attr);
609 		if (ret == 0)
610 			rxq->hairpin_status = 1;
611 	}
612 	return ret;
613 }
614 
615 /*
616  * Unbind the hairpin queue and reset its HW configuration.
617  * This needs to be called twice both for Tx and Rx queues of a pair.
618  * If the queue is already unbound, it is considered successful.
619  *
620  * @param dev
621  *   Pointer to Ethernet device structure.
622  * @param cur_queue
623  *   Index of the queue to change the HW configuration to unbind.
624  * @param direction
625  *   Positive to reset the TxQ, zero to reset the RxQ.
626  *
627  * @return
628  *   0 on success, a negative errno value otherwise and rte_errno is set.
629  */
630 int
631 mlx5_hairpin_queue_peer_unbind(struct rte_eth_dev *dev, uint16_t cur_queue,
632 			       uint32_t direction)
633 {
634 	int ret = 0;
635 
636 	if (direction != 0) {
637 		struct mlx5_txq_ctrl *txq_ctrl;
638 		struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
639 
640 		txq_ctrl = mlx5_txq_get(dev, cur_queue);
641 		if (txq_ctrl == NULL) {
642 			rte_errno = EINVAL;
643 			DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
644 				dev->data->port_id, cur_queue);
645 			return -rte_errno;
646 		}
647 		if (!txq_ctrl->is_hairpin) {
648 			rte_errno = EINVAL;
649 			DRV_LOG(ERR, "port %u queue %d not a hairpin Txq",
650 				dev->data->port_id, cur_queue);
651 			mlx5_txq_release(dev, cur_queue);
652 			return -rte_errno;
653 		}
654 		/* Already unbound, return success before obj checking. */
655 		if (txq_ctrl->hairpin_status == 0) {
656 			DRV_LOG(DEBUG, "port %u Tx queue %d is already unbound",
657 				dev->data->port_id, cur_queue);
658 			mlx5_txq_release(dev, cur_queue);
659 			return 0;
660 		}
661 		if (!txq_ctrl->obj || !txq_ctrl->obj->sq) {
662 			rte_errno = ENOMEM;
663 			DRV_LOG(ERR, "port %u no Txq object found: %d",
664 				dev->data->port_id, cur_queue);
665 			mlx5_txq_release(dev, cur_queue);
666 			return -rte_errno;
667 		}
668 		sq_attr.state = MLX5_SQC_STATE_RST;
669 		sq_attr.sq_state = MLX5_SQC_STATE_RDY;
670 		ret = mlx5_devx_cmd_modify_sq(txq_ctrl->obj->sq, &sq_attr);
671 		if (ret == 0)
672 			txq_ctrl->hairpin_status = 0;
673 		mlx5_txq_release(dev, cur_queue);
674 	} else {
675 		struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, cur_queue);
676 		struct mlx5_rxq_ctrl *rxq_ctrl;
677 		struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
678 
679 		if (rxq == NULL) {
680 			rte_errno = EINVAL;
681 			DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
682 				dev->data->port_id, cur_queue);
683 			return -rte_errno;
684 		}
685 		rxq_ctrl = rxq->ctrl;
686 		if (!rxq_ctrl->is_hairpin) {
687 			rte_errno = EINVAL;
688 			DRV_LOG(ERR, "port %u queue %d not a hairpin Rxq",
689 				dev->data->port_id, cur_queue);
690 			return -rte_errno;
691 		}
692 		if (rxq->hairpin_status == 0) {
693 			DRV_LOG(DEBUG, "port %u Rx queue %d is already unbound",
694 				dev->data->port_id, cur_queue);
695 			return 0;
696 		}
697 		if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) {
698 			rte_errno = ENOMEM;
699 			DRV_LOG(ERR, "port %u no Rxq object found: %d",
700 				dev->data->port_id, cur_queue);
701 			return -rte_errno;
702 		}
703 		rq_attr.state = MLX5_RQC_STATE_RST;
704 		rq_attr.rq_state = MLX5_RQC_STATE_RDY;
705 		ret = mlx5_devx_cmd_modify_rq(rxq_ctrl->obj->rq, &rq_attr);
706 		if (ret == 0)
707 			rxq->hairpin_status = 0;
708 	}
709 	return ret;
710 }
711 
712 /*
713  * Bind the hairpin port pairs, from the Tx to the peer Rx.
714  * This function only supports to bind the Tx to one Rx.
715  *
716  * @param dev
717  *   Pointer to Ethernet device structure.
718  * @param rx_port
719  *   Port identifier of the Rx port.
720  *
721  * @return
722  *   0 on success, a negative errno value otherwise and rte_errno is set.
723  */
724 static int
725 mlx5_hairpin_bind_single_port(struct rte_eth_dev *dev, uint16_t rx_port)
726 {
727 	struct mlx5_priv *priv = dev->data->dev_private;
728 	int ret = 0;
729 	struct mlx5_txq_ctrl *txq_ctrl;
730 	uint32_t i;
731 	struct rte_hairpin_peer_info peer = {0xffffff};
732 	struct rte_hairpin_peer_info cur;
733 	const struct rte_eth_hairpin_conf *conf;
734 	uint16_t num_q = 0;
735 	uint16_t local_port = priv->dev_data->port_id;
736 	uint32_t manual;
737 	uint32_t explicit;
738 	uint16_t rx_queue;
739 
740 	if (mlx5_eth_find_next(rx_port, dev->device) != rx_port) {
741 		rte_errno = ENODEV;
742 		DRV_LOG(ERR, "Rx port %u does not belong to mlx5", rx_port);
743 		return -rte_errno;
744 	}
745 	/*
746 	 * Before binding TxQ to peer RxQ, first round loop will be used for
747 	 * checking the queues' configuration consistency. This would be a
748 	 * little time consuming but better than doing the rollback.
749 	 */
750 	for (i = 0; i != priv->txqs_n; i++) {
751 		txq_ctrl = mlx5_txq_get(dev, i);
752 		if (txq_ctrl == NULL)
753 			continue;
754 		if (!txq_ctrl->is_hairpin) {
755 			mlx5_txq_release(dev, i);
756 			continue;
757 		}
758 		/*
759 		 * All hairpin Tx queues of a single port that connected to the
760 		 * same peer Rx port should have the same "auto binding" and
761 		 * "implicit Tx flow" modes.
762 		 * Peer consistency checking will be done in per queue binding.
763 		 */
764 		conf = &txq_ctrl->hairpin_conf;
765 		if (conf->peers[0].port == rx_port) {
766 			if (num_q == 0) {
767 				manual = conf->manual_bind;
768 				explicit = conf->tx_explicit;
769 			} else {
770 				if (manual != conf->manual_bind ||
771 				    explicit != conf->tx_explicit) {
772 					rte_errno = EINVAL;
773 					DRV_LOG(ERR, "port %u queue %d mode"
774 						" mismatch: %u %u, %u %u",
775 						local_port, i, manual,
776 						conf->manual_bind, explicit,
777 						conf->tx_explicit);
778 					mlx5_txq_release(dev, i);
779 					return -rte_errno;
780 				}
781 			}
782 			num_q++;
783 		}
784 		mlx5_txq_release(dev, i);
785 	}
786 	/* Once no queue is configured, success is returned directly. */
787 	if (num_q == 0)
788 		return ret;
789 	/* All the hairpin TX queues need to be traversed again. */
790 	for (i = 0; i != priv->txqs_n; i++) {
791 		txq_ctrl = mlx5_txq_get(dev, i);
792 		if (txq_ctrl == NULL)
793 			continue;
794 		if (!txq_ctrl->is_hairpin) {
795 			mlx5_txq_release(dev, i);
796 			continue;
797 		}
798 		if (txq_ctrl->hairpin_conf.peers[0].port != rx_port) {
799 			mlx5_txq_release(dev, i);
800 			continue;
801 		}
802 		rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
803 		/*
804 		 * Fetch peer RxQ's information.
805 		 * No need to pass the information of the current queue.
806 		 */
807 		ret = rte_eth_hairpin_queue_peer_update(rx_port, rx_queue,
808 							NULL, &peer, 1);
809 		if (ret != 0) {
810 			mlx5_txq_release(dev, i);
811 			goto error;
812 		}
813 		/* Accessing its own device, inside mlx5 PMD. */
814 		ret = mlx5_hairpin_queue_peer_bind(dev, i, &peer, 1);
815 		if (ret != 0) {
816 			mlx5_txq_release(dev, i);
817 			goto error;
818 		}
819 		/* Pass TxQ's information to peer RxQ and try binding. */
820 		cur.peer_q = rx_queue;
821 		cur.qp_id = mlx5_txq_get_sqn(txq_ctrl);
822 		cur.vhca_id = priv->sh->cdev->config.hca_attr.vhca_id;
823 		cur.tx_explicit = txq_ctrl->hairpin_conf.tx_explicit;
824 		cur.manual_bind = txq_ctrl->hairpin_conf.manual_bind;
825 		/*
826 		 * In order to access another device in a proper way, RTE level
827 		 * private function is needed.
828 		 */
829 		ret = rte_eth_hairpin_queue_peer_bind(rx_port, rx_queue,
830 						      &cur, 0);
831 		if (ret != 0) {
832 			mlx5_txq_release(dev, i);
833 			goto error;
834 		}
835 		mlx5_txq_release(dev, i);
836 	}
837 	return 0;
838 error:
839 	/*
840 	 * Do roll-back process for the queues already bound.
841 	 * No need to check the return value of the queue unbind function.
842 	 */
843 	do {
844 		/* No validation is needed here. */
845 		txq_ctrl = mlx5_txq_get(dev, i);
846 		if (txq_ctrl == NULL)
847 			continue;
848 		if (!txq_ctrl->is_hairpin ||
849 		    txq_ctrl->hairpin_conf.peers[0].port != rx_port) {
850 			mlx5_txq_release(dev, i);
851 			continue;
852 		}
853 		rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
854 		rte_eth_hairpin_queue_peer_unbind(rx_port, rx_queue, 0);
855 		mlx5_hairpin_queue_peer_unbind(dev, i, 1);
856 		mlx5_txq_release(dev, i);
857 	} while (i--);
858 	return ret;
859 }
860 
861 /*
862  * Unbind the hairpin port pair, HW configuration of both devices will be clear
863  * and status will be reset for all the queues used between them.
864  * This function only supports to unbind the Tx from one Rx.
865  *
866  * @param dev
867  *   Pointer to Ethernet device structure.
868  * @param rx_port
869  *   Port identifier of the Rx port.
870  *
871  * @return
872  *   0 on success, a negative errno value otherwise and rte_errno is set.
873  */
874 static int
875 mlx5_hairpin_unbind_single_port(struct rte_eth_dev *dev, uint16_t rx_port)
876 {
877 	struct mlx5_priv *priv = dev->data->dev_private;
878 	struct mlx5_txq_ctrl *txq_ctrl;
879 	uint32_t i;
880 	int ret;
881 	uint16_t cur_port = priv->dev_data->port_id;
882 
883 	if (mlx5_eth_find_next(rx_port, dev->device) != rx_port) {
884 		rte_errno = ENODEV;
885 		DRV_LOG(ERR, "Rx port %u does not belong to mlx5", rx_port);
886 		return -rte_errno;
887 	}
888 	for (i = 0; i != priv->txqs_n; i++) {
889 		uint16_t rx_queue;
890 
891 		txq_ctrl = mlx5_txq_get(dev, i);
892 		if (txq_ctrl == NULL)
893 			continue;
894 		if (!txq_ctrl->is_hairpin) {
895 			mlx5_txq_release(dev, i);
896 			continue;
897 		}
898 		if (txq_ctrl->hairpin_conf.peers[0].port != rx_port) {
899 			mlx5_txq_release(dev, i);
900 			continue;
901 		}
902 		/* Indeed, only the first used queue needs to be checked. */
903 		if (txq_ctrl->hairpin_conf.manual_bind == 0) {
904 			mlx5_txq_release(dev, i);
905 			if (cur_port != rx_port) {
906 				rte_errno = EINVAL;
907 				DRV_LOG(ERR, "port %u and port %u are in"
908 					" auto-bind mode", cur_port, rx_port);
909 				return -rte_errno;
910 			} else {
911 				return 0;
912 			}
913 		}
914 		rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
915 		mlx5_txq_release(dev, i);
916 		ret = rte_eth_hairpin_queue_peer_unbind(rx_port, rx_queue, 0);
917 		if (ret) {
918 			DRV_LOG(ERR, "port %u Rx queue %d unbind - failure",
919 				rx_port, rx_queue);
920 			return ret;
921 		}
922 		ret = mlx5_hairpin_queue_peer_unbind(dev, i, 1);
923 		if (ret) {
924 			DRV_LOG(ERR, "port %u Tx queue %d unbind - failure",
925 				cur_port, i);
926 			return ret;
927 		}
928 	}
929 	return 0;
930 }
931 
932 /*
933  * Bind hairpin ports, Rx could be all ports when using RTE_MAX_ETHPORTS.
934  * @see mlx5_hairpin_bind_single_port()
935  */
936 int
937 mlx5_hairpin_bind(struct rte_eth_dev *dev, uint16_t rx_port)
938 {
939 	int ret = 0;
940 	uint16_t p, pp;
941 
942 	/*
943 	 * If the Rx port has no hairpin configuration with the current port,
944 	 * the binding will be skipped in the called function of single port.
945 	 * Device started status will be checked only before the queue
946 	 * information updating.
947 	 */
948 	if (rx_port == RTE_MAX_ETHPORTS) {
949 		MLX5_ETH_FOREACH_DEV(p, dev->device) {
950 			ret = mlx5_hairpin_bind_single_port(dev, p);
951 			if (ret != 0)
952 				goto unbind;
953 		}
954 		return ret;
955 	} else {
956 		return mlx5_hairpin_bind_single_port(dev, rx_port);
957 	}
958 unbind:
959 	MLX5_ETH_FOREACH_DEV(pp, dev->device)
960 		if (pp < p)
961 			mlx5_hairpin_unbind_single_port(dev, pp);
962 	return ret;
963 }
964 
965 /*
966  * Unbind hairpin ports, Rx could be all ports when using RTE_MAX_ETHPORTS.
967  * @see mlx5_hairpin_unbind_single_port()
968  */
969 int
970 mlx5_hairpin_unbind(struct rte_eth_dev *dev, uint16_t rx_port)
971 {
972 	int ret = 0;
973 	uint16_t p;
974 
975 	if (rx_port == RTE_MAX_ETHPORTS)
976 		MLX5_ETH_FOREACH_DEV(p, dev->device) {
977 			ret = mlx5_hairpin_unbind_single_port(dev, p);
978 			if (ret != 0)
979 				return ret;
980 		}
981 	else
982 		ret = mlx5_hairpin_unbind_single_port(dev, rx_port);
983 	return ret;
984 }
985 
986 /*
987  * DPDK callback to get the hairpin peer ports list.
988  * This will return the actual number of peer ports and save the identifiers
989  * into the array (sorted, may be different from that when setting up the
990  * hairpin peer queues).
991  * The peer port ID could be the same as the port ID of the current device.
992  *
993  * @param dev
994  *   Pointer to Ethernet device structure.
995  * @param peer_ports
996  *   Pointer to array to save the port identifiers.
997  * @param len
998  *   The length of the array.
999  * @param direction
1000  *   Current port to peer port direction.
1001  *   positive - current used as Tx to get all peer Rx ports.
1002  *   zero - current used as Rx to get all peer Tx ports.
1003  *
1004  * @return
1005  *   0 or positive value on success, actual number of peer ports.
1006  *   a negative errno value otherwise and rte_errno is set.
1007  */
1008 int
1009 mlx5_hairpin_get_peer_ports(struct rte_eth_dev *dev, uint16_t *peer_ports,
1010 			    size_t len, uint32_t direction)
1011 {
1012 	struct mlx5_priv *priv = dev->data->dev_private;
1013 	struct mlx5_txq_ctrl *txq_ctrl;
1014 	uint32_t i;
1015 	uint16_t pp;
1016 	uint32_t bits[(RTE_MAX_ETHPORTS + 31) / 32] = {0};
1017 	int ret = 0;
1018 
1019 	if (direction) {
1020 		for (i = 0; i < priv->txqs_n; i++) {
1021 			txq_ctrl = mlx5_txq_get(dev, i);
1022 			if (!txq_ctrl)
1023 				continue;
1024 			if (!txq_ctrl->is_hairpin) {
1025 				mlx5_txq_release(dev, i);
1026 				continue;
1027 			}
1028 			pp = txq_ctrl->hairpin_conf.peers[0].port;
1029 			if (pp >= RTE_MAX_ETHPORTS) {
1030 				rte_errno = ERANGE;
1031 				mlx5_txq_release(dev, i);
1032 				DRV_LOG(ERR, "port %hu queue %u peer port "
1033 					"out of range %hu",
1034 					priv->dev_data->port_id, i, pp);
1035 				return -rte_errno;
1036 			}
1037 			bits[pp / 32] |= 1 << (pp % 32);
1038 			mlx5_txq_release(dev, i);
1039 		}
1040 	} else {
1041 		for (i = 0; i < priv->rxqs_n; i++) {
1042 			struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, i);
1043 			struct mlx5_rxq_ctrl *rxq_ctrl;
1044 
1045 			if (rxq == NULL)
1046 				continue;
1047 			rxq_ctrl = rxq->ctrl;
1048 			if (!rxq_ctrl->is_hairpin)
1049 				continue;
1050 			pp = rxq->hairpin_conf.peers[0].port;
1051 			if (pp >= RTE_MAX_ETHPORTS) {
1052 				rte_errno = ERANGE;
1053 				DRV_LOG(ERR, "port %hu queue %u peer port "
1054 					"out of range %hu",
1055 					priv->dev_data->port_id, i, pp);
1056 				return -rte_errno;
1057 			}
1058 			bits[pp / 32] |= 1 << (pp % 32);
1059 		}
1060 	}
1061 	for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1062 		if (bits[i / 32] & (1 << (i % 32))) {
1063 			if ((size_t)ret >= len) {
1064 				rte_errno = E2BIG;
1065 				return -rte_errno;
1066 			}
1067 			peer_ports[ret++] = i;
1068 		}
1069 	}
1070 	return ret;
1071 }
1072 
1073 #ifdef HAVE_MLX5_HWS_SUPPORT
1074 
1075 /**
1076  * Check if starting representor port is allowed.
1077  *
1078  * If transfer proxy port is configured for HWS, then starting representor port
1079  * is allowed if and only if transfer proxy port is started as well.
1080  *
1081  * @param dev
1082  *   Pointer to Ethernet device structure.
1083  *
1084  * @return
1085  *   If stopping representor port is allowed, then 0 is returned.
1086  *   Otherwise rte_errno is set, and negative errno value is returned.
1087  */
1088 static int
1089 mlx5_hw_representor_port_allowed_start(struct rte_eth_dev *dev)
1090 {
1091 	struct mlx5_priv *priv = dev->data->dev_private;
1092 	struct rte_eth_dev *proxy_dev;
1093 	struct mlx5_priv *proxy_priv;
1094 	uint16_t proxy_port_id = UINT16_MAX;
1095 	int ret;
1096 
1097 	MLX5_ASSERT(priv->sh->config.dv_flow_en == 2);
1098 	MLX5_ASSERT(priv->sh->config.dv_esw_en);
1099 	MLX5_ASSERT(priv->representor);
1100 	ret = rte_flow_pick_transfer_proxy(dev->data->port_id, &proxy_port_id, NULL);
1101 	if (ret) {
1102 		if (ret == -ENODEV)
1103 			DRV_LOG(ERR, "Starting representor port %u is not allowed. Transfer "
1104 				     "proxy port is not available.", dev->data->port_id);
1105 		else
1106 			DRV_LOG(ERR, "Failed to pick transfer proxy for port %u (ret = %d)",
1107 				dev->data->port_id, ret);
1108 		return ret;
1109 	}
1110 	proxy_dev = &rte_eth_devices[proxy_port_id];
1111 	proxy_priv = proxy_dev->data->dev_private;
1112 	if (proxy_priv->dr_ctx == NULL) {
1113 		DRV_LOG(DEBUG, "Starting representor port %u is allowed, but default traffic flows"
1114 			       " will not be created. Transfer proxy port must be configured"
1115 			       " for HWS and started.",
1116 			       dev->data->port_id);
1117 		return 0;
1118 	}
1119 	if (!proxy_dev->data->dev_started) {
1120 		DRV_LOG(ERR, "Failed to start port %u: transfer proxy (port %u) must be started",
1121 			     dev->data->port_id, proxy_port_id);
1122 		rte_errno = EAGAIN;
1123 		return -rte_errno;
1124 	}
1125 	if (priv->sh->config.repr_matching && !priv->dr_ctx) {
1126 		DRV_LOG(ERR, "Failed to start port %u: with representor matching enabled, port "
1127 			     "must be configured for HWS", dev->data->port_id);
1128 		rte_errno = EINVAL;
1129 		return -rte_errno;
1130 	}
1131 	return 0;
1132 }
1133 
1134 #endif
1135 
1136 /**
1137  * DPDK callback to start the device.
1138  *
1139  * Simulate device start by attaching all configured flows.
1140  *
1141  * @param dev
1142  *   Pointer to Ethernet device structure.
1143  *
1144  * @return
1145  *   0 on success, a negative errno value otherwise and rte_errno is set.
1146  *   The following error values are defined:
1147  *
1148  *   - -EAGAIN: If port representor cannot be started,
1149  *     because transfer proxy port is not started.
1150  */
1151 int
1152 mlx5_dev_start(struct rte_eth_dev *dev)
1153 {
1154 	struct mlx5_priv *priv = dev->data->dev_private;
1155 	int ret;
1156 	int fine_inline;
1157 
1158 	DRV_LOG(DEBUG, "port %u starting device", dev->data->port_id);
1159 #ifdef HAVE_MLX5_HWS_SUPPORT
1160 	if (priv->sh->config.dv_flow_en == 2) {
1161 		/* If there is no E-Switch, then there are no start/stop order limitations. */
1162 		if (!priv->sh->config.dv_esw_en)
1163 			goto continue_dev_start;
1164 		/* If master is being started, then it is always allowed. */
1165 		if (priv->master)
1166 			goto continue_dev_start;
1167 		if (mlx5_hw_representor_port_allowed_start(dev))
1168 			return -rte_errno;
1169 	}
1170 continue_dev_start:
1171 #endif
1172 	fine_inline = rte_mbuf_dynflag_lookup
1173 		(RTE_PMD_MLX5_FINE_GRANULARITY_INLINE, NULL);
1174 	if (fine_inline >= 0)
1175 		rte_net_mlx5_dynf_inline_mask = 1UL << fine_inline;
1176 	else
1177 		rte_net_mlx5_dynf_inline_mask = 0;
1178 	if (dev->data->nb_rx_queues > 0) {
1179 		uint32_t max_lro_msg_size = priv->max_lro_msg_size;
1180 
1181 		if (max_lro_msg_size < MLX5_LRO_SEG_CHUNK_SIZE) {
1182 			uint32_t i;
1183 			struct mlx5_rxq_priv *rxq;
1184 
1185 			for (i = 0; i != priv->rxqs_n; ++i) {
1186 				rxq = mlx5_rxq_get(dev, i);
1187 				if (rxq && rxq->ctrl && rxq->ctrl->rxq.lro) {
1188 					DRV_LOG(ERR, "port %u invalid max LRO size",
1189 						dev->data->port_id);
1190 					rte_errno = EINVAL;
1191 					return -rte_errno;
1192 				}
1193 			}
1194 		}
1195 		ret = mlx5_dev_configure_rss_reta(dev);
1196 		if (ret) {
1197 			DRV_LOG(ERR, "port %u reta config failed: %s",
1198 				dev->data->port_id, strerror(rte_errno));
1199 			return -rte_errno;
1200 		}
1201 	}
1202 	ret = mlx5_txpp_start(dev);
1203 	if (ret) {
1204 		DRV_LOG(ERR, "port %u Tx packet pacing init failed: %s",
1205 			dev->data->port_id, strerror(rte_errno));
1206 		goto error;
1207 	}
1208 	if (mlx5_devx_obj_ops_en(priv->sh) &&
1209 	    priv->obj_ops.lb_dummy_queue_create) {
1210 		ret = priv->obj_ops.lb_dummy_queue_create(dev);
1211 		if (ret)
1212 			goto error;
1213 	}
1214 	ret = mlx5_txq_start(dev);
1215 	if (ret) {
1216 		DRV_LOG(ERR, "port %u Tx queue allocation failed: %s",
1217 			dev->data->port_id, strerror(rte_errno));
1218 		goto error;
1219 	}
1220 	if (priv->config.std_delay_drop || priv->config.hp_delay_drop) {
1221 		if (!priv->sh->dev_cap.vf && !priv->sh->dev_cap.sf &&
1222 		    !priv->representor) {
1223 			ret = mlx5_get_flag_dropless_rq(dev);
1224 			if (ret < 0)
1225 				DRV_LOG(WARNING,
1226 					"port %u cannot query dropless flag",
1227 					dev->data->port_id);
1228 			else if (!ret)
1229 				DRV_LOG(WARNING,
1230 					"port %u dropless_rq OFF, no rearming",
1231 					dev->data->port_id);
1232 		} else {
1233 			DRV_LOG(DEBUG,
1234 				"port %u doesn't support dropless_rq flag",
1235 				dev->data->port_id);
1236 		}
1237 	}
1238 	ret = mlx5_rxq_start(dev);
1239 	if (ret) {
1240 		DRV_LOG(ERR, "port %u Rx queue allocation failed: %s",
1241 			dev->data->port_id, strerror(rte_errno));
1242 		goto error;
1243 	}
1244 	/*
1245 	 * Such step will be skipped if there is no hairpin TX queue configured
1246 	 * with RX peer queue from the same device.
1247 	 */
1248 	ret = mlx5_hairpin_auto_bind(dev);
1249 	if (ret) {
1250 		DRV_LOG(ERR, "port %u hairpin auto binding failed: %s",
1251 			dev->data->port_id, strerror(rte_errno));
1252 		goto error;
1253 	}
1254 	/* Set started flag here for the following steps like control flow. */
1255 	dev->data->dev_started = 1;
1256 	ret = mlx5_rx_intr_vec_enable(dev);
1257 	if (ret) {
1258 		DRV_LOG(ERR, "port %u Rx interrupt vector creation failed",
1259 			dev->data->port_id);
1260 		goto error;
1261 	}
1262 	mlx5_os_stats_init(dev);
1263 	/*
1264 	 * Attach indirection table objects detached on port stop.
1265 	 * They may be needed to create RSS in non-isolated mode.
1266 	 */
1267 	ret = mlx5_action_handle_attach(dev);
1268 	if (ret) {
1269 		DRV_LOG(ERR,
1270 			"port %u failed to attach indirect actions: %s",
1271 			dev->data->port_id, rte_strerror(rte_errno));
1272 		goto error;
1273 	}
1274 #ifdef HAVE_MLX5_HWS_SUPPORT
1275 	if (priv->sh->config.dv_flow_en == 2) {
1276 		ret = flow_hw_table_update(dev, NULL);
1277 		if (ret) {
1278 			DRV_LOG(ERR, "port %u failed to update HWS tables",
1279 				dev->data->port_id);
1280 			goto error;
1281 		}
1282 	}
1283 #endif
1284 	ret = mlx5_traffic_enable(dev);
1285 	if (ret) {
1286 		DRV_LOG(ERR, "port %u failed to set defaults flows",
1287 			dev->data->port_id);
1288 		goto error;
1289 	}
1290 	/* Set dynamic fields and flags into Rx queues. */
1291 	mlx5_flow_rxq_dynf_set(dev);
1292 	/* Set flags and context to convert Rx timestamps. */
1293 	mlx5_rxq_timestamp_set(dev);
1294 	/* Set a mask and offset of scheduling on timestamp into Tx queues. */
1295 	mlx5_txq_dynf_timestamp_set(dev);
1296 	/*
1297 	 * In non-cached mode, it only needs to start the default mreg copy
1298 	 * action and no flow created by application exists anymore.
1299 	 * But it is worth wrapping the interface for further usage.
1300 	 */
1301 	ret = mlx5_flow_start_default(dev);
1302 	if (ret) {
1303 		DRV_LOG(DEBUG, "port %u failed to start default actions: %s",
1304 			dev->data->port_id, strerror(rte_errno));
1305 		goto error;
1306 	}
1307 	if (mlx5_dev_ctx_shared_mempool_subscribe(dev) != 0) {
1308 		DRV_LOG(ERR, "port %u failed to subscribe for mempool life cycle: %s",
1309 			dev->data->port_id, rte_strerror(rte_errno));
1310 		goto error;
1311 	}
1312 	rte_wmb();
1313 	dev->tx_pkt_burst = mlx5_select_tx_function(dev);
1314 	dev->rx_pkt_burst = mlx5_select_rx_function(dev);
1315 	/* Enable datapath on secondary process. */
1316 	mlx5_mp_os_req_start_rxtx(dev);
1317 	if (rte_intr_fd_get(priv->sh->intr_handle) >= 0) {
1318 		priv->sh->port[priv->dev_port - 1].ih_port_id =
1319 					(uint32_t)dev->data->port_id;
1320 	} else {
1321 		DRV_LOG(INFO, "port %u starts without RMV interrupts.",
1322 			dev->data->port_id);
1323 		dev->data->dev_conf.intr_conf.rmv = 0;
1324 	}
1325 	if (rte_intr_fd_get(priv->sh->intr_handle_nl) >= 0) {
1326 		priv->sh->port[priv->dev_port - 1].nl_ih_port_id =
1327 					(uint32_t)dev->data->port_id;
1328 	} else {
1329 		DRV_LOG(INFO, "port %u starts without LSC interrupts.",
1330 			dev->data->port_id);
1331 		dev->data->dev_conf.intr_conf.lsc = 0;
1332 	}
1333 	if (rte_intr_fd_get(priv->sh->intr_handle_devx) >= 0)
1334 		priv->sh->port[priv->dev_port - 1].devx_ih_port_id =
1335 					(uint32_t)dev->data->port_id;
1336 	return 0;
1337 error:
1338 	ret = rte_errno; /* Save rte_errno before cleanup. */
1339 	/* Rollback. */
1340 	dev->data->dev_started = 0;
1341 	mlx5_flow_stop_default(dev);
1342 	mlx5_traffic_disable(dev);
1343 	mlx5_txq_stop(dev);
1344 	mlx5_rxq_stop(dev);
1345 	if (priv->obj_ops.lb_dummy_queue_release)
1346 		priv->obj_ops.lb_dummy_queue_release(dev);
1347 	mlx5_txpp_stop(dev); /* Stop last. */
1348 	rte_errno = ret; /* Restore rte_errno. */
1349 	return -rte_errno;
1350 }
1351 
1352 #ifdef HAVE_MLX5_HWS_SUPPORT
1353 /**
1354  * Check if stopping transfer proxy port is allowed.
1355  *
1356  * If transfer proxy port is configured for HWS, then it is allowed to stop it
1357  * if and only if all other representor ports are stopped.
1358  *
1359  * @param dev
1360  *   Pointer to Ethernet device structure.
1361  *
1362  * @return
1363  *   If stopping transfer proxy port is allowed, then 0 is returned.
1364  *   Otherwise rte_errno is set, and negative errno value is returned.
1365  */
1366 static int
1367 mlx5_hw_proxy_port_allowed_stop(struct rte_eth_dev *dev)
1368 {
1369 	struct mlx5_priv *priv = dev->data->dev_private;
1370 	bool representor_started = false;
1371 	uint16_t port_id;
1372 
1373 	MLX5_ASSERT(priv->sh->config.dv_flow_en == 2);
1374 	MLX5_ASSERT(priv->sh->config.dv_esw_en);
1375 	MLX5_ASSERT(priv->master);
1376 	/* If transfer proxy port was not configured for HWS, then stopping it is allowed. */
1377 	if (!priv->dr_ctx)
1378 		return 0;
1379 	MLX5_ETH_FOREACH_DEV(port_id, dev->device) {
1380 		const struct rte_eth_dev *port_dev = &rte_eth_devices[port_id];
1381 		const struct mlx5_priv *port_priv = port_dev->data->dev_private;
1382 
1383 		if (port_id != dev->data->port_id &&
1384 		    port_priv->domain_id == priv->domain_id &&
1385 		    port_dev->data->dev_started)
1386 			representor_started = true;
1387 	}
1388 	if (representor_started) {
1389 		DRV_LOG(ERR, "Failed to stop port %u: attached representor ports"
1390 			     " must be stopped before stopping transfer proxy port",
1391 			     dev->data->port_id);
1392 		rte_errno = EBUSY;
1393 		return -rte_errno;
1394 	}
1395 	return 0;
1396 }
1397 #endif
1398 
1399 /**
1400  * DPDK callback to stop the device.
1401  *
1402  * Simulate device stop by detaching all configured flows.
1403  *
1404  * @param dev
1405  *   Pointer to Ethernet device structure.
1406  *
1407  * @return
1408  *   0 on success, a negative errno value otherwise and rte_errno is set.
1409  *   The following error values are defined:
1410  *
1411  *   - -EBUSY: If transfer proxy port cannot be stopped,
1412  *     because other port representors are still running.
1413  */
1414 int
1415 mlx5_dev_stop(struct rte_eth_dev *dev)
1416 {
1417 	struct mlx5_priv *priv = dev->data->dev_private;
1418 
1419 #ifdef HAVE_MLX5_HWS_SUPPORT
1420 	if (priv->sh->config.dv_flow_en == 2) {
1421 		/* If there is no E-Switch, then there are no start/stop order limitations. */
1422 		if (!priv->sh->config.dv_esw_en)
1423 			goto continue_dev_stop;
1424 		/* If representor is being stopped, then it is always allowed. */
1425 		if (priv->representor)
1426 			goto continue_dev_stop;
1427 		if (mlx5_hw_proxy_port_allowed_stop(dev)) {
1428 			dev->data->dev_started = 1;
1429 			return -rte_errno;
1430 		}
1431 	}
1432 continue_dev_stop:
1433 #endif
1434 	dev->data->dev_started = 0;
1435 	/* Prevent crashes when queues are still in use. */
1436 	dev->rx_pkt_burst = rte_eth_pkt_burst_dummy;
1437 	dev->tx_pkt_burst = rte_eth_pkt_burst_dummy;
1438 	rte_wmb();
1439 	/* Disable datapath on secondary process. */
1440 	mlx5_mp_os_req_stop_rxtx(dev);
1441 	rte_delay_us_sleep(1000 * priv->rxqs_n);
1442 	DRV_LOG(DEBUG, "port %u stopping device", dev->data->port_id);
1443 	if (priv->sh->config.dv_flow_en == 2) {
1444 		if (!__atomic_load_n(&priv->hws_mark_refcnt, __ATOMIC_RELAXED))
1445 			flow_hw_rxq_flag_set(dev, false);
1446 	} else {
1447 		mlx5_flow_stop_default(dev);
1448 	}
1449 	/* Control flows for default traffic can be removed firstly. */
1450 	mlx5_traffic_disable(dev);
1451 	/* All RX queue flags will be cleared in the flush interface. */
1452 	mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_GEN, true);
1453 	mlx5_flow_meter_rxq_flush(dev);
1454 	mlx5_action_handle_detach(dev);
1455 #ifdef HAVE_MLX5_HWS_SUPPORT
1456 	mlx5_flow_hw_cleanup_ctrl_rx_templates(dev);
1457 #endif
1458 	mlx5_rx_intr_vec_disable(dev);
1459 	priv->sh->port[priv->dev_port - 1].ih_port_id = RTE_MAX_ETHPORTS;
1460 	priv->sh->port[priv->dev_port - 1].devx_ih_port_id = RTE_MAX_ETHPORTS;
1461 	priv->sh->port[priv->dev_port - 1].nl_ih_port_id = RTE_MAX_ETHPORTS;
1462 	mlx5_txq_stop(dev);
1463 	mlx5_rxq_stop(dev);
1464 	if (priv->obj_ops.lb_dummy_queue_release)
1465 		priv->obj_ops.lb_dummy_queue_release(dev);
1466 	mlx5_txpp_stop(dev);
1467 
1468 	return 0;
1469 }
1470 
1471 #ifdef HAVE_MLX5_HWS_SUPPORT
1472 
1473 static int
1474 mlx5_traffic_enable_hws(struct rte_eth_dev *dev)
1475 {
1476 	struct mlx5_priv *priv = dev->data->dev_private;
1477 	struct mlx5_sh_config *config = &priv->sh->config;
1478 	uint64_t flags = 0;
1479 	unsigned int i;
1480 	int ret;
1481 
1482 	/*
1483 	 * With extended metadata enabled, the Tx metadata copy is handled by default
1484 	 * Tx tagging flow rules, so default Tx flow rule is not needed. It is only
1485 	 * required when representor matching is disabled.
1486 	 */
1487 	if (config->dv_esw_en &&
1488 	    !config->repr_matching &&
1489 	    config->dv_xmeta_en == MLX5_XMETA_MODE_META32_HWS &&
1490 	    priv->master) {
1491 		if (mlx5_flow_hw_create_tx_default_mreg_copy_flow(dev))
1492 			goto error;
1493 	}
1494 	for (i = 0; i < priv->txqs_n; ++i) {
1495 		struct mlx5_txq_ctrl *txq = mlx5_txq_get(dev, i);
1496 		uint32_t queue;
1497 
1498 		if (!txq)
1499 			continue;
1500 		queue = mlx5_txq_get_sqn(txq);
1501 		if ((priv->representor || priv->master) && config->dv_esw_en) {
1502 			if (mlx5_flow_hw_esw_create_sq_miss_flow(dev, queue, false)) {
1503 				mlx5_txq_release(dev, i);
1504 				goto error;
1505 			}
1506 		}
1507 		if (config->dv_esw_en && config->repr_matching) {
1508 			if (mlx5_flow_hw_tx_repr_matching_flow(dev, queue, false)) {
1509 				mlx5_txq_release(dev, i);
1510 				goto error;
1511 			}
1512 		}
1513 		mlx5_txq_release(dev, i);
1514 	}
1515 	if (config->fdb_def_rule) {
1516 		if ((priv->master || priv->representor) && config->dv_esw_en) {
1517 			if (!mlx5_flow_hw_esw_create_default_jump_flow(dev))
1518 				priv->fdb_def_rule = 1;
1519 			else
1520 				goto error;
1521 		}
1522 	} else {
1523 		DRV_LOG(INFO, "port %u FDB default rule is disabled", dev->data->port_id);
1524 	}
1525 	if (priv->isolated)
1526 		return 0;
1527 	if (!priv->sh->config.lacp_by_user && priv->pf_bond >= 0)
1528 		if (mlx5_flow_hw_lacp_rx_flow(dev))
1529 			goto error;
1530 	if (dev->data->promiscuous)
1531 		flags |= MLX5_CTRL_PROMISCUOUS;
1532 	if (dev->data->all_multicast)
1533 		flags |= MLX5_CTRL_ALL_MULTICAST;
1534 	else
1535 		flags |= MLX5_CTRL_BROADCAST | MLX5_CTRL_IPV4_MULTICAST | MLX5_CTRL_IPV6_MULTICAST;
1536 	flags |= MLX5_CTRL_DMAC;
1537 	if (priv->vlan_filter_n)
1538 		flags |= MLX5_CTRL_VLAN_FILTER;
1539 	return mlx5_flow_hw_ctrl_flows(dev, flags);
1540 error:
1541 	ret = rte_errno;
1542 	mlx5_flow_hw_flush_ctrl_flows(dev);
1543 	rte_errno = ret;
1544 	return -rte_errno;
1545 }
1546 
1547 #endif
1548 
1549 /**
1550  * Enable traffic flows configured by control plane
1551  *
1552  * @param dev
1553  *   Pointer to Ethernet device structure.
1554  *
1555  * @return
1556  *   0 on success, a negative errno value otherwise and rte_errno is set.
1557  */
1558 int
1559 mlx5_traffic_enable(struct rte_eth_dev *dev)
1560 {
1561 	struct mlx5_priv *priv = dev->data->dev_private;
1562 	struct rte_flow_item_eth bcast = {
1563 		.hdr.dst_addr.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1564 	};
1565 	struct rte_flow_item_eth ipv6_multi_spec = {
1566 		.hdr.dst_addr.addr_bytes = "\x33\x33\x00\x00\x00\x00",
1567 	};
1568 	struct rte_flow_item_eth ipv6_multi_mask = {
1569 		.hdr.dst_addr.addr_bytes = "\xff\xff\x00\x00\x00\x00",
1570 	};
1571 	struct rte_flow_item_eth unicast = {
1572 		.hdr.src_addr.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1573 	};
1574 	struct rte_flow_item_eth unicast_mask = {
1575 		.hdr.dst_addr.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1576 	};
1577 	const unsigned int vlan_filter_n = priv->vlan_filter_n;
1578 	const struct rte_ether_addr cmp = {
1579 		.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1580 	};
1581 	unsigned int i;
1582 	unsigned int j;
1583 	int ret;
1584 
1585 #ifdef HAVE_MLX5_HWS_SUPPORT
1586 	if (priv->sh->config.dv_flow_en == 2)
1587 		return mlx5_traffic_enable_hws(dev);
1588 #endif
1589 	/*
1590 	 * Hairpin txq default flow should be created no matter if it is
1591 	 * isolation mode. Or else all the packets to be sent will be sent
1592 	 * out directly without the TX flow actions, e.g. encapsulation.
1593 	 */
1594 	for (i = 0; i != priv->txqs_n; ++i) {
1595 		struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i);
1596 		if (!txq_ctrl)
1597 			continue;
1598 		/* Only Tx implicit mode requires the default Tx flow. */
1599 		if (txq_ctrl->is_hairpin &&
1600 		    txq_ctrl->hairpin_conf.tx_explicit == 0 &&
1601 		    txq_ctrl->hairpin_conf.peers[0].port ==
1602 		    priv->dev_data->port_id) {
1603 			ret = mlx5_ctrl_flow_source_queue(dev,
1604 					mlx5_txq_get_sqn(txq_ctrl));
1605 			if (ret) {
1606 				mlx5_txq_release(dev, i);
1607 				goto error;
1608 			}
1609 		}
1610 		if (priv->sh->config.dv_esw_en) {
1611 			uint32_t q = mlx5_txq_get_sqn(txq_ctrl);
1612 
1613 			if (mlx5_flow_create_devx_sq_miss_flow(dev, q) == 0) {
1614 				mlx5_txq_release(dev, i);
1615 				DRV_LOG(ERR,
1616 					"Port %u Tx queue %u SQ create representor devx default miss rule failed.",
1617 					dev->data->port_id, i);
1618 				goto error;
1619 			}
1620 		}
1621 		mlx5_txq_release(dev, i);
1622 	}
1623 	if (priv->sh->config.fdb_def_rule) {
1624 		if (priv->sh->config.dv_esw_en) {
1625 			if (mlx5_flow_create_esw_table_zero_flow(dev))
1626 				priv->fdb_def_rule = 1;
1627 			else
1628 				DRV_LOG(INFO, "port %u FDB default rule cannot be configured - only Eswitch group 0 flows are supported.",
1629 					dev->data->port_id);
1630 		}
1631 	} else {
1632 		DRV_LOG(INFO, "port %u FDB default rule is disabled",
1633 			dev->data->port_id);
1634 	}
1635 	if (!priv->sh->config.lacp_by_user && priv->pf_bond >= 0) {
1636 		ret = mlx5_flow_lacp_miss(dev);
1637 		if (ret)
1638 			DRV_LOG(INFO, "port %u LACP rule cannot be created - "
1639 				"forward LACP to kernel.", dev->data->port_id);
1640 		else
1641 			DRV_LOG(INFO, "LACP traffic will be missed in port %u."
1642 				, dev->data->port_id);
1643 	}
1644 	if (priv->isolated)
1645 		return 0;
1646 	if (dev->data->promiscuous) {
1647 		struct rte_flow_item_eth promisc = {
1648 			.hdr.dst_addr.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1649 			.hdr.src_addr.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1650 			.hdr.ether_type = 0,
1651 		};
1652 
1653 		ret = mlx5_ctrl_flow(dev, &promisc, &promisc);
1654 		if (ret)
1655 			goto error;
1656 	}
1657 	if (dev->data->all_multicast) {
1658 		struct rte_flow_item_eth multicast = {
1659 			.hdr.dst_addr.addr_bytes = "\x01\x00\x00\x00\x00\x00",
1660 			.hdr.src_addr.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1661 			.hdr.ether_type = 0,
1662 		};
1663 
1664 		ret = mlx5_ctrl_flow(dev, &multicast, &multicast);
1665 		if (ret)
1666 			goto error;
1667 	} else {
1668 		/* Add broadcast/multicast flows. */
1669 		for (i = 0; i != vlan_filter_n; ++i) {
1670 			uint16_t vlan = priv->vlan_filter[i];
1671 
1672 			struct rte_flow_item_vlan vlan_spec = {
1673 				.hdr.vlan_tci = rte_cpu_to_be_16(vlan),
1674 			};
1675 			struct rte_flow_item_vlan vlan_mask =
1676 				rte_flow_item_vlan_mask;
1677 
1678 			ret = mlx5_ctrl_flow_vlan(dev, &bcast, &bcast,
1679 						  &vlan_spec, &vlan_mask);
1680 			if (ret)
1681 				goto error;
1682 			ret = mlx5_ctrl_flow_vlan(dev, &ipv6_multi_spec,
1683 						  &ipv6_multi_mask,
1684 						  &vlan_spec, &vlan_mask);
1685 			if (ret)
1686 				goto error;
1687 		}
1688 		if (!vlan_filter_n) {
1689 			ret = mlx5_ctrl_flow(dev, &bcast, &bcast);
1690 			if (ret)
1691 				goto error;
1692 			ret = mlx5_ctrl_flow(dev, &ipv6_multi_spec,
1693 					     &ipv6_multi_mask);
1694 			if (ret) {
1695 				/* Do not fail on IPv6 broadcast creation failure. */
1696 				DRV_LOG(WARNING,
1697 					"IPv6 broadcast is not supported");
1698 				ret = 0;
1699 			}
1700 		}
1701 	}
1702 	/* Add MAC address flows. */
1703 	for (i = 0; i != MLX5_MAX_MAC_ADDRESSES; ++i) {
1704 		struct rte_ether_addr *mac = &dev->data->mac_addrs[i];
1705 
1706 		if (!memcmp(mac, &cmp, sizeof(*mac)))
1707 			continue;
1708 		memcpy(&unicast.hdr.dst_addr.addr_bytes,
1709 		       mac->addr_bytes,
1710 		       RTE_ETHER_ADDR_LEN);
1711 		for (j = 0; j != vlan_filter_n; ++j) {
1712 			uint16_t vlan = priv->vlan_filter[j];
1713 
1714 			struct rte_flow_item_vlan vlan_spec = {
1715 				.hdr.vlan_tci = rte_cpu_to_be_16(vlan),
1716 			};
1717 			struct rte_flow_item_vlan vlan_mask =
1718 				rte_flow_item_vlan_mask;
1719 
1720 			ret = mlx5_ctrl_flow_vlan(dev, &unicast,
1721 						  &unicast_mask,
1722 						  &vlan_spec,
1723 						  &vlan_mask);
1724 			if (ret)
1725 				goto error;
1726 		}
1727 		if (!vlan_filter_n) {
1728 			ret = mlx5_ctrl_flow(dev, &unicast, &unicast_mask);
1729 			if (ret)
1730 				goto error;
1731 		}
1732 	}
1733 	return 0;
1734 error:
1735 	ret = rte_errno; /* Save rte_errno before cleanup. */
1736 	mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_CTL, false);
1737 	rte_errno = ret; /* Restore rte_errno. */
1738 	return -rte_errno;
1739 }
1740 
1741 
1742 /**
1743  * Disable traffic flows configured by control plane
1744  *
1745  * @param dev
1746  *   Pointer to Ethernet device private data.
1747  */
1748 void
1749 mlx5_traffic_disable(struct rte_eth_dev *dev)
1750 {
1751 #ifdef HAVE_MLX5_HWS_SUPPORT
1752 	struct mlx5_priv *priv = dev->data->dev_private;
1753 
1754 	if (priv->sh->config.dv_flow_en == 2)
1755 		mlx5_flow_hw_flush_ctrl_flows(dev);
1756 	else
1757 #endif
1758 		mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_CTL, false);
1759 }
1760 
1761 /**
1762  * Restart traffic flows configured by control plane
1763  *
1764  * @param dev
1765  *   Pointer to Ethernet device private data.
1766  *
1767  * @return
1768  *   0 on success, a negative errno value otherwise and rte_errno is set.
1769  */
1770 int
1771 mlx5_traffic_restart(struct rte_eth_dev *dev)
1772 {
1773 	if (dev->data->dev_started) {
1774 		mlx5_traffic_disable(dev);
1775 #ifdef HAVE_MLX5_HWS_SUPPORT
1776 		mlx5_flow_hw_cleanup_ctrl_rx_templates(dev);
1777 #endif
1778 		return mlx5_traffic_enable(dev);
1779 	}
1780 	return 0;
1781 }
1782