xref: /dpdk/drivers/net/mlx5/mlx5_trigger.c (revision 0cedf34da78ff5633184860779c3733c6f8be36b)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2015 6WIND S.A.
3  * Copyright 2015 Mellanox Technologies, Ltd
4  */
5 
6 #include <unistd.h>
7 
8 #include <rte_ether.h>
9 #include <ethdev_driver.h>
10 #include <rte_interrupts.h>
11 #include <rte_alarm.h>
12 #include <rte_cycles.h>
13 
14 #include <mlx5_malloc.h>
15 
16 #include "mlx5.h"
17 #include "mlx5_flow.h"
18 #include "mlx5_rx.h"
19 #include "mlx5_tx.h"
20 #include "mlx5_utils.h"
21 #include "rte_pmd_mlx5.h"
22 
23 /**
24  * Stop traffic on Tx queues.
25  *
26  * @param dev
27  *   Pointer to Ethernet device structure.
28  */
29 static void
30 mlx5_txq_stop(struct rte_eth_dev *dev)
31 {
32 	struct mlx5_priv *priv = dev->data->dev_private;
33 	unsigned int i;
34 
35 	for (i = 0; i != priv->txqs_n; ++i)
36 		mlx5_txq_release(dev, i);
37 }
38 
39 /**
40  * Start traffic on Tx queues.
41  *
42  * @param dev
43  *   Pointer to Ethernet device structure.
44  *
45  * @return
46  *   0 on success, a negative errno value otherwise and rte_errno is set.
47  */
48 static int
49 mlx5_txq_start(struct rte_eth_dev *dev)
50 {
51 	struct mlx5_priv *priv = dev->data->dev_private;
52 	unsigned int i;
53 	int ret;
54 
55 	for (i = 0; i != priv->txqs_n; ++i) {
56 		struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i);
57 		struct mlx5_txq_data *txq_data = &txq_ctrl->txq;
58 		uint32_t flags = MLX5_MEM_RTE | MLX5_MEM_ZERO;
59 
60 		if (!txq_ctrl)
61 			continue;
62 		if (txq_ctrl->type == MLX5_TXQ_TYPE_STANDARD)
63 			txq_alloc_elts(txq_ctrl);
64 		MLX5_ASSERT(!txq_ctrl->obj);
65 		txq_ctrl->obj = mlx5_malloc(flags, sizeof(struct mlx5_txq_obj),
66 					    0, txq_ctrl->socket);
67 		if (!txq_ctrl->obj) {
68 			DRV_LOG(ERR, "Port %u Tx queue %u cannot allocate "
69 				"memory resources.", dev->data->port_id,
70 				txq_data->idx);
71 			rte_errno = ENOMEM;
72 			goto error;
73 		}
74 		ret = priv->obj_ops.txq_obj_new(dev, i);
75 		if (ret < 0) {
76 			mlx5_free(txq_ctrl->obj);
77 			txq_ctrl->obj = NULL;
78 			goto error;
79 		}
80 		if (txq_ctrl->type == MLX5_TXQ_TYPE_STANDARD) {
81 			size_t size = txq_data->cqe_s * sizeof(*txq_data->fcqs);
82 
83 			txq_data->fcqs = mlx5_malloc(flags, size,
84 						     RTE_CACHE_LINE_SIZE,
85 						     txq_ctrl->socket);
86 			if (!txq_data->fcqs) {
87 				DRV_LOG(ERR, "Port %u Tx queue %u cannot "
88 					"allocate memory (FCQ).",
89 					dev->data->port_id, i);
90 				rte_errno = ENOMEM;
91 				goto error;
92 			}
93 		}
94 		DRV_LOG(DEBUG, "Port %u txq %u updated with %p.",
95 			dev->data->port_id, i, (void *)&txq_ctrl->obj);
96 		LIST_INSERT_HEAD(&priv->txqsobj, txq_ctrl->obj, next);
97 	}
98 	return 0;
99 error:
100 	ret = rte_errno; /* Save rte_errno before cleanup. */
101 	do {
102 		mlx5_txq_release(dev, i);
103 	} while (i-- != 0);
104 	rte_errno = ret; /* Restore rte_errno. */
105 	return -rte_errno;
106 }
107 
108 /**
109  * Translate the chunk address to MR key in order to put in into the cache.
110  */
111 static void
112 mlx5_rxq_mempool_register_cb(struct rte_mempool *mp, void *opaque,
113 			     struct rte_mempool_memhdr *memhdr,
114 			     unsigned int idx)
115 {
116 	struct mlx5_rxq_data *rxq = opaque;
117 
118 	RTE_SET_USED(mp);
119 	RTE_SET_USED(idx);
120 	mlx5_rx_addr2mr(rxq, (uintptr_t)memhdr->addr);
121 }
122 
123 /**
124  * Register Rx queue mempools and fill the Rx queue cache.
125  * This function tolerates repeated mempool registration.
126  *
127  * @param[in] rxq_ctrl
128  *   Rx queue control data.
129  *
130  * @return
131  *   0 on success, (-1) on failure and rte_errno is set.
132  */
133 static int
134 mlx5_rxq_mempool_register(struct mlx5_rxq_ctrl *rxq_ctrl)
135 {
136 	struct mlx5_priv *priv = rxq_ctrl->priv;
137 	struct rte_mempool *mp;
138 	uint32_t s;
139 	int ret = 0;
140 
141 	mlx5_mr_flush_local_cache(&rxq_ctrl->rxq.mr_ctrl);
142 	/* MPRQ mempool is registered on creation, just fill the cache. */
143 	if (mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq)) {
144 		rte_mempool_mem_iter(rxq_ctrl->rxq.mprq_mp,
145 				     mlx5_rxq_mempool_register_cb,
146 				     &rxq_ctrl->rxq);
147 		return 0;
148 	}
149 	for (s = 0; s < rxq_ctrl->rxq.rxseg_n; s++) {
150 		mp = rxq_ctrl->rxq.rxseg[s].mp;
151 		ret = mlx5_mr_mempool_register(&priv->sh->cdev->mr_scache,
152 					       priv->sh->cdev->pd, mp,
153 					       &priv->mp_id);
154 		if (ret < 0 && rte_errno != EEXIST)
155 			return ret;
156 		rte_mempool_mem_iter(mp, mlx5_rxq_mempool_register_cb,
157 				     &rxq_ctrl->rxq);
158 	}
159 	return 0;
160 }
161 
162 /**
163  * Stop traffic on Rx queues.
164  *
165  * @param dev
166  *   Pointer to Ethernet device structure.
167  */
168 static void
169 mlx5_rxq_stop(struct rte_eth_dev *dev)
170 {
171 	struct mlx5_priv *priv = dev->data->dev_private;
172 	unsigned int i;
173 
174 	for (i = 0; i != priv->rxqs_n; ++i)
175 		mlx5_rxq_release(dev, i);
176 }
177 
178 /**
179  * Start traffic on Rx queues.
180  *
181  * @param dev
182  *   Pointer to Ethernet device structure.
183  *
184  * @return
185  *   0 on success, a negative errno value otherwise and rte_errno is set.
186  */
187 static int
188 mlx5_rxq_start(struct rte_eth_dev *dev)
189 {
190 	struct mlx5_priv *priv = dev->data->dev_private;
191 	unsigned int i;
192 	int ret = 0;
193 
194 	/* Allocate/reuse/resize mempool for Multi-Packet RQ. */
195 	if (mlx5_mprq_alloc_mp(dev)) {
196 		/* Should not release Rx queues but return immediately. */
197 		return -rte_errno;
198 	}
199 	DRV_LOG(DEBUG, "Port %u device_attr.max_qp_wr is %d.",
200 		dev->data->port_id, priv->sh->device_attr.max_qp_wr);
201 	DRV_LOG(DEBUG, "Port %u device_attr.max_sge is %d.",
202 		dev->data->port_id, priv->sh->device_attr.max_sge);
203 	for (i = 0; i != priv->rxqs_n; ++i) {
204 		struct mlx5_rxq_priv *rxq = mlx5_rxq_ref(dev, i);
205 		struct mlx5_rxq_ctrl *rxq_ctrl;
206 
207 		if (rxq == NULL)
208 			continue;
209 		rxq_ctrl = rxq->ctrl;
210 		if (rxq_ctrl->type == MLX5_RXQ_TYPE_STANDARD) {
211 			/*
212 			 * Pre-register the mempools. Regardless of whether
213 			 * the implicit registration is enabled or not,
214 			 * Rx mempool destruction is tracked to free MRs.
215 			 */
216 			if (mlx5_rxq_mempool_register(rxq_ctrl) < 0)
217 				goto error;
218 			ret = rxq_alloc_elts(rxq_ctrl);
219 			if (ret)
220 				goto error;
221 		}
222 		MLX5_ASSERT(!rxq_ctrl->obj);
223 		rxq_ctrl->obj = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO,
224 					    sizeof(*rxq_ctrl->obj), 0,
225 					    rxq_ctrl->socket);
226 		if (!rxq_ctrl->obj) {
227 			DRV_LOG(ERR,
228 				"Port %u Rx queue %u can't allocate resources.",
229 				dev->data->port_id, (*priv->rxqs)[i]->idx);
230 			rte_errno = ENOMEM;
231 			goto error;
232 		}
233 		ret = priv->obj_ops.rxq_obj_new(dev, i);
234 		if (ret) {
235 			mlx5_free(rxq_ctrl->obj);
236 			rxq_ctrl->obj = NULL;
237 			goto error;
238 		}
239 		DRV_LOG(DEBUG, "Port %u rxq %u updated with %p.",
240 			dev->data->port_id, i, (void *)&rxq_ctrl->obj);
241 		LIST_INSERT_HEAD(&priv->rxqsobj, rxq_ctrl->obj, next);
242 	}
243 	return 0;
244 error:
245 	ret = rte_errno; /* Save rte_errno before cleanup. */
246 	do {
247 		mlx5_rxq_release(dev, i);
248 	} while (i-- != 0);
249 	rte_errno = ret; /* Restore rte_errno. */
250 	return -rte_errno;
251 }
252 
253 /**
254  * Binds Tx queues to Rx queues for hairpin.
255  *
256  * Binds Tx queues to the target Rx queues.
257  *
258  * @param dev
259  *   Pointer to Ethernet device structure.
260  *
261  * @return
262  *   0 on success, a negative errno value otherwise and rte_errno is set.
263  */
264 static int
265 mlx5_hairpin_auto_bind(struct rte_eth_dev *dev)
266 {
267 	struct mlx5_priv *priv = dev->data->dev_private;
268 	struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
269 	struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
270 	struct mlx5_txq_ctrl *txq_ctrl;
271 	struct mlx5_rxq_priv *rxq;
272 	struct mlx5_rxq_ctrl *rxq_ctrl;
273 	struct mlx5_devx_obj *sq;
274 	struct mlx5_devx_obj *rq;
275 	unsigned int i;
276 	int ret = 0;
277 	bool need_auto = false;
278 	uint16_t self_port = dev->data->port_id;
279 
280 	for (i = 0; i != priv->txqs_n; ++i) {
281 		txq_ctrl = mlx5_txq_get(dev, i);
282 		if (!txq_ctrl)
283 			continue;
284 		if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN ||
285 		    txq_ctrl->hairpin_conf.peers[0].port != self_port) {
286 			mlx5_txq_release(dev, i);
287 			continue;
288 		}
289 		if (txq_ctrl->hairpin_conf.manual_bind) {
290 			mlx5_txq_release(dev, i);
291 			return 0;
292 		}
293 		need_auto = true;
294 		mlx5_txq_release(dev, i);
295 	}
296 	if (!need_auto)
297 		return 0;
298 	for (i = 0; i != priv->txqs_n; ++i) {
299 		txq_ctrl = mlx5_txq_get(dev, i);
300 		if (!txq_ctrl)
301 			continue;
302 		/* Skip hairpin queues with other peer ports. */
303 		if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN ||
304 		    txq_ctrl->hairpin_conf.peers[0].port != self_port) {
305 			mlx5_txq_release(dev, i);
306 			continue;
307 		}
308 		if (!txq_ctrl->obj) {
309 			rte_errno = ENOMEM;
310 			DRV_LOG(ERR, "port %u no txq object found: %d",
311 				dev->data->port_id, i);
312 			mlx5_txq_release(dev, i);
313 			return -rte_errno;
314 		}
315 		sq = txq_ctrl->obj->sq;
316 		rxq = mlx5_rxq_get(dev, txq_ctrl->hairpin_conf.peers[0].queue);
317 		if (rxq == NULL) {
318 			mlx5_txq_release(dev, i);
319 			rte_errno = EINVAL;
320 			DRV_LOG(ERR, "port %u no rxq object found: %d",
321 				dev->data->port_id,
322 				txq_ctrl->hairpin_conf.peers[0].queue);
323 			return -rte_errno;
324 		}
325 		rxq_ctrl = rxq->ctrl;
326 		if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN ||
327 		    rxq_ctrl->hairpin_conf.peers[0].queue != i) {
328 			rte_errno = ENOMEM;
329 			DRV_LOG(ERR, "port %u Tx queue %d can't be binded to "
330 				"Rx queue %d", dev->data->port_id,
331 				i, txq_ctrl->hairpin_conf.peers[0].queue);
332 			goto error;
333 		}
334 		rq = rxq_ctrl->obj->rq;
335 		if (!rq) {
336 			rte_errno = ENOMEM;
337 			DRV_LOG(ERR, "port %u hairpin no matching rxq: %d",
338 				dev->data->port_id,
339 				txq_ctrl->hairpin_conf.peers[0].queue);
340 			goto error;
341 		}
342 		sq_attr.state = MLX5_SQC_STATE_RDY;
343 		sq_attr.sq_state = MLX5_SQC_STATE_RST;
344 		sq_attr.hairpin_peer_rq = rq->id;
345 		sq_attr.hairpin_peer_vhca = priv->config.hca_attr.vhca_id;
346 		ret = mlx5_devx_cmd_modify_sq(sq, &sq_attr);
347 		if (ret)
348 			goto error;
349 		rq_attr.state = MLX5_SQC_STATE_RDY;
350 		rq_attr.rq_state = MLX5_SQC_STATE_RST;
351 		rq_attr.hairpin_peer_sq = sq->id;
352 		rq_attr.hairpin_peer_vhca = priv->config.hca_attr.vhca_id;
353 		ret = mlx5_devx_cmd_modify_rq(rq, &rq_attr);
354 		if (ret)
355 			goto error;
356 		/* Qs with auto-bind will be destroyed directly. */
357 		rxq_ctrl->hairpin_status = 1;
358 		txq_ctrl->hairpin_status = 1;
359 		mlx5_txq_release(dev, i);
360 	}
361 	return 0;
362 error:
363 	mlx5_txq_release(dev, i);
364 	return -rte_errno;
365 }
366 
367 /*
368  * Fetch the peer queue's SW & HW information.
369  *
370  * @param dev
371  *   Pointer to Ethernet device structure.
372  * @param peer_queue
373  *   Index of the queue to fetch the information.
374  * @param current_info
375  *   Pointer to the input peer information, not used currently.
376  * @param peer_info
377  *   Pointer to the structure to store the information, output.
378  * @param direction
379  *   Positive to get the RxQ information, zero to get the TxQ information.
380  *
381  * @return
382  *   0 on success, a negative errno value otherwise and rte_errno is set.
383  */
384 int
385 mlx5_hairpin_queue_peer_update(struct rte_eth_dev *dev, uint16_t peer_queue,
386 			       struct rte_hairpin_peer_info *current_info,
387 			       struct rte_hairpin_peer_info *peer_info,
388 			       uint32_t direction)
389 {
390 	struct mlx5_priv *priv = dev->data->dev_private;
391 	RTE_SET_USED(current_info);
392 
393 	if (dev->data->dev_started == 0) {
394 		rte_errno = EBUSY;
395 		DRV_LOG(ERR, "peer port %u is not started",
396 			dev->data->port_id);
397 		return -rte_errno;
398 	}
399 	/*
400 	 * Peer port used as egress. In the current design, hairpin Tx queue
401 	 * will be bound to the peer Rx queue. Indeed, only the information of
402 	 * peer Rx queue needs to be fetched.
403 	 */
404 	if (direction == 0) {
405 		struct mlx5_txq_ctrl *txq_ctrl;
406 
407 		txq_ctrl = mlx5_txq_get(dev, peer_queue);
408 		if (txq_ctrl == NULL) {
409 			rte_errno = EINVAL;
410 			DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
411 				dev->data->port_id, peer_queue);
412 			return -rte_errno;
413 		}
414 		if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
415 			rte_errno = EINVAL;
416 			DRV_LOG(ERR, "port %u queue %d is not a hairpin Txq",
417 				dev->data->port_id, peer_queue);
418 			mlx5_txq_release(dev, peer_queue);
419 			return -rte_errno;
420 		}
421 		if (txq_ctrl->obj == NULL || txq_ctrl->obj->sq == NULL) {
422 			rte_errno = ENOMEM;
423 			DRV_LOG(ERR, "port %u no Txq object found: %d",
424 				dev->data->port_id, peer_queue);
425 			mlx5_txq_release(dev, peer_queue);
426 			return -rte_errno;
427 		}
428 		peer_info->qp_id = txq_ctrl->obj->sq->id;
429 		peer_info->vhca_id = priv->config.hca_attr.vhca_id;
430 		/* 1-to-1 mapping, only the first one is used. */
431 		peer_info->peer_q = txq_ctrl->hairpin_conf.peers[0].queue;
432 		peer_info->tx_explicit = txq_ctrl->hairpin_conf.tx_explicit;
433 		peer_info->manual_bind = txq_ctrl->hairpin_conf.manual_bind;
434 		mlx5_txq_release(dev, peer_queue);
435 	} else { /* Peer port used as ingress. */
436 		struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, peer_queue);
437 		struct mlx5_rxq_ctrl *rxq_ctrl;
438 
439 		if (rxq == NULL) {
440 			rte_errno = EINVAL;
441 			DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
442 				dev->data->port_id, peer_queue);
443 			return -rte_errno;
444 		}
445 		rxq_ctrl = rxq->ctrl;
446 		if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
447 			rte_errno = EINVAL;
448 			DRV_LOG(ERR, "port %u queue %d is not a hairpin Rxq",
449 				dev->data->port_id, peer_queue);
450 			return -rte_errno;
451 		}
452 		if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) {
453 			rte_errno = ENOMEM;
454 			DRV_LOG(ERR, "port %u no Rxq object found: %d",
455 				dev->data->port_id, peer_queue);
456 			return -rte_errno;
457 		}
458 		peer_info->qp_id = rxq_ctrl->obj->rq->id;
459 		peer_info->vhca_id = priv->config.hca_attr.vhca_id;
460 		peer_info->peer_q = rxq_ctrl->hairpin_conf.peers[0].queue;
461 		peer_info->tx_explicit = rxq_ctrl->hairpin_conf.tx_explicit;
462 		peer_info->manual_bind = rxq_ctrl->hairpin_conf.manual_bind;
463 	}
464 	return 0;
465 }
466 
467 /*
468  * Bind the hairpin queue with the peer HW information.
469  * This needs to be called twice both for Tx and Rx queues of a pair.
470  * If the queue is already bound, it is considered successful.
471  *
472  * @param dev
473  *   Pointer to Ethernet device structure.
474  * @param cur_queue
475  *   Index of the queue to change the HW configuration to bind.
476  * @param peer_info
477  *   Pointer to information of the peer queue.
478  * @param direction
479  *   Positive to configure the TxQ, zero to configure the RxQ.
480  *
481  * @return
482  *   0 on success, a negative errno value otherwise and rte_errno is set.
483  */
484 int
485 mlx5_hairpin_queue_peer_bind(struct rte_eth_dev *dev, uint16_t cur_queue,
486 			     struct rte_hairpin_peer_info *peer_info,
487 			     uint32_t direction)
488 {
489 	int ret = 0;
490 
491 	/*
492 	 * Consistency checking of the peer queue: opposite direction is used
493 	 * to get the peer queue info with ethdev port ID, no need to check.
494 	 */
495 	if (peer_info->peer_q != cur_queue) {
496 		rte_errno = EINVAL;
497 		DRV_LOG(ERR, "port %u queue %d and peer queue %d mismatch",
498 			dev->data->port_id, cur_queue, peer_info->peer_q);
499 		return -rte_errno;
500 	}
501 	if (direction != 0) {
502 		struct mlx5_txq_ctrl *txq_ctrl;
503 		struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
504 
505 		txq_ctrl = mlx5_txq_get(dev, cur_queue);
506 		if (txq_ctrl == NULL) {
507 			rte_errno = EINVAL;
508 			DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
509 				dev->data->port_id, cur_queue);
510 			return -rte_errno;
511 		}
512 		if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
513 			rte_errno = EINVAL;
514 			DRV_LOG(ERR, "port %u queue %d not a hairpin Txq",
515 				dev->data->port_id, cur_queue);
516 			mlx5_txq_release(dev, cur_queue);
517 			return -rte_errno;
518 		}
519 		if (txq_ctrl->obj == NULL || txq_ctrl->obj->sq == NULL) {
520 			rte_errno = ENOMEM;
521 			DRV_LOG(ERR, "port %u no Txq object found: %d",
522 				dev->data->port_id, cur_queue);
523 			mlx5_txq_release(dev, cur_queue);
524 			return -rte_errno;
525 		}
526 		if (txq_ctrl->hairpin_status != 0) {
527 			DRV_LOG(DEBUG, "port %u Tx queue %d is already bound",
528 				dev->data->port_id, cur_queue);
529 			mlx5_txq_release(dev, cur_queue);
530 			return 0;
531 		}
532 		/*
533 		 * All queues' of one port consistency checking is done in the
534 		 * bind() function, and that is optional.
535 		 */
536 		if (peer_info->tx_explicit !=
537 		    txq_ctrl->hairpin_conf.tx_explicit) {
538 			rte_errno = EINVAL;
539 			DRV_LOG(ERR, "port %u Tx queue %d and peer Tx rule mode"
540 				" mismatch", dev->data->port_id, cur_queue);
541 			mlx5_txq_release(dev, cur_queue);
542 			return -rte_errno;
543 		}
544 		if (peer_info->manual_bind !=
545 		    txq_ctrl->hairpin_conf.manual_bind) {
546 			rte_errno = EINVAL;
547 			DRV_LOG(ERR, "port %u Tx queue %d and peer binding mode"
548 				" mismatch", dev->data->port_id, cur_queue);
549 			mlx5_txq_release(dev, cur_queue);
550 			return -rte_errno;
551 		}
552 		sq_attr.state = MLX5_SQC_STATE_RDY;
553 		sq_attr.sq_state = MLX5_SQC_STATE_RST;
554 		sq_attr.hairpin_peer_rq = peer_info->qp_id;
555 		sq_attr.hairpin_peer_vhca = peer_info->vhca_id;
556 		ret = mlx5_devx_cmd_modify_sq(txq_ctrl->obj->sq, &sq_attr);
557 		if (ret == 0)
558 			txq_ctrl->hairpin_status = 1;
559 		mlx5_txq_release(dev, cur_queue);
560 	} else {
561 		struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, cur_queue);
562 		struct mlx5_rxq_ctrl *rxq_ctrl;
563 		struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
564 
565 		if (rxq == NULL) {
566 			rte_errno = EINVAL;
567 			DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
568 				dev->data->port_id, cur_queue);
569 			return -rte_errno;
570 		}
571 		rxq_ctrl = rxq->ctrl;
572 		if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
573 			rte_errno = EINVAL;
574 			DRV_LOG(ERR, "port %u queue %d not a hairpin Rxq",
575 				dev->data->port_id, cur_queue);
576 			return -rte_errno;
577 		}
578 		if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) {
579 			rte_errno = ENOMEM;
580 			DRV_LOG(ERR, "port %u no Rxq object found: %d",
581 				dev->data->port_id, cur_queue);
582 			return -rte_errno;
583 		}
584 		if (rxq_ctrl->hairpin_status != 0) {
585 			DRV_LOG(DEBUG, "port %u Rx queue %d is already bound",
586 				dev->data->port_id, cur_queue);
587 			return 0;
588 		}
589 		if (peer_info->tx_explicit !=
590 		    rxq_ctrl->hairpin_conf.tx_explicit) {
591 			rte_errno = EINVAL;
592 			DRV_LOG(ERR, "port %u Rx queue %d and peer Tx rule mode"
593 				" mismatch", dev->data->port_id, cur_queue);
594 			return -rte_errno;
595 		}
596 		if (peer_info->manual_bind !=
597 		    rxq_ctrl->hairpin_conf.manual_bind) {
598 			rte_errno = EINVAL;
599 			DRV_LOG(ERR, "port %u Rx queue %d and peer binding mode"
600 				" mismatch", dev->data->port_id, cur_queue);
601 			return -rte_errno;
602 		}
603 		rq_attr.state = MLX5_SQC_STATE_RDY;
604 		rq_attr.rq_state = MLX5_SQC_STATE_RST;
605 		rq_attr.hairpin_peer_sq = peer_info->qp_id;
606 		rq_attr.hairpin_peer_vhca = peer_info->vhca_id;
607 		ret = mlx5_devx_cmd_modify_rq(rxq_ctrl->obj->rq, &rq_attr);
608 		if (ret == 0)
609 			rxq_ctrl->hairpin_status = 1;
610 	}
611 	return ret;
612 }
613 
614 /*
615  * Unbind the hairpin queue and reset its HW configuration.
616  * This needs to be called twice both for Tx and Rx queues of a pair.
617  * If the queue is already unbound, it is considered successful.
618  *
619  * @param dev
620  *   Pointer to Ethernet device structure.
621  * @param cur_queue
622  *   Index of the queue to change the HW configuration to unbind.
623  * @param direction
624  *   Positive to reset the TxQ, zero to reset the RxQ.
625  *
626  * @return
627  *   0 on success, a negative errno value otherwise and rte_errno is set.
628  */
629 int
630 mlx5_hairpin_queue_peer_unbind(struct rte_eth_dev *dev, uint16_t cur_queue,
631 			       uint32_t direction)
632 {
633 	int ret = 0;
634 
635 	if (direction != 0) {
636 		struct mlx5_txq_ctrl *txq_ctrl;
637 		struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
638 
639 		txq_ctrl = mlx5_txq_get(dev, cur_queue);
640 		if (txq_ctrl == NULL) {
641 			rte_errno = EINVAL;
642 			DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
643 				dev->data->port_id, cur_queue);
644 			return -rte_errno;
645 		}
646 		if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
647 			rte_errno = EINVAL;
648 			DRV_LOG(ERR, "port %u queue %d not a hairpin Txq",
649 				dev->data->port_id, cur_queue);
650 			mlx5_txq_release(dev, cur_queue);
651 			return -rte_errno;
652 		}
653 		/* Already unbound, return success before obj checking. */
654 		if (txq_ctrl->hairpin_status == 0) {
655 			DRV_LOG(DEBUG, "port %u Tx queue %d is already unbound",
656 				dev->data->port_id, cur_queue);
657 			mlx5_txq_release(dev, cur_queue);
658 			return 0;
659 		}
660 		if (!txq_ctrl->obj || !txq_ctrl->obj->sq) {
661 			rte_errno = ENOMEM;
662 			DRV_LOG(ERR, "port %u no Txq object found: %d",
663 				dev->data->port_id, cur_queue);
664 			mlx5_txq_release(dev, cur_queue);
665 			return -rte_errno;
666 		}
667 		sq_attr.state = MLX5_SQC_STATE_RST;
668 		sq_attr.sq_state = MLX5_SQC_STATE_RST;
669 		ret = mlx5_devx_cmd_modify_sq(txq_ctrl->obj->sq, &sq_attr);
670 		if (ret == 0)
671 			txq_ctrl->hairpin_status = 0;
672 		mlx5_txq_release(dev, cur_queue);
673 	} else {
674 		struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, cur_queue);
675 		struct mlx5_rxq_ctrl *rxq_ctrl;
676 		struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
677 
678 		if (rxq == NULL) {
679 			rte_errno = EINVAL;
680 			DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
681 				dev->data->port_id, cur_queue);
682 			return -rte_errno;
683 		}
684 		rxq_ctrl = rxq->ctrl;
685 		if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
686 			rte_errno = EINVAL;
687 			DRV_LOG(ERR, "port %u queue %d not a hairpin Rxq",
688 				dev->data->port_id, cur_queue);
689 			return -rte_errno;
690 		}
691 		if (rxq_ctrl->hairpin_status == 0) {
692 			DRV_LOG(DEBUG, "port %u Rx queue %d is already unbound",
693 				dev->data->port_id, cur_queue);
694 			return 0;
695 		}
696 		if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) {
697 			rte_errno = ENOMEM;
698 			DRV_LOG(ERR, "port %u no Rxq object found: %d",
699 				dev->data->port_id, cur_queue);
700 			return -rte_errno;
701 		}
702 		rq_attr.state = MLX5_SQC_STATE_RST;
703 		rq_attr.rq_state = MLX5_SQC_STATE_RST;
704 		ret = mlx5_devx_cmd_modify_rq(rxq_ctrl->obj->rq, &rq_attr);
705 		if (ret == 0)
706 			rxq_ctrl->hairpin_status = 0;
707 	}
708 	return ret;
709 }
710 
711 /*
712  * Bind the hairpin port pairs, from the Tx to the peer Rx.
713  * This function only supports to bind the Tx to one Rx.
714  *
715  * @param dev
716  *   Pointer to Ethernet device structure.
717  * @param rx_port
718  *   Port identifier of the Rx port.
719  *
720  * @return
721  *   0 on success, a negative errno value otherwise and rte_errno is set.
722  */
723 static int
724 mlx5_hairpin_bind_single_port(struct rte_eth_dev *dev, uint16_t rx_port)
725 {
726 	struct mlx5_priv *priv = dev->data->dev_private;
727 	int ret = 0;
728 	struct mlx5_txq_ctrl *txq_ctrl;
729 	uint32_t i;
730 	struct rte_hairpin_peer_info peer = {0xffffff};
731 	struct rte_hairpin_peer_info cur;
732 	const struct rte_eth_hairpin_conf *conf;
733 	uint16_t num_q = 0;
734 	uint16_t local_port = priv->dev_data->port_id;
735 	uint32_t manual;
736 	uint32_t explicit;
737 	uint16_t rx_queue;
738 
739 	if (mlx5_eth_find_next(rx_port, dev->device) != rx_port) {
740 		rte_errno = ENODEV;
741 		DRV_LOG(ERR, "Rx port %u does not belong to mlx5", rx_port);
742 		return -rte_errno;
743 	}
744 	/*
745 	 * Before binding TxQ to peer RxQ, first round loop will be used for
746 	 * checking the queues' configuration consistency. This would be a
747 	 * little time consuming but better than doing the rollback.
748 	 */
749 	for (i = 0; i != priv->txqs_n; i++) {
750 		txq_ctrl = mlx5_txq_get(dev, i);
751 		if (txq_ctrl == NULL)
752 			continue;
753 		if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
754 			mlx5_txq_release(dev, i);
755 			continue;
756 		}
757 		/*
758 		 * All hairpin Tx queues of a single port that connected to the
759 		 * same peer Rx port should have the same "auto binding" and
760 		 * "implicit Tx flow" modes.
761 		 * Peer consistency checking will be done in per queue binding.
762 		 */
763 		conf = &txq_ctrl->hairpin_conf;
764 		if (conf->peers[0].port == rx_port) {
765 			if (num_q == 0) {
766 				manual = conf->manual_bind;
767 				explicit = conf->tx_explicit;
768 			} else {
769 				if (manual != conf->manual_bind ||
770 				    explicit != conf->tx_explicit) {
771 					rte_errno = EINVAL;
772 					DRV_LOG(ERR, "port %u queue %d mode"
773 						" mismatch: %u %u, %u %u",
774 						local_port, i, manual,
775 						conf->manual_bind, explicit,
776 						conf->tx_explicit);
777 					mlx5_txq_release(dev, i);
778 					return -rte_errno;
779 				}
780 			}
781 			num_q++;
782 		}
783 		mlx5_txq_release(dev, i);
784 	}
785 	/* Once no queue is configured, success is returned directly. */
786 	if (num_q == 0)
787 		return ret;
788 	/* All the hairpin TX queues need to be traversed again. */
789 	for (i = 0; i != priv->txqs_n; i++) {
790 		txq_ctrl = mlx5_txq_get(dev, i);
791 		if (txq_ctrl == NULL)
792 			continue;
793 		if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
794 			mlx5_txq_release(dev, i);
795 			continue;
796 		}
797 		if (txq_ctrl->hairpin_conf.peers[0].port != rx_port) {
798 			mlx5_txq_release(dev, i);
799 			continue;
800 		}
801 		rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
802 		/*
803 		 * Fetch peer RxQ's information.
804 		 * No need to pass the information of the current queue.
805 		 */
806 		ret = rte_eth_hairpin_queue_peer_update(rx_port, rx_queue,
807 							NULL, &peer, 1);
808 		if (ret != 0) {
809 			mlx5_txq_release(dev, i);
810 			goto error;
811 		}
812 		/* Accessing its own device, inside mlx5 PMD. */
813 		ret = mlx5_hairpin_queue_peer_bind(dev, i, &peer, 1);
814 		if (ret != 0) {
815 			mlx5_txq_release(dev, i);
816 			goto error;
817 		}
818 		/* Pass TxQ's information to peer RxQ and try binding. */
819 		cur.peer_q = rx_queue;
820 		cur.qp_id = txq_ctrl->obj->sq->id;
821 		cur.vhca_id = priv->config.hca_attr.vhca_id;
822 		cur.tx_explicit = txq_ctrl->hairpin_conf.tx_explicit;
823 		cur.manual_bind = txq_ctrl->hairpin_conf.manual_bind;
824 		/*
825 		 * In order to access another device in a proper way, RTE level
826 		 * private function is needed.
827 		 */
828 		ret = rte_eth_hairpin_queue_peer_bind(rx_port, rx_queue,
829 						      &cur, 0);
830 		if (ret != 0) {
831 			mlx5_txq_release(dev, i);
832 			goto error;
833 		}
834 		mlx5_txq_release(dev, i);
835 	}
836 	return 0;
837 error:
838 	/*
839 	 * Do roll-back process for the queues already bound.
840 	 * No need to check the return value of the queue unbind function.
841 	 */
842 	do {
843 		/* No validation is needed here. */
844 		txq_ctrl = mlx5_txq_get(dev, i);
845 		if (txq_ctrl == NULL)
846 			continue;
847 		rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
848 		rte_eth_hairpin_queue_peer_unbind(rx_port, rx_queue, 0);
849 		mlx5_hairpin_queue_peer_unbind(dev, i, 1);
850 		mlx5_txq_release(dev, i);
851 	} while (i--);
852 	return ret;
853 }
854 
855 /*
856  * Unbind the hairpin port pair, HW configuration of both devices will be clear
857  * and status will be reset for all the queues used between the them.
858  * This function only supports to unbind the Tx from one Rx.
859  *
860  * @param dev
861  *   Pointer to Ethernet device structure.
862  * @param rx_port
863  *   Port identifier of the Rx port.
864  *
865  * @return
866  *   0 on success, a negative errno value otherwise and rte_errno is set.
867  */
868 static int
869 mlx5_hairpin_unbind_single_port(struct rte_eth_dev *dev, uint16_t rx_port)
870 {
871 	struct mlx5_priv *priv = dev->data->dev_private;
872 	struct mlx5_txq_ctrl *txq_ctrl;
873 	uint32_t i;
874 	int ret;
875 	uint16_t cur_port = priv->dev_data->port_id;
876 
877 	if (mlx5_eth_find_next(rx_port, dev->device) != rx_port) {
878 		rte_errno = ENODEV;
879 		DRV_LOG(ERR, "Rx port %u does not belong to mlx5", rx_port);
880 		return -rte_errno;
881 	}
882 	for (i = 0; i != priv->txqs_n; i++) {
883 		uint16_t rx_queue;
884 
885 		txq_ctrl = mlx5_txq_get(dev, i);
886 		if (txq_ctrl == NULL)
887 			continue;
888 		if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
889 			mlx5_txq_release(dev, i);
890 			continue;
891 		}
892 		if (txq_ctrl->hairpin_conf.peers[0].port != rx_port) {
893 			mlx5_txq_release(dev, i);
894 			continue;
895 		}
896 		/* Indeed, only the first used queue needs to be checked. */
897 		if (txq_ctrl->hairpin_conf.manual_bind == 0) {
898 			if (cur_port != rx_port) {
899 				rte_errno = EINVAL;
900 				DRV_LOG(ERR, "port %u and port %u are in"
901 					" auto-bind mode", cur_port, rx_port);
902 				mlx5_txq_release(dev, i);
903 				return -rte_errno;
904 			} else {
905 				return 0;
906 			}
907 		}
908 		rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
909 		mlx5_txq_release(dev, i);
910 		ret = rte_eth_hairpin_queue_peer_unbind(rx_port, rx_queue, 0);
911 		if (ret) {
912 			DRV_LOG(ERR, "port %u Rx queue %d unbind - failure",
913 				rx_port, rx_queue);
914 			return ret;
915 		}
916 		ret = mlx5_hairpin_queue_peer_unbind(dev, i, 1);
917 		if (ret) {
918 			DRV_LOG(ERR, "port %u Tx queue %d unbind - failure",
919 				cur_port, i);
920 			return ret;
921 		}
922 	}
923 	return 0;
924 }
925 
926 /*
927  * Bind hairpin ports, Rx could be all ports when using RTE_MAX_ETHPORTS.
928  * @see mlx5_hairpin_bind_single_port()
929  */
930 int
931 mlx5_hairpin_bind(struct rte_eth_dev *dev, uint16_t rx_port)
932 {
933 	int ret = 0;
934 	uint16_t p, pp;
935 
936 	/*
937 	 * If the Rx port has no hairpin configuration with the current port,
938 	 * the binding will be skipped in the called function of single port.
939 	 * Device started status will be checked only before the queue
940 	 * information updating.
941 	 */
942 	if (rx_port == RTE_MAX_ETHPORTS) {
943 		MLX5_ETH_FOREACH_DEV(p, dev->device) {
944 			ret = mlx5_hairpin_bind_single_port(dev, p);
945 			if (ret != 0)
946 				goto unbind;
947 		}
948 		return ret;
949 	} else {
950 		return mlx5_hairpin_bind_single_port(dev, rx_port);
951 	}
952 unbind:
953 	MLX5_ETH_FOREACH_DEV(pp, dev->device)
954 		if (pp < p)
955 			mlx5_hairpin_unbind_single_port(dev, pp);
956 	return ret;
957 }
958 
959 /*
960  * Unbind hairpin ports, Rx could be all ports when using RTE_MAX_ETHPORTS.
961  * @see mlx5_hairpin_unbind_single_port()
962  */
963 int
964 mlx5_hairpin_unbind(struct rte_eth_dev *dev, uint16_t rx_port)
965 {
966 	int ret = 0;
967 	uint16_t p;
968 
969 	if (rx_port == RTE_MAX_ETHPORTS)
970 		MLX5_ETH_FOREACH_DEV(p, dev->device) {
971 			ret = mlx5_hairpin_unbind_single_port(dev, p);
972 			if (ret != 0)
973 				return ret;
974 		}
975 	else
976 		ret = mlx5_hairpin_unbind_single_port(dev, rx_port);
977 	return ret;
978 }
979 
980 /*
981  * DPDK callback to get the hairpin peer ports list.
982  * This will return the actual number of peer ports and save the identifiers
983  * into the array (sorted, may be different from that when setting up the
984  * hairpin peer queues).
985  * The peer port ID could be the same as the port ID of the current device.
986  *
987  * @param dev
988  *   Pointer to Ethernet device structure.
989  * @param peer_ports
990  *   Pointer to array to save the port identifiers.
991  * @param len
992  *   The length of the array.
993  * @param direction
994  *   Current port to peer port direction.
995  *   positive - current used as Tx to get all peer Rx ports.
996  *   zero - current used as Rx to get all peer Tx ports.
997  *
998  * @return
999  *   0 or positive value on success, actual number of peer ports.
1000  *   a negative errno value otherwise and rte_errno is set.
1001  */
1002 int
1003 mlx5_hairpin_get_peer_ports(struct rte_eth_dev *dev, uint16_t *peer_ports,
1004 			    size_t len, uint32_t direction)
1005 {
1006 	struct mlx5_priv *priv = dev->data->dev_private;
1007 	struct mlx5_txq_ctrl *txq_ctrl;
1008 	uint32_t i;
1009 	uint16_t pp;
1010 	uint32_t bits[(RTE_MAX_ETHPORTS + 31) / 32] = {0};
1011 	int ret = 0;
1012 
1013 	if (direction) {
1014 		for (i = 0; i < priv->txqs_n; i++) {
1015 			txq_ctrl = mlx5_txq_get(dev, i);
1016 			if (!txq_ctrl)
1017 				continue;
1018 			if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
1019 				mlx5_txq_release(dev, i);
1020 				continue;
1021 			}
1022 			pp = txq_ctrl->hairpin_conf.peers[0].port;
1023 			if (pp >= RTE_MAX_ETHPORTS) {
1024 				rte_errno = ERANGE;
1025 				mlx5_txq_release(dev, i);
1026 				DRV_LOG(ERR, "port %hu queue %u peer port "
1027 					"out of range %hu",
1028 					priv->dev_data->port_id, i, pp);
1029 				return -rte_errno;
1030 			}
1031 			bits[pp / 32] |= 1 << (pp % 32);
1032 			mlx5_txq_release(dev, i);
1033 		}
1034 	} else {
1035 		for (i = 0; i < priv->rxqs_n; i++) {
1036 			struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, i);
1037 			struct mlx5_rxq_ctrl *rxq_ctrl;
1038 
1039 			if (rxq == NULL)
1040 				continue;
1041 			rxq_ctrl = rxq->ctrl;
1042 			if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN)
1043 				continue;
1044 			pp = rxq_ctrl->hairpin_conf.peers[0].port;
1045 			if (pp >= RTE_MAX_ETHPORTS) {
1046 				rte_errno = ERANGE;
1047 				DRV_LOG(ERR, "port %hu queue %u peer port "
1048 					"out of range %hu",
1049 					priv->dev_data->port_id, i, pp);
1050 				return -rte_errno;
1051 			}
1052 			bits[pp / 32] |= 1 << (pp % 32);
1053 		}
1054 	}
1055 	for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1056 		if (bits[i / 32] & (1 << (i % 32))) {
1057 			if ((size_t)ret >= len) {
1058 				rte_errno = E2BIG;
1059 				return -rte_errno;
1060 			}
1061 			peer_ports[ret++] = i;
1062 		}
1063 	}
1064 	return ret;
1065 }
1066 
1067 /**
1068  * DPDK callback to start the device.
1069  *
1070  * Simulate device start by attaching all configured flows.
1071  *
1072  * @param dev
1073  *   Pointer to Ethernet device structure.
1074  *
1075  * @return
1076  *   0 on success, a negative errno value otherwise and rte_errno is set.
1077  */
1078 int
1079 mlx5_dev_start(struct rte_eth_dev *dev)
1080 {
1081 	struct mlx5_priv *priv = dev->data->dev_private;
1082 	int ret;
1083 	int fine_inline;
1084 
1085 	DRV_LOG(DEBUG, "port %u starting device", dev->data->port_id);
1086 	fine_inline = rte_mbuf_dynflag_lookup
1087 		(RTE_PMD_MLX5_FINE_GRANULARITY_INLINE, NULL);
1088 	if (fine_inline >= 0)
1089 		rte_net_mlx5_dynf_inline_mask = 1UL << fine_inline;
1090 	else
1091 		rte_net_mlx5_dynf_inline_mask = 0;
1092 	if (dev->data->nb_rx_queues > 0) {
1093 		ret = mlx5_dev_configure_rss_reta(dev);
1094 		if (ret) {
1095 			DRV_LOG(ERR, "port %u reta config failed: %s",
1096 				dev->data->port_id, strerror(rte_errno));
1097 			return -rte_errno;
1098 		}
1099 	}
1100 	ret = mlx5_txpp_start(dev);
1101 	if (ret) {
1102 		DRV_LOG(ERR, "port %u Tx packet pacing init failed: %s",
1103 			dev->data->port_id, strerror(rte_errno));
1104 		goto error;
1105 	}
1106 	if ((priv->sh->devx && priv->config.dv_flow_en &&
1107 	    priv->config.dest_tir) && priv->obj_ops.lb_dummy_queue_create) {
1108 		ret = priv->obj_ops.lb_dummy_queue_create(dev);
1109 		if (ret)
1110 			goto error;
1111 	}
1112 	ret = mlx5_txq_start(dev);
1113 	if (ret) {
1114 		DRV_LOG(ERR, "port %u Tx queue allocation failed: %s",
1115 			dev->data->port_id, strerror(rte_errno));
1116 		goto error;
1117 	}
1118 	ret = mlx5_rxq_start(dev);
1119 	if (ret) {
1120 		DRV_LOG(ERR, "port %u Rx queue allocation failed: %s",
1121 			dev->data->port_id, strerror(rte_errno));
1122 		goto error;
1123 	}
1124 	/*
1125 	 * Such step will be skipped if there is no hairpin TX queue configured
1126 	 * with RX peer queue from the same device.
1127 	 */
1128 	ret = mlx5_hairpin_auto_bind(dev);
1129 	if (ret) {
1130 		DRV_LOG(ERR, "port %u hairpin auto binding failed: %s",
1131 			dev->data->port_id, strerror(rte_errno));
1132 		goto error;
1133 	}
1134 	/* Set started flag here for the following steps like control flow. */
1135 	dev->data->dev_started = 1;
1136 	ret = mlx5_rx_intr_vec_enable(dev);
1137 	if (ret) {
1138 		DRV_LOG(ERR, "port %u Rx interrupt vector creation failed",
1139 			dev->data->port_id);
1140 		goto error;
1141 	}
1142 	mlx5_os_stats_init(dev);
1143 	ret = mlx5_traffic_enable(dev);
1144 	if (ret) {
1145 		DRV_LOG(ERR, "port %u failed to set defaults flows",
1146 			dev->data->port_id);
1147 		goto error;
1148 	}
1149 	/* Set a mask and offset of dynamic metadata flows into Rx queues. */
1150 	mlx5_flow_rxq_dynf_metadata_set(dev);
1151 	/* Set flags and context to convert Rx timestamps. */
1152 	mlx5_rxq_timestamp_set(dev);
1153 	/* Set a mask and offset of scheduling on timestamp into Tx queues. */
1154 	mlx5_txq_dynf_timestamp_set(dev);
1155 	/* Attach indirection table objects detached on port stop. */
1156 	ret = mlx5_action_handle_attach(dev);
1157 	if (ret) {
1158 		DRV_LOG(ERR,
1159 			"port %u failed to attach indirect actions: %s",
1160 			dev->data->port_id, rte_strerror(rte_errno));
1161 		goto error;
1162 	}
1163 	/*
1164 	 * In non-cached mode, it only needs to start the default mreg copy
1165 	 * action and no flow created by application exists anymore.
1166 	 * But it is worth wrapping the interface for further usage.
1167 	 */
1168 	ret = mlx5_flow_start_default(dev);
1169 	if (ret) {
1170 		DRV_LOG(DEBUG, "port %u failed to start default actions: %s",
1171 			dev->data->port_id, strerror(rte_errno));
1172 		goto error;
1173 	}
1174 	if (mlx5_dev_ctx_shared_mempool_subscribe(dev) != 0) {
1175 		DRV_LOG(ERR, "port %u failed to subscribe for mempool life cycle: %s",
1176 			dev->data->port_id, rte_strerror(rte_errno));
1177 		goto error;
1178 	}
1179 	rte_wmb();
1180 	dev->tx_pkt_burst = mlx5_select_tx_function(dev);
1181 	dev->rx_pkt_burst = mlx5_select_rx_function(dev);
1182 	/* Enable datapath on secondary process. */
1183 	mlx5_mp_os_req_start_rxtx(dev);
1184 	if (rte_intr_fd_get(priv->sh->intr_handle) >= 0) {
1185 		priv->sh->port[priv->dev_port - 1].ih_port_id =
1186 					(uint32_t)dev->data->port_id;
1187 	} else {
1188 		DRV_LOG(INFO, "port %u starts without LSC and RMV interrupts.",
1189 			dev->data->port_id);
1190 		dev->data->dev_conf.intr_conf.lsc = 0;
1191 		dev->data->dev_conf.intr_conf.rmv = 0;
1192 	}
1193 	if (rte_intr_fd_get(priv->sh->intr_handle_devx) >= 0)
1194 		priv->sh->port[priv->dev_port - 1].devx_ih_port_id =
1195 					(uint32_t)dev->data->port_id;
1196 	return 0;
1197 error:
1198 	ret = rte_errno; /* Save rte_errno before cleanup. */
1199 	/* Rollback. */
1200 	dev->data->dev_started = 0;
1201 	mlx5_flow_stop_default(dev);
1202 	mlx5_traffic_disable(dev);
1203 	mlx5_txq_stop(dev);
1204 	mlx5_rxq_stop(dev);
1205 	if (priv->obj_ops.lb_dummy_queue_release)
1206 		priv->obj_ops.lb_dummy_queue_release(dev);
1207 	mlx5_txpp_stop(dev); /* Stop last. */
1208 	rte_errno = ret; /* Restore rte_errno. */
1209 	return -rte_errno;
1210 }
1211 
1212 /**
1213  * DPDK callback to stop the device.
1214  *
1215  * Simulate device stop by detaching all configured flows.
1216  *
1217  * @param dev
1218  *   Pointer to Ethernet device structure.
1219  */
1220 int
1221 mlx5_dev_stop(struct rte_eth_dev *dev)
1222 {
1223 	struct mlx5_priv *priv = dev->data->dev_private;
1224 
1225 	dev->data->dev_started = 0;
1226 	/* Prevent crashes when queues are still in use. */
1227 	dev->rx_pkt_burst = removed_rx_burst;
1228 	dev->tx_pkt_burst = removed_tx_burst;
1229 	rte_wmb();
1230 	/* Disable datapath on secondary process. */
1231 	mlx5_mp_os_req_stop_rxtx(dev);
1232 	rte_delay_us_sleep(1000 * priv->rxqs_n);
1233 	DRV_LOG(DEBUG, "port %u stopping device", dev->data->port_id);
1234 	mlx5_flow_stop_default(dev);
1235 	/* Control flows for default traffic can be removed firstly. */
1236 	mlx5_traffic_disable(dev);
1237 	/* All RX queue flags will be cleared in the flush interface. */
1238 	mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_GEN, true);
1239 	mlx5_flow_meter_rxq_flush(dev);
1240 	mlx5_action_handle_detach(dev);
1241 	mlx5_rx_intr_vec_disable(dev);
1242 	priv->sh->port[priv->dev_port - 1].ih_port_id = RTE_MAX_ETHPORTS;
1243 	priv->sh->port[priv->dev_port - 1].devx_ih_port_id = RTE_MAX_ETHPORTS;
1244 	mlx5_txq_stop(dev);
1245 	mlx5_rxq_stop(dev);
1246 	if (priv->obj_ops.lb_dummy_queue_release)
1247 		priv->obj_ops.lb_dummy_queue_release(dev);
1248 	mlx5_txpp_stop(dev);
1249 
1250 	return 0;
1251 }
1252 
1253 /**
1254  * Enable traffic flows configured by control plane
1255  *
1256  * @param dev
1257  *   Pointer to Ethernet device private data.
1258  * @param dev
1259  *   Pointer to Ethernet device structure.
1260  *
1261  * @return
1262  *   0 on success, a negative errno value otherwise and rte_errno is set.
1263  */
1264 int
1265 mlx5_traffic_enable(struct rte_eth_dev *dev)
1266 {
1267 	struct mlx5_priv *priv = dev->data->dev_private;
1268 	struct rte_flow_item_eth bcast = {
1269 		.dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1270 	};
1271 	struct rte_flow_item_eth ipv6_multi_spec = {
1272 		.dst.addr_bytes = "\x33\x33\x00\x00\x00\x00",
1273 	};
1274 	struct rte_flow_item_eth ipv6_multi_mask = {
1275 		.dst.addr_bytes = "\xff\xff\x00\x00\x00\x00",
1276 	};
1277 	struct rte_flow_item_eth unicast = {
1278 		.src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1279 	};
1280 	struct rte_flow_item_eth unicast_mask = {
1281 		.dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1282 	};
1283 	const unsigned int vlan_filter_n = priv->vlan_filter_n;
1284 	const struct rte_ether_addr cmp = {
1285 		.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1286 	};
1287 	unsigned int i;
1288 	unsigned int j;
1289 	int ret;
1290 
1291 	/*
1292 	 * Hairpin txq default flow should be created no matter if it is
1293 	 * isolation mode. Or else all the packets to be sent will be sent
1294 	 * out directly without the TX flow actions, e.g. encapsulation.
1295 	 */
1296 	for (i = 0; i != priv->txqs_n; ++i) {
1297 		struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i);
1298 		if (!txq_ctrl)
1299 			continue;
1300 		/* Only Tx implicit mode requires the default Tx flow. */
1301 		if (txq_ctrl->type == MLX5_TXQ_TYPE_HAIRPIN &&
1302 		    txq_ctrl->hairpin_conf.tx_explicit == 0 &&
1303 		    txq_ctrl->hairpin_conf.peers[0].port ==
1304 		    priv->dev_data->port_id) {
1305 			ret = mlx5_ctrl_flow_source_queue(dev, i);
1306 			if (ret) {
1307 				mlx5_txq_release(dev, i);
1308 				goto error;
1309 			}
1310 		}
1311 		if ((priv->representor || priv->master) &&
1312 		    priv->config.dv_esw_en) {
1313 			if (mlx5_flow_create_devx_sq_miss_flow(dev, i) == 0) {
1314 				DRV_LOG(ERR,
1315 					"Port %u Tx queue %u SQ create representor devx default miss rule failed.",
1316 					dev->data->port_id, i);
1317 				goto error;
1318 			}
1319 		}
1320 		mlx5_txq_release(dev, i);
1321 	}
1322 	if ((priv->master || priv->representor) && priv->config.dv_esw_en) {
1323 		if (mlx5_flow_create_esw_table_zero_flow(dev))
1324 			priv->fdb_def_rule = 1;
1325 		else
1326 			DRV_LOG(INFO, "port %u FDB default rule cannot be"
1327 				" configured - only Eswitch group 0 flows are"
1328 				" supported.", dev->data->port_id);
1329 	}
1330 	if (!priv->config.lacp_by_user && priv->pf_bond >= 0) {
1331 		ret = mlx5_flow_lacp_miss(dev);
1332 		if (ret)
1333 			DRV_LOG(INFO, "port %u LACP rule cannot be created - "
1334 				"forward LACP to kernel.", dev->data->port_id);
1335 		else
1336 			DRV_LOG(INFO, "LACP traffic will be missed in port %u."
1337 				, dev->data->port_id);
1338 	}
1339 	if (priv->isolated)
1340 		return 0;
1341 	if (dev->data->promiscuous) {
1342 		struct rte_flow_item_eth promisc = {
1343 			.dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1344 			.src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1345 			.type = 0,
1346 		};
1347 
1348 		ret = mlx5_ctrl_flow(dev, &promisc, &promisc);
1349 		if (ret)
1350 			goto error;
1351 	}
1352 	if (dev->data->all_multicast) {
1353 		struct rte_flow_item_eth multicast = {
1354 			.dst.addr_bytes = "\x01\x00\x00\x00\x00\x00",
1355 			.src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1356 			.type = 0,
1357 		};
1358 
1359 		ret = mlx5_ctrl_flow(dev, &multicast, &multicast);
1360 		if (ret)
1361 			goto error;
1362 	} else {
1363 		/* Add broadcast/multicast flows. */
1364 		for (i = 0; i != vlan_filter_n; ++i) {
1365 			uint16_t vlan = priv->vlan_filter[i];
1366 
1367 			struct rte_flow_item_vlan vlan_spec = {
1368 				.tci = rte_cpu_to_be_16(vlan),
1369 			};
1370 			struct rte_flow_item_vlan vlan_mask =
1371 				rte_flow_item_vlan_mask;
1372 
1373 			ret = mlx5_ctrl_flow_vlan(dev, &bcast, &bcast,
1374 						  &vlan_spec, &vlan_mask);
1375 			if (ret)
1376 				goto error;
1377 			ret = mlx5_ctrl_flow_vlan(dev, &ipv6_multi_spec,
1378 						  &ipv6_multi_mask,
1379 						  &vlan_spec, &vlan_mask);
1380 			if (ret)
1381 				goto error;
1382 		}
1383 		if (!vlan_filter_n) {
1384 			ret = mlx5_ctrl_flow(dev, &bcast, &bcast);
1385 			if (ret)
1386 				goto error;
1387 			ret = mlx5_ctrl_flow(dev, &ipv6_multi_spec,
1388 					     &ipv6_multi_mask);
1389 			if (ret) {
1390 				/* Do not fail on IPv6 broadcast creation failure. */
1391 				DRV_LOG(WARNING,
1392 					"IPv6 broadcast is not supported");
1393 				ret = 0;
1394 			}
1395 		}
1396 	}
1397 	/* Add MAC address flows. */
1398 	for (i = 0; i != MLX5_MAX_MAC_ADDRESSES; ++i) {
1399 		struct rte_ether_addr *mac = &dev->data->mac_addrs[i];
1400 
1401 		if (!memcmp(mac, &cmp, sizeof(*mac)))
1402 			continue;
1403 		memcpy(&unicast.dst.addr_bytes,
1404 		       mac->addr_bytes,
1405 		       RTE_ETHER_ADDR_LEN);
1406 		for (j = 0; j != vlan_filter_n; ++j) {
1407 			uint16_t vlan = priv->vlan_filter[j];
1408 
1409 			struct rte_flow_item_vlan vlan_spec = {
1410 				.tci = rte_cpu_to_be_16(vlan),
1411 			};
1412 			struct rte_flow_item_vlan vlan_mask =
1413 				rte_flow_item_vlan_mask;
1414 
1415 			ret = mlx5_ctrl_flow_vlan(dev, &unicast,
1416 						  &unicast_mask,
1417 						  &vlan_spec,
1418 						  &vlan_mask);
1419 			if (ret)
1420 				goto error;
1421 		}
1422 		if (!vlan_filter_n) {
1423 			ret = mlx5_ctrl_flow(dev, &unicast, &unicast_mask);
1424 			if (ret)
1425 				goto error;
1426 		}
1427 	}
1428 	return 0;
1429 error:
1430 	ret = rte_errno; /* Save rte_errno before cleanup. */
1431 	mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_CTL, false);
1432 	rte_errno = ret; /* Restore rte_errno. */
1433 	return -rte_errno;
1434 }
1435 
1436 
1437 /**
1438  * Disable traffic flows configured by control plane
1439  *
1440  * @param dev
1441  *   Pointer to Ethernet device private data.
1442  */
1443 void
1444 mlx5_traffic_disable(struct rte_eth_dev *dev)
1445 {
1446 	mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_CTL, false);
1447 }
1448 
1449 /**
1450  * Restart traffic flows configured by control plane
1451  *
1452  * @param dev
1453  *   Pointer to Ethernet device private data.
1454  *
1455  * @return
1456  *   0 on success, a negative errno value otherwise and rte_errno is set.
1457  */
1458 int
1459 mlx5_traffic_restart(struct rte_eth_dev *dev)
1460 {
1461 	if (dev->data->dev_started) {
1462 		mlx5_traffic_disable(dev);
1463 		return mlx5_traffic_enable(dev);
1464 	}
1465 	return 0;
1466 }
1467