xref: /dpdk/drivers/net/mlx5/mlx5_trigger.c (revision 8b8036a66e3d59ffa58afb8d96fa2c73262155a7)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2015 6WIND S.A.
3  * Copyright 2015 Mellanox Technologies, Ltd
4  */
5 
6 #include <unistd.h>
7 
8 #include <rte_ether.h>
9 #include <ethdev_driver.h>
10 #include <rte_interrupts.h>
11 #include <rte_alarm.h>
12 #include <rte_cycles.h>
13 
14 #include <mlx5_malloc.h>
15 
16 #include "mlx5.h"
17 #include "mlx5_flow.h"
18 #include "mlx5_rx.h"
19 #include "mlx5_tx.h"
20 #include "mlx5_utils.h"
21 #include "rte_pmd_mlx5.h"
22 
23 /**
24  * Stop traffic on Tx queues.
25  *
26  * @param dev
27  *   Pointer to Ethernet device structure.
28  */
29 static void
30 mlx5_txq_stop(struct rte_eth_dev *dev)
31 {
32 	struct mlx5_priv *priv = dev->data->dev_private;
33 	unsigned int i;
34 
35 	for (i = 0; i != priv->txqs_n; ++i)
36 		mlx5_txq_release(dev, i);
37 }
38 
39 /**
40  * Start traffic on Tx queues.
41  *
42  * @param dev
43  *   Pointer to Ethernet device structure.
44  *
45  * @return
46  *   0 on success, a negative errno value otherwise and rte_errno is set.
47  */
48 static int
49 mlx5_txq_start(struct rte_eth_dev *dev)
50 {
51 	struct mlx5_priv *priv = dev->data->dev_private;
52 	unsigned int i;
53 	int ret;
54 
55 	for (i = 0; i != priv->txqs_n; ++i) {
56 		struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i);
57 		struct mlx5_txq_data *txq_data = &txq_ctrl->txq;
58 		uint32_t flags = MLX5_MEM_RTE | MLX5_MEM_ZERO;
59 
60 		if (!txq_ctrl)
61 			continue;
62 		if (txq_ctrl->type == MLX5_TXQ_TYPE_STANDARD)
63 			txq_alloc_elts(txq_ctrl);
64 		MLX5_ASSERT(!txq_ctrl->obj);
65 		txq_ctrl->obj = mlx5_malloc(flags, sizeof(struct mlx5_txq_obj),
66 					    0, txq_ctrl->socket);
67 		if (!txq_ctrl->obj) {
68 			DRV_LOG(ERR, "Port %u Tx queue %u cannot allocate "
69 				"memory resources.", dev->data->port_id,
70 				txq_data->idx);
71 			rte_errno = ENOMEM;
72 			goto error;
73 		}
74 		ret = priv->obj_ops.txq_obj_new(dev, i);
75 		if (ret < 0) {
76 			mlx5_free(txq_ctrl->obj);
77 			txq_ctrl->obj = NULL;
78 			goto error;
79 		}
80 		if (txq_ctrl->type == MLX5_TXQ_TYPE_STANDARD) {
81 			size_t size = txq_data->cqe_s * sizeof(*txq_data->fcqs);
82 
83 			txq_data->fcqs = mlx5_malloc(flags, size,
84 						     RTE_CACHE_LINE_SIZE,
85 						     txq_ctrl->socket);
86 			if (!txq_data->fcqs) {
87 				DRV_LOG(ERR, "Port %u Tx queue %u cannot "
88 					"allocate memory (FCQ).",
89 					dev->data->port_id, i);
90 				rte_errno = ENOMEM;
91 				goto error;
92 			}
93 		}
94 		DRV_LOG(DEBUG, "Port %u txq %u updated with %p.",
95 			dev->data->port_id, i, (void *)&txq_ctrl->obj);
96 		LIST_INSERT_HEAD(&priv->txqsobj, txq_ctrl->obj, next);
97 	}
98 	return 0;
99 error:
100 	ret = rte_errno; /* Save rte_errno before cleanup. */
101 	do {
102 		mlx5_txq_release(dev, i);
103 	} while (i-- != 0);
104 	rte_errno = ret; /* Restore rte_errno. */
105 	return -rte_errno;
106 }
107 
108 /**
109  * Translate the chunk address to MR key in order to put in into the cache.
110  */
111 static void
112 mlx5_rxq_mempool_register_cb(struct rte_mempool *mp, void *opaque,
113 			     struct rte_mempool_memhdr *memhdr,
114 			     unsigned int idx)
115 {
116 	struct mlx5_rxq_data *rxq = opaque;
117 
118 	RTE_SET_USED(mp);
119 	RTE_SET_USED(idx);
120 	mlx5_rx_addr2mr(rxq, (uintptr_t)memhdr->addr);
121 }
122 
123 /**
124  * Register Rx queue mempools and fill the Rx queue cache.
125  * This function tolerates repeated mempool registration.
126  *
127  * @param[in] rxq_ctrl
128  *   Rx queue control data.
129  *
130  * @return
131  *   0 on success, (-1) on failure and rte_errno is set.
132  */
133 static int
134 mlx5_rxq_mempool_register(struct mlx5_rxq_ctrl *rxq_ctrl)
135 {
136 	struct rte_mempool *mp;
137 	uint32_t s;
138 	int ret = 0;
139 
140 	mlx5_mr_flush_local_cache(&rxq_ctrl->rxq.mr_ctrl);
141 	/* MPRQ mempool is registered on creation, just fill the cache. */
142 	if (mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq)) {
143 		rte_mempool_mem_iter(rxq_ctrl->rxq.mprq_mp,
144 				     mlx5_rxq_mempool_register_cb,
145 				     &rxq_ctrl->rxq);
146 		return 0;
147 	}
148 	for (s = 0; s < rxq_ctrl->rxq.rxseg_n; s++) {
149 		mp = rxq_ctrl->rxq.rxseg[s].mp;
150 		ret = mlx5_mr_mempool_register(rxq_ctrl->sh->cdev, mp);
151 		if (ret < 0 && rte_errno != EEXIST)
152 			return ret;
153 		rte_mempool_mem_iter(mp, mlx5_rxq_mempool_register_cb,
154 				     &rxq_ctrl->rxq);
155 	}
156 	return 0;
157 }
158 
159 /**
160  * Stop traffic on Rx queues.
161  *
162  * @param dev
163  *   Pointer to Ethernet device structure.
164  */
165 static void
166 mlx5_rxq_stop(struct rte_eth_dev *dev)
167 {
168 	struct mlx5_priv *priv = dev->data->dev_private;
169 	unsigned int i;
170 
171 	for (i = 0; i != priv->rxqs_n; ++i)
172 		mlx5_rxq_release(dev, i);
173 }
174 
175 static int
176 mlx5_rxq_ctrl_prepare(struct rte_eth_dev *dev, struct mlx5_rxq_ctrl *rxq_ctrl,
177 		      unsigned int idx)
178 {
179 	int ret = 0;
180 
181 	if (rxq_ctrl->type == MLX5_RXQ_TYPE_STANDARD) {
182 		/*
183 		 * Pre-register the mempools. Regardless of whether
184 		 * the implicit registration is enabled or not,
185 		 * Rx mempool destruction is tracked to free MRs.
186 		 */
187 		if (mlx5_rxq_mempool_register(rxq_ctrl) < 0)
188 			return -rte_errno;
189 		ret = rxq_alloc_elts(rxq_ctrl);
190 		if (ret)
191 			return ret;
192 	}
193 	MLX5_ASSERT(!rxq_ctrl->obj);
194 	rxq_ctrl->obj = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO,
195 				    sizeof(*rxq_ctrl->obj), 0,
196 				    rxq_ctrl->socket);
197 	if (!rxq_ctrl->obj) {
198 		DRV_LOG(ERR, "Port %u Rx queue %u can't allocate resources.",
199 			dev->data->port_id, idx);
200 		rte_errno = ENOMEM;
201 		return -rte_errno;
202 	}
203 	DRV_LOG(DEBUG, "Port %u rxq %u updated with %p.", dev->data->port_id,
204 		idx, (void *)&rxq_ctrl->obj);
205 	return 0;
206 }
207 
208 /**
209  * Start traffic on Rx queues.
210  *
211  * @param dev
212  *   Pointer to Ethernet device structure.
213  *
214  * @return
215  *   0 on success, a negative errno value otherwise and rte_errno is set.
216  */
217 static int
218 mlx5_rxq_start(struct rte_eth_dev *dev)
219 {
220 	struct mlx5_priv *priv = dev->data->dev_private;
221 	unsigned int i;
222 	int ret = 0;
223 
224 	/* Allocate/reuse/resize mempool for Multi-Packet RQ. */
225 	if (mlx5_mprq_alloc_mp(dev)) {
226 		/* Should not release Rx queues but return immediately. */
227 		return -rte_errno;
228 	}
229 	DRV_LOG(DEBUG, "Port %u device_attr.max_qp_wr is %d.",
230 		dev->data->port_id, priv->sh->device_attr.max_qp_wr);
231 	DRV_LOG(DEBUG, "Port %u device_attr.max_sge is %d.",
232 		dev->data->port_id, priv->sh->device_attr.max_sge);
233 	for (i = 0; i != priv->rxqs_n; ++i) {
234 		struct mlx5_rxq_priv *rxq = mlx5_rxq_ref(dev, i);
235 		struct mlx5_rxq_ctrl *rxq_ctrl;
236 
237 		if (rxq == NULL)
238 			continue;
239 		rxq_ctrl = rxq->ctrl;
240 		if (!rxq_ctrl->started) {
241 			if (mlx5_rxq_ctrl_prepare(dev, rxq_ctrl, i) < 0)
242 				goto error;
243 			LIST_INSERT_HEAD(&priv->rxqsobj, rxq_ctrl->obj, next);
244 		}
245 		ret = priv->obj_ops.rxq_obj_new(rxq);
246 		if (ret) {
247 			mlx5_free(rxq_ctrl->obj);
248 			rxq_ctrl->obj = NULL;
249 			goto error;
250 		}
251 		rxq_ctrl->started = true;
252 	}
253 	return 0;
254 error:
255 	ret = rte_errno; /* Save rte_errno before cleanup. */
256 	do {
257 		mlx5_rxq_release(dev, i);
258 	} while (i-- != 0);
259 	rte_errno = ret; /* Restore rte_errno. */
260 	return -rte_errno;
261 }
262 
263 /**
264  * Binds Tx queues to Rx queues for hairpin.
265  *
266  * Binds Tx queues to the target Rx queues.
267  *
268  * @param dev
269  *   Pointer to Ethernet device structure.
270  *
271  * @return
272  *   0 on success, a negative errno value otherwise and rte_errno is set.
273  */
274 static int
275 mlx5_hairpin_auto_bind(struct rte_eth_dev *dev)
276 {
277 	struct mlx5_priv *priv = dev->data->dev_private;
278 	struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
279 	struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
280 	struct mlx5_txq_ctrl *txq_ctrl;
281 	struct mlx5_rxq_priv *rxq;
282 	struct mlx5_rxq_ctrl *rxq_ctrl;
283 	struct mlx5_devx_obj *sq;
284 	struct mlx5_devx_obj *rq;
285 	unsigned int i;
286 	int ret = 0;
287 	bool need_auto = false;
288 	uint16_t self_port = dev->data->port_id;
289 
290 	for (i = 0; i != priv->txqs_n; ++i) {
291 		txq_ctrl = mlx5_txq_get(dev, i);
292 		if (!txq_ctrl)
293 			continue;
294 		if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN ||
295 		    txq_ctrl->hairpin_conf.peers[0].port != self_port) {
296 			mlx5_txq_release(dev, i);
297 			continue;
298 		}
299 		if (txq_ctrl->hairpin_conf.manual_bind) {
300 			mlx5_txq_release(dev, i);
301 			return 0;
302 		}
303 		need_auto = true;
304 		mlx5_txq_release(dev, i);
305 	}
306 	if (!need_auto)
307 		return 0;
308 	for (i = 0; i != priv->txqs_n; ++i) {
309 		txq_ctrl = mlx5_txq_get(dev, i);
310 		if (!txq_ctrl)
311 			continue;
312 		/* Skip hairpin queues with other peer ports. */
313 		if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN ||
314 		    txq_ctrl->hairpin_conf.peers[0].port != self_port) {
315 			mlx5_txq_release(dev, i);
316 			continue;
317 		}
318 		if (!txq_ctrl->obj) {
319 			rte_errno = ENOMEM;
320 			DRV_LOG(ERR, "port %u no txq object found: %d",
321 				dev->data->port_id, i);
322 			mlx5_txq_release(dev, i);
323 			return -rte_errno;
324 		}
325 		sq = txq_ctrl->obj->sq;
326 		rxq = mlx5_rxq_get(dev, txq_ctrl->hairpin_conf.peers[0].queue);
327 		if (rxq == NULL) {
328 			mlx5_txq_release(dev, i);
329 			rte_errno = EINVAL;
330 			DRV_LOG(ERR, "port %u no rxq object found: %d",
331 				dev->data->port_id,
332 				txq_ctrl->hairpin_conf.peers[0].queue);
333 			return -rte_errno;
334 		}
335 		rxq_ctrl = rxq->ctrl;
336 		if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN ||
337 		    rxq->hairpin_conf.peers[0].queue != i) {
338 			rte_errno = ENOMEM;
339 			DRV_LOG(ERR, "port %u Tx queue %d can't be binded to "
340 				"Rx queue %d", dev->data->port_id,
341 				i, txq_ctrl->hairpin_conf.peers[0].queue);
342 			goto error;
343 		}
344 		rq = rxq_ctrl->obj->rq;
345 		if (!rq) {
346 			rte_errno = ENOMEM;
347 			DRV_LOG(ERR, "port %u hairpin no matching rxq: %d",
348 				dev->data->port_id,
349 				txq_ctrl->hairpin_conf.peers[0].queue);
350 			goto error;
351 		}
352 		sq_attr.state = MLX5_SQC_STATE_RDY;
353 		sq_attr.sq_state = MLX5_SQC_STATE_RST;
354 		sq_attr.hairpin_peer_rq = rq->id;
355 		sq_attr.hairpin_peer_vhca = priv->config.hca_attr.vhca_id;
356 		ret = mlx5_devx_cmd_modify_sq(sq, &sq_attr);
357 		if (ret)
358 			goto error;
359 		rq_attr.state = MLX5_SQC_STATE_RDY;
360 		rq_attr.rq_state = MLX5_SQC_STATE_RST;
361 		rq_attr.hairpin_peer_sq = sq->id;
362 		rq_attr.hairpin_peer_vhca = priv->config.hca_attr.vhca_id;
363 		ret = mlx5_devx_cmd_modify_rq(rq, &rq_attr);
364 		if (ret)
365 			goto error;
366 		/* Qs with auto-bind will be destroyed directly. */
367 		rxq->hairpin_status = 1;
368 		txq_ctrl->hairpin_status = 1;
369 		mlx5_txq_release(dev, i);
370 	}
371 	return 0;
372 error:
373 	mlx5_txq_release(dev, i);
374 	return -rte_errno;
375 }
376 
377 /*
378  * Fetch the peer queue's SW & HW information.
379  *
380  * @param dev
381  *   Pointer to Ethernet device structure.
382  * @param peer_queue
383  *   Index of the queue to fetch the information.
384  * @param current_info
385  *   Pointer to the input peer information, not used currently.
386  * @param peer_info
387  *   Pointer to the structure to store the information, output.
388  * @param direction
389  *   Positive to get the RxQ information, zero to get the TxQ information.
390  *
391  * @return
392  *   0 on success, a negative errno value otherwise and rte_errno is set.
393  */
394 int
395 mlx5_hairpin_queue_peer_update(struct rte_eth_dev *dev, uint16_t peer_queue,
396 			       struct rte_hairpin_peer_info *current_info,
397 			       struct rte_hairpin_peer_info *peer_info,
398 			       uint32_t direction)
399 {
400 	struct mlx5_priv *priv = dev->data->dev_private;
401 	RTE_SET_USED(current_info);
402 
403 	if (dev->data->dev_started == 0) {
404 		rte_errno = EBUSY;
405 		DRV_LOG(ERR, "peer port %u is not started",
406 			dev->data->port_id);
407 		return -rte_errno;
408 	}
409 	/*
410 	 * Peer port used as egress. In the current design, hairpin Tx queue
411 	 * will be bound to the peer Rx queue. Indeed, only the information of
412 	 * peer Rx queue needs to be fetched.
413 	 */
414 	if (direction == 0) {
415 		struct mlx5_txq_ctrl *txq_ctrl;
416 
417 		txq_ctrl = mlx5_txq_get(dev, peer_queue);
418 		if (txq_ctrl == NULL) {
419 			rte_errno = EINVAL;
420 			DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
421 				dev->data->port_id, peer_queue);
422 			return -rte_errno;
423 		}
424 		if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
425 			rte_errno = EINVAL;
426 			DRV_LOG(ERR, "port %u queue %d is not a hairpin Txq",
427 				dev->data->port_id, peer_queue);
428 			mlx5_txq_release(dev, peer_queue);
429 			return -rte_errno;
430 		}
431 		if (txq_ctrl->obj == NULL || txq_ctrl->obj->sq == NULL) {
432 			rte_errno = ENOMEM;
433 			DRV_LOG(ERR, "port %u no Txq object found: %d",
434 				dev->data->port_id, peer_queue);
435 			mlx5_txq_release(dev, peer_queue);
436 			return -rte_errno;
437 		}
438 		peer_info->qp_id = txq_ctrl->obj->sq->id;
439 		peer_info->vhca_id = priv->config.hca_attr.vhca_id;
440 		/* 1-to-1 mapping, only the first one is used. */
441 		peer_info->peer_q = txq_ctrl->hairpin_conf.peers[0].queue;
442 		peer_info->tx_explicit = txq_ctrl->hairpin_conf.tx_explicit;
443 		peer_info->manual_bind = txq_ctrl->hairpin_conf.manual_bind;
444 		mlx5_txq_release(dev, peer_queue);
445 	} else { /* Peer port used as ingress. */
446 		struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, peer_queue);
447 		struct mlx5_rxq_ctrl *rxq_ctrl;
448 
449 		if (rxq == NULL) {
450 			rte_errno = EINVAL;
451 			DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
452 				dev->data->port_id, peer_queue);
453 			return -rte_errno;
454 		}
455 		rxq_ctrl = rxq->ctrl;
456 		if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
457 			rte_errno = EINVAL;
458 			DRV_LOG(ERR, "port %u queue %d is not a hairpin Rxq",
459 				dev->data->port_id, peer_queue);
460 			return -rte_errno;
461 		}
462 		if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) {
463 			rte_errno = ENOMEM;
464 			DRV_LOG(ERR, "port %u no Rxq object found: %d",
465 				dev->data->port_id, peer_queue);
466 			return -rte_errno;
467 		}
468 		peer_info->qp_id = rxq_ctrl->obj->rq->id;
469 		peer_info->vhca_id = priv->config.hca_attr.vhca_id;
470 		peer_info->peer_q = rxq->hairpin_conf.peers[0].queue;
471 		peer_info->tx_explicit = rxq->hairpin_conf.tx_explicit;
472 		peer_info->manual_bind = rxq->hairpin_conf.manual_bind;
473 	}
474 	return 0;
475 }
476 
477 /*
478  * Bind the hairpin queue with the peer HW information.
479  * This needs to be called twice both for Tx and Rx queues of a pair.
480  * If the queue is already bound, it is considered successful.
481  *
482  * @param dev
483  *   Pointer to Ethernet device structure.
484  * @param cur_queue
485  *   Index of the queue to change the HW configuration to bind.
486  * @param peer_info
487  *   Pointer to information of the peer queue.
488  * @param direction
489  *   Positive to configure the TxQ, zero to configure the RxQ.
490  *
491  * @return
492  *   0 on success, a negative errno value otherwise and rte_errno is set.
493  */
494 int
495 mlx5_hairpin_queue_peer_bind(struct rte_eth_dev *dev, uint16_t cur_queue,
496 			     struct rte_hairpin_peer_info *peer_info,
497 			     uint32_t direction)
498 {
499 	int ret = 0;
500 
501 	/*
502 	 * Consistency checking of the peer queue: opposite direction is used
503 	 * to get the peer queue info with ethdev port ID, no need to check.
504 	 */
505 	if (peer_info->peer_q != cur_queue) {
506 		rte_errno = EINVAL;
507 		DRV_LOG(ERR, "port %u queue %d and peer queue %d mismatch",
508 			dev->data->port_id, cur_queue, peer_info->peer_q);
509 		return -rte_errno;
510 	}
511 	if (direction != 0) {
512 		struct mlx5_txq_ctrl *txq_ctrl;
513 		struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
514 
515 		txq_ctrl = mlx5_txq_get(dev, cur_queue);
516 		if (txq_ctrl == NULL) {
517 			rte_errno = EINVAL;
518 			DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
519 				dev->data->port_id, cur_queue);
520 			return -rte_errno;
521 		}
522 		if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
523 			rte_errno = EINVAL;
524 			DRV_LOG(ERR, "port %u queue %d not a hairpin Txq",
525 				dev->data->port_id, cur_queue);
526 			mlx5_txq_release(dev, cur_queue);
527 			return -rte_errno;
528 		}
529 		if (txq_ctrl->obj == NULL || txq_ctrl->obj->sq == NULL) {
530 			rte_errno = ENOMEM;
531 			DRV_LOG(ERR, "port %u no Txq object found: %d",
532 				dev->data->port_id, cur_queue);
533 			mlx5_txq_release(dev, cur_queue);
534 			return -rte_errno;
535 		}
536 		if (txq_ctrl->hairpin_status != 0) {
537 			DRV_LOG(DEBUG, "port %u Tx queue %d is already bound",
538 				dev->data->port_id, cur_queue);
539 			mlx5_txq_release(dev, cur_queue);
540 			return 0;
541 		}
542 		/*
543 		 * All queues' of one port consistency checking is done in the
544 		 * bind() function, and that is optional.
545 		 */
546 		if (peer_info->tx_explicit !=
547 		    txq_ctrl->hairpin_conf.tx_explicit) {
548 			rte_errno = EINVAL;
549 			DRV_LOG(ERR, "port %u Tx queue %d and peer Tx rule mode"
550 				" mismatch", dev->data->port_id, cur_queue);
551 			mlx5_txq_release(dev, cur_queue);
552 			return -rte_errno;
553 		}
554 		if (peer_info->manual_bind !=
555 		    txq_ctrl->hairpin_conf.manual_bind) {
556 			rte_errno = EINVAL;
557 			DRV_LOG(ERR, "port %u Tx queue %d and peer binding mode"
558 				" mismatch", dev->data->port_id, cur_queue);
559 			mlx5_txq_release(dev, cur_queue);
560 			return -rte_errno;
561 		}
562 		sq_attr.state = MLX5_SQC_STATE_RDY;
563 		sq_attr.sq_state = MLX5_SQC_STATE_RST;
564 		sq_attr.hairpin_peer_rq = peer_info->qp_id;
565 		sq_attr.hairpin_peer_vhca = peer_info->vhca_id;
566 		ret = mlx5_devx_cmd_modify_sq(txq_ctrl->obj->sq, &sq_attr);
567 		if (ret == 0)
568 			txq_ctrl->hairpin_status = 1;
569 		mlx5_txq_release(dev, cur_queue);
570 	} else {
571 		struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, cur_queue);
572 		struct mlx5_rxq_ctrl *rxq_ctrl;
573 		struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
574 
575 		if (rxq == NULL) {
576 			rte_errno = EINVAL;
577 			DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
578 				dev->data->port_id, cur_queue);
579 			return -rte_errno;
580 		}
581 		rxq_ctrl = rxq->ctrl;
582 		if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
583 			rte_errno = EINVAL;
584 			DRV_LOG(ERR, "port %u queue %d not a hairpin Rxq",
585 				dev->data->port_id, cur_queue);
586 			return -rte_errno;
587 		}
588 		if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) {
589 			rte_errno = ENOMEM;
590 			DRV_LOG(ERR, "port %u no Rxq object found: %d",
591 				dev->data->port_id, cur_queue);
592 			return -rte_errno;
593 		}
594 		if (rxq->hairpin_status != 0) {
595 			DRV_LOG(DEBUG, "port %u Rx queue %d is already bound",
596 				dev->data->port_id, cur_queue);
597 			return 0;
598 		}
599 		if (peer_info->tx_explicit !=
600 		    rxq->hairpin_conf.tx_explicit) {
601 			rte_errno = EINVAL;
602 			DRV_LOG(ERR, "port %u Rx queue %d and peer Tx rule mode"
603 				" mismatch", dev->data->port_id, cur_queue);
604 			return -rte_errno;
605 		}
606 		if (peer_info->manual_bind !=
607 		    rxq->hairpin_conf.manual_bind) {
608 			rte_errno = EINVAL;
609 			DRV_LOG(ERR, "port %u Rx queue %d and peer binding mode"
610 				" mismatch", dev->data->port_id, cur_queue);
611 			return -rte_errno;
612 		}
613 		rq_attr.state = MLX5_SQC_STATE_RDY;
614 		rq_attr.rq_state = MLX5_SQC_STATE_RST;
615 		rq_attr.hairpin_peer_sq = peer_info->qp_id;
616 		rq_attr.hairpin_peer_vhca = peer_info->vhca_id;
617 		ret = mlx5_devx_cmd_modify_rq(rxq_ctrl->obj->rq, &rq_attr);
618 		if (ret == 0)
619 			rxq->hairpin_status = 1;
620 	}
621 	return ret;
622 }
623 
624 /*
625  * Unbind the hairpin queue and reset its HW configuration.
626  * This needs to be called twice both for Tx and Rx queues of a pair.
627  * If the queue is already unbound, it is considered successful.
628  *
629  * @param dev
630  *   Pointer to Ethernet device structure.
631  * @param cur_queue
632  *   Index of the queue to change the HW configuration to unbind.
633  * @param direction
634  *   Positive to reset the TxQ, zero to reset the RxQ.
635  *
636  * @return
637  *   0 on success, a negative errno value otherwise and rte_errno is set.
638  */
639 int
640 mlx5_hairpin_queue_peer_unbind(struct rte_eth_dev *dev, uint16_t cur_queue,
641 			       uint32_t direction)
642 {
643 	int ret = 0;
644 
645 	if (direction != 0) {
646 		struct mlx5_txq_ctrl *txq_ctrl;
647 		struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
648 
649 		txq_ctrl = mlx5_txq_get(dev, cur_queue);
650 		if (txq_ctrl == NULL) {
651 			rte_errno = EINVAL;
652 			DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
653 				dev->data->port_id, cur_queue);
654 			return -rte_errno;
655 		}
656 		if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
657 			rte_errno = EINVAL;
658 			DRV_LOG(ERR, "port %u queue %d not a hairpin Txq",
659 				dev->data->port_id, cur_queue);
660 			mlx5_txq_release(dev, cur_queue);
661 			return -rte_errno;
662 		}
663 		/* Already unbound, return success before obj checking. */
664 		if (txq_ctrl->hairpin_status == 0) {
665 			DRV_LOG(DEBUG, "port %u Tx queue %d is already unbound",
666 				dev->data->port_id, cur_queue);
667 			mlx5_txq_release(dev, cur_queue);
668 			return 0;
669 		}
670 		if (!txq_ctrl->obj || !txq_ctrl->obj->sq) {
671 			rte_errno = ENOMEM;
672 			DRV_LOG(ERR, "port %u no Txq object found: %d",
673 				dev->data->port_id, cur_queue);
674 			mlx5_txq_release(dev, cur_queue);
675 			return -rte_errno;
676 		}
677 		sq_attr.state = MLX5_SQC_STATE_RST;
678 		sq_attr.sq_state = MLX5_SQC_STATE_RST;
679 		ret = mlx5_devx_cmd_modify_sq(txq_ctrl->obj->sq, &sq_attr);
680 		if (ret == 0)
681 			txq_ctrl->hairpin_status = 0;
682 		mlx5_txq_release(dev, cur_queue);
683 	} else {
684 		struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, cur_queue);
685 		struct mlx5_rxq_ctrl *rxq_ctrl;
686 		struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
687 
688 		if (rxq == NULL) {
689 			rte_errno = EINVAL;
690 			DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
691 				dev->data->port_id, cur_queue);
692 			return -rte_errno;
693 		}
694 		rxq_ctrl = rxq->ctrl;
695 		if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
696 			rte_errno = EINVAL;
697 			DRV_LOG(ERR, "port %u queue %d not a hairpin Rxq",
698 				dev->data->port_id, cur_queue);
699 			return -rte_errno;
700 		}
701 		if (rxq->hairpin_status == 0) {
702 			DRV_LOG(DEBUG, "port %u Rx queue %d is already unbound",
703 				dev->data->port_id, cur_queue);
704 			return 0;
705 		}
706 		if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) {
707 			rte_errno = ENOMEM;
708 			DRV_LOG(ERR, "port %u no Rxq object found: %d",
709 				dev->data->port_id, cur_queue);
710 			return -rte_errno;
711 		}
712 		rq_attr.state = MLX5_SQC_STATE_RST;
713 		rq_attr.rq_state = MLX5_SQC_STATE_RST;
714 		ret = mlx5_devx_cmd_modify_rq(rxq_ctrl->obj->rq, &rq_attr);
715 		if (ret == 0)
716 			rxq->hairpin_status = 0;
717 	}
718 	return ret;
719 }
720 
721 /*
722  * Bind the hairpin port pairs, from the Tx to the peer Rx.
723  * This function only supports to bind the Tx to one Rx.
724  *
725  * @param dev
726  *   Pointer to Ethernet device structure.
727  * @param rx_port
728  *   Port identifier of the Rx port.
729  *
730  * @return
731  *   0 on success, a negative errno value otherwise and rte_errno is set.
732  */
733 static int
734 mlx5_hairpin_bind_single_port(struct rte_eth_dev *dev, uint16_t rx_port)
735 {
736 	struct mlx5_priv *priv = dev->data->dev_private;
737 	int ret = 0;
738 	struct mlx5_txq_ctrl *txq_ctrl;
739 	uint32_t i;
740 	struct rte_hairpin_peer_info peer = {0xffffff};
741 	struct rte_hairpin_peer_info cur;
742 	const struct rte_eth_hairpin_conf *conf;
743 	uint16_t num_q = 0;
744 	uint16_t local_port = priv->dev_data->port_id;
745 	uint32_t manual;
746 	uint32_t explicit;
747 	uint16_t rx_queue;
748 
749 	if (mlx5_eth_find_next(rx_port, dev->device) != rx_port) {
750 		rte_errno = ENODEV;
751 		DRV_LOG(ERR, "Rx port %u does not belong to mlx5", rx_port);
752 		return -rte_errno;
753 	}
754 	/*
755 	 * Before binding TxQ to peer RxQ, first round loop will be used for
756 	 * checking the queues' configuration consistency. This would be a
757 	 * little time consuming but better than doing the rollback.
758 	 */
759 	for (i = 0; i != priv->txqs_n; i++) {
760 		txq_ctrl = mlx5_txq_get(dev, i);
761 		if (txq_ctrl == NULL)
762 			continue;
763 		if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
764 			mlx5_txq_release(dev, i);
765 			continue;
766 		}
767 		/*
768 		 * All hairpin Tx queues of a single port that connected to the
769 		 * same peer Rx port should have the same "auto binding" and
770 		 * "implicit Tx flow" modes.
771 		 * Peer consistency checking will be done in per queue binding.
772 		 */
773 		conf = &txq_ctrl->hairpin_conf;
774 		if (conf->peers[0].port == rx_port) {
775 			if (num_q == 0) {
776 				manual = conf->manual_bind;
777 				explicit = conf->tx_explicit;
778 			} else {
779 				if (manual != conf->manual_bind ||
780 				    explicit != conf->tx_explicit) {
781 					rte_errno = EINVAL;
782 					DRV_LOG(ERR, "port %u queue %d mode"
783 						" mismatch: %u %u, %u %u",
784 						local_port, i, manual,
785 						conf->manual_bind, explicit,
786 						conf->tx_explicit);
787 					mlx5_txq_release(dev, i);
788 					return -rte_errno;
789 				}
790 			}
791 			num_q++;
792 		}
793 		mlx5_txq_release(dev, i);
794 	}
795 	/* Once no queue is configured, success is returned directly. */
796 	if (num_q == 0)
797 		return ret;
798 	/* All the hairpin TX queues need to be traversed again. */
799 	for (i = 0; i != priv->txqs_n; i++) {
800 		txq_ctrl = mlx5_txq_get(dev, i);
801 		if (txq_ctrl == NULL)
802 			continue;
803 		if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
804 			mlx5_txq_release(dev, i);
805 			continue;
806 		}
807 		if (txq_ctrl->hairpin_conf.peers[0].port != rx_port) {
808 			mlx5_txq_release(dev, i);
809 			continue;
810 		}
811 		rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
812 		/*
813 		 * Fetch peer RxQ's information.
814 		 * No need to pass the information of the current queue.
815 		 */
816 		ret = rte_eth_hairpin_queue_peer_update(rx_port, rx_queue,
817 							NULL, &peer, 1);
818 		if (ret != 0) {
819 			mlx5_txq_release(dev, i);
820 			goto error;
821 		}
822 		/* Accessing its own device, inside mlx5 PMD. */
823 		ret = mlx5_hairpin_queue_peer_bind(dev, i, &peer, 1);
824 		if (ret != 0) {
825 			mlx5_txq_release(dev, i);
826 			goto error;
827 		}
828 		/* Pass TxQ's information to peer RxQ and try binding. */
829 		cur.peer_q = rx_queue;
830 		cur.qp_id = txq_ctrl->obj->sq->id;
831 		cur.vhca_id = priv->config.hca_attr.vhca_id;
832 		cur.tx_explicit = txq_ctrl->hairpin_conf.tx_explicit;
833 		cur.manual_bind = txq_ctrl->hairpin_conf.manual_bind;
834 		/*
835 		 * In order to access another device in a proper way, RTE level
836 		 * private function is needed.
837 		 */
838 		ret = rte_eth_hairpin_queue_peer_bind(rx_port, rx_queue,
839 						      &cur, 0);
840 		if (ret != 0) {
841 			mlx5_txq_release(dev, i);
842 			goto error;
843 		}
844 		mlx5_txq_release(dev, i);
845 	}
846 	return 0;
847 error:
848 	/*
849 	 * Do roll-back process for the queues already bound.
850 	 * No need to check the return value of the queue unbind function.
851 	 */
852 	do {
853 		/* No validation is needed here. */
854 		txq_ctrl = mlx5_txq_get(dev, i);
855 		if (txq_ctrl == NULL)
856 			continue;
857 		rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
858 		rte_eth_hairpin_queue_peer_unbind(rx_port, rx_queue, 0);
859 		mlx5_hairpin_queue_peer_unbind(dev, i, 1);
860 		mlx5_txq_release(dev, i);
861 	} while (i--);
862 	return ret;
863 }
864 
865 /*
866  * Unbind the hairpin port pair, HW configuration of both devices will be clear
867  * and status will be reset for all the queues used between the them.
868  * This function only supports to unbind the Tx from one Rx.
869  *
870  * @param dev
871  *   Pointer to Ethernet device structure.
872  * @param rx_port
873  *   Port identifier of the Rx port.
874  *
875  * @return
876  *   0 on success, a negative errno value otherwise and rte_errno is set.
877  */
878 static int
879 mlx5_hairpin_unbind_single_port(struct rte_eth_dev *dev, uint16_t rx_port)
880 {
881 	struct mlx5_priv *priv = dev->data->dev_private;
882 	struct mlx5_txq_ctrl *txq_ctrl;
883 	uint32_t i;
884 	int ret;
885 	uint16_t cur_port = priv->dev_data->port_id;
886 
887 	if (mlx5_eth_find_next(rx_port, dev->device) != rx_port) {
888 		rte_errno = ENODEV;
889 		DRV_LOG(ERR, "Rx port %u does not belong to mlx5", rx_port);
890 		return -rte_errno;
891 	}
892 	for (i = 0; i != priv->txqs_n; i++) {
893 		uint16_t rx_queue;
894 
895 		txq_ctrl = mlx5_txq_get(dev, i);
896 		if (txq_ctrl == NULL)
897 			continue;
898 		if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
899 			mlx5_txq_release(dev, i);
900 			continue;
901 		}
902 		if (txq_ctrl->hairpin_conf.peers[0].port != rx_port) {
903 			mlx5_txq_release(dev, i);
904 			continue;
905 		}
906 		/* Indeed, only the first used queue needs to be checked. */
907 		if (txq_ctrl->hairpin_conf.manual_bind == 0) {
908 			if (cur_port != rx_port) {
909 				rte_errno = EINVAL;
910 				DRV_LOG(ERR, "port %u and port %u are in"
911 					" auto-bind mode", cur_port, rx_port);
912 				mlx5_txq_release(dev, i);
913 				return -rte_errno;
914 			} else {
915 				return 0;
916 			}
917 		}
918 		rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
919 		mlx5_txq_release(dev, i);
920 		ret = rte_eth_hairpin_queue_peer_unbind(rx_port, rx_queue, 0);
921 		if (ret) {
922 			DRV_LOG(ERR, "port %u Rx queue %d unbind - failure",
923 				rx_port, rx_queue);
924 			return ret;
925 		}
926 		ret = mlx5_hairpin_queue_peer_unbind(dev, i, 1);
927 		if (ret) {
928 			DRV_LOG(ERR, "port %u Tx queue %d unbind - failure",
929 				cur_port, i);
930 			return ret;
931 		}
932 	}
933 	return 0;
934 }
935 
936 /*
937  * Bind hairpin ports, Rx could be all ports when using RTE_MAX_ETHPORTS.
938  * @see mlx5_hairpin_bind_single_port()
939  */
940 int
941 mlx5_hairpin_bind(struct rte_eth_dev *dev, uint16_t rx_port)
942 {
943 	int ret = 0;
944 	uint16_t p, pp;
945 
946 	/*
947 	 * If the Rx port has no hairpin configuration with the current port,
948 	 * the binding will be skipped in the called function of single port.
949 	 * Device started status will be checked only before the queue
950 	 * information updating.
951 	 */
952 	if (rx_port == RTE_MAX_ETHPORTS) {
953 		MLX5_ETH_FOREACH_DEV(p, dev->device) {
954 			ret = mlx5_hairpin_bind_single_port(dev, p);
955 			if (ret != 0)
956 				goto unbind;
957 		}
958 		return ret;
959 	} else {
960 		return mlx5_hairpin_bind_single_port(dev, rx_port);
961 	}
962 unbind:
963 	MLX5_ETH_FOREACH_DEV(pp, dev->device)
964 		if (pp < p)
965 			mlx5_hairpin_unbind_single_port(dev, pp);
966 	return ret;
967 }
968 
969 /*
970  * Unbind hairpin ports, Rx could be all ports when using RTE_MAX_ETHPORTS.
971  * @see mlx5_hairpin_unbind_single_port()
972  */
973 int
974 mlx5_hairpin_unbind(struct rte_eth_dev *dev, uint16_t rx_port)
975 {
976 	int ret = 0;
977 	uint16_t p;
978 
979 	if (rx_port == RTE_MAX_ETHPORTS)
980 		MLX5_ETH_FOREACH_DEV(p, dev->device) {
981 			ret = mlx5_hairpin_unbind_single_port(dev, p);
982 			if (ret != 0)
983 				return ret;
984 		}
985 	else
986 		ret = mlx5_hairpin_unbind_single_port(dev, rx_port);
987 	return ret;
988 }
989 
990 /*
991  * DPDK callback to get the hairpin peer ports list.
992  * This will return the actual number of peer ports and save the identifiers
993  * into the array (sorted, may be different from that when setting up the
994  * hairpin peer queues).
995  * The peer port ID could be the same as the port ID of the current device.
996  *
997  * @param dev
998  *   Pointer to Ethernet device structure.
999  * @param peer_ports
1000  *   Pointer to array to save the port identifiers.
1001  * @param len
1002  *   The length of the array.
1003  * @param direction
1004  *   Current port to peer port direction.
1005  *   positive - current used as Tx to get all peer Rx ports.
1006  *   zero - current used as Rx to get all peer Tx ports.
1007  *
1008  * @return
1009  *   0 or positive value on success, actual number of peer ports.
1010  *   a negative errno value otherwise and rte_errno is set.
1011  */
1012 int
1013 mlx5_hairpin_get_peer_ports(struct rte_eth_dev *dev, uint16_t *peer_ports,
1014 			    size_t len, uint32_t direction)
1015 {
1016 	struct mlx5_priv *priv = dev->data->dev_private;
1017 	struct mlx5_txq_ctrl *txq_ctrl;
1018 	uint32_t i;
1019 	uint16_t pp;
1020 	uint32_t bits[(RTE_MAX_ETHPORTS + 31) / 32] = {0};
1021 	int ret = 0;
1022 
1023 	if (direction) {
1024 		for (i = 0; i < priv->txqs_n; i++) {
1025 			txq_ctrl = mlx5_txq_get(dev, i);
1026 			if (!txq_ctrl)
1027 				continue;
1028 			if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
1029 				mlx5_txq_release(dev, i);
1030 				continue;
1031 			}
1032 			pp = txq_ctrl->hairpin_conf.peers[0].port;
1033 			if (pp >= RTE_MAX_ETHPORTS) {
1034 				rte_errno = ERANGE;
1035 				mlx5_txq_release(dev, i);
1036 				DRV_LOG(ERR, "port %hu queue %u peer port "
1037 					"out of range %hu",
1038 					priv->dev_data->port_id, i, pp);
1039 				return -rte_errno;
1040 			}
1041 			bits[pp / 32] |= 1 << (pp % 32);
1042 			mlx5_txq_release(dev, i);
1043 		}
1044 	} else {
1045 		for (i = 0; i < priv->rxqs_n; i++) {
1046 			struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, i);
1047 			struct mlx5_rxq_ctrl *rxq_ctrl;
1048 
1049 			if (rxq == NULL)
1050 				continue;
1051 			rxq_ctrl = rxq->ctrl;
1052 			if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN)
1053 				continue;
1054 			pp = rxq->hairpin_conf.peers[0].port;
1055 			if (pp >= RTE_MAX_ETHPORTS) {
1056 				rte_errno = ERANGE;
1057 				DRV_LOG(ERR, "port %hu queue %u peer port "
1058 					"out of range %hu",
1059 					priv->dev_data->port_id, i, pp);
1060 				return -rte_errno;
1061 			}
1062 			bits[pp / 32] |= 1 << (pp % 32);
1063 		}
1064 	}
1065 	for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1066 		if (bits[i / 32] & (1 << (i % 32))) {
1067 			if ((size_t)ret >= len) {
1068 				rte_errno = E2BIG;
1069 				return -rte_errno;
1070 			}
1071 			peer_ports[ret++] = i;
1072 		}
1073 	}
1074 	return ret;
1075 }
1076 
1077 /**
1078  * DPDK callback to start the device.
1079  *
1080  * Simulate device start by attaching all configured flows.
1081  *
1082  * @param dev
1083  *   Pointer to Ethernet device structure.
1084  *
1085  * @return
1086  *   0 on success, a negative errno value otherwise and rte_errno is set.
1087  */
1088 int
1089 mlx5_dev_start(struct rte_eth_dev *dev)
1090 {
1091 	struct mlx5_priv *priv = dev->data->dev_private;
1092 	int ret;
1093 	int fine_inline;
1094 
1095 	DRV_LOG(DEBUG, "port %u starting device", dev->data->port_id);
1096 	fine_inline = rte_mbuf_dynflag_lookup
1097 		(RTE_PMD_MLX5_FINE_GRANULARITY_INLINE, NULL);
1098 	if (fine_inline >= 0)
1099 		rte_net_mlx5_dynf_inline_mask = 1UL << fine_inline;
1100 	else
1101 		rte_net_mlx5_dynf_inline_mask = 0;
1102 	if (dev->data->nb_rx_queues > 0) {
1103 		ret = mlx5_dev_configure_rss_reta(dev);
1104 		if (ret) {
1105 			DRV_LOG(ERR, "port %u reta config failed: %s",
1106 				dev->data->port_id, strerror(rte_errno));
1107 			return -rte_errno;
1108 		}
1109 	}
1110 	ret = mlx5_txpp_start(dev);
1111 	if (ret) {
1112 		DRV_LOG(ERR, "port %u Tx packet pacing init failed: %s",
1113 			dev->data->port_id, strerror(rte_errno));
1114 		goto error;
1115 	}
1116 	if ((priv->sh->devx && priv->config.dv_flow_en &&
1117 	    priv->config.dest_tir) && priv->obj_ops.lb_dummy_queue_create) {
1118 		ret = priv->obj_ops.lb_dummy_queue_create(dev);
1119 		if (ret)
1120 			goto error;
1121 	}
1122 	ret = mlx5_txq_start(dev);
1123 	if (ret) {
1124 		DRV_LOG(ERR, "port %u Tx queue allocation failed: %s",
1125 			dev->data->port_id, strerror(rte_errno));
1126 		goto error;
1127 	}
1128 	if (priv->config.std_delay_drop || priv->config.hp_delay_drop) {
1129 		if (!priv->config.vf && !priv->config.sf &&
1130 		    !priv->representor) {
1131 			ret = mlx5_get_flag_dropless_rq(dev);
1132 			if (ret < 0)
1133 				DRV_LOG(WARNING,
1134 					"port %u cannot query dropless flag",
1135 					dev->data->port_id);
1136 			else if (!ret)
1137 				DRV_LOG(WARNING,
1138 					"port %u dropless_rq OFF, no rearming",
1139 					dev->data->port_id);
1140 		} else {
1141 			DRV_LOG(DEBUG,
1142 				"port %u doesn't support dropless_rq flag",
1143 				dev->data->port_id);
1144 		}
1145 	}
1146 	ret = mlx5_rxq_start(dev);
1147 	if (ret) {
1148 		DRV_LOG(ERR, "port %u Rx queue allocation failed: %s",
1149 			dev->data->port_id, strerror(rte_errno));
1150 		goto error;
1151 	}
1152 	/*
1153 	 * Such step will be skipped if there is no hairpin TX queue configured
1154 	 * with RX peer queue from the same device.
1155 	 */
1156 	ret = mlx5_hairpin_auto_bind(dev);
1157 	if (ret) {
1158 		DRV_LOG(ERR, "port %u hairpin auto binding failed: %s",
1159 			dev->data->port_id, strerror(rte_errno));
1160 		goto error;
1161 	}
1162 	/* Set started flag here for the following steps like control flow. */
1163 	dev->data->dev_started = 1;
1164 	ret = mlx5_rx_intr_vec_enable(dev);
1165 	if (ret) {
1166 		DRV_LOG(ERR, "port %u Rx interrupt vector creation failed",
1167 			dev->data->port_id);
1168 		goto error;
1169 	}
1170 	mlx5_os_stats_init(dev);
1171 	ret = mlx5_traffic_enable(dev);
1172 	if (ret) {
1173 		DRV_LOG(ERR, "port %u failed to set defaults flows",
1174 			dev->data->port_id);
1175 		goto error;
1176 	}
1177 	/* Set a mask and offset of dynamic metadata flows into Rx queues. */
1178 	mlx5_flow_rxq_dynf_metadata_set(dev);
1179 	/* Set flags and context to convert Rx timestamps. */
1180 	mlx5_rxq_timestamp_set(dev);
1181 	/* Set a mask and offset of scheduling on timestamp into Tx queues. */
1182 	mlx5_txq_dynf_timestamp_set(dev);
1183 	/* Attach indirection table objects detached on port stop. */
1184 	ret = mlx5_action_handle_attach(dev);
1185 	if (ret) {
1186 		DRV_LOG(ERR,
1187 			"port %u failed to attach indirect actions: %s",
1188 			dev->data->port_id, rte_strerror(rte_errno));
1189 		goto error;
1190 	}
1191 	/*
1192 	 * In non-cached mode, it only needs to start the default mreg copy
1193 	 * action and no flow created by application exists anymore.
1194 	 * But it is worth wrapping the interface for further usage.
1195 	 */
1196 	ret = mlx5_flow_start_default(dev);
1197 	if (ret) {
1198 		DRV_LOG(DEBUG, "port %u failed to start default actions: %s",
1199 			dev->data->port_id, strerror(rte_errno));
1200 		goto error;
1201 	}
1202 	if (mlx5_dev_ctx_shared_mempool_subscribe(dev) != 0) {
1203 		DRV_LOG(ERR, "port %u failed to subscribe for mempool life cycle: %s",
1204 			dev->data->port_id, rte_strerror(rte_errno));
1205 		goto error;
1206 	}
1207 	rte_wmb();
1208 	dev->tx_pkt_burst = mlx5_select_tx_function(dev);
1209 	dev->rx_pkt_burst = mlx5_select_rx_function(dev);
1210 	/* Enable datapath on secondary process. */
1211 	mlx5_mp_os_req_start_rxtx(dev);
1212 	if (rte_intr_fd_get(priv->sh->intr_handle) >= 0) {
1213 		priv->sh->port[priv->dev_port - 1].ih_port_id =
1214 					(uint32_t)dev->data->port_id;
1215 	} else {
1216 		DRV_LOG(INFO, "port %u starts without LSC and RMV interrupts.",
1217 			dev->data->port_id);
1218 		dev->data->dev_conf.intr_conf.lsc = 0;
1219 		dev->data->dev_conf.intr_conf.rmv = 0;
1220 	}
1221 	if (rte_intr_fd_get(priv->sh->intr_handle_devx) >= 0)
1222 		priv->sh->port[priv->dev_port - 1].devx_ih_port_id =
1223 					(uint32_t)dev->data->port_id;
1224 	return 0;
1225 error:
1226 	ret = rte_errno; /* Save rte_errno before cleanup. */
1227 	/* Rollback. */
1228 	dev->data->dev_started = 0;
1229 	mlx5_flow_stop_default(dev);
1230 	mlx5_traffic_disable(dev);
1231 	mlx5_txq_stop(dev);
1232 	mlx5_rxq_stop(dev);
1233 	if (priv->obj_ops.lb_dummy_queue_release)
1234 		priv->obj_ops.lb_dummy_queue_release(dev);
1235 	mlx5_txpp_stop(dev); /* Stop last. */
1236 	rte_errno = ret; /* Restore rte_errno. */
1237 	return -rte_errno;
1238 }
1239 
1240 /**
1241  * DPDK callback to stop the device.
1242  *
1243  * Simulate device stop by detaching all configured flows.
1244  *
1245  * @param dev
1246  *   Pointer to Ethernet device structure.
1247  */
1248 int
1249 mlx5_dev_stop(struct rte_eth_dev *dev)
1250 {
1251 	struct mlx5_priv *priv = dev->data->dev_private;
1252 
1253 	dev->data->dev_started = 0;
1254 	/* Prevent crashes when queues are still in use. */
1255 	dev->rx_pkt_burst = removed_rx_burst;
1256 	dev->tx_pkt_burst = removed_tx_burst;
1257 	rte_wmb();
1258 	/* Disable datapath on secondary process. */
1259 	mlx5_mp_os_req_stop_rxtx(dev);
1260 	rte_delay_us_sleep(1000 * priv->rxqs_n);
1261 	DRV_LOG(DEBUG, "port %u stopping device", dev->data->port_id);
1262 	mlx5_flow_stop_default(dev);
1263 	/* Control flows for default traffic can be removed firstly. */
1264 	mlx5_traffic_disable(dev);
1265 	/* All RX queue flags will be cleared in the flush interface. */
1266 	mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_GEN, true);
1267 	mlx5_flow_meter_rxq_flush(dev);
1268 	mlx5_action_handle_detach(dev);
1269 	mlx5_rx_intr_vec_disable(dev);
1270 	priv->sh->port[priv->dev_port - 1].ih_port_id = RTE_MAX_ETHPORTS;
1271 	priv->sh->port[priv->dev_port - 1].devx_ih_port_id = RTE_MAX_ETHPORTS;
1272 	mlx5_txq_stop(dev);
1273 	mlx5_rxq_stop(dev);
1274 	if (priv->obj_ops.lb_dummy_queue_release)
1275 		priv->obj_ops.lb_dummy_queue_release(dev);
1276 	mlx5_txpp_stop(dev);
1277 
1278 	return 0;
1279 }
1280 
1281 /**
1282  * Enable traffic flows configured by control plane
1283  *
1284  * @param dev
1285  *   Pointer to Ethernet device private data.
1286  * @param dev
1287  *   Pointer to Ethernet device structure.
1288  *
1289  * @return
1290  *   0 on success, a negative errno value otherwise and rte_errno is set.
1291  */
1292 int
1293 mlx5_traffic_enable(struct rte_eth_dev *dev)
1294 {
1295 	struct mlx5_priv *priv = dev->data->dev_private;
1296 	struct rte_flow_item_eth bcast = {
1297 		.dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1298 	};
1299 	struct rte_flow_item_eth ipv6_multi_spec = {
1300 		.dst.addr_bytes = "\x33\x33\x00\x00\x00\x00",
1301 	};
1302 	struct rte_flow_item_eth ipv6_multi_mask = {
1303 		.dst.addr_bytes = "\xff\xff\x00\x00\x00\x00",
1304 	};
1305 	struct rte_flow_item_eth unicast = {
1306 		.src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1307 	};
1308 	struct rte_flow_item_eth unicast_mask = {
1309 		.dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1310 	};
1311 	const unsigned int vlan_filter_n = priv->vlan_filter_n;
1312 	const struct rte_ether_addr cmp = {
1313 		.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1314 	};
1315 	unsigned int i;
1316 	unsigned int j;
1317 	int ret;
1318 
1319 	/*
1320 	 * Hairpin txq default flow should be created no matter if it is
1321 	 * isolation mode. Or else all the packets to be sent will be sent
1322 	 * out directly without the TX flow actions, e.g. encapsulation.
1323 	 */
1324 	for (i = 0; i != priv->txqs_n; ++i) {
1325 		struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i);
1326 		if (!txq_ctrl)
1327 			continue;
1328 		/* Only Tx implicit mode requires the default Tx flow. */
1329 		if (txq_ctrl->type == MLX5_TXQ_TYPE_HAIRPIN &&
1330 		    txq_ctrl->hairpin_conf.tx_explicit == 0 &&
1331 		    txq_ctrl->hairpin_conf.peers[0].port ==
1332 		    priv->dev_data->port_id) {
1333 			ret = mlx5_ctrl_flow_source_queue(dev, i);
1334 			if (ret) {
1335 				mlx5_txq_release(dev, i);
1336 				goto error;
1337 			}
1338 		}
1339 		if ((priv->representor || priv->master) &&
1340 		    priv->config.dv_esw_en) {
1341 			if (mlx5_flow_create_devx_sq_miss_flow(dev, i) == 0) {
1342 				DRV_LOG(ERR,
1343 					"Port %u Tx queue %u SQ create representor devx default miss rule failed.",
1344 					dev->data->port_id, i);
1345 				goto error;
1346 			}
1347 		}
1348 		mlx5_txq_release(dev, i);
1349 	}
1350 	if ((priv->master || priv->representor) && priv->config.dv_esw_en) {
1351 		if (mlx5_flow_create_esw_table_zero_flow(dev))
1352 			priv->fdb_def_rule = 1;
1353 		else
1354 			DRV_LOG(INFO, "port %u FDB default rule cannot be"
1355 				" configured - only Eswitch group 0 flows are"
1356 				" supported.", dev->data->port_id);
1357 	}
1358 	if (!priv->config.lacp_by_user && priv->pf_bond >= 0) {
1359 		ret = mlx5_flow_lacp_miss(dev);
1360 		if (ret)
1361 			DRV_LOG(INFO, "port %u LACP rule cannot be created - "
1362 				"forward LACP to kernel.", dev->data->port_id);
1363 		else
1364 			DRV_LOG(INFO, "LACP traffic will be missed in port %u."
1365 				, dev->data->port_id);
1366 	}
1367 	if (priv->isolated)
1368 		return 0;
1369 	if (dev->data->promiscuous) {
1370 		struct rte_flow_item_eth promisc = {
1371 			.dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1372 			.src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1373 			.type = 0,
1374 		};
1375 
1376 		ret = mlx5_ctrl_flow(dev, &promisc, &promisc);
1377 		if (ret)
1378 			goto error;
1379 	}
1380 	if (dev->data->all_multicast) {
1381 		struct rte_flow_item_eth multicast = {
1382 			.dst.addr_bytes = "\x01\x00\x00\x00\x00\x00",
1383 			.src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1384 			.type = 0,
1385 		};
1386 
1387 		ret = mlx5_ctrl_flow(dev, &multicast, &multicast);
1388 		if (ret)
1389 			goto error;
1390 	} else {
1391 		/* Add broadcast/multicast flows. */
1392 		for (i = 0; i != vlan_filter_n; ++i) {
1393 			uint16_t vlan = priv->vlan_filter[i];
1394 
1395 			struct rte_flow_item_vlan vlan_spec = {
1396 				.tci = rte_cpu_to_be_16(vlan),
1397 			};
1398 			struct rte_flow_item_vlan vlan_mask =
1399 				rte_flow_item_vlan_mask;
1400 
1401 			ret = mlx5_ctrl_flow_vlan(dev, &bcast, &bcast,
1402 						  &vlan_spec, &vlan_mask);
1403 			if (ret)
1404 				goto error;
1405 			ret = mlx5_ctrl_flow_vlan(dev, &ipv6_multi_spec,
1406 						  &ipv6_multi_mask,
1407 						  &vlan_spec, &vlan_mask);
1408 			if (ret)
1409 				goto error;
1410 		}
1411 		if (!vlan_filter_n) {
1412 			ret = mlx5_ctrl_flow(dev, &bcast, &bcast);
1413 			if (ret)
1414 				goto error;
1415 			ret = mlx5_ctrl_flow(dev, &ipv6_multi_spec,
1416 					     &ipv6_multi_mask);
1417 			if (ret) {
1418 				/* Do not fail on IPv6 broadcast creation failure. */
1419 				DRV_LOG(WARNING,
1420 					"IPv6 broadcast is not supported");
1421 				ret = 0;
1422 			}
1423 		}
1424 	}
1425 	/* Add MAC address flows. */
1426 	for (i = 0; i != MLX5_MAX_MAC_ADDRESSES; ++i) {
1427 		struct rte_ether_addr *mac = &dev->data->mac_addrs[i];
1428 
1429 		if (!memcmp(mac, &cmp, sizeof(*mac)))
1430 			continue;
1431 		memcpy(&unicast.dst.addr_bytes,
1432 		       mac->addr_bytes,
1433 		       RTE_ETHER_ADDR_LEN);
1434 		for (j = 0; j != vlan_filter_n; ++j) {
1435 			uint16_t vlan = priv->vlan_filter[j];
1436 
1437 			struct rte_flow_item_vlan vlan_spec = {
1438 				.tci = rte_cpu_to_be_16(vlan),
1439 			};
1440 			struct rte_flow_item_vlan vlan_mask =
1441 				rte_flow_item_vlan_mask;
1442 
1443 			ret = mlx5_ctrl_flow_vlan(dev, &unicast,
1444 						  &unicast_mask,
1445 						  &vlan_spec,
1446 						  &vlan_mask);
1447 			if (ret)
1448 				goto error;
1449 		}
1450 		if (!vlan_filter_n) {
1451 			ret = mlx5_ctrl_flow(dev, &unicast, &unicast_mask);
1452 			if (ret)
1453 				goto error;
1454 		}
1455 	}
1456 	return 0;
1457 error:
1458 	ret = rte_errno; /* Save rte_errno before cleanup. */
1459 	mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_CTL, false);
1460 	rte_errno = ret; /* Restore rte_errno. */
1461 	return -rte_errno;
1462 }
1463 
1464 
1465 /**
1466  * Disable traffic flows configured by control plane
1467  *
1468  * @param dev
1469  *   Pointer to Ethernet device private data.
1470  */
1471 void
1472 mlx5_traffic_disable(struct rte_eth_dev *dev)
1473 {
1474 	mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_CTL, false);
1475 }
1476 
1477 /**
1478  * Restart traffic flows configured by control plane
1479  *
1480  * @param dev
1481  *   Pointer to Ethernet device private data.
1482  *
1483  * @return
1484  *   0 on success, a negative errno value otherwise and rte_errno is set.
1485  */
1486 int
1487 mlx5_traffic_restart(struct rte_eth_dev *dev)
1488 {
1489 	if (dev->data->dev_started) {
1490 		mlx5_traffic_disable(dev);
1491 		return mlx5_traffic_enable(dev);
1492 	}
1493 	return 0;
1494 }
1495