xref: /dpdk/drivers/net/mlx5/mlx5_trigger.c (revision 6956a48cabbb5c98098aaf0116c255209b8f4e87)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2015 6WIND S.A.
3  * Copyright 2015 Mellanox Technologies, Ltd
4  */
5 
6 #include <unistd.h>
7 
8 #include <rte_ether.h>
9 #include <rte_ethdev_driver.h>
10 #include <rte_interrupts.h>
11 #include <rte_alarm.h>
12 #include <rte_cycles.h>
13 
14 #include <mlx5_malloc.h>
15 
16 #include "mlx5.h"
17 #include "mlx5_mr.h"
18 #include "mlx5_rxtx.h"
19 #include "mlx5_utils.h"
20 #include "rte_pmd_mlx5.h"
21 
22 /**
23  * Stop traffic on Tx queues.
24  *
25  * @param dev
26  *   Pointer to Ethernet device structure.
27  */
28 static void
29 mlx5_txq_stop(struct rte_eth_dev *dev)
30 {
31 	struct mlx5_priv *priv = dev->data->dev_private;
32 	unsigned int i;
33 
34 	for (i = 0; i != priv->txqs_n; ++i)
35 		mlx5_txq_release(dev, i);
36 }
37 
38 /**
39  * Start traffic on Tx queues.
40  *
41  * @param dev
42  *   Pointer to Ethernet device structure.
43  *
44  * @return
45  *   0 on success, a negative errno value otherwise and rte_errno is set.
46  */
47 static int
48 mlx5_txq_start(struct rte_eth_dev *dev)
49 {
50 	struct mlx5_priv *priv = dev->data->dev_private;
51 	unsigned int i;
52 	int ret;
53 
54 	for (i = 0; i != priv->txqs_n; ++i) {
55 		struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i);
56 		struct mlx5_txq_data *txq_data = &txq_ctrl->txq;
57 		uint32_t flags = MLX5_MEM_RTE | MLX5_MEM_ZERO;
58 
59 		if (!txq_ctrl)
60 			continue;
61 		if (txq_ctrl->type == MLX5_TXQ_TYPE_STANDARD)
62 			txq_alloc_elts(txq_ctrl);
63 		MLX5_ASSERT(!txq_ctrl->obj);
64 		txq_ctrl->obj = mlx5_malloc(flags, sizeof(struct mlx5_txq_obj),
65 					    0, txq_ctrl->socket);
66 		if (!txq_ctrl->obj) {
67 			DRV_LOG(ERR, "Port %u Tx queue %u cannot allocate "
68 				"memory resources.", dev->data->port_id,
69 				txq_data->idx);
70 			rte_errno = ENOMEM;
71 			goto error;
72 		}
73 		ret = priv->obj_ops.txq_obj_new(dev, i);
74 		if (ret < 0) {
75 			mlx5_free(txq_ctrl->obj);
76 			txq_ctrl->obj = NULL;
77 			goto error;
78 		}
79 		if (txq_ctrl->type == MLX5_TXQ_TYPE_STANDARD) {
80 			size_t size = txq_data->cqe_s * sizeof(*txq_data->fcqs);
81 
82 			txq_data->fcqs = mlx5_malloc(flags, size,
83 						     RTE_CACHE_LINE_SIZE,
84 						     txq_ctrl->socket);
85 			if (!txq_data->fcqs) {
86 				DRV_LOG(ERR, "Port %u Tx queue %u cannot "
87 					"allocate memory (FCQ).",
88 					dev->data->port_id, i);
89 				rte_errno = ENOMEM;
90 				goto error;
91 			}
92 		}
93 		DRV_LOG(DEBUG, "Port %u txq %u updated with %p.",
94 			dev->data->port_id, i, (void *)&txq_ctrl->obj);
95 		LIST_INSERT_HEAD(&priv->txqsobj, txq_ctrl->obj, next);
96 	}
97 	return 0;
98 error:
99 	ret = rte_errno; /* Save rte_errno before cleanup. */
100 	do {
101 		mlx5_txq_release(dev, i);
102 	} while (i-- != 0);
103 	rte_errno = ret; /* Restore rte_errno. */
104 	return -rte_errno;
105 }
106 
107 /**
108  * Stop traffic on Rx queues.
109  *
110  * @param dev
111  *   Pointer to Ethernet device structure.
112  */
113 static void
114 mlx5_rxq_stop(struct rte_eth_dev *dev)
115 {
116 	struct mlx5_priv *priv = dev->data->dev_private;
117 	unsigned int i;
118 
119 	for (i = 0; i != priv->rxqs_n; ++i)
120 		mlx5_rxq_release(dev, i);
121 }
122 
123 /**
124  * Start traffic on Rx queues.
125  *
126  * @param dev
127  *   Pointer to Ethernet device structure.
128  *
129  * @return
130  *   0 on success, a negative errno value otherwise and rte_errno is set.
131  */
132 static int
133 mlx5_rxq_start(struct rte_eth_dev *dev)
134 {
135 	struct mlx5_priv *priv = dev->data->dev_private;
136 	unsigned int i;
137 	int ret = 0;
138 
139 	/* Allocate/reuse/resize mempool for Multi-Packet RQ. */
140 	if (mlx5_mprq_alloc_mp(dev)) {
141 		/* Should not release Rx queues but return immediately. */
142 		return -rte_errno;
143 	}
144 	DRV_LOG(DEBUG, "Port %u device_attr.max_qp_wr is %d.",
145 		dev->data->port_id, priv->sh->device_attr.max_qp_wr);
146 	DRV_LOG(DEBUG, "Port %u device_attr.max_sge is %d.",
147 		dev->data->port_id, priv->sh->device_attr.max_sge);
148 	for (i = 0; i != priv->rxqs_n; ++i) {
149 		struct mlx5_rxq_ctrl *rxq_ctrl = mlx5_rxq_get(dev, i);
150 
151 		if (!rxq_ctrl)
152 			continue;
153 		if (rxq_ctrl->type == MLX5_RXQ_TYPE_STANDARD) {
154 			/* Pre-register Rx mempools. */
155 			if (mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq)) {
156 				mlx5_mr_update_mp(dev, &rxq_ctrl->rxq.mr_ctrl,
157 						  rxq_ctrl->rxq.mprq_mp);
158 			} else {
159 				uint32_t s;
160 
161 				for (s = 0; s < rxq_ctrl->rxq.rxseg_n; s++)
162 					mlx5_mr_update_mp
163 						(dev, &rxq_ctrl->rxq.mr_ctrl,
164 						rxq_ctrl->rxq.rxseg[s].mp);
165 			}
166 			ret = rxq_alloc_elts(rxq_ctrl);
167 			if (ret)
168 				goto error;
169 		}
170 		MLX5_ASSERT(!rxq_ctrl->obj);
171 		rxq_ctrl->obj = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO,
172 					    sizeof(*rxq_ctrl->obj), 0,
173 					    rxq_ctrl->socket);
174 		if (!rxq_ctrl->obj) {
175 			DRV_LOG(ERR,
176 				"Port %u Rx queue %u can't allocate resources.",
177 				dev->data->port_id, (*priv->rxqs)[i]->idx);
178 			rte_errno = ENOMEM;
179 			goto error;
180 		}
181 		ret = priv->obj_ops.rxq_obj_new(dev, i);
182 		if (ret) {
183 			mlx5_free(rxq_ctrl->obj);
184 			goto error;
185 		}
186 		DRV_LOG(DEBUG, "Port %u rxq %u updated with %p.",
187 			dev->data->port_id, i, (void *)&rxq_ctrl->obj);
188 		LIST_INSERT_HEAD(&priv->rxqsobj, rxq_ctrl->obj, next);
189 	}
190 	return 0;
191 error:
192 	ret = rte_errno; /* Save rte_errno before cleanup. */
193 	do {
194 		mlx5_rxq_release(dev, i);
195 	} while (i-- != 0);
196 	rte_errno = ret; /* Restore rte_errno. */
197 	return -rte_errno;
198 }
199 
200 /**
201  * Binds Tx queues to Rx queues for hairpin.
202  *
203  * Binds Tx queues to the target Rx queues.
204  *
205  * @param dev
206  *   Pointer to Ethernet device structure.
207  *
208  * @return
209  *   0 on success, a negative errno value otherwise and rte_errno is set.
210  */
211 static int
212 mlx5_hairpin_auto_bind(struct rte_eth_dev *dev)
213 {
214 	struct mlx5_priv *priv = dev->data->dev_private;
215 	struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
216 	struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
217 	struct mlx5_txq_ctrl *txq_ctrl;
218 	struct mlx5_rxq_ctrl *rxq_ctrl;
219 	struct mlx5_devx_obj *sq;
220 	struct mlx5_devx_obj *rq;
221 	unsigned int i;
222 	int ret = 0;
223 	bool need_auto = false;
224 	uint16_t self_port = dev->data->port_id;
225 
226 	for (i = 0; i != priv->txqs_n; ++i) {
227 		txq_ctrl = mlx5_txq_get(dev, i);
228 		if (!txq_ctrl)
229 			continue;
230 		if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
231 			mlx5_txq_release(dev, i);
232 			continue;
233 		}
234 		if (txq_ctrl->hairpin_conf.peers[0].port != self_port)
235 			continue;
236 		if (txq_ctrl->hairpin_conf.manual_bind) {
237 			mlx5_txq_release(dev, i);
238 			return 0;
239 		}
240 		need_auto = true;
241 		mlx5_txq_release(dev, i);
242 	}
243 	if (!need_auto)
244 		return 0;
245 	for (i = 0; i != priv->txqs_n; ++i) {
246 		txq_ctrl = mlx5_txq_get(dev, i);
247 		if (!txq_ctrl)
248 			continue;
249 		if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
250 			mlx5_txq_release(dev, i);
251 			continue;
252 		}
253 		/* Skip hairpin queues with other peer ports. */
254 		if (txq_ctrl->hairpin_conf.peers[0].port != self_port)
255 			continue;
256 		if (!txq_ctrl->obj) {
257 			rte_errno = ENOMEM;
258 			DRV_LOG(ERR, "port %u no txq object found: %d",
259 				dev->data->port_id, i);
260 			mlx5_txq_release(dev, i);
261 			return -rte_errno;
262 		}
263 		sq = txq_ctrl->obj->sq;
264 		rxq_ctrl = mlx5_rxq_get(dev,
265 					txq_ctrl->hairpin_conf.peers[0].queue);
266 		if (!rxq_ctrl) {
267 			mlx5_txq_release(dev, i);
268 			rte_errno = EINVAL;
269 			DRV_LOG(ERR, "port %u no rxq object found: %d",
270 				dev->data->port_id,
271 				txq_ctrl->hairpin_conf.peers[0].queue);
272 			return -rte_errno;
273 		}
274 		if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN ||
275 		    rxq_ctrl->hairpin_conf.peers[0].queue != i) {
276 			rte_errno = ENOMEM;
277 			DRV_LOG(ERR, "port %u Tx queue %d can't be binded to "
278 				"Rx queue %d", dev->data->port_id,
279 				i, txq_ctrl->hairpin_conf.peers[0].queue);
280 			goto error;
281 		}
282 		rq = rxq_ctrl->obj->rq;
283 		if (!rq) {
284 			rte_errno = ENOMEM;
285 			DRV_LOG(ERR, "port %u hairpin no matching rxq: %d",
286 				dev->data->port_id,
287 				txq_ctrl->hairpin_conf.peers[0].queue);
288 			goto error;
289 		}
290 		sq_attr.state = MLX5_SQC_STATE_RDY;
291 		sq_attr.sq_state = MLX5_SQC_STATE_RST;
292 		sq_attr.hairpin_peer_rq = rq->id;
293 		sq_attr.hairpin_peer_vhca = priv->config.hca_attr.vhca_id;
294 		ret = mlx5_devx_cmd_modify_sq(sq, &sq_attr);
295 		if (ret)
296 			goto error;
297 		rq_attr.state = MLX5_SQC_STATE_RDY;
298 		rq_attr.rq_state = MLX5_SQC_STATE_RST;
299 		rq_attr.hairpin_peer_sq = sq->id;
300 		rq_attr.hairpin_peer_vhca = priv->config.hca_attr.vhca_id;
301 		ret = mlx5_devx_cmd_modify_rq(rq, &rq_attr);
302 		if (ret)
303 			goto error;
304 		/* Qs with auto-bind will be destroyed directly. */
305 		rxq_ctrl->hairpin_status = 1;
306 		txq_ctrl->hairpin_status = 1;
307 		mlx5_txq_release(dev, i);
308 		mlx5_rxq_release(dev, txq_ctrl->hairpin_conf.peers[0].queue);
309 	}
310 	return 0;
311 error:
312 	mlx5_txq_release(dev, i);
313 	mlx5_rxq_release(dev, txq_ctrl->hairpin_conf.peers[0].queue);
314 	return -rte_errno;
315 }
316 
317 /*
318  * Fetch the peer queue's SW & HW information.
319  *
320  * @param dev
321  *   Pointer to Ethernet device structure.
322  * @param peer_queue
323  *   Index of the queue to fetch the information.
324  * @param current_info
325  *   Pointer to the input peer information, not used currently.
326  * @param peer_info
327  *   Pointer to the structure to store the information, output.
328  * @param direction
329  *   Positive to get the RxQ information, zero to get the TxQ information.
330  *
331  * @return
332  *   0 on success, a negative errno value otherwise and rte_errno is set.
333  */
334 int
335 mlx5_hairpin_queue_peer_update(struct rte_eth_dev *dev, uint16_t peer_queue,
336 			       struct rte_hairpin_peer_info *current_info,
337 			       struct rte_hairpin_peer_info *peer_info,
338 			       uint32_t direction)
339 {
340 	struct mlx5_priv *priv = dev->data->dev_private;
341 	RTE_SET_USED(current_info);
342 
343 	if (dev->data->dev_started == 0) {
344 		rte_errno = EBUSY;
345 		DRV_LOG(ERR, "peer port %u is not started",
346 			dev->data->port_id);
347 		return -rte_errno;
348 	}
349 	/*
350 	 * Peer port used as egress. In the current design, hairpin Tx queue
351 	 * will be bound to the peer Rx queue. Indeed, only the information of
352 	 * peer Rx queue needs to be fetched.
353 	 */
354 	if (direction == 0) {
355 		struct mlx5_txq_ctrl *txq_ctrl;
356 
357 		txq_ctrl = mlx5_txq_get(dev, peer_queue);
358 		if (txq_ctrl == NULL) {
359 			rte_errno = EINVAL;
360 			DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
361 				dev->data->port_id, peer_queue);
362 			return -rte_errno;
363 		}
364 		if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
365 			rte_errno = EINVAL;
366 			DRV_LOG(ERR, "port %u queue %d is not a hairpin Txq",
367 				dev->data->port_id, peer_queue);
368 			mlx5_txq_release(dev, peer_queue);
369 			return -rte_errno;
370 		}
371 		if (txq_ctrl->obj == NULL || txq_ctrl->obj->sq == NULL) {
372 			rte_errno = ENOMEM;
373 			DRV_LOG(ERR, "port %u no Txq object found: %d",
374 				dev->data->port_id, peer_queue);
375 			mlx5_txq_release(dev, peer_queue);
376 			return -rte_errno;
377 		}
378 		peer_info->qp_id = txq_ctrl->obj->sq->id;
379 		peer_info->vhca_id = priv->config.hca_attr.vhca_id;
380 		/* 1-to-1 mapping, only the first one is used. */
381 		peer_info->peer_q = txq_ctrl->hairpin_conf.peers[0].queue;
382 		peer_info->tx_explicit = txq_ctrl->hairpin_conf.tx_explicit;
383 		peer_info->manual_bind = txq_ctrl->hairpin_conf.manual_bind;
384 		mlx5_txq_release(dev, peer_queue);
385 	} else { /* Peer port used as ingress. */
386 		struct mlx5_rxq_ctrl *rxq_ctrl;
387 
388 		rxq_ctrl = mlx5_rxq_get(dev, peer_queue);
389 		if (rxq_ctrl == NULL) {
390 			rte_errno = EINVAL;
391 			DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
392 				dev->data->port_id, peer_queue);
393 			return -rte_errno;
394 		}
395 		if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
396 			rte_errno = EINVAL;
397 			DRV_LOG(ERR, "port %u queue %d is not a hairpin Rxq",
398 				dev->data->port_id, peer_queue);
399 			mlx5_rxq_release(dev, peer_queue);
400 			return -rte_errno;
401 		}
402 		if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) {
403 			rte_errno = ENOMEM;
404 			DRV_LOG(ERR, "port %u no Rxq object found: %d",
405 				dev->data->port_id, peer_queue);
406 			mlx5_rxq_release(dev, peer_queue);
407 			return -rte_errno;
408 		}
409 		peer_info->qp_id = rxq_ctrl->obj->rq->id;
410 		peer_info->vhca_id = priv->config.hca_attr.vhca_id;
411 		peer_info->peer_q = rxq_ctrl->hairpin_conf.peers[0].queue;
412 		peer_info->tx_explicit = rxq_ctrl->hairpin_conf.tx_explicit;
413 		peer_info->manual_bind = rxq_ctrl->hairpin_conf.manual_bind;
414 		mlx5_rxq_release(dev, peer_queue);
415 	}
416 	return 0;
417 }
418 
419 /*
420  * Bind the hairpin queue with the peer HW information.
421  * This needs to be called twice both for Tx and Rx queues of a pair.
422  * If the queue is already bound, it is considered successful.
423  *
424  * @param dev
425  *   Pointer to Ethernet device structure.
426  * @param cur_queue
427  *   Index of the queue to change the HW configuration to bind.
428  * @param peer_info
429  *   Pointer to information of the peer queue.
430  * @param direction
431  *   Positive to configure the TxQ, zero to configure the RxQ.
432  *
433  * @return
434  *   0 on success, a negative errno value otherwise and rte_errno is set.
435  */
436 int
437 mlx5_hairpin_queue_peer_bind(struct rte_eth_dev *dev, uint16_t cur_queue,
438 			     struct rte_hairpin_peer_info *peer_info,
439 			     uint32_t direction)
440 {
441 	int ret = 0;
442 
443 	/*
444 	 * Consistency checking of the peer queue: opposite direction is used
445 	 * to get the peer queue info with ethdev port ID, no need to check.
446 	 */
447 	if (peer_info->peer_q != cur_queue) {
448 		rte_errno = EINVAL;
449 		DRV_LOG(ERR, "port %u queue %d and peer queue %d mismatch",
450 			dev->data->port_id, cur_queue, peer_info->peer_q);
451 		return -rte_errno;
452 	}
453 	if (direction != 0) {
454 		struct mlx5_txq_ctrl *txq_ctrl;
455 		struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
456 
457 		txq_ctrl = mlx5_txq_get(dev, cur_queue);
458 		if (txq_ctrl == NULL) {
459 			rte_errno = EINVAL;
460 			DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
461 				dev->data->port_id, cur_queue);
462 			return -rte_errno;
463 		}
464 		if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
465 			rte_errno = EINVAL;
466 			DRV_LOG(ERR, "port %u queue %d not a hairpin Txq",
467 				dev->data->port_id, cur_queue);
468 			mlx5_txq_release(dev, cur_queue);
469 			return -rte_errno;
470 		}
471 		if (txq_ctrl->obj == NULL || txq_ctrl->obj->sq == NULL) {
472 			rte_errno = ENOMEM;
473 			DRV_LOG(ERR, "port %u no Txq object found: %d",
474 				dev->data->port_id, cur_queue);
475 			mlx5_txq_release(dev, cur_queue);
476 			return -rte_errno;
477 		}
478 		if (txq_ctrl->hairpin_status != 0) {
479 			DRV_LOG(DEBUG, "port %u Tx queue %d is already bound",
480 				dev->data->port_id, cur_queue);
481 			mlx5_txq_release(dev, cur_queue);
482 			return 0;
483 		}
484 		/*
485 		 * All queues' of one port consistency checking is done in the
486 		 * bind() function, and that is optional.
487 		 */
488 		if (peer_info->tx_explicit !=
489 		    txq_ctrl->hairpin_conf.tx_explicit) {
490 			rte_errno = EINVAL;
491 			DRV_LOG(ERR, "port %u Tx queue %d and peer Tx rule mode"
492 				" mismatch", dev->data->port_id, cur_queue);
493 			mlx5_txq_release(dev, cur_queue);
494 			return -rte_errno;
495 		}
496 		if (peer_info->manual_bind !=
497 		    txq_ctrl->hairpin_conf.manual_bind) {
498 			rte_errno = EINVAL;
499 			DRV_LOG(ERR, "port %u Tx queue %d and peer binding mode"
500 				" mismatch", dev->data->port_id, cur_queue);
501 			mlx5_txq_release(dev, cur_queue);
502 			return -rte_errno;
503 		}
504 		sq_attr.state = MLX5_SQC_STATE_RDY;
505 		sq_attr.sq_state = MLX5_SQC_STATE_RST;
506 		sq_attr.hairpin_peer_rq = peer_info->qp_id;
507 		sq_attr.hairpin_peer_vhca = peer_info->vhca_id;
508 		ret = mlx5_devx_cmd_modify_sq(txq_ctrl->obj->sq, &sq_attr);
509 		if (ret == 0)
510 			txq_ctrl->hairpin_status = 1;
511 		mlx5_txq_release(dev, cur_queue);
512 	} else {
513 		struct mlx5_rxq_ctrl *rxq_ctrl;
514 		struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
515 
516 		rxq_ctrl = mlx5_rxq_get(dev, cur_queue);
517 		if (rxq_ctrl == NULL) {
518 			rte_errno = EINVAL;
519 			DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
520 				dev->data->port_id, cur_queue);
521 			return -rte_errno;
522 		}
523 		if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
524 			rte_errno = EINVAL;
525 			DRV_LOG(ERR, "port %u queue %d not a hairpin Rxq",
526 				dev->data->port_id, cur_queue);
527 			mlx5_rxq_release(dev, cur_queue);
528 			return -rte_errno;
529 		}
530 		if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) {
531 			rte_errno = ENOMEM;
532 			DRV_LOG(ERR, "port %u no Rxq object found: %d",
533 				dev->data->port_id, cur_queue);
534 			mlx5_rxq_release(dev, cur_queue);
535 			return -rte_errno;
536 		}
537 		if (rxq_ctrl->hairpin_status != 0) {
538 			DRV_LOG(DEBUG, "port %u Rx queue %d is already bound",
539 				dev->data->port_id, cur_queue);
540 			mlx5_rxq_release(dev, cur_queue);
541 			return 0;
542 		}
543 		if (peer_info->tx_explicit !=
544 		    rxq_ctrl->hairpin_conf.tx_explicit) {
545 			rte_errno = EINVAL;
546 			DRV_LOG(ERR, "port %u Rx queue %d and peer Tx rule mode"
547 				" mismatch", dev->data->port_id, cur_queue);
548 			mlx5_rxq_release(dev, cur_queue);
549 			return -rte_errno;
550 		}
551 		if (peer_info->manual_bind !=
552 		    rxq_ctrl->hairpin_conf.manual_bind) {
553 			rte_errno = EINVAL;
554 			DRV_LOG(ERR, "port %u Rx queue %d and peer binding mode"
555 				" mismatch", dev->data->port_id, cur_queue);
556 			mlx5_rxq_release(dev, cur_queue);
557 			return -rte_errno;
558 		}
559 		rq_attr.state = MLX5_SQC_STATE_RDY;
560 		rq_attr.rq_state = MLX5_SQC_STATE_RST;
561 		rq_attr.hairpin_peer_sq = peer_info->qp_id;
562 		rq_attr.hairpin_peer_vhca = peer_info->vhca_id;
563 		ret = mlx5_devx_cmd_modify_rq(rxq_ctrl->obj->rq, &rq_attr);
564 		if (ret == 0)
565 			rxq_ctrl->hairpin_status = 1;
566 		mlx5_rxq_release(dev, cur_queue);
567 	}
568 	return ret;
569 }
570 
571 /*
572  * Unbind the hairpin queue and reset its HW configuration.
573  * This needs to be called twice both for Tx and Rx queues of a pair.
574  * If the queue is already unbound, it is considered successful.
575  *
576  * @param dev
577  *   Pointer to Ethernet device structure.
578  * @param cur_queue
579  *   Index of the queue to change the HW configuration to unbind.
580  * @param direction
581  *   Positive to reset the TxQ, zero to reset the RxQ.
582  *
583  * @return
584  *   0 on success, a negative errno value otherwise and rte_errno is set.
585  */
586 int
587 mlx5_hairpin_queue_peer_unbind(struct rte_eth_dev *dev, uint16_t cur_queue,
588 			       uint32_t direction)
589 {
590 	int ret = 0;
591 
592 	if (direction != 0) {
593 		struct mlx5_txq_ctrl *txq_ctrl;
594 		struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
595 
596 		txq_ctrl = mlx5_txq_get(dev, cur_queue);
597 		if (txq_ctrl == NULL) {
598 			rte_errno = EINVAL;
599 			DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
600 				dev->data->port_id, cur_queue);
601 			return -rte_errno;
602 		}
603 		if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
604 			rte_errno = EINVAL;
605 			DRV_LOG(ERR, "port %u queue %d not a hairpin Txq",
606 				dev->data->port_id, cur_queue);
607 			mlx5_txq_release(dev, cur_queue);
608 			return -rte_errno;
609 		}
610 		/* Already unbound, return success before obj checking. */
611 		if (txq_ctrl->hairpin_status == 0) {
612 			DRV_LOG(DEBUG, "port %u Tx queue %d is already unbound",
613 				dev->data->port_id, cur_queue);
614 			mlx5_txq_release(dev, cur_queue);
615 			return 0;
616 		}
617 		if (!txq_ctrl->obj || !txq_ctrl->obj->sq) {
618 			rte_errno = ENOMEM;
619 			DRV_LOG(ERR, "port %u no Txq object found: %d",
620 				dev->data->port_id, cur_queue);
621 			mlx5_txq_release(dev, cur_queue);
622 			return -rte_errno;
623 		}
624 		sq_attr.state = MLX5_SQC_STATE_RST;
625 		sq_attr.sq_state = MLX5_SQC_STATE_RST;
626 		ret = mlx5_devx_cmd_modify_sq(txq_ctrl->obj->sq, &sq_attr);
627 		if (ret == 0)
628 			txq_ctrl->hairpin_status = 0;
629 		mlx5_txq_release(dev, cur_queue);
630 	} else {
631 		struct mlx5_rxq_ctrl *rxq_ctrl;
632 		struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
633 
634 		rxq_ctrl = mlx5_rxq_get(dev, cur_queue);
635 		if (rxq_ctrl == NULL) {
636 			rte_errno = EINVAL;
637 			DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
638 				dev->data->port_id, cur_queue);
639 			return -rte_errno;
640 		}
641 		if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
642 			rte_errno = EINVAL;
643 			DRV_LOG(ERR, "port %u queue %d not a hairpin Rxq",
644 				dev->data->port_id, cur_queue);
645 			mlx5_rxq_release(dev, cur_queue);
646 			return -rte_errno;
647 		}
648 		if (rxq_ctrl->hairpin_status == 0) {
649 			DRV_LOG(DEBUG, "port %u Rx queue %d is already unbound",
650 				dev->data->port_id, cur_queue);
651 			mlx5_rxq_release(dev, cur_queue);
652 			return 0;
653 		}
654 		if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) {
655 			rte_errno = ENOMEM;
656 			DRV_LOG(ERR, "port %u no Rxq object found: %d",
657 				dev->data->port_id, cur_queue);
658 			mlx5_rxq_release(dev, cur_queue);
659 			return -rte_errno;
660 		}
661 		rq_attr.state = MLX5_SQC_STATE_RST;
662 		rq_attr.rq_state = MLX5_SQC_STATE_RST;
663 		ret = mlx5_devx_cmd_modify_rq(rxq_ctrl->obj->rq, &rq_attr);
664 		if (ret == 0)
665 			rxq_ctrl->hairpin_status = 0;
666 		mlx5_rxq_release(dev, cur_queue);
667 	}
668 	return ret;
669 }
670 
671 /*
672  * Bind the hairpin port pairs, from the Tx to the peer Rx.
673  * This function only supports to bind the Tx to one Rx.
674  *
675  * @param dev
676  *   Pointer to Ethernet device structure.
677  * @param rx_port
678  *   Port identifier of the Rx port.
679  *
680  * @return
681  *   0 on success, a negative errno value otherwise and rte_errno is set.
682  */
683 static int
684 mlx5_hairpin_bind_single_port(struct rte_eth_dev *dev, uint16_t rx_port)
685 {
686 	struct mlx5_priv *priv = dev->data->dev_private;
687 	int ret = 0;
688 	struct mlx5_txq_ctrl *txq_ctrl;
689 	uint32_t i;
690 	struct rte_hairpin_peer_info peer = {0xffffff};
691 	struct rte_hairpin_peer_info cur;
692 	const struct rte_eth_hairpin_conf *conf;
693 	uint16_t num_q = 0;
694 	uint16_t local_port = priv->dev_data->port_id;
695 	uint32_t manual;
696 	uint32_t explicit;
697 	uint16_t rx_queue;
698 
699 	if (mlx5_eth_find_next(rx_port, priv->pci_dev) != rx_port) {
700 		rte_errno = ENODEV;
701 		DRV_LOG(ERR, "Rx port %u does not belong to mlx5", rx_port);
702 		return -rte_errno;
703 	}
704 	/*
705 	 * Before binding TxQ to peer RxQ, first round loop will be used for
706 	 * checking the queues' configuration consistency. This would be a
707 	 * little time consuming but better than doing the rollback.
708 	 */
709 	for (i = 0; i != priv->txqs_n; i++) {
710 		txq_ctrl = mlx5_txq_get(dev, i);
711 		if (txq_ctrl == NULL)
712 			continue;
713 		if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
714 			mlx5_txq_release(dev, i);
715 			continue;
716 		}
717 		/*
718 		 * All hairpin Tx queues of a single port that connected to the
719 		 * same peer Rx port should have the same "auto binding" and
720 		 * "implicit Tx flow" modes.
721 		 * Peer consistency checking will be done in per queue binding.
722 		 */
723 		conf = &txq_ctrl->hairpin_conf;
724 		if (conf->peers[0].port == rx_port) {
725 			if (num_q == 0) {
726 				manual = conf->manual_bind;
727 				explicit = conf->tx_explicit;
728 			} else {
729 				if (manual != conf->manual_bind ||
730 				    explicit != conf->tx_explicit) {
731 					rte_errno = EINVAL;
732 					DRV_LOG(ERR, "port %u queue %d mode"
733 						" mismatch: %u %u, %u %u",
734 						local_port, i, manual,
735 						conf->manual_bind, explicit,
736 						conf->tx_explicit);
737 					mlx5_txq_release(dev, i);
738 					return -rte_errno;
739 				}
740 			}
741 			num_q++;
742 		}
743 		mlx5_txq_release(dev, i);
744 	}
745 	/* Once no queue is configured, success is returned directly. */
746 	if (num_q == 0)
747 		return ret;
748 	/* All the hairpin TX queues need to be traversed again. */
749 	for (i = 0; i != priv->txqs_n; i++) {
750 		txq_ctrl = mlx5_txq_get(dev, i);
751 		if (txq_ctrl == NULL)
752 			continue;
753 		if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
754 			mlx5_txq_release(dev, i);
755 			continue;
756 		}
757 		if (txq_ctrl->hairpin_conf.peers[0].port != rx_port) {
758 			mlx5_txq_release(dev, i);
759 			continue;
760 		}
761 		rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
762 		/*
763 		 * Fetch peer RxQ's information.
764 		 * No need to pass the information of the current queue.
765 		 */
766 		ret = rte_eth_hairpin_queue_peer_update(rx_port, rx_queue,
767 							NULL, &peer, 1);
768 		if (ret != 0) {
769 			mlx5_txq_release(dev, i);
770 			goto error;
771 		}
772 		/* Accessing its own device, inside mlx5 PMD. */
773 		ret = mlx5_hairpin_queue_peer_bind(dev, i, &peer, 1);
774 		if (ret != 0) {
775 			mlx5_txq_release(dev, i);
776 			goto error;
777 		}
778 		/* Pass TxQ's information to peer RxQ and try binding. */
779 		cur.peer_q = rx_queue;
780 		cur.qp_id = txq_ctrl->obj->sq->id;
781 		cur.vhca_id = priv->config.hca_attr.vhca_id;
782 		cur.tx_explicit = txq_ctrl->hairpin_conf.tx_explicit;
783 		cur.manual_bind = txq_ctrl->hairpin_conf.manual_bind;
784 		/*
785 		 * In order to access another device in a proper way, RTE level
786 		 * private function is needed.
787 		 */
788 		ret = rte_eth_hairpin_queue_peer_bind(rx_port, rx_queue,
789 						      &cur, 0);
790 		if (ret != 0) {
791 			mlx5_txq_release(dev, i);
792 			goto error;
793 		}
794 		mlx5_txq_release(dev, i);
795 	}
796 	return 0;
797 error:
798 	/*
799 	 * Do roll-back process for the queues already bound.
800 	 * No need to check the return value of the queue unbind function.
801 	 */
802 	do {
803 		/* No validation is needed here. */
804 		txq_ctrl = mlx5_txq_get(dev, i);
805 		if (txq_ctrl == NULL)
806 			continue;
807 		rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
808 		rte_eth_hairpin_queue_peer_unbind(rx_port, rx_queue, 0);
809 		mlx5_hairpin_queue_peer_unbind(dev, i, 1);
810 		mlx5_txq_release(dev, i);
811 	} while (i--);
812 	return ret;
813 }
814 
815 /*
816  * Unbind the hairpin port pair, HW configuration of both devices will be clear
817  * and status will be reset for all the queues used between the them.
818  * This function only supports to unbind the Tx from one Rx.
819  *
820  * @param dev
821  *   Pointer to Ethernet device structure.
822  * @param rx_port
823  *   Port identifier of the Rx port.
824  *
825  * @return
826  *   0 on success, a negative errno value otherwise and rte_errno is set.
827  */
828 static int
829 mlx5_hairpin_unbind_single_port(struct rte_eth_dev *dev, uint16_t rx_port)
830 {
831 	struct mlx5_priv *priv = dev->data->dev_private;
832 	struct mlx5_txq_ctrl *txq_ctrl;
833 	uint32_t i;
834 	int ret;
835 	uint16_t cur_port = priv->dev_data->port_id;
836 
837 	if (mlx5_eth_find_next(rx_port, priv->pci_dev) != rx_port) {
838 		rte_errno = ENODEV;
839 		DRV_LOG(ERR, "Rx port %u does not belong to mlx5", rx_port);
840 		return -rte_errno;
841 	}
842 	for (i = 0; i != priv->txqs_n; i++) {
843 		uint16_t rx_queue;
844 
845 		txq_ctrl = mlx5_txq_get(dev, i);
846 		if (txq_ctrl == NULL)
847 			continue;
848 		if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
849 			mlx5_txq_release(dev, i);
850 			continue;
851 		}
852 		if (txq_ctrl->hairpin_conf.peers[0].port != rx_port) {
853 			mlx5_txq_release(dev, i);
854 			continue;
855 		}
856 		/* Indeed, only the first used queue needs to be checked. */
857 		if (txq_ctrl->hairpin_conf.manual_bind == 0) {
858 			if (cur_port != rx_port) {
859 				rte_errno = EINVAL;
860 				DRV_LOG(ERR, "port %u and port %u are in"
861 					" auto-bind mode", cur_port, rx_port);
862 				mlx5_txq_release(dev, i);
863 				return -rte_errno;
864 			} else {
865 				return 0;
866 			}
867 		}
868 		rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
869 		mlx5_txq_release(dev, i);
870 		ret = rte_eth_hairpin_queue_peer_unbind(rx_port, rx_queue, 0);
871 		if (ret) {
872 			DRV_LOG(ERR, "port %u Rx queue %d unbind - failure",
873 				rx_port, rx_queue);
874 			return ret;
875 		}
876 		ret = mlx5_hairpin_queue_peer_unbind(dev, i, 1);
877 		if (ret) {
878 			DRV_LOG(ERR, "port %u Tx queue %d unbind - failure",
879 				cur_port, i);
880 			return ret;
881 		}
882 	}
883 	return 0;
884 }
885 
886 /*
887  * Bind hairpin ports, Rx could be all ports when using RTE_MAX_ETHPORTS.
888  * @see mlx5_hairpin_bind_single_port()
889  */
890 int
891 mlx5_hairpin_bind(struct rte_eth_dev *dev, uint16_t rx_port)
892 {
893 	int ret = 0;
894 	uint16_t p, pp;
895 	struct mlx5_priv *priv = dev->data->dev_private;
896 
897 	/*
898 	 * If the Rx port has no hairpin configuration with the current port,
899 	 * the binding will be skipped in the called function of single port.
900 	 * Device started status will be checked only before the queue
901 	 * information updating.
902 	 */
903 	if (rx_port == RTE_MAX_ETHPORTS) {
904 		MLX5_ETH_FOREACH_DEV(p, priv->pci_dev) {
905 			ret = mlx5_hairpin_bind_single_port(dev, p);
906 			if (ret != 0)
907 				goto unbind;
908 		}
909 		return ret;
910 	} else {
911 		return mlx5_hairpin_bind_single_port(dev, rx_port);
912 	}
913 unbind:
914 	MLX5_ETH_FOREACH_DEV(pp, priv->pci_dev)
915 		if (pp < p)
916 			mlx5_hairpin_unbind_single_port(dev, pp);
917 	return ret;
918 }
919 
920 /*
921  * Unbind hairpin ports, Rx could be all ports when using RTE_MAX_ETHPORTS.
922  * @see mlx5_hairpin_unbind_single_port()
923  */
924 int
925 mlx5_hairpin_unbind(struct rte_eth_dev *dev, uint16_t rx_port)
926 {
927 	int ret = 0;
928 	uint16_t p;
929 	struct mlx5_priv *priv = dev->data->dev_private;
930 
931 	if (rx_port == RTE_MAX_ETHPORTS)
932 		MLX5_ETH_FOREACH_DEV(p, priv->pci_dev) {
933 			ret = mlx5_hairpin_unbind_single_port(dev, p);
934 			if (ret != 0)
935 				return ret;
936 		}
937 	else
938 		ret = mlx5_hairpin_unbind_single_port(dev, rx_port);
939 	return ret;
940 }
941 
942 /*
943  * DPDK callback to get the hairpin peer ports list.
944  * This will return the actual number of peer ports and save the identifiers
945  * into the array (sorted, may be different from that when setting up the
946  * hairpin peer queues).
947  * The peer port ID could be the same as the port ID of the current device.
948  *
949  * @param dev
950  *   Pointer to Ethernet device structure.
951  * @param peer_ports
952  *   Pointer to array to save the port identifiers.
953  * @param len
954  *   The length of the array.
955  * @param direction
956  *   Current port to peer port direction.
957  *   positive - current used as Tx to get all peer Rx ports.
958  *   zero - current used as Rx to get all peer Tx ports.
959  *
960  * @return
961  *   0 or positive value on success, actual number of peer ports.
962  *   a negative errno value otherwise and rte_errno is set.
963  */
964 int
965 mlx5_hairpin_get_peer_ports(struct rte_eth_dev *dev, uint16_t *peer_ports,
966 			    size_t len, uint32_t direction)
967 {
968 	struct mlx5_priv *priv = dev->data->dev_private;
969 	struct mlx5_txq_ctrl *txq_ctrl;
970 	struct mlx5_rxq_ctrl *rxq_ctrl;
971 	uint32_t i;
972 	uint16_t pp;
973 	uint32_t bits[(RTE_MAX_ETHPORTS + 31) / 32] = {0};
974 	int ret = 0;
975 
976 	if (direction) {
977 		for (i = 0; i < priv->txqs_n; i++) {
978 			txq_ctrl = mlx5_txq_get(dev, i);
979 			if (!txq_ctrl)
980 				continue;
981 			if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) {
982 				mlx5_txq_release(dev, i);
983 				continue;
984 			}
985 			pp = txq_ctrl->hairpin_conf.peers[0].port;
986 			if (pp >= RTE_MAX_ETHPORTS) {
987 				rte_errno = ERANGE;
988 				mlx5_txq_release(dev, i);
989 				DRV_LOG(ERR, "port %hu queue %u peer port "
990 					"out of range %hu",
991 					priv->dev_data->port_id, i, pp);
992 				return -rte_errno;
993 			}
994 			bits[pp / 32] |= 1 << (pp % 32);
995 			mlx5_txq_release(dev, i);
996 		}
997 	} else {
998 		for (i = 0; i < priv->rxqs_n; i++) {
999 			rxq_ctrl = mlx5_rxq_get(dev, i);
1000 			if (!rxq_ctrl)
1001 				continue;
1002 			if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) {
1003 				mlx5_rxq_release(dev, i);
1004 				continue;
1005 			}
1006 			pp = rxq_ctrl->hairpin_conf.peers[0].port;
1007 			if (pp >= RTE_MAX_ETHPORTS) {
1008 				rte_errno = ERANGE;
1009 				mlx5_rxq_release(dev, i);
1010 				DRV_LOG(ERR, "port %hu queue %u peer port "
1011 					"out of range %hu",
1012 					priv->dev_data->port_id, i, pp);
1013 				return -rte_errno;
1014 			}
1015 			bits[pp / 32] |= 1 << (pp % 32);
1016 			mlx5_rxq_release(dev, i);
1017 		}
1018 	}
1019 	for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1020 		if (bits[i / 32] & (1 << (i % 32))) {
1021 			if ((size_t)ret >= len) {
1022 				rte_errno = E2BIG;
1023 				return -rte_errno;
1024 			}
1025 			peer_ports[ret++] = i;
1026 		}
1027 	}
1028 	return ret;
1029 }
1030 
1031 /**
1032  * DPDK callback to start the device.
1033  *
1034  * Simulate device start by attaching all configured flows.
1035  *
1036  * @param dev
1037  *   Pointer to Ethernet device structure.
1038  *
1039  * @return
1040  *   0 on success, a negative errno value otherwise and rte_errno is set.
1041  */
1042 int
1043 mlx5_dev_start(struct rte_eth_dev *dev)
1044 {
1045 	struct mlx5_priv *priv = dev->data->dev_private;
1046 	int ret;
1047 	int fine_inline;
1048 
1049 	DRV_LOG(DEBUG, "port %u starting device", dev->data->port_id);
1050 	fine_inline = rte_mbuf_dynflag_lookup
1051 		(RTE_PMD_MLX5_FINE_GRANULARITY_INLINE, NULL);
1052 	if (fine_inline >= 0)
1053 		rte_net_mlx5_dynf_inline_mask = 1UL << fine_inline;
1054 	else
1055 		rte_net_mlx5_dynf_inline_mask = 0;
1056 	if (dev->data->nb_rx_queues > 0) {
1057 		ret = mlx5_dev_configure_rss_reta(dev);
1058 		if (ret) {
1059 			DRV_LOG(ERR, "port %u reta config failed: %s",
1060 				dev->data->port_id, strerror(rte_errno));
1061 			return -rte_errno;
1062 		}
1063 	}
1064 	ret = mlx5_txpp_start(dev);
1065 	if (ret) {
1066 		DRV_LOG(ERR, "port %u Tx packet pacing init failed: %s",
1067 			dev->data->port_id, strerror(rte_errno));
1068 		goto error;
1069 	}
1070 	ret = mlx5_txq_start(dev);
1071 	if (ret) {
1072 		DRV_LOG(ERR, "port %u Tx queue allocation failed: %s",
1073 			dev->data->port_id, strerror(rte_errno));
1074 		goto error;
1075 	}
1076 	ret = mlx5_rxq_start(dev);
1077 	if (ret) {
1078 		DRV_LOG(ERR, "port %u Rx queue allocation failed: %s",
1079 			dev->data->port_id, strerror(rte_errno));
1080 		goto error;
1081 	}
1082 	/*
1083 	 * Such step will be skipped if there is no hairpin TX queue configured
1084 	 * with RX peer queue from the same device.
1085 	 */
1086 	ret = mlx5_hairpin_auto_bind(dev);
1087 	if (ret) {
1088 		DRV_LOG(ERR, "port %u hairpin auto binding failed: %s",
1089 			dev->data->port_id, strerror(rte_errno));
1090 		goto error;
1091 	}
1092 	/* Set started flag here for the following steps like control flow. */
1093 	dev->data->dev_started = 1;
1094 	ret = mlx5_rx_intr_vec_enable(dev);
1095 	if (ret) {
1096 		DRV_LOG(ERR, "port %u Rx interrupt vector creation failed",
1097 			dev->data->port_id);
1098 		goto error;
1099 	}
1100 	mlx5_os_stats_init(dev);
1101 	ret = mlx5_traffic_enable(dev);
1102 	if (ret) {
1103 		DRV_LOG(ERR, "port %u failed to set defaults flows",
1104 			dev->data->port_id);
1105 		goto error;
1106 	}
1107 	/* Set a mask and offset of dynamic metadata flows into Rx queues. */
1108 	mlx5_flow_rxq_dynf_metadata_set(dev);
1109 	/* Set flags and context to convert Rx timestamps. */
1110 	mlx5_rxq_timestamp_set(dev);
1111 	/* Set a mask and offset of scheduling on timestamp into Tx queues. */
1112 	mlx5_txq_dynf_timestamp_set(dev);
1113 	/*
1114 	 * In non-cached mode, it only needs to start the default mreg copy
1115 	 * action and no flow created by application exists anymore.
1116 	 * But it is worth wrapping the interface for further usage.
1117 	 */
1118 	ret = mlx5_flow_start_default(dev);
1119 	if (ret) {
1120 		DRV_LOG(DEBUG, "port %u failed to start default actions: %s",
1121 			dev->data->port_id, strerror(rte_errno));
1122 		goto error;
1123 	}
1124 	rte_wmb();
1125 	dev->tx_pkt_burst = mlx5_select_tx_function(dev);
1126 	dev->rx_pkt_burst = mlx5_select_rx_function(dev);
1127 	/* Enable datapath on secondary process. */
1128 	mlx5_mp_os_req_start_rxtx(dev);
1129 	if (priv->sh->intr_handle.fd >= 0) {
1130 		priv->sh->port[priv->dev_port - 1].ih_port_id =
1131 					(uint32_t)dev->data->port_id;
1132 	} else {
1133 		DRV_LOG(INFO, "port %u starts without LSC and RMV interrupts.",
1134 			dev->data->port_id);
1135 		dev->data->dev_conf.intr_conf.lsc = 0;
1136 		dev->data->dev_conf.intr_conf.rmv = 0;
1137 	}
1138 	if (priv->sh->intr_handle_devx.fd >= 0)
1139 		priv->sh->port[priv->dev_port - 1].devx_ih_port_id =
1140 					(uint32_t)dev->data->port_id;
1141 	return 0;
1142 error:
1143 	ret = rte_errno; /* Save rte_errno before cleanup. */
1144 	/* Rollback. */
1145 	dev->data->dev_started = 0;
1146 	mlx5_flow_stop_default(dev);
1147 	mlx5_traffic_disable(dev);
1148 	mlx5_txq_stop(dev);
1149 	mlx5_rxq_stop(dev);
1150 	mlx5_txpp_stop(dev); /* Stop last. */
1151 	rte_errno = ret; /* Restore rte_errno. */
1152 	return -rte_errno;
1153 }
1154 
1155 /**
1156  * DPDK callback to stop the device.
1157  *
1158  * Simulate device stop by detaching all configured flows.
1159  *
1160  * @param dev
1161  *   Pointer to Ethernet device structure.
1162  */
1163 int
1164 mlx5_dev_stop(struct rte_eth_dev *dev)
1165 {
1166 	struct mlx5_priv *priv = dev->data->dev_private;
1167 
1168 	dev->data->dev_started = 0;
1169 	/* Prevent crashes when queues are still in use. */
1170 	dev->rx_pkt_burst = removed_rx_burst;
1171 	dev->tx_pkt_burst = removed_tx_burst;
1172 	rte_wmb();
1173 	/* Disable datapath on secondary process. */
1174 	mlx5_mp_os_req_stop_rxtx(dev);
1175 	rte_delay_us_sleep(1000 * priv->rxqs_n);
1176 	DRV_LOG(DEBUG, "port %u stopping device", dev->data->port_id);
1177 	mlx5_flow_stop_default(dev);
1178 	/* Control flows for default traffic can be removed firstly. */
1179 	mlx5_traffic_disable(dev);
1180 	/* All RX queue flags will be cleared in the flush interface. */
1181 	mlx5_flow_list_flush(dev, &priv->flows, true);
1182 	mlx5_rx_intr_vec_disable(dev);
1183 	priv->sh->port[priv->dev_port - 1].ih_port_id = RTE_MAX_ETHPORTS;
1184 	priv->sh->port[priv->dev_port - 1].devx_ih_port_id = RTE_MAX_ETHPORTS;
1185 	mlx5_txq_stop(dev);
1186 	mlx5_rxq_stop(dev);
1187 	mlx5_txpp_stop(dev);
1188 
1189 	return 0;
1190 }
1191 
1192 /**
1193  * Enable traffic flows configured by control plane
1194  *
1195  * @param dev
1196  *   Pointer to Ethernet device private data.
1197  * @param dev
1198  *   Pointer to Ethernet device structure.
1199  *
1200  * @return
1201  *   0 on success, a negative errno value otherwise and rte_errno is set.
1202  */
1203 int
1204 mlx5_traffic_enable(struct rte_eth_dev *dev)
1205 {
1206 	struct mlx5_priv *priv = dev->data->dev_private;
1207 	struct rte_flow_item_eth bcast = {
1208 		.dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1209 	};
1210 	struct rte_flow_item_eth ipv6_multi_spec = {
1211 		.dst.addr_bytes = "\x33\x33\x00\x00\x00\x00",
1212 	};
1213 	struct rte_flow_item_eth ipv6_multi_mask = {
1214 		.dst.addr_bytes = "\xff\xff\x00\x00\x00\x00",
1215 	};
1216 	struct rte_flow_item_eth unicast = {
1217 		.src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1218 	};
1219 	struct rte_flow_item_eth unicast_mask = {
1220 		.dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1221 	};
1222 	const unsigned int vlan_filter_n = priv->vlan_filter_n;
1223 	const struct rte_ether_addr cmp = {
1224 		.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1225 	};
1226 	unsigned int i;
1227 	unsigned int j;
1228 	int ret;
1229 
1230 	/*
1231 	 * Hairpin txq default flow should be created no matter if it is
1232 	 * isolation mode. Or else all the packets to be sent will be sent
1233 	 * out directly without the TX flow actions, e.g. encapsulation.
1234 	 */
1235 	for (i = 0; i != priv->txqs_n; ++i) {
1236 		struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i);
1237 		if (!txq_ctrl)
1238 			continue;
1239 		/* Only Tx implicit mode requires the default Tx flow. */
1240 		if (txq_ctrl->type == MLX5_TXQ_TYPE_HAIRPIN &&
1241 		    txq_ctrl->hairpin_conf.tx_explicit == 0 &&
1242 		    txq_ctrl->hairpin_conf.peers[0].port ==
1243 		    priv->dev_data->port_id) {
1244 			ret = mlx5_ctrl_flow_source_queue(dev, i);
1245 			if (ret) {
1246 				mlx5_txq_release(dev, i);
1247 				goto error;
1248 			}
1249 		}
1250 		mlx5_txq_release(dev, i);
1251 	}
1252 	if (priv->config.dv_esw_en && !priv->config.vf) {
1253 		if (mlx5_flow_create_esw_table_zero_flow(dev))
1254 			priv->fdb_def_rule = 1;
1255 		else
1256 			DRV_LOG(INFO, "port %u FDB default rule cannot be"
1257 				" configured - only Eswitch group 0 flows are"
1258 				" supported.", dev->data->port_id);
1259 	}
1260 	if (!priv->config.lacp_by_user && priv->pf_bond >= 0) {
1261 		ret = mlx5_flow_lacp_miss(dev);
1262 		if (ret)
1263 			DRV_LOG(INFO, "port %u LACP rule cannot be created - "
1264 				"forward LACP to kernel.", dev->data->port_id);
1265 		else
1266 			DRV_LOG(INFO, "LACP traffic will be missed in port %u."
1267 				, dev->data->port_id);
1268 	}
1269 	if (priv->isolated)
1270 		return 0;
1271 	if (dev->data->promiscuous) {
1272 		struct rte_flow_item_eth promisc = {
1273 			.dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1274 			.src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1275 			.type = 0,
1276 		};
1277 
1278 		ret = mlx5_ctrl_flow(dev, &promisc, &promisc);
1279 		if (ret)
1280 			goto error;
1281 	}
1282 	if (dev->data->all_multicast) {
1283 		struct rte_flow_item_eth multicast = {
1284 			.dst.addr_bytes = "\x01\x00\x00\x00\x00\x00",
1285 			.src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1286 			.type = 0,
1287 		};
1288 
1289 		ret = mlx5_ctrl_flow(dev, &multicast, &multicast);
1290 		if (ret)
1291 			goto error;
1292 	} else {
1293 		/* Add broadcast/multicast flows. */
1294 		for (i = 0; i != vlan_filter_n; ++i) {
1295 			uint16_t vlan = priv->vlan_filter[i];
1296 
1297 			struct rte_flow_item_vlan vlan_spec = {
1298 				.tci = rte_cpu_to_be_16(vlan),
1299 			};
1300 			struct rte_flow_item_vlan vlan_mask =
1301 				rte_flow_item_vlan_mask;
1302 
1303 			ret = mlx5_ctrl_flow_vlan(dev, &bcast, &bcast,
1304 						  &vlan_spec, &vlan_mask);
1305 			if (ret)
1306 				goto error;
1307 			ret = mlx5_ctrl_flow_vlan(dev, &ipv6_multi_spec,
1308 						  &ipv6_multi_mask,
1309 						  &vlan_spec, &vlan_mask);
1310 			if (ret)
1311 				goto error;
1312 		}
1313 		if (!vlan_filter_n) {
1314 			ret = mlx5_ctrl_flow(dev, &bcast, &bcast);
1315 			if (ret)
1316 				goto error;
1317 			ret = mlx5_ctrl_flow(dev, &ipv6_multi_spec,
1318 					     &ipv6_multi_mask);
1319 			if (ret) {
1320 				/* Do not fail on IPv6 broadcast creation failure. */
1321 				DRV_LOG(WARNING,
1322 					"IPv6 broadcast is not supported");
1323 				ret = 0;
1324 			}
1325 		}
1326 	}
1327 	/* Add MAC address flows. */
1328 	for (i = 0; i != MLX5_MAX_MAC_ADDRESSES; ++i) {
1329 		struct rte_ether_addr *mac = &dev->data->mac_addrs[i];
1330 
1331 		if (!memcmp(mac, &cmp, sizeof(*mac)))
1332 			continue;
1333 		memcpy(&unicast.dst.addr_bytes,
1334 		       mac->addr_bytes,
1335 		       RTE_ETHER_ADDR_LEN);
1336 		for (j = 0; j != vlan_filter_n; ++j) {
1337 			uint16_t vlan = priv->vlan_filter[j];
1338 
1339 			struct rte_flow_item_vlan vlan_spec = {
1340 				.tci = rte_cpu_to_be_16(vlan),
1341 			};
1342 			struct rte_flow_item_vlan vlan_mask =
1343 				rte_flow_item_vlan_mask;
1344 
1345 			ret = mlx5_ctrl_flow_vlan(dev, &unicast,
1346 						  &unicast_mask,
1347 						  &vlan_spec,
1348 						  &vlan_mask);
1349 			if (ret)
1350 				goto error;
1351 		}
1352 		if (!vlan_filter_n) {
1353 			ret = mlx5_ctrl_flow(dev, &unicast, &unicast_mask);
1354 			if (ret)
1355 				goto error;
1356 		}
1357 	}
1358 	return 0;
1359 error:
1360 	ret = rte_errno; /* Save rte_errno before cleanup. */
1361 	mlx5_flow_list_flush(dev, &priv->ctrl_flows, false);
1362 	rte_errno = ret; /* Restore rte_errno. */
1363 	return -rte_errno;
1364 }
1365 
1366 
1367 /**
1368  * Disable traffic flows configured by control plane
1369  *
1370  * @param dev
1371  *   Pointer to Ethernet device private data.
1372  */
1373 void
1374 mlx5_traffic_disable(struct rte_eth_dev *dev)
1375 {
1376 	struct mlx5_priv *priv = dev->data->dev_private;
1377 
1378 	mlx5_flow_list_flush(dev, &priv->ctrl_flows, false);
1379 }
1380 
1381 /**
1382  * Restart traffic flows configured by control plane
1383  *
1384  * @param dev
1385  *   Pointer to Ethernet device private data.
1386  *
1387  * @return
1388  *   0 on success, a negative errno value otherwise and rte_errno is set.
1389  */
1390 int
1391 mlx5_traffic_restart(struct rte_eth_dev *dev)
1392 {
1393 	if (dev->data->dev_started) {
1394 		mlx5_traffic_disable(dev);
1395 		return mlx5_traffic_enable(dev);
1396 	}
1397 	return 0;
1398 }
1399