1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2015 6WIND S.A. 3 * Copyright 2015 Mellanox Technologies, Ltd 4 */ 5 6 #include <unistd.h> 7 8 #include <rte_ether.h> 9 #include <ethdev_driver.h> 10 #include <rte_interrupts.h> 11 #include <rte_alarm.h> 12 #include <rte_cycles.h> 13 14 #include <mlx5_malloc.h> 15 16 #include "mlx5.h" 17 #include "mlx5_flow.h" 18 #include "mlx5_rx.h" 19 #include "mlx5_tx.h" 20 #include "mlx5_utils.h" 21 #include "rte_pmd_mlx5.h" 22 23 /** 24 * Stop traffic on Tx queues. 25 * 26 * @param dev 27 * Pointer to Ethernet device structure. 28 */ 29 static void 30 mlx5_txq_stop(struct rte_eth_dev *dev) 31 { 32 struct mlx5_priv *priv = dev->data->dev_private; 33 unsigned int i; 34 35 for (i = 0; i != priv->txqs_n; ++i) 36 mlx5_txq_release(dev, i); 37 } 38 39 /** 40 * Start traffic on Tx queues. 41 * 42 * @param dev 43 * Pointer to Ethernet device structure. 44 * 45 * @return 46 * 0 on success, a negative errno value otherwise and rte_errno is set. 47 */ 48 static int 49 mlx5_txq_start(struct rte_eth_dev *dev) 50 { 51 struct mlx5_priv *priv = dev->data->dev_private; 52 unsigned int i; 53 int ret; 54 55 for (i = 0; i != priv->txqs_n; ++i) { 56 struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i); 57 struct mlx5_txq_data *txq_data = &txq_ctrl->txq; 58 uint32_t flags = MLX5_MEM_RTE | MLX5_MEM_ZERO; 59 60 if (!txq_ctrl) 61 continue; 62 if (txq_ctrl->type == MLX5_TXQ_TYPE_STANDARD) 63 txq_alloc_elts(txq_ctrl); 64 MLX5_ASSERT(!txq_ctrl->obj); 65 txq_ctrl->obj = mlx5_malloc(flags, sizeof(struct mlx5_txq_obj), 66 0, txq_ctrl->socket); 67 if (!txq_ctrl->obj) { 68 DRV_LOG(ERR, "Port %u Tx queue %u cannot allocate " 69 "memory resources.", dev->data->port_id, 70 txq_data->idx); 71 rte_errno = ENOMEM; 72 goto error; 73 } 74 ret = priv->obj_ops.txq_obj_new(dev, i); 75 if (ret < 0) { 76 mlx5_free(txq_ctrl->obj); 77 txq_ctrl->obj = NULL; 78 goto error; 79 } 80 if (txq_ctrl->type == MLX5_TXQ_TYPE_STANDARD) { 81 size_t size = txq_data->cqe_s * sizeof(*txq_data->fcqs); 82 83 txq_data->fcqs = mlx5_malloc(flags, size, 84 RTE_CACHE_LINE_SIZE, 85 txq_ctrl->socket); 86 if (!txq_data->fcqs) { 87 DRV_LOG(ERR, "Port %u Tx queue %u cannot " 88 "allocate memory (FCQ).", 89 dev->data->port_id, i); 90 rte_errno = ENOMEM; 91 goto error; 92 } 93 } 94 DRV_LOG(DEBUG, "Port %u txq %u updated with %p.", 95 dev->data->port_id, i, (void *)&txq_ctrl->obj); 96 LIST_INSERT_HEAD(&priv->txqsobj, txq_ctrl->obj, next); 97 } 98 return 0; 99 error: 100 ret = rte_errno; /* Save rte_errno before cleanup. */ 101 do { 102 mlx5_txq_release(dev, i); 103 } while (i-- != 0); 104 rte_errno = ret; /* Restore rte_errno. */ 105 return -rte_errno; 106 } 107 108 /** 109 * Translate the chunk address to MR key in order to put in into the cache. 110 */ 111 static void 112 mlx5_rxq_mempool_register_cb(struct rte_mempool *mp, void *opaque, 113 struct rte_mempool_memhdr *memhdr, 114 unsigned int idx) 115 { 116 struct mlx5_rxq_data *rxq = opaque; 117 118 RTE_SET_USED(mp); 119 RTE_SET_USED(idx); 120 mlx5_rx_addr2mr(rxq, (uintptr_t)memhdr->addr); 121 } 122 123 /** 124 * Register Rx queue mempools and fill the Rx queue cache. 125 * This function tolerates repeated mempool registration. 126 * 127 * @param[in] rxq_ctrl 128 * Rx queue control data. 129 * 130 * @return 131 * 0 on success, (-1) on failure and rte_errno is set. 132 */ 133 static int 134 mlx5_rxq_mempool_register(struct mlx5_rxq_ctrl *rxq_ctrl) 135 { 136 struct mlx5_priv *priv = rxq_ctrl->priv; 137 struct rte_mempool *mp; 138 uint32_t s; 139 int ret = 0; 140 141 mlx5_mr_flush_local_cache(&rxq_ctrl->rxq.mr_ctrl); 142 /* MPRQ mempool is registered on creation, just fill the cache. */ 143 if (mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq)) { 144 rte_mempool_mem_iter(rxq_ctrl->rxq.mprq_mp, 145 mlx5_rxq_mempool_register_cb, 146 &rxq_ctrl->rxq); 147 return 0; 148 } 149 for (s = 0; s < rxq_ctrl->rxq.rxseg_n; s++) { 150 mp = rxq_ctrl->rxq.rxseg[s].mp; 151 ret = mlx5_mr_mempool_register(&priv->sh->cdev->mr_scache, 152 priv->sh->cdev->pd, mp, 153 &priv->mp_id); 154 if (ret < 0 && rte_errno != EEXIST) 155 return ret; 156 rte_mempool_mem_iter(mp, mlx5_rxq_mempool_register_cb, 157 &rxq_ctrl->rxq); 158 } 159 return 0; 160 } 161 162 /** 163 * Stop traffic on Rx queues. 164 * 165 * @param dev 166 * Pointer to Ethernet device structure. 167 */ 168 static void 169 mlx5_rxq_stop(struct rte_eth_dev *dev) 170 { 171 struct mlx5_priv *priv = dev->data->dev_private; 172 unsigned int i; 173 174 for (i = 0; i != priv->rxqs_n; ++i) 175 mlx5_rxq_release(dev, i); 176 } 177 178 /** 179 * Start traffic on Rx queues. 180 * 181 * @param dev 182 * Pointer to Ethernet device structure. 183 * 184 * @return 185 * 0 on success, a negative errno value otherwise and rte_errno is set. 186 */ 187 static int 188 mlx5_rxq_start(struct rte_eth_dev *dev) 189 { 190 struct mlx5_priv *priv = dev->data->dev_private; 191 unsigned int i; 192 int ret = 0; 193 194 /* Allocate/reuse/resize mempool for Multi-Packet RQ. */ 195 if (mlx5_mprq_alloc_mp(dev)) { 196 /* Should not release Rx queues but return immediately. */ 197 return -rte_errno; 198 } 199 DRV_LOG(DEBUG, "Port %u device_attr.max_qp_wr is %d.", 200 dev->data->port_id, priv->sh->device_attr.max_qp_wr); 201 DRV_LOG(DEBUG, "Port %u device_attr.max_sge is %d.", 202 dev->data->port_id, priv->sh->device_attr.max_sge); 203 for (i = 0; i != priv->rxqs_n; ++i) { 204 struct mlx5_rxq_ctrl *rxq_ctrl = mlx5_rxq_get(dev, i); 205 206 if (!rxq_ctrl) 207 continue; 208 if (rxq_ctrl->type == MLX5_RXQ_TYPE_STANDARD) { 209 /* 210 * Pre-register the mempools. Regardless of whether 211 * the implicit registration is enabled or not, 212 * Rx mempool destruction is tracked to free MRs. 213 */ 214 if (mlx5_rxq_mempool_register(rxq_ctrl) < 0) 215 goto error; 216 ret = rxq_alloc_elts(rxq_ctrl); 217 if (ret) 218 goto error; 219 } 220 MLX5_ASSERT(!rxq_ctrl->obj); 221 rxq_ctrl->obj = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, 222 sizeof(*rxq_ctrl->obj), 0, 223 rxq_ctrl->socket); 224 if (!rxq_ctrl->obj) { 225 DRV_LOG(ERR, 226 "Port %u Rx queue %u can't allocate resources.", 227 dev->data->port_id, (*priv->rxqs)[i]->idx); 228 rte_errno = ENOMEM; 229 goto error; 230 } 231 ret = priv->obj_ops.rxq_obj_new(dev, i); 232 if (ret) { 233 mlx5_free(rxq_ctrl->obj); 234 rxq_ctrl->obj = NULL; 235 goto error; 236 } 237 DRV_LOG(DEBUG, "Port %u rxq %u updated with %p.", 238 dev->data->port_id, i, (void *)&rxq_ctrl->obj); 239 LIST_INSERT_HEAD(&priv->rxqsobj, rxq_ctrl->obj, next); 240 } 241 return 0; 242 error: 243 ret = rte_errno; /* Save rte_errno before cleanup. */ 244 do { 245 mlx5_rxq_release(dev, i); 246 } while (i-- != 0); 247 rte_errno = ret; /* Restore rte_errno. */ 248 return -rte_errno; 249 } 250 251 /** 252 * Binds Tx queues to Rx queues for hairpin. 253 * 254 * Binds Tx queues to the target Rx queues. 255 * 256 * @param dev 257 * Pointer to Ethernet device structure. 258 * 259 * @return 260 * 0 on success, a negative errno value otherwise and rte_errno is set. 261 */ 262 static int 263 mlx5_hairpin_auto_bind(struct rte_eth_dev *dev) 264 { 265 struct mlx5_priv *priv = dev->data->dev_private; 266 struct mlx5_devx_modify_sq_attr sq_attr = { 0 }; 267 struct mlx5_devx_modify_rq_attr rq_attr = { 0 }; 268 struct mlx5_txq_ctrl *txq_ctrl; 269 struct mlx5_rxq_ctrl *rxq_ctrl; 270 struct mlx5_devx_obj *sq; 271 struct mlx5_devx_obj *rq; 272 unsigned int i; 273 int ret = 0; 274 bool need_auto = false; 275 uint16_t self_port = dev->data->port_id; 276 277 for (i = 0; i != priv->txqs_n; ++i) { 278 txq_ctrl = mlx5_txq_get(dev, i); 279 if (!txq_ctrl) 280 continue; 281 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN || 282 txq_ctrl->hairpin_conf.peers[0].port != self_port) { 283 mlx5_txq_release(dev, i); 284 continue; 285 } 286 if (txq_ctrl->hairpin_conf.manual_bind) { 287 mlx5_txq_release(dev, i); 288 return 0; 289 } 290 need_auto = true; 291 mlx5_txq_release(dev, i); 292 } 293 if (!need_auto) 294 return 0; 295 for (i = 0; i != priv->txqs_n; ++i) { 296 txq_ctrl = mlx5_txq_get(dev, i); 297 if (!txq_ctrl) 298 continue; 299 /* Skip hairpin queues with other peer ports. */ 300 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN || 301 txq_ctrl->hairpin_conf.peers[0].port != self_port) { 302 mlx5_txq_release(dev, i); 303 continue; 304 } 305 if (!txq_ctrl->obj) { 306 rte_errno = ENOMEM; 307 DRV_LOG(ERR, "port %u no txq object found: %d", 308 dev->data->port_id, i); 309 mlx5_txq_release(dev, i); 310 return -rte_errno; 311 } 312 sq = txq_ctrl->obj->sq; 313 rxq_ctrl = mlx5_rxq_get(dev, 314 txq_ctrl->hairpin_conf.peers[0].queue); 315 if (!rxq_ctrl) { 316 mlx5_txq_release(dev, i); 317 rte_errno = EINVAL; 318 DRV_LOG(ERR, "port %u no rxq object found: %d", 319 dev->data->port_id, 320 txq_ctrl->hairpin_conf.peers[0].queue); 321 return -rte_errno; 322 } 323 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN || 324 rxq_ctrl->hairpin_conf.peers[0].queue != i) { 325 rte_errno = ENOMEM; 326 DRV_LOG(ERR, "port %u Tx queue %d can't be binded to " 327 "Rx queue %d", dev->data->port_id, 328 i, txq_ctrl->hairpin_conf.peers[0].queue); 329 goto error; 330 } 331 rq = rxq_ctrl->obj->rq; 332 if (!rq) { 333 rte_errno = ENOMEM; 334 DRV_LOG(ERR, "port %u hairpin no matching rxq: %d", 335 dev->data->port_id, 336 txq_ctrl->hairpin_conf.peers[0].queue); 337 goto error; 338 } 339 sq_attr.state = MLX5_SQC_STATE_RDY; 340 sq_attr.sq_state = MLX5_SQC_STATE_RST; 341 sq_attr.hairpin_peer_rq = rq->id; 342 sq_attr.hairpin_peer_vhca = priv->config.hca_attr.vhca_id; 343 ret = mlx5_devx_cmd_modify_sq(sq, &sq_attr); 344 if (ret) 345 goto error; 346 rq_attr.state = MLX5_SQC_STATE_RDY; 347 rq_attr.rq_state = MLX5_SQC_STATE_RST; 348 rq_attr.hairpin_peer_sq = sq->id; 349 rq_attr.hairpin_peer_vhca = priv->config.hca_attr.vhca_id; 350 ret = mlx5_devx_cmd_modify_rq(rq, &rq_attr); 351 if (ret) 352 goto error; 353 /* Qs with auto-bind will be destroyed directly. */ 354 rxq_ctrl->hairpin_status = 1; 355 txq_ctrl->hairpin_status = 1; 356 mlx5_txq_release(dev, i); 357 mlx5_rxq_release(dev, txq_ctrl->hairpin_conf.peers[0].queue); 358 } 359 return 0; 360 error: 361 mlx5_txq_release(dev, i); 362 mlx5_rxq_release(dev, txq_ctrl->hairpin_conf.peers[0].queue); 363 return -rte_errno; 364 } 365 366 /* 367 * Fetch the peer queue's SW & HW information. 368 * 369 * @param dev 370 * Pointer to Ethernet device structure. 371 * @param peer_queue 372 * Index of the queue to fetch the information. 373 * @param current_info 374 * Pointer to the input peer information, not used currently. 375 * @param peer_info 376 * Pointer to the structure to store the information, output. 377 * @param direction 378 * Positive to get the RxQ information, zero to get the TxQ information. 379 * 380 * @return 381 * 0 on success, a negative errno value otherwise and rte_errno is set. 382 */ 383 int 384 mlx5_hairpin_queue_peer_update(struct rte_eth_dev *dev, uint16_t peer_queue, 385 struct rte_hairpin_peer_info *current_info, 386 struct rte_hairpin_peer_info *peer_info, 387 uint32_t direction) 388 { 389 struct mlx5_priv *priv = dev->data->dev_private; 390 RTE_SET_USED(current_info); 391 392 if (dev->data->dev_started == 0) { 393 rte_errno = EBUSY; 394 DRV_LOG(ERR, "peer port %u is not started", 395 dev->data->port_id); 396 return -rte_errno; 397 } 398 /* 399 * Peer port used as egress. In the current design, hairpin Tx queue 400 * will be bound to the peer Rx queue. Indeed, only the information of 401 * peer Rx queue needs to be fetched. 402 */ 403 if (direction == 0) { 404 struct mlx5_txq_ctrl *txq_ctrl; 405 406 txq_ctrl = mlx5_txq_get(dev, peer_queue); 407 if (txq_ctrl == NULL) { 408 rte_errno = EINVAL; 409 DRV_LOG(ERR, "Failed to get port %u Tx queue %d", 410 dev->data->port_id, peer_queue); 411 return -rte_errno; 412 } 413 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) { 414 rte_errno = EINVAL; 415 DRV_LOG(ERR, "port %u queue %d is not a hairpin Txq", 416 dev->data->port_id, peer_queue); 417 mlx5_txq_release(dev, peer_queue); 418 return -rte_errno; 419 } 420 if (txq_ctrl->obj == NULL || txq_ctrl->obj->sq == NULL) { 421 rte_errno = ENOMEM; 422 DRV_LOG(ERR, "port %u no Txq object found: %d", 423 dev->data->port_id, peer_queue); 424 mlx5_txq_release(dev, peer_queue); 425 return -rte_errno; 426 } 427 peer_info->qp_id = txq_ctrl->obj->sq->id; 428 peer_info->vhca_id = priv->config.hca_attr.vhca_id; 429 /* 1-to-1 mapping, only the first one is used. */ 430 peer_info->peer_q = txq_ctrl->hairpin_conf.peers[0].queue; 431 peer_info->tx_explicit = txq_ctrl->hairpin_conf.tx_explicit; 432 peer_info->manual_bind = txq_ctrl->hairpin_conf.manual_bind; 433 mlx5_txq_release(dev, peer_queue); 434 } else { /* Peer port used as ingress. */ 435 struct mlx5_rxq_ctrl *rxq_ctrl; 436 437 rxq_ctrl = mlx5_rxq_get(dev, peer_queue); 438 if (rxq_ctrl == NULL) { 439 rte_errno = EINVAL; 440 DRV_LOG(ERR, "Failed to get port %u Rx queue %d", 441 dev->data->port_id, peer_queue); 442 return -rte_errno; 443 } 444 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) { 445 rte_errno = EINVAL; 446 DRV_LOG(ERR, "port %u queue %d is not a hairpin Rxq", 447 dev->data->port_id, peer_queue); 448 mlx5_rxq_release(dev, peer_queue); 449 return -rte_errno; 450 } 451 if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) { 452 rte_errno = ENOMEM; 453 DRV_LOG(ERR, "port %u no Rxq object found: %d", 454 dev->data->port_id, peer_queue); 455 mlx5_rxq_release(dev, peer_queue); 456 return -rte_errno; 457 } 458 peer_info->qp_id = rxq_ctrl->obj->rq->id; 459 peer_info->vhca_id = priv->config.hca_attr.vhca_id; 460 peer_info->peer_q = rxq_ctrl->hairpin_conf.peers[0].queue; 461 peer_info->tx_explicit = rxq_ctrl->hairpin_conf.tx_explicit; 462 peer_info->manual_bind = rxq_ctrl->hairpin_conf.manual_bind; 463 mlx5_rxq_release(dev, peer_queue); 464 } 465 return 0; 466 } 467 468 /* 469 * Bind the hairpin queue with the peer HW information. 470 * This needs to be called twice both for Tx and Rx queues of a pair. 471 * If the queue is already bound, it is considered successful. 472 * 473 * @param dev 474 * Pointer to Ethernet device structure. 475 * @param cur_queue 476 * Index of the queue to change the HW configuration to bind. 477 * @param peer_info 478 * Pointer to information of the peer queue. 479 * @param direction 480 * Positive to configure the TxQ, zero to configure the RxQ. 481 * 482 * @return 483 * 0 on success, a negative errno value otherwise and rte_errno is set. 484 */ 485 int 486 mlx5_hairpin_queue_peer_bind(struct rte_eth_dev *dev, uint16_t cur_queue, 487 struct rte_hairpin_peer_info *peer_info, 488 uint32_t direction) 489 { 490 int ret = 0; 491 492 /* 493 * Consistency checking of the peer queue: opposite direction is used 494 * to get the peer queue info with ethdev port ID, no need to check. 495 */ 496 if (peer_info->peer_q != cur_queue) { 497 rte_errno = EINVAL; 498 DRV_LOG(ERR, "port %u queue %d and peer queue %d mismatch", 499 dev->data->port_id, cur_queue, peer_info->peer_q); 500 return -rte_errno; 501 } 502 if (direction != 0) { 503 struct mlx5_txq_ctrl *txq_ctrl; 504 struct mlx5_devx_modify_sq_attr sq_attr = { 0 }; 505 506 txq_ctrl = mlx5_txq_get(dev, cur_queue); 507 if (txq_ctrl == NULL) { 508 rte_errno = EINVAL; 509 DRV_LOG(ERR, "Failed to get port %u Tx queue %d", 510 dev->data->port_id, cur_queue); 511 return -rte_errno; 512 } 513 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) { 514 rte_errno = EINVAL; 515 DRV_LOG(ERR, "port %u queue %d not a hairpin Txq", 516 dev->data->port_id, cur_queue); 517 mlx5_txq_release(dev, cur_queue); 518 return -rte_errno; 519 } 520 if (txq_ctrl->obj == NULL || txq_ctrl->obj->sq == NULL) { 521 rte_errno = ENOMEM; 522 DRV_LOG(ERR, "port %u no Txq object found: %d", 523 dev->data->port_id, cur_queue); 524 mlx5_txq_release(dev, cur_queue); 525 return -rte_errno; 526 } 527 if (txq_ctrl->hairpin_status != 0) { 528 DRV_LOG(DEBUG, "port %u Tx queue %d is already bound", 529 dev->data->port_id, cur_queue); 530 mlx5_txq_release(dev, cur_queue); 531 return 0; 532 } 533 /* 534 * All queues' of one port consistency checking is done in the 535 * bind() function, and that is optional. 536 */ 537 if (peer_info->tx_explicit != 538 txq_ctrl->hairpin_conf.tx_explicit) { 539 rte_errno = EINVAL; 540 DRV_LOG(ERR, "port %u Tx queue %d and peer Tx rule mode" 541 " mismatch", dev->data->port_id, cur_queue); 542 mlx5_txq_release(dev, cur_queue); 543 return -rte_errno; 544 } 545 if (peer_info->manual_bind != 546 txq_ctrl->hairpin_conf.manual_bind) { 547 rte_errno = EINVAL; 548 DRV_LOG(ERR, "port %u Tx queue %d and peer binding mode" 549 " mismatch", dev->data->port_id, cur_queue); 550 mlx5_txq_release(dev, cur_queue); 551 return -rte_errno; 552 } 553 sq_attr.state = MLX5_SQC_STATE_RDY; 554 sq_attr.sq_state = MLX5_SQC_STATE_RST; 555 sq_attr.hairpin_peer_rq = peer_info->qp_id; 556 sq_attr.hairpin_peer_vhca = peer_info->vhca_id; 557 ret = mlx5_devx_cmd_modify_sq(txq_ctrl->obj->sq, &sq_attr); 558 if (ret == 0) 559 txq_ctrl->hairpin_status = 1; 560 mlx5_txq_release(dev, cur_queue); 561 } else { 562 struct mlx5_rxq_ctrl *rxq_ctrl; 563 struct mlx5_devx_modify_rq_attr rq_attr = { 0 }; 564 565 rxq_ctrl = mlx5_rxq_get(dev, cur_queue); 566 if (rxq_ctrl == NULL) { 567 rte_errno = EINVAL; 568 DRV_LOG(ERR, "Failed to get port %u Rx queue %d", 569 dev->data->port_id, cur_queue); 570 return -rte_errno; 571 } 572 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) { 573 rte_errno = EINVAL; 574 DRV_LOG(ERR, "port %u queue %d not a hairpin Rxq", 575 dev->data->port_id, cur_queue); 576 mlx5_rxq_release(dev, cur_queue); 577 return -rte_errno; 578 } 579 if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) { 580 rte_errno = ENOMEM; 581 DRV_LOG(ERR, "port %u no Rxq object found: %d", 582 dev->data->port_id, cur_queue); 583 mlx5_rxq_release(dev, cur_queue); 584 return -rte_errno; 585 } 586 if (rxq_ctrl->hairpin_status != 0) { 587 DRV_LOG(DEBUG, "port %u Rx queue %d is already bound", 588 dev->data->port_id, cur_queue); 589 mlx5_rxq_release(dev, cur_queue); 590 return 0; 591 } 592 if (peer_info->tx_explicit != 593 rxq_ctrl->hairpin_conf.tx_explicit) { 594 rte_errno = EINVAL; 595 DRV_LOG(ERR, "port %u Rx queue %d and peer Tx rule mode" 596 " mismatch", dev->data->port_id, cur_queue); 597 mlx5_rxq_release(dev, cur_queue); 598 return -rte_errno; 599 } 600 if (peer_info->manual_bind != 601 rxq_ctrl->hairpin_conf.manual_bind) { 602 rte_errno = EINVAL; 603 DRV_LOG(ERR, "port %u Rx queue %d and peer binding mode" 604 " mismatch", dev->data->port_id, cur_queue); 605 mlx5_rxq_release(dev, cur_queue); 606 return -rte_errno; 607 } 608 rq_attr.state = MLX5_SQC_STATE_RDY; 609 rq_attr.rq_state = MLX5_SQC_STATE_RST; 610 rq_attr.hairpin_peer_sq = peer_info->qp_id; 611 rq_attr.hairpin_peer_vhca = peer_info->vhca_id; 612 ret = mlx5_devx_cmd_modify_rq(rxq_ctrl->obj->rq, &rq_attr); 613 if (ret == 0) 614 rxq_ctrl->hairpin_status = 1; 615 mlx5_rxq_release(dev, cur_queue); 616 } 617 return ret; 618 } 619 620 /* 621 * Unbind the hairpin queue and reset its HW configuration. 622 * This needs to be called twice both for Tx and Rx queues of a pair. 623 * If the queue is already unbound, it is considered successful. 624 * 625 * @param dev 626 * Pointer to Ethernet device structure. 627 * @param cur_queue 628 * Index of the queue to change the HW configuration to unbind. 629 * @param direction 630 * Positive to reset the TxQ, zero to reset the RxQ. 631 * 632 * @return 633 * 0 on success, a negative errno value otherwise and rte_errno is set. 634 */ 635 int 636 mlx5_hairpin_queue_peer_unbind(struct rte_eth_dev *dev, uint16_t cur_queue, 637 uint32_t direction) 638 { 639 int ret = 0; 640 641 if (direction != 0) { 642 struct mlx5_txq_ctrl *txq_ctrl; 643 struct mlx5_devx_modify_sq_attr sq_attr = { 0 }; 644 645 txq_ctrl = mlx5_txq_get(dev, cur_queue); 646 if (txq_ctrl == NULL) { 647 rte_errno = EINVAL; 648 DRV_LOG(ERR, "Failed to get port %u Tx queue %d", 649 dev->data->port_id, cur_queue); 650 return -rte_errno; 651 } 652 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) { 653 rte_errno = EINVAL; 654 DRV_LOG(ERR, "port %u queue %d not a hairpin Txq", 655 dev->data->port_id, cur_queue); 656 mlx5_txq_release(dev, cur_queue); 657 return -rte_errno; 658 } 659 /* Already unbound, return success before obj checking. */ 660 if (txq_ctrl->hairpin_status == 0) { 661 DRV_LOG(DEBUG, "port %u Tx queue %d is already unbound", 662 dev->data->port_id, cur_queue); 663 mlx5_txq_release(dev, cur_queue); 664 return 0; 665 } 666 if (!txq_ctrl->obj || !txq_ctrl->obj->sq) { 667 rte_errno = ENOMEM; 668 DRV_LOG(ERR, "port %u no Txq object found: %d", 669 dev->data->port_id, cur_queue); 670 mlx5_txq_release(dev, cur_queue); 671 return -rte_errno; 672 } 673 sq_attr.state = MLX5_SQC_STATE_RST; 674 sq_attr.sq_state = MLX5_SQC_STATE_RST; 675 ret = mlx5_devx_cmd_modify_sq(txq_ctrl->obj->sq, &sq_attr); 676 if (ret == 0) 677 txq_ctrl->hairpin_status = 0; 678 mlx5_txq_release(dev, cur_queue); 679 } else { 680 struct mlx5_rxq_ctrl *rxq_ctrl; 681 struct mlx5_devx_modify_rq_attr rq_attr = { 0 }; 682 683 rxq_ctrl = mlx5_rxq_get(dev, cur_queue); 684 if (rxq_ctrl == NULL) { 685 rte_errno = EINVAL; 686 DRV_LOG(ERR, "Failed to get port %u Rx queue %d", 687 dev->data->port_id, cur_queue); 688 return -rte_errno; 689 } 690 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) { 691 rte_errno = EINVAL; 692 DRV_LOG(ERR, "port %u queue %d not a hairpin Rxq", 693 dev->data->port_id, cur_queue); 694 mlx5_rxq_release(dev, cur_queue); 695 return -rte_errno; 696 } 697 if (rxq_ctrl->hairpin_status == 0) { 698 DRV_LOG(DEBUG, "port %u Rx queue %d is already unbound", 699 dev->data->port_id, cur_queue); 700 mlx5_rxq_release(dev, cur_queue); 701 return 0; 702 } 703 if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) { 704 rte_errno = ENOMEM; 705 DRV_LOG(ERR, "port %u no Rxq object found: %d", 706 dev->data->port_id, cur_queue); 707 mlx5_rxq_release(dev, cur_queue); 708 return -rte_errno; 709 } 710 rq_attr.state = MLX5_SQC_STATE_RST; 711 rq_attr.rq_state = MLX5_SQC_STATE_RST; 712 ret = mlx5_devx_cmd_modify_rq(rxq_ctrl->obj->rq, &rq_attr); 713 if (ret == 0) 714 rxq_ctrl->hairpin_status = 0; 715 mlx5_rxq_release(dev, cur_queue); 716 } 717 return ret; 718 } 719 720 /* 721 * Bind the hairpin port pairs, from the Tx to the peer Rx. 722 * This function only supports to bind the Tx to one Rx. 723 * 724 * @param dev 725 * Pointer to Ethernet device structure. 726 * @param rx_port 727 * Port identifier of the Rx port. 728 * 729 * @return 730 * 0 on success, a negative errno value otherwise and rte_errno is set. 731 */ 732 static int 733 mlx5_hairpin_bind_single_port(struct rte_eth_dev *dev, uint16_t rx_port) 734 { 735 struct mlx5_priv *priv = dev->data->dev_private; 736 int ret = 0; 737 struct mlx5_txq_ctrl *txq_ctrl; 738 uint32_t i; 739 struct rte_hairpin_peer_info peer = {0xffffff}; 740 struct rte_hairpin_peer_info cur; 741 const struct rte_eth_hairpin_conf *conf; 742 uint16_t num_q = 0; 743 uint16_t local_port = priv->dev_data->port_id; 744 uint32_t manual; 745 uint32_t explicit; 746 uint16_t rx_queue; 747 748 if (mlx5_eth_find_next(rx_port, dev->device) != rx_port) { 749 rte_errno = ENODEV; 750 DRV_LOG(ERR, "Rx port %u does not belong to mlx5", rx_port); 751 return -rte_errno; 752 } 753 /* 754 * Before binding TxQ to peer RxQ, first round loop will be used for 755 * checking the queues' configuration consistency. This would be a 756 * little time consuming but better than doing the rollback. 757 */ 758 for (i = 0; i != priv->txqs_n; i++) { 759 txq_ctrl = mlx5_txq_get(dev, i); 760 if (txq_ctrl == NULL) 761 continue; 762 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) { 763 mlx5_txq_release(dev, i); 764 continue; 765 } 766 /* 767 * All hairpin Tx queues of a single port that connected to the 768 * same peer Rx port should have the same "auto binding" and 769 * "implicit Tx flow" modes. 770 * Peer consistency checking will be done in per queue binding. 771 */ 772 conf = &txq_ctrl->hairpin_conf; 773 if (conf->peers[0].port == rx_port) { 774 if (num_q == 0) { 775 manual = conf->manual_bind; 776 explicit = conf->tx_explicit; 777 } else { 778 if (manual != conf->manual_bind || 779 explicit != conf->tx_explicit) { 780 rte_errno = EINVAL; 781 DRV_LOG(ERR, "port %u queue %d mode" 782 " mismatch: %u %u, %u %u", 783 local_port, i, manual, 784 conf->manual_bind, explicit, 785 conf->tx_explicit); 786 mlx5_txq_release(dev, i); 787 return -rte_errno; 788 } 789 } 790 num_q++; 791 } 792 mlx5_txq_release(dev, i); 793 } 794 /* Once no queue is configured, success is returned directly. */ 795 if (num_q == 0) 796 return ret; 797 /* All the hairpin TX queues need to be traversed again. */ 798 for (i = 0; i != priv->txqs_n; i++) { 799 txq_ctrl = mlx5_txq_get(dev, i); 800 if (txq_ctrl == NULL) 801 continue; 802 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) { 803 mlx5_txq_release(dev, i); 804 continue; 805 } 806 if (txq_ctrl->hairpin_conf.peers[0].port != rx_port) { 807 mlx5_txq_release(dev, i); 808 continue; 809 } 810 rx_queue = txq_ctrl->hairpin_conf.peers[0].queue; 811 /* 812 * Fetch peer RxQ's information. 813 * No need to pass the information of the current queue. 814 */ 815 ret = rte_eth_hairpin_queue_peer_update(rx_port, rx_queue, 816 NULL, &peer, 1); 817 if (ret != 0) { 818 mlx5_txq_release(dev, i); 819 goto error; 820 } 821 /* Accessing its own device, inside mlx5 PMD. */ 822 ret = mlx5_hairpin_queue_peer_bind(dev, i, &peer, 1); 823 if (ret != 0) { 824 mlx5_txq_release(dev, i); 825 goto error; 826 } 827 /* Pass TxQ's information to peer RxQ and try binding. */ 828 cur.peer_q = rx_queue; 829 cur.qp_id = txq_ctrl->obj->sq->id; 830 cur.vhca_id = priv->config.hca_attr.vhca_id; 831 cur.tx_explicit = txq_ctrl->hairpin_conf.tx_explicit; 832 cur.manual_bind = txq_ctrl->hairpin_conf.manual_bind; 833 /* 834 * In order to access another device in a proper way, RTE level 835 * private function is needed. 836 */ 837 ret = rte_eth_hairpin_queue_peer_bind(rx_port, rx_queue, 838 &cur, 0); 839 if (ret != 0) { 840 mlx5_txq_release(dev, i); 841 goto error; 842 } 843 mlx5_txq_release(dev, i); 844 } 845 return 0; 846 error: 847 /* 848 * Do roll-back process for the queues already bound. 849 * No need to check the return value of the queue unbind function. 850 */ 851 do { 852 /* No validation is needed here. */ 853 txq_ctrl = mlx5_txq_get(dev, i); 854 if (txq_ctrl == NULL) 855 continue; 856 rx_queue = txq_ctrl->hairpin_conf.peers[0].queue; 857 rte_eth_hairpin_queue_peer_unbind(rx_port, rx_queue, 0); 858 mlx5_hairpin_queue_peer_unbind(dev, i, 1); 859 mlx5_txq_release(dev, i); 860 } while (i--); 861 return ret; 862 } 863 864 /* 865 * Unbind the hairpin port pair, HW configuration of both devices will be clear 866 * and status will be reset for all the queues used between the them. 867 * This function only supports to unbind the Tx from one Rx. 868 * 869 * @param dev 870 * Pointer to Ethernet device structure. 871 * @param rx_port 872 * Port identifier of the Rx port. 873 * 874 * @return 875 * 0 on success, a negative errno value otherwise and rte_errno is set. 876 */ 877 static int 878 mlx5_hairpin_unbind_single_port(struct rte_eth_dev *dev, uint16_t rx_port) 879 { 880 struct mlx5_priv *priv = dev->data->dev_private; 881 struct mlx5_txq_ctrl *txq_ctrl; 882 uint32_t i; 883 int ret; 884 uint16_t cur_port = priv->dev_data->port_id; 885 886 if (mlx5_eth_find_next(rx_port, dev->device) != rx_port) { 887 rte_errno = ENODEV; 888 DRV_LOG(ERR, "Rx port %u does not belong to mlx5", rx_port); 889 return -rte_errno; 890 } 891 for (i = 0; i != priv->txqs_n; i++) { 892 uint16_t rx_queue; 893 894 txq_ctrl = mlx5_txq_get(dev, i); 895 if (txq_ctrl == NULL) 896 continue; 897 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) { 898 mlx5_txq_release(dev, i); 899 continue; 900 } 901 if (txq_ctrl->hairpin_conf.peers[0].port != rx_port) { 902 mlx5_txq_release(dev, i); 903 continue; 904 } 905 /* Indeed, only the first used queue needs to be checked. */ 906 if (txq_ctrl->hairpin_conf.manual_bind == 0) { 907 if (cur_port != rx_port) { 908 rte_errno = EINVAL; 909 DRV_LOG(ERR, "port %u and port %u are in" 910 " auto-bind mode", cur_port, rx_port); 911 mlx5_txq_release(dev, i); 912 return -rte_errno; 913 } else { 914 return 0; 915 } 916 } 917 rx_queue = txq_ctrl->hairpin_conf.peers[0].queue; 918 mlx5_txq_release(dev, i); 919 ret = rte_eth_hairpin_queue_peer_unbind(rx_port, rx_queue, 0); 920 if (ret) { 921 DRV_LOG(ERR, "port %u Rx queue %d unbind - failure", 922 rx_port, rx_queue); 923 return ret; 924 } 925 ret = mlx5_hairpin_queue_peer_unbind(dev, i, 1); 926 if (ret) { 927 DRV_LOG(ERR, "port %u Tx queue %d unbind - failure", 928 cur_port, i); 929 return ret; 930 } 931 } 932 return 0; 933 } 934 935 /* 936 * Bind hairpin ports, Rx could be all ports when using RTE_MAX_ETHPORTS. 937 * @see mlx5_hairpin_bind_single_port() 938 */ 939 int 940 mlx5_hairpin_bind(struct rte_eth_dev *dev, uint16_t rx_port) 941 { 942 int ret = 0; 943 uint16_t p, pp; 944 945 /* 946 * If the Rx port has no hairpin configuration with the current port, 947 * the binding will be skipped in the called function of single port. 948 * Device started status will be checked only before the queue 949 * information updating. 950 */ 951 if (rx_port == RTE_MAX_ETHPORTS) { 952 MLX5_ETH_FOREACH_DEV(p, dev->device) { 953 ret = mlx5_hairpin_bind_single_port(dev, p); 954 if (ret != 0) 955 goto unbind; 956 } 957 return ret; 958 } else { 959 return mlx5_hairpin_bind_single_port(dev, rx_port); 960 } 961 unbind: 962 MLX5_ETH_FOREACH_DEV(pp, dev->device) 963 if (pp < p) 964 mlx5_hairpin_unbind_single_port(dev, pp); 965 return ret; 966 } 967 968 /* 969 * Unbind hairpin ports, Rx could be all ports when using RTE_MAX_ETHPORTS. 970 * @see mlx5_hairpin_unbind_single_port() 971 */ 972 int 973 mlx5_hairpin_unbind(struct rte_eth_dev *dev, uint16_t rx_port) 974 { 975 int ret = 0; 976 uint16_t p; 977 978 if (rx_port == RTE_MAX_ETHPORTS) 979 MLX5_ETH_FOREACH_DEV(p, dev->device) { 980 ret = mlx5_hairpin_unbind_single_port(dev, p); 981 if (ret != 0) 982 return ret; 983 } 984 else 985 ret = mlx5_hairpin_unbind_single_port(dev, rx_port); 986 return ret; 987 } 988 989 /* 990 * DPDK callback to get the hairpin peer ports list. 991 * This will return the actual number of peer ports and save the identifiers 992 * into the array (sorted, may be different from that when setting up the 993 * hairpin peer queues). 994 * The peer port ID could be the same as the port ID of the current device. 995 * 996 * @param dev 997 * Pointer to Ethernet device structure. 998 * @param peer_ports 999 * Pointer to array to save the port identifiers. 1000 * @param len 1001 * The length of the array. 1002 * @param direction 1003 * Current port to peer port direction. 1004 * positive - current used as Tx to get all peer Rx ports. 1005 * zero - current used as Rx to get all peer Tx ports. 1006 * 1007 * @return 1008 * 0 or positive value on success, actual number of peer ports. 1009 * a negative errno value otherwise and rte_errno is set. 1010 */ 1011 int 1012 mlx5_hairpin_get_peer_ports(struct rte_eth_dev *dev, uint16_t *peer_ports, 1013 size_t len, uint32_t direction) 1014 { 1015 struct mlx5_priv *priv = dev->data->dev_private; 1016 struct mlx5_txq_ctrl *txq_ctrl; 1017 struct mlx5_rxq_ctrl *rxq_ctrl; 1018 uint32_t i; 1019 uint16_t pp; 1020 uint32_t bits[(RTE_MAX_ETHPORTS + 31) / 32] = {0}; 1021 int ret = 0; 1022 1023 if (direction) { 1024 for (i = 0; i < priv->txqs_n; i++) { 1025 txq_ctrl = mlx5_txq_get(dev, i); 1026 if (!txq_ctrl) 1027 continue; 1028 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) { 1029 mlx5_txq_release(dev, i); 1030 continue; 1031 } 1032 pp = txq_ctrl->hairpin_conf.peers[0].port; 1033 if (pp >= RTE_MAX_ETHPORTS) { 1034 rte_errno = ERANGE; 1035 mlx5_txq_release(dev, i); 1036 DRV_LOG(ERR, "port %hu queue %u peer port " 1037 "out of range %hu", 1038 priv->dev_data->port_id, i, pp); 1039 return -rte_errno; 1040 } 1041 bits[pp / 32] |= 1 << (pp % 32); 1042 mlx5_txq_release(dev, i); 1043 } 1044 } else { 1045 for (i = 0; i < priv->rxqs_n; i++) { 1046 rxq_ctrl = mlx5_rxq_get(dev, i); 1047 if (!rxq_ctrl) 1048 continue; 1049 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) { 1050 mlx5_rxq_release(dev, i); 1051 continue; 1052 } 1053 pp = rxq_ctrl->hairpin_conf.peers[0].port; 1054 if (pp >= RTE_MAX_ETHPORTS) { 1055 rte_errno = ERANGE; 1056 mlx5_rxq_release(dev, i); 1057 DRV_LOG(ERR, "port %hu queue %u peer port " 1058 "out of range %hu", 1059 priv->dev_data->port_id, i, pp); 1060 return -rte_errno; 1061 } 1062 bits[pp / 32] |= 1 << (pp % 32); 1063 mlx5_rxq_release(dev, i); 1064 } 1065 } 1066 for (i = 0; i < RTE_MAX_ETHPORTS; i++) { 1067 if (bits[i / 32] & (1 << (i % 32))) { 1068 if ((size_t)ret >= len) { 1069 rte_errno = E2BIG; 1070 return -rte_errno; 1071 } 1072 peer_ports[ret++] = i; 1073 } 1074 } 1075 return ret; 1076 } 1077 1078 /** 1079 * DPDK callback to start the device. 1080 * 1081 * Simulate device start by attaching all configured flows. 1082 * 1083 * @param dev 1084 * Pointer to Ethernet device structure. 1085 * 1086 * @return 1087 * 0 on success, a negative errno value otherwise and rte_errno is set. 1088 */ 1089 int 1090 mlx5_dev_start(struct rte_eth_dev *dev) 1091 { 1092 struct mlx5_priv *priv = dev->data->dev_private; 1093 int ret; 1094 int fine_inline; 1095 1096 DRV_LOG(DEBUG, "port %u starting device", dev->data->port_id); 1097 fine_inline = rte_mbuf_dynflag_lookup 1098 (RTE_PMD_MLX5_FINE_GRANULARITY_INLINE, NULL); 1099 if (fine_inline >= 0) 1100 rte_net_mlx5_dynf_inline_mask = 1UL << fine_inline; 1101 else 1102 rte_net_mlx5_dynf_inline_mask = 0; 1103 if (dev->data->nb_rx_queues > 0) { 1104 ret = mlx5_dev_configure_rss_reta(dev); 1105 if (ret) { 1106 DRV_LOG(ERR, "port %u reta config failed: %s", 1107 dev->data->port_id, strerror(rte_errno)); 1108 return -rte_errno; 1109 } 1110 } 1111 ret = mlx5_txpp_start(dev); 1112 if (ret) { 1113 DRV_LOG(ERR, "port %u Tx packet pacing init failed: %s", 1114 dev->data->port_id, strerror(rte_errno)); 1115 goto error; 1116 } 1117 if ((priv->sh->devx && priv->config.dv_flow_en && 1118 priv->config.dest_tir) && priv->obj_ops.lb_dummy_queue_create) { 1119 ret = priv->obj_ops.lb_dummy_queue_create(dev); 1120 if (ret) 1121 goto error; 1122 } 1123 ret = mlx5_txq_start(dev); 1124 if (ret) { 1125 DRV_LOG(ERR, "port %u Tx queue allocation failed: %s", 1126 dev->data->port_id, strerror(rte_errno)); 1127 goto error; 1128 } 1129 ret = mlx5_rxq_start(dev); 1130 if (ret) { 1131 DRV_LOG(ERR, "port %u Rx queue allocation failed: %s", 1132 dev->data->port_id, strerror(rte_errno)); 1133 goto error; 1134 } 1135 /* 1136 * Such step will be skipped if there is no hairpin TX queue configured 1137 * with RX peer queue from the same device. 1138 */ 1139 ret = mlx5_hairpin_auto_bind(dev); 1140 if (ret) { 1141 DRV_LOG(ERR, "port %u hairpin auto binding failed: %s", 1142 dev->data->port_id, strerror(rte_errno)); 1143 goto error; 1144 } 1145 /* Set started flag here for the following steps like control flow. */ 1146 dev->data->dev_started = 1; 1147 ret = mlx5_rx_intr_vec_enable(dev); 1148 if (ret) { 1149 DRV_LOG(ERR, "port %u Rx interrupt vector creation failed", 1150 dev->data->port_id); 1151 goto error; 1152 } 1153 mlx5_os_stats_init(dev); 1154 ret = mlx5_traffic_enable(dev); 1155 if (ret) { 1156 DRV_LOG(ERR, "port %u failed to set defaults flows", 1157 dev->data->port_id); 1158 goto error; 1159 } 1160 /* Set a mask and offset of dynamic metadata flows into Rx queues. */ 1161 mlx5_flow_rxq_dynf_metadata_set(dev); 1162 /* Set flags and context to convert Rx timestamps. */ 1163 mlx5_rxq_timestamp_set(dev); 1164 /* Set a mask and offset of scheduling on timestamp into Tx queues. */ 1165 mlx5_txq_dynf_timestamp_set(dev); 1166 /* Attach indirection table objects detached on port stop. */ 1167 ret = mlx5_action_handle_attach(dev); 1168 if (ret) { 1169 DRV_LOG(ERR, 1170 "port %u failed to attach indirect actions: %s", 1171 dev->data->port_id, rte_strerror(rte_errno)); 1172 goto error; 1173 } 1174 /* 1175 * In non-cached mode, it only needs to start the default mreg copy 1176 * action and no flow created by application exists anymore. 1177 * But it is worth wrapping the interface for further usage. 1178 */ 1179 ret = mlx5_flow_start_default(dev); 1180 if (ret) { 1181 DRV_LOG(DEBUG, "port %u failed to start default actions: %s", 1182 dev->data->port_id, strerror(rte_errno)); 1183 goto error; 1184 } 1185 if (mlx5_dev_ctx_shared_mempool_subscribe(dev) != 0) { 1186 DRV_LOG(ERR, "port %u failed to subscribe for mempool life cycle: %s", 1187 dev->data->port_id, rte_strerror(rte_errno)); 1188 goto error; 1189 } 1190 rte_wmb(); 1191 dev->tx_pkt_burst = mlx5_select_tx_function(dev); 1192 dev->rx_pkt_burst = mlx5_select_rx_function(dev); 1193 /* Enable datapath on secondary process. */ 1194 mlx5_mp_os_req_start_rxtx(dev); 1195 if (rte_intr_fd_get(priv->sh->intr_handle) >= 0) { 1196 priv->sh->port[priv->dev_port - 1].ih_port_id = 1197 (uint32_t)dev->data->port_id; 1198 } else { 1199 DRV_LOG(INFO, "port %u starts without LSC and RMV interrupts.", 1200 dev->data->port_id); 1201 dev->data->dev_conf.intr_conf.lsc = 0; 1202 dev->data->dev_conf.intr_conf.rmv = 0; 1203 } 1204 if (rte_intr_fd_get(priv->sh->intr_handle_devx) >= 0) 1205 priv->sh->port[priv->dev_port - 1].devx_ih_port_id = 1206 (uint32_t)dev->data->port_id; 1207 return 0; 1208 error: 1209 ret = rte_errno; /* Save rte_errno before cleanup. */ 1210 /* Rollback. */ 1211 dev->data->dev_started = 0; 1212 mlx5_flow_stop_default(dev); 1213 mlx5_traffic_disable(dev); 1214 mlx5_txq_stop(dev); 1215 mlx5_rxq_stop(dev); 1216 if (priv->obj_ops.lb_dummy_queue_release) 1217 priv->obj_ops.lb_dummy_queue_release(dev); 1218 mlx5_txpp_stop(dev); /* Stop last. */ 1219 rte_errno = ret; /* Restore rte_errno. */ 1220 return -rte_errno; 1221 } 1222 1223 /** 1224 * DPDK callback to stop the device. 1225 * 1226 * Simulate device stop by detaching all configured flows. 1227 * 1228 * @param dev 1229 * Pointer to Ethernet device structure. 1230 */ 1231 int 1232 mlx5_dev_stop(struct rte_eth_dev *dev) 1233 { 1234 struct mlx5_priv *priv = dev->data->dev_private; 1235 1236 dev->data->dev_started = 0; 1237 /* Prevent crashes when queues are still in use. */ 1238 dev->rx_pkt_burst = removed_rx_burst; 1239 dev->tx_pkt_burst = removed_tx_burst; 1240 rte_wmb(); 1241 /* Disable datapath on secondary process. */ 1242 mlx5_mp_os_req_stop_rxtx(dev); 1243 rte_delay_us_sleep(1000 * priv->rxqs_n); 1244 DRV_LOG(DEBUG, "port %u stopping device", dev->data->port_id); 1245 mlx5_flow_stop_default(dev); 1246 /* Control flows for default traffic can be removed firstly. */ 1247 mlx5_traffic_disable(dev); 1248 /* All RX queue flags will be cleared in the flush interface. */ 1249 mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_GEN, true); 1250 mlx5_flow_meter_rxq_flush(dev); 1251 mlx5_action_handle_detach(dev); 1252 mlx5_rx_intr_vec_disable(dev); 1253 priv->sh->port[priv->dev_port - 1].ih_port_id = RTE_MAX_ETHPORTS; 1254 priv->sh->port[priv->dev_port - 1].devx_ih_port_id = RTE_MAX_ETHPORTS; 1255 mlx5_txq_stop(dev); 1256 mlx5_rxq_stop(dev); 1257 if (priv->obj_ops.lb_dummy_queue_release) 1258 priv->obj_ops.lb_dummy_queue_release(dev); 1259 mlx5_txpp_stop(dev); 1260 1261 return 0; 1262 } 1263 1264 /** 1265 * Enable traffic flows configured by control plane 1266 * 1267 * @param dev 1268 * Pointer to Ethernet device private data. 1269 * @param dev 1270 * Pointer to Ethernet device structure. 1271 * 1272 * @return 1273 * 0 on success, a negative errno value otherwise and rte_errno is set. 1274 */ 1275 int 1276 mlx5_traffic_enable(struct rte_eth_dev *dev) 1277 { 1278 struct mlx5_priv *priv = dev->data->dev_private; 1279 struct rte_flow_item_eth bcast = { 1280 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff", 1281 }; 1282 struct rte_flow_item_eth ipv6_multi_spec = { 1283 .dst.addr_bytes = "\x33\x33\x00\x00\x00\x00", 1284 }; 1285 struct rte_flow_item_eth ipv6_multi_mask = { 1286 .dst.addr_bytes = "\xff\xff\x00\x00\x00\x00", 1287 }; 1288 struct rte_flow_item_eth unicast = { 1289 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00", 1290 }; 1291 struct rte_flow_item_eth unicast_mask = { 1292 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff", 1293 }; 1294 const unsigned int vlan_filter_n = priv->vlan_filter_n; 1295 const struct rte_ether_addr cmp = { 1296 .addr_bytes = "\x00\x00\x00\x00\x00\x00", 1297 }; 1298 unsigned int i; 1299 unsigned int j; 1300 int ret; 1301 1302 /* 1303 * Hairpin txq default flow should be created no matter if it is 1304 * isolation mode. Or else all the packets to be sent will be sent 1305 * out directly without the TX flow actions, e.g. encapsulation. 1306 */ 1307 for (i = 0; i != priv->txqs_n; ++i) { 1308 struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i); 1309 if (!txq_ctrl) 1310 continue; 1311 /* Only Tx implicit mode requires the default Tx flow. */ 1312 if (txq_ctrl->type == MLX5_TXQ_TYPE_HAIRPIN && 1313 txq_ctrl->hairpin_conf.tx_explicit == 0 && 1314 txq_ctrl->hairpin_conf.peers[0].port == 1315 priv->dev_data->port_id) { 1316 ret = mlx5_ctrl_flow_source_queue(dev, i); 1317 if (ret) { 1318 mlx5_txq_release(dev, i); 1319 goto error; 1320 } 1321 } 1322 if ((priv->representor || priv->master) && 1323 priv->config.dv_esw_en) { 1324 if (mlx5_flow_create_devx_sq_miss_flow(dev, i) == 0) { 1325 DRV_LOG(ERR, 1326 "Port %u Tx queue %u SQ create representor devx default miss rule failed.", 1327 dev->data->port_id, i); 1328 goto error; 1329 } 1330 } 1331 mlx5_txq_release(dev, i); 1332 } 1333 if ((priv->master || priv->representor) && priv->config.dv_esw_en) { 1334 if (mlx5_flow_create_esw_table_zero_flow(dev)) 1335 priv->fdb_def_rule = 1; 1336 else 1337 DRV_LOG(INFO, "port %u FDB default rule cannot be" 1338 " configured - only Eswitch group 0 flows are" 1339 " supported.", dev->data->port_id); 1340 } 1341 if (!priv->config.lacp_by_user && priv->pf_bond >= 0) { 1342 ret = mlx5_flow_lacp_miss(dev); 1343 if (ret) 1344 DRV_LOG(INFO, "port %u LACP rule cannot be created - " 1345 "forward LACP to kernel.", dev->data->port_id); 1346 else 1347 DRV_LOG(INFO, "LACP traffic will be missed in port %u." 1348 , dev->data->port_id); 1349 } 1350 if (priv->isolated) 1351 return 0; 1352 if (dev->data->promiscuous) { 1353 struct rte_flow_item_eth promisc = { 1354 .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00", 1355 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00", 1356 .type = 0, 1357 }; 1358 1359 ret = mlx5_ctrl_flow(dev, &promisc, &promisc); 1360 if (ret) 1361 goto error; 1362 } 1363 if (dev->data->all_multicast) { 1364 struct rte_flow_item_eth multicast = { 1365 .dst.addr_bytes = "\x01\x00\x00\x00\x00\x00", 1366 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00", 1367 .type = 0, 1368 }; 1369 1370 ret = mlx5_ctrl_flow(dev, &multicast, &multicast); 1371 if (ret) 1372 goto error; 1373 } else { 1374 /* Add broadcast/multicast flows. */ 1375 for (i = 0; i != vlan_filter_n; ++i) { 1376 uint16_t vlan = priv->vlan_filter[i]; 1377 1378 struct rte_flow_item_vlan vlan_spec = { 1379 .tci = rte_cpu_to_be_16(vlan), 1380 }; 1381 struct rte_flow_item_vlan vlan_mask = 1382 rte_flow_item_vlan_mask; 1383 1384 ret = mlx5_ctrl_flow_vlan(dev, &bcast, &bcast, 1385 &vlan_spec, &vlan_mask); 1386 if (ret) 1387 goto error; 1388 ret = mlx5_ctrl_flow_vlan(dev, &ipv6_multi_spec, 1389 &ipv6_multi_mask, 1390 &vlan_spec, &vlan_mask); 1391 if (ret) 1392 goto error; 1393 } 1394 if (!vlan_filter_n) { 1395 ret = mlx5_ctrl_flow(dev, &bcast, &bcast); 1396 if (ret) 1397 goto error; 1398 ret = mlx5_ctrl_flow(dev, &ipv6_multi_spec, 1399 &ipv6_multi_mask); 1400 if (ret) { 1401 /* Do not fail on IPv6 broadcast creation failure. */ 1402 DRV_LOG(WARNING, 1403 "IPv6 broadcast is not supported"); 1404 ret = 0; 1405 } 1406 } 1407 } 1408 /* Add MAC address flows. */ 1409 for (i = 0; i != MLX5_MAX_MAC_ADDRESSES; ++i) { 1410 struct rte_ether_addr *mac = &dev->data->mac_addrs[i]; 1411 1412 if (!memcmp(mac, &cmp, sizeof(*mac))) 1413 continue; 1414 memcpy(&unicast.dst.addr_bytes, 1415 mac->addr_bytes, 1416 RTE_ETHER_ADDR_LEN); 1417 for (j = 0; j != vlan_filter_n; ++j) { 1418 uint16_t vlan = priv->vlan_filter[j]; 1419 1420 struct rte_flow_item_vlan vlan_spec = { 1421 .tci = rte_cpu_to_be_16(vlan), 1422 }; 1423 struct rte_flow_item_vlan vlan_mask = 1424 rte_flow_item_vlan_mask; 1425 1426 ret = mlx5_ctrl_flow_vlan(dev, &unicast, 1427 &unicast_mask, 1428 &vlan_spec, 1429 &vlan_mask); 1430 if (ret) 1431 goto error; 1432 } 1433 if (!vlan_filter_n) { 1434 ret = mlx5_ctrl_flow(dev, &unicast, &unicast_mask); 1435 if (ret) 1436 goto error; 1437 } 1438 } 1439 return 0; 1440 error: 1441 ret = rte_errno; /* Save rte_errno before cleanup. */ 1442 mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_CTL, false); 1443 rte_errno = ret; /* Restore rte_errno. */ 1444 return -rte_errno; 1445 } 1446 1447 1448 /** 1449 * Disable traffic flows configured by control plane 1450 * 1451 * @param dev 1452 * Pointer to Ethernet device private data. 1453 */ 1454 void 1455 mlx5_traffic_disable(struct rte_eth_dev *dev) 1456 { 1457 mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_CTL, false); 1458 } 1459 1460 /** 1461 * Restart traffic flows configured by control plane 1462 * 1463 * @param dev 1464 * Pointer to Ethernet device private data. 1465 * 1466 * @return 1467 * 0 on success, a negative errno value otherwise and rte_errno is set. 1468 */ 1469 int 1470 mlx5_traffic_restart(struct rte_eth_dev *dev) 1471 { 1472 if (dev->data->dev_started) { 1473 mlx5_traffic_disable(dev); 1474 return mlx5_traffic_enable(dev); 1475 } 1476 return 0; 1477 } 1478