1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2015 6WIND S.A. 3 * Copyright 2015 Mellanox Technologies, Ltd 4 */ 5 6 #include <unistd.h> 7 8 #include <rte_ether.h> 9 #include <ethdev_driver.h> 10 #include <rte_interrupts.h> 11 #include <rte_alarm.h> 12 #include <rte_cycles.h> 13 14 #include <mlx5_malloc.h> 15 16 #include "mlx5.h" 17 #include "mlx5_rx.h" 18 #include "mlx5_tx.h" 19 #include "mlx5_utils.h" 20 #include "rte_pmd_mlx5.h" 21 22 /** 23 * Stop traffic on Tx queues. 24 * 25 * @param dev 26 * Pointer to Ethernet device structure. 27 */ 28 static void 29 mlx5_txq_stop(struct rte_eth_dev *dev) 30 { 31 struct mlx5_priv *priv = dev->data->dev_private; 32 unsigned int i; 33 34 for (i = 0; i != priv->txqs_n; ++i) 35 mlx5_txq_release(dev, i); 36 } 37 38 /** 39 * Start traffic on Tx queues. 40 * 41 * @param dev 42 * Pointer to Ethernet device structure. 43 * 44 * @return 45 * 0 on success, a negative errno value otherwise and rte_errno is set. 46 */ 47 static int 48 mlx5_txq_start(struct rte_eth_dev *dev) 49 { 50 struct mlx5_priv *priv = dev->data->dev_private; 51 unsigned int i; 52 int ret; 53 54 for (i = 0; i != priv->txqs_n; ++i) { 55 struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i); 56 struct mlx5_txq_data *txq_data = &txq_ctrl->txq; 57 uint32_t flags = MLX5_MEM_RTE | MLX5_MEM_ZERO; 58 59 if (!txq_ctrl) 60 continue; 61 if (txq_ctrl->type == MLX5_TXQ_TYPE_STANDARD) 62 txq_alloc_elts(txq_ctrl); 63 MLX5_ASSERT(!txq_ctrl->obj); 64 txq_ctrl->obj = mlx5_malloc(flags, sizeof(struct mlx5_txq_obj), 65 0, txq_ctrl->socket); 66 if (!txq_ctrl->obj) { 67 DRV_LOG(ERR, "Port %u Tx queue %u cannot allocate " 68 "memory resources.", dev->data->port_id, 69 txq_data->idx); 70 rte_errno = ENOMEM; 71 goto error; 72 } 73 ret = priv->obj_ops.txq_obj_new(dev, i); 74 if (ret < 0) { 75 mlx5_free(txq_ctrl->obj); 76 txq_ctrl->obj = NULL; 77 goto error; 78 } 79 if (txq_ctrl->type == MLX5_TXQ_TYPE_STANDARD) { 80 size_t size = txq_data->cqe_s * sizeof(*txq_data->fcqs); 81 82 txq_data->fcqs = mlx5_malloc(flags, size, 83 RTE_CACHE_LINE_SIZE, 84 txq_ctrl->socket); 85 if (!txq_data->fcqs) { 86 DRV_LOG(ERR, "Port %u Tx queue %u cannot " 87 "allocate memory (FCQ).", 88 dev->data->port_id, i); 89 rte_errno = ENOMEM; 90 goto error; 91 } 92 } 93 DRV_LOG(DEBUG, "Port %u txq %u updated with %p.", 94 dev->data->port_id, i, (void *)&txq_ctrl->obj); 95 LIST_INSERT_HEAD(&priv->txqsobj, txq_ctrl->obj, next); 96 } 97 return 0; 98 error: 99 ret = rte_errno; /* Save rte_errno before cleanup. */ 100 do { 101 mlx5_txq_release(dev, i); 102 } while (i-- != 0); 103 rte_errno = ret; /* Restore rte_errno. */ 104 return -rte_errno; 105 } 106 107 /** 108 * Translate the chunk address to MR key in order to put in into the cache. 109 */ 110 static void 111 mlx5_rxq_mempool_register_cb(struct rte_mempool *mp, void *opaque, 112 struct rte_mempool_memhdr *memhdr, 113 unsigned int idx) 114 { 115 struct mlx5_rxq_data *rxq = opaque; 116 117 RTE_SET_USED(mp); 118 RTE_SET_USED(idx); 119 mlx5_rx_addr2mr(rxq, (uintptr_t)memhdr->addr); 120 } 121 122 /** 123 * Register Rx queue mempools and fill the Rx queue cache. 124 * This function tolerates repeated mempool registration. 125 * 126 * @param[in] rxq_ctrl 127 * Rx queue control data. 128 * 129 * @return 130 * 0 on success, (-1) on failure and rte_errno is set. 131 */ 132 static int 133 mlx5_rxq_mempool_register(struct mlx5_rxq_ctrl *rxq_ctrl) 134 { 135 struct mlx5_priv *priv = rxq_ctrl->priv; 136 struct rte_mempool *mp; 137 uint32_t s; 138 int ret = 0; 139 140 mlx5_mr_flush_local_cache(&rxq_ctrl->rxq.mr_ctrl); 141 /* MPRQ mempool is registered on creation, just fill the cache. */ 142 if (mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq)) { 143 rte_mempool_mem_iter(rxq_ctrl->rxq.mprq_mp, 144 mlx5_rxq_mempool_register_cb, 145 &rxq_ctrl->rxq); 146 return 0; 147 } 148 for (s = 0; s < rxq_ctrl->rxq.rxseg_n; s++) { 149 mp = rxq_ctrl->rxq.rxseg[s].mp; 150 ret = mlx5_mr_mempool_register(&priv->sh->cdev->mr_scache, 151 priv->sh->cdev->pd, mp, 152 &priv->mp_id); 153 if (ret < 0 && rte_errno != EEXIST) 154 return ret; 155 rte_mempool_mem_iter(mp, mlx5_rxq_mempool_register_cb, 156 &rxq_ctrl->rxq); 157 } 158 return 0; 159 } 160 161 /** 162 * Stop traffic on Rx queues. 163 * 164 * @param dev 165 * Pointer to Ethernet device structure. 166 */ 167 static void 168 mlx5_rxq_stop(struct rte_eth_dev *dev) 169 { 170 struct mlx5_priv *priv = dev->data->dev_private; 171 unsigned int i; 172 173 for (i = 0; i != priv->rxqs_n; ++i) 174 mlx5_rxq_release(dev, i); 175 } 176 177 /** 178 * Start traffic on Rx queues. 179 * 180 * @param dev 181 * Pointer to Ethernet device structure. 182 * 183 * @return 184 * 0 on success, a negative errno value otherwise and rte_errno is set. 185 */ 186 static int 187 mlx5_rxq_start(struct rte_eth_dev *dev) 188 { 189 struct mlx5_priv *priv = dev->data->dev_private; 190 unsigned int i; 191 int ret = 0; 192 193 /* Allocate/reuse/resize mempool for Multi-Packet RQ. */ 194 if (mlx5_mprq_alloc_mp(dev)) { 195 /* Should not release Rx queues but return immediately. */ 196 return -rte_errno; 197 } 198 DRV_LOG(DEBUG, "Port %u device_attr.max_qp_wr is %d.", 199 dev->data->port_id, priv->sh->device_attr.max_qp_wr); 200 DRV_LOG(DEBUG, "Port %u device_attr.max_sge is %d.", 201 dev->data->port_id, priv->sh->device_attr.max_sge); 202 for (i = 0; i != priv->rxqs_n; ++i) { 203 struct mlx5_rxq_ctrl *rxq_ctrl = mlx5_rxq_get(dev, i); 204 205 if (!rxq_ctrl) 206 continue; 207 if (rxq_ctrl->type == MLX5_RXQ_TYPE_STANDARD) { 208 /* 209 * Pre-register the mempools. Regardless of whether 210 * the implicit registration is enabled or not, 211 * Rx mempool destruction is tracked to free MRs. 212 */ 213 if (mlx5_rxq_mempool_register(rxq_ctrl) < 0) 214 goto error; 215 ret = rxq_alloc_elts(rxq_ctrl); 216 if (ret) 217 goto error; 218 } 219 MLX5_ASSERT(!rxq_ctrl->obj); 220 rxq_ctrl->obj = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, 221 sizeof(*rxq_ctrl->obj), 0, 222 rxq_ctrl->socket); 223 if (!rxq_ctrl->obj) { 224 DRV_LOG(ERR, 225 "Port %u Rx queue %u can't allocate resources.", 226 dev->data->port_id, (*priv->rxqs)[i]->idx); 227 rte_errno = ENOMEM; 228 goto error; 229 } 230 ret = priv->obj_ops.rxq_obj_new(dev, i); 231 if (ret) { 232 mlx5_free(rxq_ctrl->obj); 233 rxq_ctrl->obj = NULL; 234 goto error; 235 } 236 DRV_LOG(DEBUG, "Port %u rxq %u updated with %p.", 237 dev->data->port_id, i, (void *)&rxq_ctrl->obj); 238 LIST_INSERT_HEAD(&priv->rxqsobj, rxq_ctrl->obj, next); 239 } 240 return 0; 241 error: 242 ret = rte_errno; /* Save rte_errno before cleanup. */ 243 do { 244 mlx5_rxq_release(dev, i); 245 } while (i-- != 0); 246 rte_errno = ret; /* Restore rte_errno. */ 247 return -rte_errno; 248 } 249 250 /** 251 * Binds Tx queues to Rx queues for hairpin. 252 * 253 * Binds Tx queues to the target Rx queues. 254 * 255 * @param dev 256 * Pointer to Ethernet device structure. 257 * 258 * @return 259 * 0 on success, a negative errno value otherwise and rte_errno is set. 260 */ 261 static int 262 mlx5_hairpin_auto_bind(struct rte_eth_dev *dev) 263 { 264 struct mlx5_priv *priv = dev->data->dev_private; 265 struct mlx5_devx_modify_sq_attr sq_attr = { 0 }; 266 struct mlx5_devx_modify_rq_attr rq_attr = { 0 }; 267 struct mlx5_txq_ctrl *txq_ctrl; 268 struct mlx5_rxq_ctrl *rxq_ctrl; 269 struct mlx5_devx_obj *sq; 270 struct mlx5_devx_obj *rq; 271 unsigned int i; 272 int ret = 0; 273 bool need_auto = false; 274 uint16_t self_port = dev->data->port_id; 275 276 for (i = 0; i != priv->txqs_n; ++i) { 277 txq_ctrl = mlx5_txq_get(dev, i); 278 if (!txq_ctrl) 279 continue; 280 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN || 281 txq_ctrl->hairpin_conf.peers[0].port != self_port) { 282 mlx5_txq_release(dev, i); 283 continue; 284 } 285 if (txq_ctrl->hairpin_conf.manual_bind) { 286 mlx5_txq_release(dev, i); 287 return 0; 288 } 289 need_auto = true; 290 mlx5_txq_release(dev, i); 291 } 292 if (!need_auto) 293 return 0; 294 for (i = 0; i != priv->txqs_n; ++i) { 295 txq_ctrl = mlx5_txq_get(dev, i); 296 if (!txq_ctrl) 297 continue; 298 /* Skip hairpin queues with other peer ports. */ 299 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN || 300 txq_ctrl->hairpin_conf.peers[0].port != self_port) { 301 mlx5_txq_release(dev, i); 302 continue; 303 } 304 if (!txq_ctrl->obj) { 305 rte_errno = ENOMEM; 306 DRV_LOG(ERR, "port %u no txq object found: %d", 307 dev->data->port_id, i); 308 mlx5_txq_release(dev, i); 309 return -rte_errno; 310 } 311 sq = txq_ctrl->obj->sq; 312 rxq_ctrl = mlx5_rxq_get(dev, 313 txq_ctrl->hairpin_conf.peers[0].queue); 314 if (!rxq_ctrl) { 315 mlx5_txq_release(dev, i); 316 rte_errno = EINVAL; 317 DRV_LOG(ERR, "port %u no rxq object found: %d", 318 dev->data->port_id, 319 txq_ctrl->hairpin_conf.peers[0].queue); 320 return -rte_errno; 321 } 322 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN || 323 rxq_ctrl->hairpin_conf.peers[0].queue != i) { 324 rte_errno = ENOMEM; 325 DRV_LOG(ERR, "port %u Tx queue %d can't be binded to " 326 "Rx queue %d", dev->data->port_id, 327 i, txq_ctrl->hairpin_conf.peers[0].queue); 328 goto error; 329 } 330 rq = rxq_ctrl->obj->rq; 331 if (!rq) { 332 rte_errno = ENOMEM; 333 DRV_LOG(ERR, "port %u hairpin no matching rxq: %d", 334 dev->data->port_id, 335 txq_ctrl->hairpin_conf.peers[0].queue); 336 goto error; 337 } 338 sq_attr.state = MLX5_SQC_STATE_RDY; 339 sq_attr.sq_state = MLX5_SQC_STATE_RST; 340 sq_attr.hairpin_peer_rq = rq->id; 341 sq_attr.hairpin_peer_vhca = priv->config.hca_attr.vhca_id; 342 ret = mlx5_devx_cmd_modify_sq(sq, &sq_attr); 343 if (ret) 344 goto error; 345 rq_attr.state = MLX5_SQC_STATE_RDY; 346 rq_attr.rq_state = MLX5_SQC_STATE_RST; 347 rq_attr.hairpin_peer_sq = sq->id; 348 rq_attr.hairpin_peer_vhca = priv->config.hca_attr.vhca_id; 349 ret = mlx5_devx_cmd_modify_rq(rq, &rq_attr); 350 if (ret) 351 goto error; 352 /* Qs with auto-bind will be destroyed directly. */ 353 rxq_ctrl->hairpin_status = 1; 354 txq_ctrl->hairpin_status = 1; 355 mlx5_txq_release(dev, i); 356 mlx5_rxq_release(dev, txq_ctrl->hairpin_conf.peers[0].queue); 357 } 358 return 0; 359 error: 360 mlx5_txq_release(dev, i); 361 mlx5_rxq_release(dev, txq_ctrl->hairpin_conf.peers[0].queue); 362 return -rte_errno; 363 } 364 365 /* 366 * Fetch the peer queue's SW & HW information. 367 * 368 * @param dev 369 * Pointer to Ethernet device structure. 370 * @param peer_queue 371 * Index of the queue to fetch the information. 372 * @param current_info 373 * Pointer to the input peer information, not used currently. 374 * @param peer_info 375 * Pointer to the structure to store the information, output. 376 * @param direction 377 * Positive to get the RxQ information, zero to get the TxQ information. 378 * 379 * @return 380 * 0 on success, a negative errno value otherwise and rte_errno is set. 381 */ 382 int 383 mlx5_hairpin_queue_peer_update(struct rte_eth_dev *dev, uint16_t peer_queue, 384 struct rte_hairpin_peer_info *current_info, 385 struct rte_hairpin_peer_info *peer_info, 386 uint32_t direction) 387 { 388 struct mlx5_priv *priv = dev->data->dev_private; 389 RTE_SET_USED(current_info); 390 391 if (dev->data->dev_started == 0) { 392 rte_errno = EBUSY; 393 DRV_LOG(ERR, "peer port %u is not started", 394 dev->data->port_id); 395 return -rte_errno; 396 } 397 /* 398 * Peer port used as egress. In the current design, hairpin Tx queue 399 * will be bound to the peer Rx queue. Indeed, only the information of 400 * peer Rx queue needs to be fetched. 401 */ 402 if (direction == 0) { 403 struct mlx5_txq_ctrl *txq_ctrl; 404 405 txq_ctrl = mlx5_txq_get(dev, peer_queue); 406 if (txq_ctrl == NULL) { 407 rte_errno = EINVAL; 408 DRV_LOG(ERR, "Failed to get port %u Tx queue %d", 409 dev->data->port_id, peer_queue); 410 return -rte_errno; 411 } 412 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) { 413 rte_errno = EINVAL; 414 DRV_LOG(ERR, "port %u queue %d is not a hairpin Txq", 415 dev->data->port_id, peer_queue); 416 mlx5_txq_release(dev, peer_queue); 417 return -rte_errno; 418 } 419 if (txq_ctrl->obj == NULL || txq_ctrl->obj->sq == NULL) { 420 rte_errno = ENOMEM; 421 DRV_LOG(ERR, "port %u no Txq object found: %d", 422 dev->data->port_id, peer_queue); 423 mlx5_txq_release(dev, peer_queue); 424 return -rte_errno; 425 } 426 peer_info->qp_id = txq_ctrl->obj->sq->id; 427 peer_info->vhca_id = priv->config.hca_attr.vhca_id; 428 /* 1-to-1 mapping, only the first one is used. */ 429 peer_info->peer_q = txq_ctrl->hairpin_conf.peers[0].queue; 430 peer_info->tx_explicit = txq_ctrl->hairpin_conf.tx_explicit; 431 peer_info->manual_bind = txq_ctrl->hairpin_conf.manual_bind; 432 mlx5_txq_release(dev, peer_queue); 433 } else { /* Peer port used as ingress. */ 434 struct mlx5_rxq_ctrl *rxq_ctrl; 435 436 rxq_ctrl = mlx5_rxq_get(dev, peer_queue); 437 if (rxq_ctrl == NULL) { 438 rte_errno = EINVAL; 439 DRV_LOG(ERR, "Failed to get port %u Rx queue %d", 440 dev->data->port_id, peer_queue); 441 return -rte_errno; 442 } 443 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) { 444 rte_errno = EINVAL; 445 DRV_LOG(ERR, "port %u queue %d is not a hairpin Rxq", 446 dev->data->port_id, peer_queue); 447 mlx5_rxq_release(dev, peer_queue); 448 return -rte_errno; 449 } 450 if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) { 451 rte_errno = ENOMEM; 452 DRV_LOG(ERR, "port %u no Rxq object found: %d", 453 dev->data->port_id, peer_queue); 454 mlx5_rxq_release(dev, peer_queue); 455 return -rte_errno; 456 } 457 peer_info->qp_id = rxq_ctrl->obj->rq->id; 458 peer_info->vhca_id = priv->config.hca_attr.vhca_id; 459 peer_info->peer_q = rxq_ctrl->hairpin_conf.peers[0].queue; 460 peer_info->tx_explicit = rxq_ctrl->hairpin_conf.tx_explicit; 461 peer_info->manual_bind = rxq_ctrl->hairpin_conf.manual_bind; 462 mlx5_rxq_release(dev, peer_queue); 463 } 464 return 0; 465 } 466 467 /* 468 * Bind the hairpin queue with the peer HW information. 469 * This needs to be called twice both for Tx and Rx queues of a pair. 470 * If the queue is already bound, it is considered successful. 471 * 472 * @param dev 473 * Pointer to Ethernet device structure. 474 * @param cur_queue 475 * Index of the queue to change the HW configuration to bind. 476 * @param peer_info 477 * Pointer to information of the peer queue. 478 * @param direction 479 * Positive to configure the TxQ, zero to configure the RxQ. 480 * 481 * @return 482 * 0 on success, a negative errno value otherwise and rte_errno is set. 483 */ 484 int 485 mlx5_hairpin_queue_peer_bind(struct rte_eth_dev *dev, uint16_t cur_queue, 486 struct rte_hairpin_peer_info *peer_info, 487 uint32_t direction) 488 { 489 int ret = 0; 490 491 /* 492 * Consistency checking of the peer queue: opposite direction is used 493 * to get the peer queue info with ethdev port ID, no need to check. 494 */ 495 if (peer_info->peer_q != cur_queue) { 496 rte_errno = EINVAL; 497 DRV_LOG(ERR, "port %u queue %d and peer queue %d mismatch", 498 dev->data->port_id, cur_queue, peer_info->peer_q); 499 return -rte_errno; 500 } 501 if (direction != 0) { 502 struct mlx5_txq_ctrl *txq_ctrl; 503 struct mlx5_devx_modify_sq_attr sq_attr = { 0 }; 504 505 txq_ctrl = mlx5_txq_get(dev, cur_queue); 506 if (txq_ctrl == NULL) { 507 rte_errno = EINVAL; 508 DRV_LOG(ERR, "Failed to get port %u Tx queue %d", 509 dev->data->port_id, cur_queue); 510 return -rte_errno; 511 } 512 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) { 513 rte_errno = EINVAL; 514 DRV_LOG(ERR, "port %u queue %d not a hairpin Txq", 515 dev->data->port_id, cur_queue); 516 mlx5_txq_release(dev, cur_queue); 517 return -rte_errno; 518 } 519 if (txq_ctrl->obj == NULL || txq_ctrl->obj->sq == NULL) { 520 rte_errno = ENOMEM; 521 DRV_LOG(ERR, "port %u no Txq object found: %d", 522 dev->data->port_id, cur_queue); 523 mlx5_txq_release(dev, cur_queue); 524 return -rte_errno; 525 } 526 if (txq_ctrl->hairpin_status != 0) { 527 DRV_LOG(DEBUG, "port %u Tx queue %d is already bound", 528 dev->data->port_id, cur_queue); 529 mlx5_txq_release(dev, cur_queue); 530 return 0; 531 } 532 /* 533 * All queues' of one port consistency checking is done in the 534 * bind() function, and that is optional. 535 */ 536 if (peer_info->tx_explicit != 537 txq_ctrl->hairpin_conf.tx_explicit) { 538 rte_errno = EINVAL; 539 DRV_LOG(ERR, "port %u Tx queue %d and peer Tx rule mode" 540 " mismatch", dev->data->port_id, cur_queue); 541 mlx5_txq_release(dev, cur_queue); 542 return -rte_errno; 543 } 544 if (peer_info->manual_bind != 545 txq_ctrl->hairpin_conf.manual_bind) { 546 rte_errno = EINVAL; 547 DRV_LOG(ERR, "port %u Tx queue %d and peer binding mode" 548 " mismatch", dev->data->port_id, cur_queue); 549 mlx5_txq_release(dev, cur_queue); 550 return -rte_errno; 551 } 552 sq_attr.state = MLX5_SQC_STATE_RDY; 553 sq_attr.sq_state = MLX5_SQC_STATE_RST; 554 sq_attr.hairpin_peer_rq = peer_info->qp_id; 555 sq_attr.hairpin_peer_vhca = peer_info->vhca_id; 556 ret = mlx5_devx_cmd_modify_sq(txq_ctrl->obj->sq, &sq_attr); 557 if (ret == 0) 558 txq_ctrl->hairpin_status = 1; 559 mlx5_txq_release(dev, cur_queue); 560 } else { 561 struct mlx5_rxq_ctrl *rxq_ctrl; 562 struct mlx5_devx_modify_rq_attr rq_attr = { 0 }; 563 564 rxq_ctrl = mlx5_rxq_get(dev, cur_queue); 565 if (rxq_ctrl == NULL) { 566 rte_errno = EINVAL; 567 DRV_LOG(ERR, "Failed to get port %u Rx queue %d", 568 dev->data->port_id, cur_queue); 569 return -rte_errno; 570 } 571 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) { 572 rte_errno = EINVAL; 573 DRV_LOG(ERR, "port %u queue %d not a hairpin Rxq", 574 dev->data->port_id, cur_queue); 575 mlx5_rxq_release(dev, cur_queue); 576 return -rte_errno; 577 } 578 if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) { 579 rte_errno = ENOMEM; 580 DRV_LOG(ERR, "port %u no Rxq object found: %d", 581 dev->data->port_id, cur_queue); 582 mlx5_rxq_release(dev, cur_queue); 583 return -rte_errno; 584 } 585 if (rxq_ctrl->hairpin_status != 0) { 586 DRV_LOG(DEBUG, "port %u Rx queue %d is already bound", 587 dev->data->port_id, cur_queue); 588 mlx5_rxq_release(dev, cur_queue); 589 return 0; 590 } 591 if (peer_info->tx_explicit != 592 rxq_ctrl->hairpin_conf.tx_explicit) { 593 rte_errno = EINVAL; 594 DRV_LOG(ERR, "port %u Rx queue %d and peer Tx rule mode" 595 " mismatch", dev->data->port_id, cur_queue); 596 mlx5_rxq_release(dev, cur_queue); 597 return -rte_errno; 598 } 599 if (peer_info->manual_bind != 600 rxq_ctrl->hairpin_conf.manual_bind) { 601 rte_errno = EINVAL; 602 DRV_LOG(ERR, "port %u Rx queue %d and peer binding mode" 603 " mismatch", dev->data->port_id, cur_queue); 604 mlx5_rxq_release(dev, cur_queue); 605 return -rte_errno; 606 } 607 rq_attr.state = MLX5_SQC_STATE_RDY; 608 rq_attr.rq_state = MLX5_SQC_STATE_RST; 609 rq_attr.hairpin_peer_sq = peer_info->qp_id; 610 rq_attr.hairpin_peer_vhca = peer_info->vhca_id; 611 ret = mlx5_devx_cmd_modify_rq(rxq_ctrl->obj->rq, &rq_attr); 612 if (ret == 0) 613 rxq_ctrl->hairpin_status = 1; 614 mlx5_rxq_release(dev, cur_queue); 615 } 616 return ret; 617 } 618 619 /* 620 * Unbind the hairpin queue and reset its HW configuration. 621 * This needs to be called twice both for Tx and Rx queues of a pair. 622 * If the queue is already unbound, it is considered successful. 623 * 624 * @param dev 625 * Pointer to Ethernet device structure. 626 * @param cur_queue 627 * Index of the queue to change the HW configuration to unbind. 628 * @param direction 629 * Positive to reset the TxQ, zero to reset the RxQ. 630 * 631 * @return 632 * 0 on success, a negative errno value otherwise and rte_errno is set. 633 */ 634 int 635 mlx5_hairpin_queue_peer_unbind(struct rte_eth_dev *dev, uint16_t cur_queue, 636 uint32_t direction) 637 { 638 int ret = 0; 639 640 if (direction != 0) { 641 struct mlx5_txq_ctrl *txq_ctrl; 642 struct mlx5_devx_modify_sq_attr sq_attr = { 0 }; 643 644 txq_ctrl = mlx5_txq_get(dev, cur_queue); 645 if (txq_ctrl == NULL) { 646 rte_errno = EINVAL; 647 DRV_LOG(ERR, "Failed to get port %u Tx queue %d", 648 dev->data->port_id, cur_queue); 649 return -rte_errno; 650 } 651 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) { 652 rte_errno = EINVAL; 653 DRV_LOG(ERR, "port %u queue %d not a hairpin Txq", 654 dev->data->port_id, cur_queue); 655 mlx5_txq_release(dev, cur_queue); 656 return -rte_errno; 657 } 658 /* Already unbound, return success before obj checking. */ 659 if (txq_ctrl->hairpin_status == 0) { 660 DRV_LOG(DEBUG, "port %u Tx queue %d is already unbound", 661 dev->data->port_id, cur_queue); 662 mlx5_txq_release(dev, cur_queue); 663 return 0; 664 } 665 if (!txq_ctrl->obj || !txq_ctrl->obj->sq) { 666 rte_errno = ENOMEM; 667 DRV_LOG(ERR, "port %u no Txq object found: %d", 668 dev->data->port_id, cur_queue); 669 mlx5_txq_release(dev, cur_queue); 670 return -rte_errno; 671 } 672 sq_attr.state = MLX5_SQC_STATE_RST; 673 sq_attr.sq_state = MLX5_SQC_STATE_RST; 674 ret = mlx5_devx_cmd_modify_sq(txq_ctrl->obj->sq, &sq_attr); 675 if (ret == 0) 676 txq_ctrl->hairpin_status = 0; 677 mlx5_txq_release(dev, cur_queue); 678 } else { 679 struct mlx5_rxq_ctrl *rxq_ctrl; 680 struct mlx5_devx_modify_rq_attr rq_attr = { 0 }; 681 682 rxq_ctrl = mlx5_rxq_get(dev, cur_queue); 683 if (rxq_ctrl == NULL) { 684 rte_errno = EINVAL; 685 DRV_LOG(ERR, "Failed to get port %u Rx queue %d", 686 dev->data->port_id, cur_queue); 687 return -rte_errno; 688 } 689 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) { 690 rte_errno = EINVAL; 691 DRV_LOG(ERR, "port %u queue %d not a hairpin Rxq", 692 dev->data->port_id, cur_queue); 693 mlx5_rxq_release(dev, cur_queue); 694 return -rte_errno; 695 } 696 if (rxq_ctrl->hairpin_status == 0) { 697 DRV_LOG(DEBUG, "port %u Rx queue %d is already unbound", 698 dev->data->port_id, cur_queue); 699 mlx5_rxq_release(dev, cur_queue); 700 return 0; 701 } 702 if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) { 703 rte_errno = ENOMEM; 704 DRV_LOG(ERR, "port %u no Rxq object found: %d", 705 dev->data->port_id, cur_queue); 706 mlx5_rxq_release(dev, cur_queue); 707 return -rte_errno; 708 } 709 rq_attr.state = MLX5_SQC_STATE_RST; 710 rq_attr.rq_state = MLX5_SQC_STATE_RST; 711 ret = mlx5_devx_cmd_modify_rq(rxq_ctrl->obj->rq, &rq_attr); 712 if (ret == 0) 713 rxq_ctrl->hairpin_status = 0; 714 mlx5_rxq_release(dev, cur_queue); 715 } 716 return ret; 717 } 718 719 /* 720 * Bind the hairpin port pairs, from the Tx to the peer Rx. 721 * This function only supports to bind the Tx to one Rx. 722 * 723 * @param dev 724 * Pointer to Ethernet device structure. 725 * @param rx_port 726 * Port identifier of the Rx port. 727 * 728 * @return 729 * 0 on success, a negative errno value otherwise and rte_errno is set. 730 */ 731 static int 732 mlx5_hairpin_bind_single_port(struct rte_eth_dev *dev, uint16_t rx_port) 733 { 734 struct mlx5_priv *priv = dev->data->dev_private; 735 int ret = 0; 736 struct mlx5_txq_ctrl *txq_ctrl; 737 uint32_t i; 738 struct rte_hairpin_peer_info peer = {0xffffff}; 739 struct rte_hairpin_peer_info cur; 740 const struct rte_eth_hairpin_conf *conf; 741 uint16_t num_q = 0; 742 uint16_t local_port = priv->dev_data->port_id; 743 uint32_t manual; 744 uint32_t explicit; 745 uint16_t rx_queue; 746 747 if (mlx5_eth_find_next(rx_port, dev->device) != rx_port) { 748 rte_errno = ENODEV; 749 DRV_LOG(ERR, "Rx port %u does not belong to mlx5", rx_port); 750 return -rte_errno; 751 } 752 /* 753 * Before binding TxQ to peer RxQ, first round loop will be used for 754 * checking the queues' configuration consistency. This would be a 755 * little time consuming but better than doing the rollback. 756 */ 757 for (i = 0; i != priv->txqs_n; i++) { 758 txq_ctrl = mlx5_txq_get(dev, i); 759 if (txq_ctrl == NULL) 760 continue; 761 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) { 762 mlx5_txq_release(dev, i); 763 continue; 764 } 765 /* 766 * All hairpin Tx queues of a single port that connected to the 767 * same peer Rx port should have the same "auto binding" and 768 * "implicit Tx flow" modes. 769 * Peer consistency checking will be done in per queue binding. 770 */ 771 conf = &txq_ctrl->hairpin_conf; 772 if (conf->peers[0].port == rx_port) { 773 if (num_q == 0) { 774 manual = conf->manual_bind; 775 explicit = conf->tx_explicit; 776 } else { 777 if (manual != conf->manual_bind || 778 explicit != conf->tx_explicit) { 779 rte_errno = EINVAL; 780 DRV_LOG(ERR, "port %u queue %d mode" 781 " mismatch: %u %u, %u %u", 782 local_port, i, manual, 783 conf->manual_bind, explicit, 784 conf->tx_explicit); 785 mlx5_txq_release(dev, i); 786 return -rte_errno; 787 } 788 } 789 num_q++; 790 } 791 mlx5_txq_release(dev, i); 792 } 793 /* Once no queue is configured, success is returned directly. */ 794 if (num_q == 0) 795 return ret; 796 /* All the hairpin TX queues need to be traversed again. */ 797 for (i = 0; i != priv->txqs_n; i++) { 798 txq_ctrl = mlx5_txq_get(dev, i); 799 if (txq_ctrl == NULL) 800 continue; 801 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) { 802 mlx5_txq_release(dev, i); 803 continue; 804 } 805 if (txq_ctrl->hairpin_conf.peers[0].port != rx_port) { 806 mlx5_txq_release(dev, i); 807 continue; 808 } 809 rx_queue = txq_ctrl->hairpin_conf.peers[0].queue; 810 /* 811 * Fetch peer RxQ's information. 812 * No need to pass the information of the current queue. 813 */ 814 ret = rte_eth_hairpin_queue_peer_update(rx_port, rx_queue, 815 NULL, &peer, 1); 816 if (ret != 0) { 817 mlx5_txq_release(dev, i); 818 goto error; 819 } 820 /* Accessing its own device, inside mlx5 PMD. */ 821 ret = mlx5_hairpin_queue_peer_bind(dev, i, &peer, 1); 822 if (ret != 0) { 823 mlx5_txq_release(dev, i); 824 goto error; 825 } 826 /* Pass TxQ's information to peer RxQ and try binding. */ 827 cur.peer_q = rx_queue; 828 cur.qp_id = txq_ctrl->obj->sq->id; 829 cur.vhca_id = priv->config.hca_attr.vhca_id; 830 cur.tx_explicit = txq_ctrl->hairpin_conf.tx_explicit; 831 cur.manual_bind = txq_ctrl->hairpin_conf.manual_bind; 832 /* 833 * In order to access another device in a proper way, RTE level 834 * private function is needed. 835 */ 836 ret = rte_eth_hairpin_queue_peer_bind(rx_port, rx_queue, 837 &cur, 0); 838 if (ret != 0) { 839 mlx5_txq_release(dev, i); 840 goto error; 841 } 842 mlx5_txq_release(dev, i); 843 } 844 return 0; 845 error: 846 /* 847 * Do roll-back process for the queues already bound. 848 * No need to check the return value of the queue unbind function. 849 */ 850 do { 851 /* No validation is needed here. */ 852 txq_ctrl = mlx5_txq_get(dev, i); 853 if (txq_ctrl == NULL) 854 continue; 855 rx_queue = txq_ctrl->hairpin_conf.peers[0].queue; 856 rte_eth_hairpin_queue_peer_unbind(rx_port, rx_queue, 0); 857 mlx5_hairpin_queue_peer_unbind(dev, i, 1); 858 mlx5_txq_release(dev, i); 859 } while (i--); 860 return ret; 861 } 862 863 /* 864 * Unbind the hairpin port pair, HW configuration of both devices will be clear 865 * and status will be reset for all the queues used between the them. 866 * This function only supports to unbind the Tx from one Rx. 867 * 868 * @param dev 869 * Pointer to Ethernet device structure. 870 * @param rx_port 871 * Port identifier of the Rx port. 872 * 873 * @return 874 * 0 on success, a negative errno value otherwise and rte_errno is set. 875 */ 876 static int 877 mlx5_hairpin_unbind_single_port(struct rte_eth_dev *dev, uint16_t rx_port) 878 { 879 struct mlx5_priv *priv = dev->data->dev_private; 880 struct mlx5_txq_ctrl *txq_ctrl; 881 uint32_t i; 882 int ret; 883 uint16_t cur_port = priv->dev_data->port_id; 884 885 if (mlx5_eth_find_next(rx_port, dev->device) != rx_port) { 886 rte_errno = ENODEV; 887 DRV_LOG(ERR, "Rx port %u does not belong to mlx5", rx_port); 888 return -rte_errno; 889 } 890 for (i = 0; i != priv->txqs_n; i++) { 891 uint16_t rx_queue; 892 893 txq_ctrl = mlx5_txq_get(dev, i); 894 if (txq_ctrl == NULL) 895 continue; 896 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) { 897 mlx5_txq_release(dev, i); 898 continue; 899 } 900 if (txq_ctrl->hairpin_conf.peers[0].port != rx_port) { 901 mlx5_txq_release(dev, i); 902 continue; 903 } 904 /* Indeed, only the first used queue needs to be checked. */ 905 if (txq_ctrl->hairpin_conf.manual_bind == 0) { 906 if (cur_port != rx_port) { 907 rte_errno = EINVAL; 908 DRV_LOG(ERR, "port %u and port %u are in" 909 " auto-bind mode", cur_port, rx_port); 910 mlx5_txq_release(dev, i); 911 return -rte_errno; 912 } else { 913 return 0; 914 } 915 } 916 rx_queue = txq_ctrl->hairpin_conf.peers[0].queue; 917 mlx5_txq_release(dev, i); 918 ret = rte_eth_hairpin_queue_peer_unbind(rx_port, rx_queue, 0); 919 if (ret) { 920 DRV_LOG(ERR, "port %u Rx queue %d unbind - failure", 921 rx_port, rx_queue); 922 return ret; 923 } 924 ret = mlx5_hairpin_queue_peer_unbind(dev, i, 1); 925 if (ret) { 926 DRV_LOG(ERR, "port %u Tx queue %d unbind - failure", 927 cur_port, i); 928 return ret; 929 } 930 } 931 return 0; 932 } 933 934 /* 935 * Bind hairpin ports, Rx could be all ports when using RTE_MAX_ETHPORTS. 936 * @see mlx5_hairpin_bind_single_port() 937 */ 938 int 939 mlx5_hairpin_bind(struct rte_eth_dev *dev, uint16_t rx_port) 940 { 941 int ret = 0; 942 uint16_t p, pp; 943 944 /* 945 * If the Rx port has no hairpin configuration with the current port, 946 * the binding will be skipped in the called function of single port. 947 * Device started status will be checked only before the queue 948 * information updating. 949 */ 950 if (rx_port == RTE_MAX_ETHPORTS) { 951 MLX5_ETH_FOREACH_DEV(p, dev->device) { 952 ret = mlx5_hairpin_bind_single_port(dev, p); 953 if (ret != 0) 954 goto unbind; 955 } 956 return ret; 957 } else { 958 return mlx5_hairpin_bind_single_port(dev, rx_port); 959 } 960 unbind: 961 MLX5_ETH_FOREACH_DEV(pp, dev->device) 962 if (pp < p) 963 mlx5_hairpin_unbind_single_port(dev, pp); 964 return ret; 965 } 966 967 /* 968 * Unbind hairpin ports, Rx could be all ports when using RTE_MAX_ETHPORTS. 969 * @see mlx5_hairpin_unbind_single_port() 970 */ 971 int 972 mlx5_hairpin_unbind(struct rte_eth_dev *dev, uint16_t rx_port) 973 { 974 int ret = 0; 975 uint16_t p; 976 977 if (rx_port == RTE_MAX_ETHPORTS) 978 MLX5_ETH_FOREACH_DEV(p, dev->device) { 979 ret = mlx5_hairpin_unbind_single_port(dev, p); 980 if (ret != 0) 981 return ret; 982 } 983 else 984 ret = mlx5_hairpin_unbind_single_port(dev, rx_port); 985 return ret; 986 } 987 988 /* 989 * DPDK callback to get the hairpin peer ports list. 990 * This will return the actual number of peer ports and save the identifiers 991 * into the array (sorted, may be different from that when setting up the 992 * hairpin peer queues). 993 * The peer port ID could be the same as the port ID of the current device. 994 * 995 * @param dev 996 * Pointer to Ethernet device structure. 997 * @param peer_ports 998 * Pointer to array to save the port identifiers. 999 * @param len 1000 * The length of the array. 1001 * @param direction 1002 * Current port to peer port direction. 1003 * positive - current used as Tx to get all peer Rx ports. 1004 * zero - current used as Rx to get all peer Tx ports. 1005 * 1006 * @return 1007 * 0 or positive value on success, actual number of peer ports. 1008 * a negative errno value otherwise and rte_errno is set. 1009 */ 1010 int 1011 mlx5_hairpin_get_peer_ports(struct rte_eth_dev *dev, uint16_t *peer_ports, 1012 size_t len, uint32_t direction) 1013 { 1014 struct mlx5_priv *priv = dev->data->dev_private; 1015 struct mlx5_txq_ctrl *txq_ctrl; 1016 struct mlx5_rxq_ctrl *rxq_ctrl; 1017 uint32_t i; 1018 uint16_t pp; 1019 uint32_t bits[(RTE_MAX_ETHPORTS + 31) / 32] = {0}; 1020 int ret = 0; 1021 1022 if (direction) { 1023 for (i = 0; i < priv->txqs_n; i++) { 1024 txq_ctrl = mlx5_txq_get(dev, i); 1025 if (!txq_ctrl) 1026 continue; 1027 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) { 1028 mlx5_txq_release(dev, i); 1029 continue; 1030 } 1031 pp = txq_ctrl->hairpin_conf.peers[0].port; 1032 if (pp >= RTE_MAX_ETHPORTS) { 1033 rte_errno = ERANGE; 1034 mlx5_txq_release(dev, i); 1035 DRV_LOG(ERR, "port %hu queue %u peer port " 1036 "out of range %hu", 1037 priv->dev_data->port_id, i, pp); 1038 return -rte_errno; 1039 } 1040 bits[pp / 32] |= 1 << (pp % 32); 1041 mlx5_txq_release(dev, i); 1042 } 1043 } else { 1044 for (i = 0; i < priv->rxqs_n; i++) { 1045 rxq_ctrl = mlx5_rxq_get(dev, i); 1046 if (!rxq_ctrl) 1047 continue; 1048 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) { 1049 mlx5_rxq_release(dev, i); 1050 continue; 1051 } 1052 pp = rxq_ctrl->hairpin_conf.peers[0].port; 1053 if (pp >= RTE_MAX_ETHPORTS) { 1054 rte_errno = ERANGE; 1055 mlx5_rxq_release(dev, i); 1056 DRV_LOG(ERR, "port %hu queue %u peer port " 1057 "out of range %hu", 1058 priv->dev_data->port_id, i, pp); 1059 return -rte_errno; 1060 } 1061 bits[pp / 32] |= 1 << (pp % 32); 1062 mlx5_rxq_release(dev, i); 1063 } 1064 } 1065 for (i = 0; i < RTE_MAX_ETHPORTS; i++) { 1066 if (bits[i / 32] & (1 << (i % 32))) { 1067 if ((size_t)ret >= len) { 1068 rte_errno = E2BIG; 1069 return -rte_errno; 1070 } 1071 peer_ports[ret++] = i; 1072 } 1073 } 1074 return ret; 1075 } 1076 1077 /** 1078 * DPDK callback to start the device. 1079 * 1080 * Simulate device start by attaching all configured flows. 1081 * 1082 * @param dev 1083 * Pointer to Ethernet device structure. 1084 * 1085 * @return 1086 * 0 on success, a negative errno value otherwise and rte_errno is set. 1087 */ 1088 int 1089 mlx5_dev_start(struct rte_eth_dev *dev) 1090 { 1091 struct mlx5_priv *priv = dev->data->dev_private; 1092 int ret; 1093 int fine_inline; 1094 1095 DRV_LOG(DEBUG, "port %u starting device", dev->data->port_id); 1096 fine_inline = rte_mbuf_dynflag_lookup 1097 (RTE_PMD_MLX5_FINE_GRANULARITY_INLINE, NULL); 1098 if (fine_inline >= 0) 1099 rte_net_mlx5_dynf_inline_mask = 1UL << fine_inline; 1100 else 1101 rte_net_mlx5_dynf_inline_mask = 0; 1102 if (dev->data->nb_rx_queues > 0) { 1103 ret = mlx5_dev_configure_rss_reta(dev); 1104 if (ret) { 1105 DRV_LOG(ERR, "port %u reta config failed: %s", 1106 dev->data->port_id, strerror(rte_errno)); 1107 return -rte_errno; 1108 } 1109 } 1110 ret = mlx5_txpp_start(dev); 1111 if (ret) { 1112 DRV_LOG(ERR, "port %u Tx packet pacing init failed: %s", 1113 dev->data->port_id, strerror(rte_errno)); 1114 goto error; 1115 } 1116 if ((priv->sh->devx && priv->config.dv_flow_en && 1117 priv->config.dest_tir) && priv->obj_ops.lb_dummy_queue_create) { 1118 ret = priv->obj_ops.lb_dummy_queue_create(dev); 1119 if (ret) 1120 goto error; 1121 } 1122 ret = mlx5_txq_start(dev); 1123 if (ret) { 1124 DRV_LOG(ERR, "port %u Tx queue allocation failed: %s", 1125 dev->data->port_id, strerror(rte_errno)); 1126 goto error; 1127 } 1128 ret = mlx5_rxq_start(dev); 1129 if (ret) { 1130 DRV_LOG(ERR, "port %u Rx queue allocation failed: %s", 1131 dev->data->port_id, strerror(rte_errno)); 1132 goto error; 1133 } 1134 /* 1135 * Such step will be skipped if there is no hairpin TX queue configured 1136 * with RX peer queue from the same device. 1137 */ 1138 ret = mlx5_hairpin_auto_bind(dev); 1139 if (ret) { 1140 DRV_LOG(ERR, "port %u hairpin auto binding failed: %s", 1141 dev->data->port_id, strerror(rte_errno)); 1142 goto error; 1143 } 1144 /* Set started flag here for the following steps like control flow. */ 1145 dev->data->dev_started = 1; 1146 ret = mlx5_rx_intr_vec_enable(dev); 1147 if (ret) { 1148 DRV_LOG(ERR, "port %u Rx interrupt vector creation failed", 1149 dev->data->port_id); 1150 goto error; 1151 } 1152 mlx5_os_stats_init(dev); 1153 ret = mlx5_traffic_enable(dev); 1154 if (ret) { 1155 DRV_LOG(ERR, "port %u failed to set defaults flows", 1156 dev->data->port_id); 1157 goto error; 1158 } 1159 /* Set a mask and offset of dynamic metadata flows into Rx queues. */ 1160 mlx5_flow_rxq_dynf_metadata_set(dev); 1161 /* Set flags and context to convert Rx timestamps. */ 1162 mlx5_rxq_timestamp_set(dev); 1163 /* Set a mask and offset of scheduling on timestamp into Tx queues. */ 1164 mlx5_txq_dynf_timestamp_set(dev); 1165 /* 1166 * In non-cached mode, it only needs to start the default mreg copy 1167 * action and no flow created by application exists anymore. 1168 * But it is worth wrapping the interface for further usage. 1169 */ 1170 ret = mlx5_flow_start_default(dev); 1171 if (ret) { 1172 DRV_LOG(DEBUG, "port %u failed to start default actions: %s", 1173 dev->data->port_id, strerror(rte_errno)); 1174 goto error; 1175 } 1176 if (mlx5_dev_ctx_shared_mempool_subscribe(dev) != 0) { 1177 DRV_LOG(ERR, "port %u failed to subscribe for mempool life cycle: %s", 1178 dev->data->port_id, rte_strerror(rte_errno)); 1179 goto error; 1180 } 1181 rte_wmb(); 1182 dev->tx_pkt_burst = mlx5_select_tx_function(dev); 1183 dev->rx_pkt_burst = mlx5_select_rx_function(dev); 1184 /* Enable datapath on secondary process. */ 1185 mlx5_mp_os_req_start_rxtx(dev); 1186 if (priv->sh->intr_handle.fd >= 0) { 1187 priv->sh->port[priv->dev_port - 1].ih_port_id = 1188 (uint32_t)dev->data->port_id; 1189 } else { 1190 DRV_LOG(INFO, "port %u starts without LSC and RMV interrupts.", 1191 dev->data->port_id); 1192 dev->data->dev_conf.intr_conf.lsc = 0; 1193 dev->data->dev_conf.intr_conf.rmv = 0; 1194 } 1195 if (priv->sh->intr_handle_devx.fd >= 0) 1196 priv->sh->port[priv->dev_port - 1].devx_ih_port_id = 1197 (uint32_t)dev->data->port_id; 1198 return 0; 1199 error: 1200 ret = rte_errno; /* Save rte_errno before cleanup. */ 1201 /* Rollback. */ 1202 dev->data->dev_started = 0; 1203 mlx5_flow_stop_default(dev); 1204 mlx5_traffic_disable(dev); 1205 mlx5_txq_stop(dev); 1206 mlx5_rxq_stop(dev); 1207 if (priv->obj_ops.lb_dummy_queue_release) 1208 priv->obj_ops.lb_dummy_queue_release(dev); 1209 mlx5_txpp_stop(dev); /* Stop last. */ 1210 rte_errno = ret; /* Restore rte_errno. */ 1211 return -rte_errno; 1212 } 1213 1214 /** 1215 * DPDK callback to stop the device. 1216 * 1217 * Simulate device stop by detaching all configured flows. 1218 * 1219 * @param dev 1220 * Pointer to Ethernet device structure. 1221 */ 1222 int 1223 mlx5_dev_stop(struct rte_eth_dev *dev) 1224 { 1225 struct mlx5_priv *priv = dev->data->dev_private; 1226 1227 dev->data->dev_started = 0; 1228 /* Prevent crashes when queues are still in use. */ 1229 dev->rx_pkt_burst = removed_rx_burst; 1230 dev->tx_pkt_burst = removed_tx_burst; 1231 rte_wmb(); 1232 /* Disable datapath on secondary process. */ 1233 mlx5_mp_os_req_stop_rxtx(dev); 1234 rte_delay_us_sleep(1000 * priv->rxqs_n); 1235 DRV_LOG(DEBUG, "port %u stopping device", dev->data->port_id); 1236 mlx5_flow_stop_default(dev); 1237 /* Control flows for default traffic can be removed firstly. */ 1238 mlx5_traffic_disable(dev); 1239 /* All RX queue flags will be cleared in the flush interface. */ 1240 mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_GEN, true); 1241 mlx5_flow_meter_rxq_flush(dev); 1242 mlx5_rx_intr_vec_disable(dev); 1243 priv->sh->port[priv->dev_port - 1].ih_port_id = RTE_MAX_ETHPORTS; 1244 priv->sh->port[priv->dev_port - 1].devx_ih_port_id = RTE_MAX_ETHPORTS; 1245 mlx5_txq_stop(dev); 1246 mlx5_rxq_stop(dev); 1247 if (priv->obj_ops.lb_dummy_queue_release) 1248 priv->obj_ops.lb_dummy_queue_release(dev); 1249 mlx5_txpp_stop(dev); 1250 1251 return 0; 1252 } 1253 1254 /** 1255 * Enable traffic flows configured by control plane 1256 * 1257 * @param dev 1258 * Pointer to Ethernet device private data. 1259 * @param dev 1260 * Pointer to Ethernet device structure. 1261 * 1262 * @return 1263 * 0 on success, a negative errno value otherwise and rte_errno is set. 1264 */ 1265 int 1266 mlx5_traffic_enable(struct rte_eth_dev *dev) 1267 { 1268 struct mlx5_priv *priv = dev->data->dev_private; 1269 struct rte_flow_item_eth bcast = { 1270 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff", 1271 }; 1272 struct rte_flow_item_eth ipv6_multi_spec = { 1273 .dst.addr_bytes = "\x33\x33\x00\x00\x00\x00", 1274 }; 1275 struct rte_flow_item_eth ipv6_multi_mask = { 1276 .dst.addr_bytes = "\xff\xff\x00\x00\x00\x00", 1277 }; 1278 struct rte_flow_item_eth unicast = { 1279 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00", 1280 }; 1281 struct rte_flow_item_eth unicast_mask = { 1282 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff", 1283 }; 1284 const unsigned int vlan_filter_n = priv->vlan_filter_n; 1285 const struct rte_ether_addr cmp = { 1286 .addr_bytes = "\x00\x00\x00\x00\x00\x00", 1287 }; 1288 unsigned int i; 1289 unsigned int j; 1290 int ret; 1291 1292 /* 1293 * Hairpin txq default flow should be created no matter if it is 1294 * isolation mode. Or else all the packets to be sent will be sent 1295 * out directly without the TX flow actions, e.g. encapsulation. 1296 */ 1297 for (i = 0; i != priv->txqs_n; ++i) { 1298 struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i); 1299 if (!txq_ctrl) 1300 continue; 1301 /* Only Tx implicit mode requires the default Tx flow. */ 1302 if (txq_ctrl->type == MLX5_TXQ_TYPE_HAIRPIN && 1303 txq_ctrl->hairpin_conf.tx_explicit == 0 && 1304 txq_ctrl->hairpin_conf.peers[0].port == 1305 priv->dev_data->port_id) { 1306 ret = mlx5_ctrl_flow_source_queue(dev, i); 1307 if (ret) { 1308 mlx5_txq_release(dev, i); 1309 goto error; 1310 } 1311 } 1312 if ((priv->representor || priv->master) && 1313 priv->config.dv_esw_en) { 1314 if (mlx5_flow_create_devx_sq_miss_flow(dev, i) == 0) { 1315 DRV_LOG(ERR, 1316 "Port %u Tx queue %u SQ create representor devx default miss rule failed.", 1317 dev->data->port_id, i); 1318 goto error; 1319 } 1320 } 1321 mlx5_txq_release(dev, i); 1322 } 1323 if ((priv->master || priv->representor) && priv->config.dv_esw_en) { 1324 if (mlx5_flow_create_esw_table_zero_flow(dev)) 1325 priv->fdb_def_rule = 1; 1326 else 1327 DRV_LOG(INFO, "port %u FDB default rule cannot be" 1328 " configured - only Eswitch group 0 flows are" 1329 " supported.", dev->data->port_id); 1330 } 1331 if (!priv->config.lacp_by_user && priv->pf_bond >= 0) { 1332 ret = mlx5_flow_lacp_miss(dev); 1333 if (ret) 1334 DRV_LOG(INFO, "port %u LACP rule cannot be created - " 1335 "forward LACP to kernel.", dev->data->port_id); 1336 else 1337 DRV_LOG(INFO, "LACP traffic will be missed in port %u." 1338 , dev->data->port_id); 1339 } 1340 if (priv->isolated) 1341 return 0; 1342 if (dev->data->promiscuous) { 1343 struct rte_flow_item_eth promisc = { 1344 .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00", 1345 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00", 1346 .type = 0, 1347 }; 1348 1349 ret = mlx5_ctrl_flow(dev, &promisc, &promisc); 1350 if (ret) 1351 goto error; 1352 } 1353 if (dev->data->all_multicast) { 1354 struct rte_flow_item_eth multicast = { 1355 .dst.addr_bytes = "\x01\x00\x00\x00\x00\x00", 1356 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00", 1357 .type = 0, 1358 }; 1359 1360 ret = mlx5_ctrl_flow(dev, &multicast, &multicast); 1361 if (ret) 1362 goto error; 1363 } else { 1364 /* Add broadcast/multicast flows. */ 1365 for (i = 0; i != vlan_filter_n; ++i) { 1366 uint16_t vlan = priv->vlan_filter[i]; 1367 1368 struct rte_flow_item_vlan vlan_spec = { 1369 .tci = rte_cpu_to_be_16(vlan), 1370 }; 1371 struct rte_flow_item_vlan vlan_mask = 1372 rte_flow_item_vlan_mask; 1373 1374 ret = mlx5_ctrl_flow_vlan(dev, &bcast, &bcast, 1375 &vlan_spec, &vlan_mask); 1376 if (ret) 1377 goto error; 1378 ret = mlx5_ctrl_flow_vlan(dev, &ipv6_multi_spec, 1379 &ipv6_multi_mask, 1380 &vlan_spec, &vlan_mask); 1381 if (ret) 1382 goto error; 1383 } 1384 if (!vlan_filter_n) { 1385 ret = mlx5_ctrl_flow(dev, &bcast, &bcast); 1386 if (ret) 1387 goto error; 1388 ret = mlx5_ctrl_flow(dev, &ipv6_multi_spec, 1389 &ipv6_multi_mask); 1390 if (ret) { 1391 /* Do not fail on IPv6 broadcast creation failure. */ 1392 DRV_LOG(WARNING, 1393 "IPv6 broadcast is not supported"); 1394 ret = 0; 1395 } 1396 } 1397 } 1398 /* Add MAC address flows. */ 1399 for (i = 0; i != MLX5_MAX_MAC_ADDRESSES; ++i) { 1400 struct rte_ether_addr *mac = &dev->data->mac_addrs[i]; 1401 1402 if (!memcmp(mac, &cmp, sizeof(*mac))) 1403 continue; 1404 memcpy(&unicast.dst.addr_bytes, 1405 mac->addr_bytes, 1406 RTE_ETHER_ADDR_LEN); 1407 for (j = 0; j != vlan_filter_n; ++j) { 1408 uint16_t vlan = priv->vlan_filter[j]; 1409 1410 struct rte_flow_item_vlan vlan_spec = { 1411 .tci = rte_cpu_to_be_16(vlan), 1412 }; 1413 struct rte_flow_item_vlan vlan_mask = 1414 rte_flow_item_vlan_mask; 1415 1416 ret = mlx5_ctrl_flow_vlan(dev, &unicast, 1417 &unicast_mask, 1418 &vlan_spec, 1419 &vlan_mask); 1420 if (ret) 1421 goto error; 1422 } 1423 if (!vlan_filter_n) { 1424 ret = mlx5_ctrl_flow(dev, &unicast, &unicast_mask); 1425 if (ret) 1426 goto error; 1427 } 1428 } 1429 return 0; 1430 error: 1431 ret = rte_errno; /* Save rte_errno before cleanup. */ 1432 mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_CTL, false); 1433 rte_errno = ret; /* Restore rte_errno. */ 1434 return -rte_errno; 1435 } 1436 1437 1438 /** 1439 * Disable traffic flows configured by control plane 1440 * 1441 * @param dev 1442 * Pointer to Ethernet device private data. 1443 */ 1444 void 1445 mlx5_traffic_disable(struct rte_eth_dev *dev) 1446 { 1447 mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_CTL, false); 1448 } 1449 1450 /** 1451 * Restart traffic flows configured by control plane 1452 * 1453 * @param dev 1454 * Pointer to Ethernet device private data. 1455 * 1456 * @return 1457 * 0 on success, a negative errno value otherwise and rte_errno is set. 1458 */ 1459 int 1460 mlx5_traffic_restart(struct rte_eth_dev *dev) 1461 { 1462 if (dev->data->dev_started) { 1463 mlx5_traffic_disable(dev); 1464 return mlx5_traffic_enable(dev); 1465 } 1466 return 0; 1467 } 1468