1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2015 6WIND S.A. 3 * Copyright 2015 Mellanox Technologies, Ltd 4 */ 5 6 #include <unistd.h> 7 8 #include <rte_ether.h> 9 #include <ethdev_driver.h> 10 #include <rte_interrupts.h> 11 #include <rte_alarm.h> 12 #include <rte_cycles.h> 13 14 #include <mlx5_malloc.h> 15 16 #include "mlx5.h" 17 #include "mlx5_mr.h" 18 #include "mlx5_rx.h" 19 #include "mlx5_tx.h" 20 #include "mlx5_utils.h" 21 #include "rte_pmd_mlx5.h" 22 23 /** 24 * Stop traffic on Tx queues. 25 * 26 * @param dev 27 * Pointer to Ethernet device structure. 28 */ 29 static void 30 mlx5_txq_stop(struct rte_eth_dev *dev) 31 { 32 struct mlx5_priv *priv = dev->data->dev_private; 33 unsigned int i; 34 35 for (i = 0; i != priv->txqs_n; ++i) 36 mlx5_txq_release(dev, i); 37 } 38 39 /** 40 * Start traffic on Tx queues. 41 * 42 * @param dev 43 * Pointer to Ethernet device structure. 44 * 45 * @return 46 * 0 on success, a negative errno value otherwise and rte_errno is set. 47 */ 48 static int 49 mlx5_txq_start(struct rte_eth_dev *dev) 50 { 51 struct mlx5_priv *priv = dev->data->dev_private; 52 unsigned int i; 53 int ret; 54 55 for (i = 0; i != priv->txqs_n; ++i) { 56 struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i); 57 struct mlx5_txq_data *txq_data = &txq_ctrl->txq; 58 uint32_t flags = MLX5_MEM_RTE | MLX5_MEM_ZERO; 59 60 if (!txq_ctrl) 61 continue; 62 if (txq_ctrl->type == MLX5_TXQ_TYPE_STANDARD) 63 txq_alloc_elts(txq_ctrl); 64 MLX5_ASSERT(!txq_ctrl->obj); 65 txq_ctrl->obj = mlx5_malloc(flags, sizeof(struct mlx5_txq_obj), 66 0, txq_ctrl->socket); 67 if (!txq_ctrl->obj) { 68 DRV_LOG(ERR, "Port %u Tx queue %u cannot allocate " 69 "memory resources.", dev->data->port_id, 70 txq_data->idx); 71 rte_errno = ENOMEM; 72 goto error; 73 } 74 ret = priv->obj_ops.txq_obj_new(dev, i); 75 if (ret < 0) { 76 mlx5_free(txq_ctrl->obj); 77 txq_ctrl->obj = NULL; 78 goto error; 79 } 80 if (txq_ctrl->type == MLX5_TXQ_TYPE_STANDARD) { 81 size_t size = txq_data->cqe_s * sizeof(*txq_data->fcqs); 82 83 txq_data->fcqs = mlx5_malloc(flags, size, 84 RTE_CACHE_LINE_SIZE, 85 txq_ctrl->socket); 86 if (!txq_data->fcqs) { 87 DRV_LOG(ERR, "Port %u Tx queue %u cannot " 88 "allocate memory (FCQ).", 89 dev->data->port_id, i); 90 rte_errno = ENOMEM; 91 goto error; 92 } 93 } 94 DRV_LOG(DEBUG, "Port %u txq %u updated with %p.", 95 dev->data->port_id, i, (void *)&txq_ctrl->obj); 96 LIST_INSERT_HEAD(&priv->txqsobj, txq_ctrl->obj, next); 97 } 98 return 0; 99 error: 100 ret = rte_errno; /* Save rte_errno before cleanup. */ 101 do { 102 mlx5_txq_release(dev, i); 103 } while (i-- != 0); 104 rte_errno = ret; /* Restore rte_errno. */ 105 return -rte_errno; 106 } 107 108 /** 109 * Translate the chunk address to MR key in order to put in into the cache. 110 */ 111 static void 112 mlx5_rxq_mempool_register_cb(struct rte_mempool *mp, void *opaque, 113 struct rte_mempool_memhdr *memhdr, 114 unsigned int idx) 115 { 116 struct mlx5_rxq_data *rxq = opaque; 117 118 RTE_SET_USED(mp); 119 RTE_SET_USED(idx); 120 mlx5_rx_addr2mr(rxq, (uintptr_t)memhdr->addr); 121 } 122 123 /** 124 * Register Rx queue mempools and fill the Rx queue cache. 125 * This function tolerates repeated mempool registration. 126 * 127 * @param[in] rxq_ctrl 128 * Rx queue control data. 129 * 130 * @return 131 * 0 on success, (-1) on failure and rte_errno is set. 132 */ 133 static int 134 mlx5_rxq_mempool_register(struct mlx5_rxq_ctrl *rxq_ctrl) 135 { 136 struct mlx5_priv *priv = rxq_ctrl->priv; 137 struct rte_mempool *mp; 138 uint32_t s; 139 int ret = 0; 140 141 mlx5_mr_flush_local_cache(&rxq_ctrl->rxq.mr_ctrl); 142 /* MPRQ mempool is registered on creation, just fill the cache. */ 143 if (mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq)) { 144 rte_mempool_mem_iter(rxq_ctrl->rxq.mprq_mp, 145 mlx5_rxq_mempool_register_cb, 146 &rxq_ctrl->rxq); 147 return 0; 148 } 149 for (s = 0; s < rxq_ctrl->rxq.rxseg_n; s++) { 150 mp = rxq_ctrl->rxq.rxseg[s].mp; 151 ret = mlx5_mr_mempool_register(&priv->sh->share_cache, 152 priv->sh->pd, mp, &priv->mp_id); 153 if (ret < 0 && rte_errno != EEXIST) 154 return ret; 155 rte_mempool_mem_iter(mp, mlx5_rxq_mempool_register_cb, 156 &rxq_ctrl->rxq); 157 } 158 return 0; 159 } 160 161 /** 162 * Stop traffic on Rx queues. 163 * 164 * @param dev 165 * Pointer to Ethernet device structure. 166 */ 167 static void 168 mlx5_rxq_stop(struct rte_eth_dev *dev) 169 { 170 struct mlx5_priv *priv = dev->data->dev_private; 171 unsigned int i; 172 173 for (i = 0; i != priv->rxqs_n; ++i) 174 mlx5_rxq_release(dev, i); 175 } 176 177 /** 178 * Start traffic on Rx queues. 179 * 180 * @param dev 181 * Pointer to Ethernet device structure. 182 * 183 * @return 184 * 0 on success, a negative errno value otherwise and rte_errno is set. 185 */ 186 static int 187 mlx5_rxq_start(struct rte_eth_dev *dev) 188 { 189 struct mlx5_priv *priv = dev->data->dev_private; 190 unsigned int i; 191 int ret = 0; 192 193 /* Allocate/reuse/resize mempool for Multi-Packet RQ. */ 194 if (mlx5_mprq_alloc_mp(dev)) { 195 /* Should not release Rx queues but return immediately. */ 196 return -rte_errno; 197 } 198 DRV_LOG(DEBUG, "Port %u device_attr.max_qp_wr is %d.", 199 dev->data->port_id, priv->sh->device_attr.max_qp_wr); 200 DRV_LOG(DEBUG, "Port %u device_attr.max_sge is %d.", 201 dev->data->port_id, priv->sh->device_attr.max_sge); 202 for (i = 0; i != priv->rxqs_n; ++i) { 203 struct mlx5_rxq_ctrl *rxq_ctrl = mlx5_rxq_get(dev, i); 204 205 if (!rxq_ctrl) 206 continue; 207 if (rxq_ctrl->type == MLX5_RXQ_TYPE_STANDARD) { 208 /* 209 * Pre-register the mempools. Regardless of whether 210 * the implicit registration is enabled or not, 211 * Rx mempool destruction is tracked to free MRs. 212 */ 213 if (mlx5_rxq_mempool_register(rxq_ctrl) < 0) 214 goto error; 215 ret = rxq_alloc_elts(rxq_ctrl); 216 if (ret) 217 goto error; 218 } 219 MLX5_ASSERT(!rxq_ctrl->obj); 220 rxq_ctrl->obj = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, 221 sizeof(*rxq_ctrl->obj), 0, 222 rxq_ctrl->socket); 223 if (!rxq_ctrl->obj) { 224 DRV_LOG(ERR, 225 "Port %u Rx queue %u can't allocate resources.", 226 dev->data->port_id, (*priv->rxqs)[i]->idx); 227 rte_errno = ENOMEM; 228 goto error; 229 } 230 ret = priv->obj_ops.rxq_obj_new(dev, i); 231 if (ret) { 232 mlx5_free(rxq_ctrl->obj); 233 goto error; 234 } 235 DRV_LOG(DEBUG, "Port %u rxq %u updated with %p.", 236 dev->data->port_id, i, (void *)&rxq_ctrl->obj); 237 LIST_INSERT_HEAD(&priv->rxqsobj, rxq_ctrl->obj, next); 238 } 239 return 0; 240 error: 241 ret = rte_errno; /* Save rte_errno before cleanup. */ 242 do { 243 mlx5_rxq_release(dev, i); 244 } while (i-- != 0); 245 rte_errno = ret; /* Restore rte_errno. */ 246 return -rte_errno; 247 } 248 249 /** 250 * Binds Tx queues to Rx queues for hairpin. 251 * 252 * Binds Tx queues to the target Rx queues. 253 * 254 * @param dev 255 * Pointer to Ethernet device structure. 256 * 257 * @return 258 * 0 on success, a negative errno value otherwise and rte_errno is set. 259 */ 260 static int 261 mlx5_hairpin_auto_bind(struct rte_eth_dev *dev) 262 { 263 struct mlx5_priv *priv = dev->data->dev_private; 264 struct mlx5_devx_modify_sq_attr sq_attr = { 0 }; 265 struct mlx5_devx_modify_rq_attr rq_attr = { 0 }; 266 struct mlx5_txq_ctrl *txq_ctrl; 267 struct mlx5_rxq_ctrl *rxq_ctrl; 268 struct mlx5_devx_obj *sq; 269 struct mlx5_devx_obj *rq; 270 unsigned int i; 271 int ret = 0; 272 bool need_auto = false; 273 uint16_t self_port = dev->data->port_id; 274 275 for (i = 0; i != priv->txqs_n; ++i) { 276 txq_ctrl = mlx5_txq_get(dev, i); 277 if (!txq_ctrl) 278 continue; 279 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN || 280 txq_ctrl->hairpin_conf.peers[0].port != self_port) { 281 mlx5_txq_release(dev, i); 282 continue; 283 } 284 if (txq_ctrl->hairpin_conf.manual_bind) { 285 mlx5_txq_release(dev, i); 286 return 0; 287 } 288 need_auto = true; 289 mlx5_txq_release(dev, i); 290 } 291 if (!need_auto) 292 return 0; 293 for (i = 0; i != priv->txqs_n; ++i) { 294 txq_ctrl = mlx5_txq_get(dev, i); 295 if (!txq_ctrl) 296 continue; 297 /* Skip hairpin queues with other peer ports. */ 298 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN || 299 txq_ctrl->hairpin_conf.peers[0].port != self_port) { 300 mlx5_txq_release(dev, i); 301 continue; 302 } 303 if (!txq_ctrl->obj) { 304 rte_errno = ENOMEM; 305 DRV_LOG(ERR, "port %u no txq object found: %d", 306 dev->data->port_id, i); 307 mlx5_txq_release(dev, i); 308 return -rte_errno; 309 } 310 sq = txq_ctrl->obj->sq; 311 rxq_ctrl = mlx5_rxq_get(dev, 312 txq_ctrl->hairpin_conf.peers[0].queue); 313 if (!rxq_ctrl) { 314 mlx5_txq_release(dev, i); 315 rte_errno = EINVAL; 316 DRV_LOG(ERR, "port %u no rxq object found: %d", 317 dev->data->port_id, 318 txq_ctrl->hairpin_conf.peers[0].queue); 319 return -rte_errno; 320 } 321 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN || 322 rxq_ctrl->hairpin_conf.peers[0].queue != i) { 323 rte_errno = ENOMEM; 324 DRV_LOG(ERR, "port %u Tx queue %d can't be binded to " 325 "Rx queue %d", dev->data->port_id, 326 i, txq_ctrl->hairpin_conf.peers[0].queue); 327 goto error; 328 } 329 rq = rxq_ctrl->obj->rq; 330 if (!rq) { 331 rte_errno = ENOMEM; 332 DRV_LOG(ERR, "port %u hairpin no matching rxq: %d", 333 dev->data->port_id, 334 txq_ctrl->hairpin_conf.peers[0].queue); 335 goto error; 336 } 337 sq_attr.state = MLX5_SQC_STATE_RDY; 338 sq_attr.sq_state = MLX5_SQC_STATE_RST; 339 sq_attr.hairpin_peer_rq = rq->id; 340 sq_attr.hairpin_peer_vhca = priv->config.hca_attr.vhca_id; 341 ret = mlx5_devx_cmd_modify_sq(sq, &sq_attr); 342 if (ret) 343 goto error; 344 rq_attr.state = MLX5_SQC_STATE_RDY; 345 rq_attr.rq_state = MLX5_SQC_STATE_RST; 346 rq_attr.hairpin_peer_sq = sq->id; 347 rq_attr.hairpin_peer_vhca = priv->config.hca_attr.vhca_id; 348 ret = mlx5_devx_cmd_modify_rq(rq, &rq_attr); 349 if (ret) 350 goto error; 351 /* Qs with auto-bind will be destroyed directly. */ 352 rxq_ctrl->hairpin_status = 1; 353 txq_ctrl->hairpin_status = 1; 354 mlx5_txq_release(dev, i); 355 mlx5_rxq_release(dev, txq_ctrl->hairpin_conf.peers[0].queue); 356 } 357 return 0; 358 error: 359 mlx5_txq_release(dev, i); 360 mlx5_rxq_release(dev, txq_ctrl->hairpin_conf.peers[0].queue); 361 return -rte_errno; 362 } 363 364 /* 365 * Fetch the peer queue's SW & HW information. 366 * 367 * @param dev 368 * Pointer to Ethernet device structure. 369 * @param peer_queue 370 * Index of the queue to fetch the information. 371 * @param current_info 372 * Pointer to the input peer information, not used currently. 373 * @param peer_info 374 * Pointer to the structure to store the information, output. 375 * @param direction 376 * Positive to get the RxQ information, zero to get the TxQ information. 377 * 378 * @return 379 * 0 on success, a negative errno value otherwise and rte_errno is set. 380 */ 381 int 382 mlx5_hairpin_queue_peer_update(struct rte_eth_dev *dev, uint16_t peer_queue, 383 struct rte_hairpin_peer_info *current_info, 384 struct rte_hairpin_peer_info *peer_info, 385 uint32_t direction) 386 { 387 struct mlx5_priv *priv = dev->data->dev_private; 388 RTE_SET_USED(current_info); 389 390 if (dev->data->dev_started == 0) { 391 rte_errno = EBUSY; 392 DRV_LOG(ERR, "peer port %u is not started", 393 dev->data->port_id); 394 return -rte_errno; 395 } 396 /* 397 * Peer port used as egress. In the current design, hairpin Tx queue 398 * will be bound to the peer Rx queue. Indeed, only the information of 399 * peer Rx queue needs to be fetched. 400 */ 401 if (direction == 0) { 402 struct mlx5_txq_ctrl *txq_ctrl; 403 404 txq_ctrl = mlx5_txq_get(dev, peer_queue); 405 if (txq_ctrl == NULL) { 406 rte_errno = EINVAL; 407 DRV_LOG(ERR, "Failed to get port %u Tx queue %d", 408 dev->data->port_id, peer_queue); 409 return -rte_errno; 410 } 411 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) { 412 rte_errno = EINVAL; 413 DRV_LOG(ERR, "port %u queue %d is not a hairpin Txq", 414 dev->data->port_id, peer_queue); 415 mlx5_txq_release(dev, peer_queue); 416 return -rte_errno; 417 } 418 if (txq_ctrl->obj == NULL || txq_ctrl->obj->sq == NULL) { 419 rte_errno = ENOMEM; 420 DRV_LOG(ERR, "port %u no Txq object found: %d", 421 dev->data->port_id, peer_queue); 422 mlx5_txq_release(dev, peer_queue); 423 return -rte_errno; 424 } 425 peer_info->qp_id = txq_ctrl->obj->sq->id; 426 peer_info->vhca_id = priv->config.hca_attr.vhca_id; 427 /* 1-to-1 mapping, only the first one is used. */ 428 peer_info->peer_q = txq_ctrl->hairpin_conf.peers[0].queue; 429 peer_info->tx_explicit = txq_ctrl->hairpin_conf.tx_explicit; 430 peer_info->manual_bind = txq_ctrl->hairpin_conf.manual_bind; 431 mlx5_txq_release(dev, peer_queue); 432 } else { /* Peer port used as ingress. */ 433 struct mlx5_rxq_ctrl *rxq_ctrl; 434 435 rxq_ctrl = mlx5_rxq_get(dev, peer_queue); 436 if (rxq_ctrl == NULL) { 437 rte_errno = EINVAL; 438 DRV_LOG(ERR, "Failed to get port %u Rx queue %d", 439 dev->data->port_id, peer_queue); 440 return -rte_errno; 441 } 442 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) { 443 rte_errno = EINVAL; 444 DRV_LOG(ERR, "port %u queue %d is not a hairpin Rxq", 445 dev->data->port_id, peer_queue); 446 mlx5_rxq_release(dev, peer_queue); 447 return -rte_errno; 448 } 449 if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) { 450 rte_errno = ENOMEM; 451 DRV_LOG(ERR, "port %u no Rxq object found: %d", 452 dev->data->port_id, peer_queue); 453 mlx5_rxq_release(dev, peer_queue); 454 return -rte_errno; 455 } 456 peer_info->qp_id = rxq_ctrl->obj->rq->id; 457 peer_info->vhca_id = priv->config.hca_attr.vhca_id; 458 peer_info->peer_q = rxq_ctrl->hairpin_conf.peers[0].queue; 459 peer_info->tx_explicit = rxq_ctrl->hairpin_conf.tx_explicit; 460 peer_info->manual_bind = rxq_ctrl->hairpin_conf.manual_bind; 461 mlx5_rxq_release(dev, peer_queue); 462 } 463 return 0; 464 } 465 466 /* 467 * Bind the hairpin queue with the peer HW information. 468 * This needs to be called twice both for Tx and Rx queues of a pair. 469 * If the queue is already bound, it is considered successful. 470 * 471 * @param dev 472 * Pointer to Ethernet device structure. 473 * @param cur_queue 474 * Index of the queue to change the HW configuration to bind. 475 * @param peer_info 476 * Pointer to information of the peer queue. 477 * @param direction 478 * Positive to configure the TxQ, zero to configure the RxQ. 479 * 480 * @return 481 * 0 on success, a negative errno value otherwise and rte_errno is set. 482 */ 483 int 484 mlx5_hairpin_queue_peer_bind(struct rte_eth_dev *dev, uint16_t cur_queue, 485 struct rte_hairpin_peer_info *peer_info, 486 uint32_t direction) 487 { 488 int ret = 0; 489 490 /* 491 * Consistency checking of the peer queue: opposite direction is used 492 * to get the peer queue info with ethdev port ID, no need to check. 493 */ 494 if (peer_info->peer_q != cur_queue) { 495 rte_errno = EINVAL; 496 DRV_LOG(ERR, "port %u queue %d and peer queue %d mismatch", 497 dev->data->port_id, cur_queue, peer_info->peer_q); 498 return -rte_errno; 499 } 500 if (direction != 0) { 501 struct mlx5_txq_ctrl *txq_ctrl; 502 struct mlx5_devx_modify_sq_attr sq_attr = { 0 }; 503 504 txq_ctrl = mlx5_txq_get(dev, cur_queue); 505 if (txq_ctrl == NULL) { 506 rte_errno = EINVAL; 507 DRV_LOG(ERR, "Failed to get port %u Tx queue %d", 508 dev->data->port_id, cur_queue); 509 return -rte_errno; 510 } 511 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) { 512 rte_errno = EINVAL; 513 DRV_LOG(ERR, "port %u queue %d not a hairpin Txq", 514 dev->data->port_id, cur_queue); 515 mlx5_txq_release(dev, cur_queue); 516 return -rte_errno; 517 } 518 if (txq_ctrl->obj == NULL || txq_ctrl->obj->sq == NULL) { 519 rte_errno = ENOMEM; 520 DRV_LOG(ERR, "port %u no Txq object found: %d", 521 dev->data->port_id, cur_queue); 522 mlx5_txq_release(dev, cur_queue); 523 return -rte_errno; 524 } 525 if (txq_ctrl->hairpin_status != 0) { 526 DRV_LOG(DEBUG, "port %u Tx queue %d is already bound", 527 dev->data->port_id, cur_queue); 528 mlx5_txq_release(dev, cur_queue); 529 return 0; 530 } 531 /* 532 * All queues' of one port consistency checking is done in the 533 * bind() function, and that is optional. 534 */ 535 if (peer_info->tx_explicit != 536 txq_ctrl->hairpin_conf.tx_explicit) { 537 rte_errno = EINVAL; 538 DRV_LOG(ERR, "port %u Tx queue %d and peer Tx rule mode" 539 " mismatch", dev->data->port_id, cur_queue); 540 mlx5_txq_release(dev, cur_queue); 541 return -rte_errno; 542 } 543 if (peer_info->manual_bind != 544 txq_ctrl->hairpin_conf.manual_bind) { 545 rte_errno = EINVAL; 546 DRV_LOG(ERR, "port %u Tx queue %d and peer binding mode" 547 " mismatch", dev->data->port_id, cur_queue); 548 mlx5_txq_release(dev, cur_queue); 549 return -rte_errno; 550 } 551 sq_attr.state = MLX5_SQC_STATE_RDY; 552 sq_attr.sq_state = MLX5_SQC_STATE_RST; 553 sq_attr.hairpin_peer_rq = peer_info->qp_id; 554 sq_attr.hairpin_peer_vhca = peer_info->vhca_id; 555 ret = mlx5_devx_cmd_modify_sq(txq_ctrl->obj->sq, &sq_attr); 556 if (ret == 0) 557 txq_ctrl->hairpin_status = 1; 558 mlx5_txq_release(dev, cur_queue); 559 } else { 560 struct mlx5_rxq_ctrl *rxq_ctrl; 561 struct mlx5_devx_modify_rq_attr rq_attr = { 0 }; 562 563 rxq_ctrl = mlx5_rxq_get(dev, cur_queue); 564 if (rxq_ctrl == NULL) { 565 rte_errno = EINVAL; 566 DRV_LOG(ERR, "Failed to get port %u Rx queue %d", 567 dev->data->port_id, cur_queue); 568 return -rte_errno; 569 } 570 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) { 571 rte_errno = EINVAL; 572 DRV_LOG(ERR, "port %u queue %d not a hairpin Rxq", 573 dev->data->port_id, cur_queue); 574 mlx5_rxq_release(dev, cur_queue); 575 return -rte_errno; 576 } 577 if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) { 578 rte_errno = ENOMEM; 579 DRV_LOG(ERR, "port %u no Rxq object found: %d", 580 dev->data->port_id, cur_queue); 581 mlx5_rxq_release(dev, cur_queue); 582 return -rte_errno; 583 } 584 if (rxq_ctrl->hairpin_status != 0) { 585 DRV_LOG(DEBUG, "port %u Rx queue %d is already bound", 586 dev->data->port_id, cur_queue); 587 mlx5_rxq_release(dev, cur_queue); 588 return 0; 589 } 590 if (peer_info->tx_explicit != 591 rxq_ctrl->hairpin_conf.tx_explicit) { 592 rte_errno = EINVAL; 593 DRV_LOG(ERR, "port %u Rx queue %d and peer Tx rule mode" 594 " mismatch", dev->data->port_id, cur_queue); 595 mlx5_rxq_release(dev, cur_queue); 596 return -rte_errno; 597 } 598 if (peer_info->manual_bind != 599 rxq_ctrl->hairpin_conf.manual_bind) { 600 rte_errno = EINVAL; 601 DRV_LOG(ERR, "port %u Rx queue %d and peer binding mode" 602 " mismatch", dev->data->port_id, cur_queue); 603 mlx5_rxq_release(dev, cur_queue); 604 return -rte_errno; 605 } 606 rq_attr.state = MLX5_SQC_STATE_RDY; 607 rq_attr.rq_state = MLX5_SQC_STATE_RST; 608 rq_attr.hairpin_peer_sq = peer_info->qp_id; 609 rq_attr.hairpin_peer_vhca = peer_info->vhca_id; 610 ret = mlx5_devx_cmd_modify_rq(rxq_ctrl->obj->rq, &rq_attr); 611 if (ret == 0) 612 rxq_ctrl->hairpin_status = 1; 613 mlx5_rxq_release(dev, cur_queue); 614 } 615 return ret; 616 } 617 618 /* 619 * Unbind the hairpin queue and reset its HW configuration. 620 * This needs to be called twice both for Tx and Rx queues of a pair. 621 * If the queue is already unbound, it is considered successful. 622 * 623 * @param dev 624 * Pointer to Ethernet device structure. 625 * @param cur_queue 626 * Index of the queue to change the HW configuration to unbind. 627 * @param direction 628 * Positive to reset the TxQ, zero to reset the RxQ. 629 * 630 * @return 631 * 0 on success, a negative errno value otherwise and rte_errno is set. 632 */ 633 int 634 mlx5_hairpin_queue_peer_unbind(struct rte_eth_dev *dev, uint16_t cur_queue, 635 uint32_t direction) 636 { 637 int ret = 0; 638 639 if (direction != 0) { 640 struct mlx5_txq_ctrl *txq_ctrl; 641 struct mlx5_devx_modify_sq_attr sq_attr = { 0 }; 642 643 txq_ctrl = mlx5_txq_get(dev, cur_queue); 644 if (txq_ctrl == NULL) { 645 rte_errno = EINVAL; 646 DRV_LOG(ERR, "Failed to get port %u Tx queue %d", 647 dev->data->port_id, cur_queue); 648 return -rte_errno; 649 } 650 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) { 651 rte_errno = EINVAL; 652 DRV_LOG(ERR, "port %u queue %d not a hairpin Txq", 653 dev->data->port_id, cur_queue); 654 mlx5_txq_release(dev, cur_queue); 655 return -rte_errno; 656 } 657 /* Already unbound, return success before obj checking. */ 658 if (txq_ctrl->hairpin_status == 0) { 659 DRV_LOG(DEBUG, "port %u Tx queue %d is already unbound", 660 dev->data->port_id, cur_queue); 661 mlx5_txq_release(dev, cur_queue); 662 return 0; 663 } 664 if (!txq_ctrl->obj || !txq_ctrl->obj->sq) { 665 rte_errno = ENOMEM; 666 DRV_LOG(ERR, "port %u no Txq object found: %d", 667 dev->data->port_id, cur_queue); 668 mlx5_txq_release(dev, cur_queue); 669 return -rte_errno; 670 } 671 sq_attr.state = MLX5_SQC_STATE_RST; 672 sq_attr.sq_state = MLX5_SQC_STATE_RST; 673 ret = mlx5_devx_cmd_modify_sq(txq_ctrl->obj->sq, &sq_attr); 674 if (ret == 0) 675 txq_ctrl->hairpin_status = 0; 676 mlx5_txq_release(dev, cur_queue); 677 } else { 678 struct mlx5_rxq_ctrl *rxq_ctrl; 679 struct mlx5_devx_modify_rq_attr rq_attr = { 0 }; 680 681 rxq_ctrl = mlx5_rxq_get(dev, cur_queue); 682 if (rxq_ctrl == NULL) { 683 rte_errno = EINVAL; 684 DRV_LOG(ERR, "Failed to get port %u Rx queue %d", 685 dev->data->port_id, cur_queue); 686 return -rte_errno; 687 } 688 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) { 689 rte_errno = EINVAL; 690 DRV_LOG(ERR, "port %u queue %d not a hairpin Rxq", 691 dev->data->port_id, cur_queue); 692 mlx5_rxq_release(dev, cur_queue); 693 return -rte_errno; 694 } 695 if (rxq_ctrl->hairpin_status == 0) { 696 DRV_LOG(DEBUG, "port %u Rx queue %d is already unbound", 697 dev->data->port_id, cur_queue); 698 mlx5_rxq_release(dev, cur_queue); 699 return 0; 700 } 701 if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) { 702 rte_errno = ENOMEM; 703 DRV_LOG(ERR, "port %u no Rxq object found: %d", 704 dev->data->port_id, cur_queue); 705 mlx5_rxq_release(dev, cur_queue); 706 return -rte_errno; 707 } 708 rq_attr.state = MLX5_SQC_STATE_RST; 709 rq_attr.rq_state = MLX5_SQC_STATE_RST; 710 ret = mlx5_devx_cmd_modify_rq(rxq_ctrl->obj->rq, &rq_attr); 711 if (ret == 0) 712 rxq_ctrl->hairpin_status = 0; 713 mlx5_rxq_release(dev, cur_queue); 714 } 715 return ret; 716 } 717 718 /* 719 * Bind the hairpin port pairs, from the Tx to the peer Rx. 720 * This function only supports to bind the Tx to one Rx. 721 * 722 * @param dev 723 * Pointer to Ethernet device structure. 724 * @param rx_port 725 * Port identifier of the Rx port. 726 * 727 * @return 728 * 0 on success, a negative errno value otherwise and rte_errno is set. 729 */ 730 static int 731 mlx5_hairpin_bind_single_port(struct rte_eth_dev *dev, uint16_t rx_port) 732 { 733 struct mlx5_priv *priv = dev->data->dev_private; 734 int ret = 0; 735 struct mlx5_txq_ctrl *txq_ctrl; 736 uint32_t i; 737 struct rte_hairpin_peer_info peer = {0xffffff}; 738 struct rte_hairpin_peer_info cur; 739 const struct rte_eth_hairpin_conf *conf; 740 uint16_t num_q = 0; 741 uint16_t local_port = priv->dev_data->port_id; 742 uint32_t manual; 743 uint32_t explicit; 744 uint16_t rx_queue; 745 746 if (mlx5_eth_find_next(rx_port, dev->device) != rx_port) { 747 rte_errno = ENODEV; 748 DRV_LOG(ERR, "Rx port %u does not belong to mlx5", rx_port); 749 return -rte_errno; 750 } 751 /* 752 * Before binding TxQ to peer RxQ, first round loop will be used for 753 * checking the queues' configuration consistency. This would be a 754 * little time consuming but better than doing the rollback. 755 */ 756 for (i = 0; i != priv->txqs_n; i++) { 757 txq_ctrl = mlx5_txq_get(dev, i); 758 if (txq_ctrl == NULL) 759 continue; 760 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) { 761 mlx5_txq_release(dev, i); 762 continue; 763 } 764 /* 765 * All hairpin Tx queues of a single port that connected to the 766 * same peer Rx port should have the same "auto binding" and 767 * "implicit Tx flow" modes. 768 * Peer consistency checking will be done in per queue binding. 769 */ 770 conf = &txq_ctrl->hairpin_conf; 771 if (conf->peers[0].port == rx_port) { 772 if (num_q == 0) { 773 manual = conf->manual_bind; 774 explicit = conf->tx_explicit; 775 } else { 776 if (manual != conf->manual_bind || 777 explicit != conf->tx_explicit) { 778 rte_errno = EINVAL; 779 DRV_LOG(ERR, "port %u queue %d mode" 780 " mismatch: %u %u, %u %u", 781 local_port, i, manual, 782 conf->manual_bind, explicit, 783 conf->tx_explicit); 784 mlx5_txq_release(dev, i); 785 return -rte_errno; 786 } 787 } 788 num_q++; 789 } 790 mlx5_txq_release(dev, i); 791 } 792 /* Once no queue is configured, success is returned directly. */ 793 if (num_q == 0) 794 return ret; 795 /* All the hairpin TX queues need to be traversed again. */ 796 for (i = 0; i != priv->txqs_n; i++) { 797 txq_ctrl = mlx5_txq_get(dev, i); 798 if (txq_ctrl == NULL) 799 continue; 800 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) { 801 mlx5_txq_release(dev, i); 802 continue; 803 } 804 if (txq_ctrl->hairpin_conf.peers[0].port != rx_port) { 805 mlx5_txq_release(dev, i); 806 continue; 807 } 808 rx_queue = txq_ctrl->hairpin_conf.peers[0].queue; 809 /* 810 * Fetch peer RxQ's information. 811 * No need to pass the information of the current queue. 812 */ 813 ret = rte_eth_hairpin_queue_peer_update(rx_port, rx_queue, 814 NULL, &peer, 1); 815 if (ret != 0) { 816 mlx5_txq_release(dev, i); 817 goto error; 818 } 819 /* Accessing its own device, inside mlx5 PMD. */ 820 ret = mlx5_hairpin_queue_peer_bind(dev, i, &peer, 1); 821 if (ret != 0) { 822 mlx5_txq_release(dev, i); 823 goto error; 824 } 825 /* Pass TxQ's information to peer RxQ and try binding. */ 826 cur.peer_q = rx_queue; 827 cur.qp_id = txq_ctrl->obj->sq->id; 828 cur.vhca_id = priv->config.hca_attr.vhca_id; 829 cur.tx_explicit = txq_ctrl->hairpin_conf.tx_explicit; 830 cur.manual_bind = txq_ctrl->hairpin_conf.manual_bind; 831 /* 832 * In order to access another device in a proper way, RTE level 833 * private function is needed. 834 */ 835 ret = rte_eth_hairpin_queue_peer_bind(rx_port, rx_queue, 836 &cur, 0); 837 if (ret != 0) { 838 mlx5_txq_release(dev, i); 839 goto error; 840 } 841 mlx5_txq_release(dev, i); 842 } 843 return 0; 844 error: 845 /* 846 * Do roll-back process for the queues already bound. 847 * No need to check the return value of the queue unbind function. 848 */ 849 do { 850 /* No validation is needed here. */ 851 txq_ctrl = mlx5_txq_get(dev, i); 852 if (txq_ctrl == NULL) 853 continue; 854 rx_queue = txq_ctrl->hairpin_conf.peers[0].queue; 855 rte_eth_hairpin_queue_peer_unbind(rx_port, rx_queue, 0); 856 mlx5_hairpin_queue_peer_unbind(dev, i, 1); 857 mlx5_txq_release(dev, i); 858 } while (i--); 859 return ret; 860 } 861 862 /* 863 * Unbind the hairpin port pair, HW configuration of both devices will be clear 864 * and status will be reset for all the queues used between the them. 865 * This function only supports to unbind the Tx from one Rx. 866 * 867 * @param dev 868 * Pointer to Ethernet device structure. 869 * @param rx_port 870 * Port identifier of the Rx port. 871 * 872 * @return 873 * 0 on success, a negative errno value otherwise and rte_errno is set. 874 */ 875 static int 876 mlx5_hairpin_unbind_single_port(struct rte_eth_dev *dev, uint16_t rx_port) 877 { 878 struct mlx5_priv *priv = dev->data->dev_private; 879 struct mlx5_txq_ctrl *txq_ctrl; 880 uint32_t i; 881 int ret; 882 uint16_t cur_port = priv->dev_data->port_id; 883 884 if (mlx5_eth_find_next(rx_port, dev->device) != rx_port) { 885 rte_errno = ENODEV; 886 DRV_LOG(ERR, "Rx port %u does not belong to mlx5", rx_port); 887 return -rte_errno; 888 } 889 for (i = 0; i != priv->txqs_n; i++) { 890 uint16_t rx_queue; 891 892 txq_ctrl = mlx5_txq_get(dev, i); 893 if (txq_ctrl == NULL) 894 continue; 895 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) { 896 mlx5_txq_release(dev, i); 897 continue; 898 } 899 if (txq_ctrl->hairpin_conf.peers[0].port != rx_port) { 900 mlx5_txq_release(dev, i); 901 continue; 902 } 903 /* Indeed, only the first used queue needs to be checked. */ 904 if (txq_ctrl->hairpin_conf.manual_bind == 0) { 905 if (cur_port != rx_port) { 906 rte_errno = EINVAL; 907 DRV_LOG(ERR, "port %u and port %u are in" 908 " auto-bind mode", cur_port, rx_port); 909 mlx5_txq_release(dev, i); 910 return -rte_errno; 911 } else { 912 return 0; 913 } 914 } 915 rx_queue = txq_ctrl->hairpin_conf.peers[0].queue; 916 mlx5_txq_release(dev, i); 917 ret = rte_eth_hairpin_queue_peer_unbind(rx_port, rx_queue, 0); 918 if (ret) { 919 DRV_LOG(ERR, "port %u Rx queue %d unbind - failure", 920 rx_port, rx_queue); 921 return ret; 922 } 923 ret = mlx5_hairpin_queue_peer_unbind(dev, i, 1); 924 if (ret) { 925 DRV_LOG(ERR, "port %u Tx queue %d unbind - failure", 926 cur_port, i); 927 return ret; 928 } 929 } 930 return 0; 931 } 932 933 /* 934 * Bind hairpin ports, Rx could be all ports when using RTE_MAX_ETHPORTS. 935 * @see mlx5_hairpin_bind_single_port() 936 */ 937 int 938 mlx5_hairpin_bind(struct rte_eth_dev *dev, uint16_t rx_port) 939 { 940 int ret = 0; 941 uint16_t p, pp; 942 943 /* 944 * If the Rx port has no hairpin configuration with the current port, 945 * the binding will be skipped in the called function of single port. 946 * Device started status will be checked only before the queue 947 * information updating. 948 */ 949 if (rx_port == RTE_MAX_ETHPORTS) { 950 MLX5_ETH_FOREACH_DEV(p, dev->device) { 951 ret = mlx5_hairpin_bind_single_port(dev, p); 952 if (ret != 0) 953 goto unbind; 954 } 955 return ret; 956 } else { 957 return mlx5_hairpin_bind_single_port(dev, rx_port); 958 } 959 unbind: 960 MLX5_ETH_FOREACH_DEV(pp, dev->device) 961 if (pp < p) 962 mlx5_hairpin_unbind_single_port(dev, pp); 963 return ret; 964 } 965 966 /* 967 * Unbind hairpin ports, Rx could be all ports when using RTE_MAX_ETHPORTS. 968 * @see mlx5_hairpin_unbind_single_port() 969 */ 970 int 971 mlx5_hairpin_unbind(struct rte_eth_dev *dev, uint16_t rx_port) 972 { 973 int ret = 0; 974 uint16_t p; 975 976 if (rx_port == RTE_MAX_ETHPORTS) 977 MLX5_ETH_FOREACH_DEV(p, dev->device) { 978 ret = mlx5_hairpin_unbind_single_port(dev, p); 979 if (ret != 0) 980 return ret; 981 } 982 else 983 ret = mlx5_hairpin_unbind_single_port(dev, rx_port); 984 return ret; 985 } 986 987 /* 988 * DPDK callback to get the hairpin peer ports list. 989 * This will return the actual number of peer ports and save the identifiers 990 * into the array (sorted, may be different from that when setting up the 991 * hairpin peer queues). 992 * The peer port ID could be the same as the port ID of the current device. 993 * 994 * @param dev 995 * Pointer to Ethernet device structure. 996 * @param peer_ports 997 * Pointer to array to save the port identifiers. 998 * @param len 999 * The length of the array. 1000 * @param direction 1001 * Current port to peer port direction. 1002 * positive - current used as Tx to get all peer Rx ports. 1003 * zero - current used as Rx to get all peer Tx ports. 1004 * 1005 * @return 1006 * 0 or positive value on success, actual number of peer ports. 1007 * a negative errno value otherwise and rte_errno is set. 1008 */ 1009 int 1010 mlx5_hairpin_get_peer_ports(struct rte_eth_dev *dev, uint16_t *peer_ports, 1011 size_t len, uint32_t direction) 1012 { 1013 struct mlx5_priv *priv = dev->data->dev_private; 1014 struct mlx5_txq_ctrl *txq_ctrl; 1015 struct mlx5_rxq_ctrl *rxq_ctrl; 1016 uint32_t i; 1017 uint16_t pp; 1018 uint32_t bits[(RTE_MAX_ETHPORTS + 31) / 32] = {0}; 1019 int ret = 0; 1020 1021 if (direction) { 1022 for (i = 0; i < priv->txqs_n; i++) { 1023 txq_ctrl = mlx5_txq_get(dev, i); 1024 if (!txq_ctrl) 1025 continue; 1026 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) { 1027 mlx5_txq_release(dev, i); 1028 continue; 1029 } 1030 pp = txq_ctrl->hairpin_conf.peers[0].port; 1031 if (pp >= RTE_MAX_ETHPORTS) { 1032 rte_errno = ERANGE; 1033 mlx5_txq_release(dev, i); 1034 DRV_LOG(ERR, "port %hu queue %u peer port " 1035 "out of range %hu", 1036 priv->dev_data->port_id, i, pp); 1037 return -rte_errno; 1038 } 1039 bits[pp / 32] |= 1 << (pp % 32); 1040 mlx5_txq_release(dev, i); 1041 } 1042 } else { 1043 for (i = 0; i < priv->rxqs_n; i++) { 1044 rxq_ctrl = mlx5_rxq_get(dev, i); 1045 if (!rxq_ctrl) 1046 continue; 1047 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) { 1048 mlx5_rxq_release(dev, i); 1049 continue; 1050 } 1051 pp = rxq_ctrl->hairpin_conf.peers[0].port; 1052 if (pp >= RTE_MAX_ETHPORTS) { 1053 rte_errno = ERANGE; 1054 mlx5_rxq_release(dev, i); 1055 DRV_LOG(ERR, "port %hu queue %u peer port " 1056 "out of range %hu", 1057 priv->dev_data->port_id, i, pp); 1058 return -rte_errno; 1059 } 1060 bits[pp / 32] |= 1 << (pp % 32); 1061 mlx5_rxq_release(dev, i); 1062 } 1063 } 1064 for (i = 0; i < RTE_MAX_ETHPORTS; i++) { 1065 if (bits[i / 32] & (1 << (i % 32))) { 1066 if ((size_t)ret >= len) { 1067 rte_errno = E2BIG; 1068 return -rte_errno; 1069 } 1070 peer_ports[ret++] = i; 1071 } 1072 } 1073 return ret; 1074 } 1075 1076 /** 1077 * DPDK callback to start the device. 1078 * 1079 * Simulate device start by attaching all configured flows. 1080 * 1081 * @param dev 1082 * Pointer to Ethernet device structure. 1083 * 1084 * @return 1085 * 0 on success, a negative errno value otherwise and rte_errno is set. 1086 */ 1087 int 1088 mlx5_dev_start(struct rte_eth_dev *dev) 1089 { 1090 struct mlx5_priv *priv = dev->data->dev_private; 1091 int ret; 1092 int fine_inline; 1093 1094 DRV_LOG(DEBUG, "port %u starting device", dev->data->port_id); 1095 fine_inline = rte_mbuf_dynflag_lookup 1096 (RTE_PMD_MLX5_FINE_GRANULARITY_INLINE, NULL); 1097 if (fine_inline >= 0) 1098 rte_net_mlx5_dynf_inline_mask = 1UL << fine_inline; 1099 else 1100 rte_net_mlx5_dynf_inline_mask = 0; 1101 if (dev->data->nb_rx_queues > 0) { 1102 ret = mlx5_dev_configure_rss_reta(dev); 1103 if (ret) { 1104 DRV_LOG(ERR, "port %u reta config failed: %s", 1105 dev->data->port_id, strerror(rte_errno)); 1106 return -rte_errno; 1107 } 1108 } 1109 ret = mlx5_txpp_start(dev); 1110 if (ret) { 1111 DRV_LOG(ERR, "port %u Tx packet pacing init failed: %s", 1112 dev->data->port_id, strerror(rte_errno)); 1113 goto error; 1114 } 1115 if ((priv->config.devx && priv->config.dv_flow_en && 1116 priv->config.dest_tir) && priv->obj_ops.lb_dummy_queue_create) { 1117 ret = priv->obj_ops.lb_dummy_queue_create(dev); 1118 if (ret) 1119 goto error; 1120 } 1121 ret = mlx5_txq_start(dev); 1122 if (ret) { 1123 DRV_LOG(ERR, "port %u Tx queue allocation failed: %s", 1124 dev->data->port_id, strerror(rte_errno)); 1125 goto error; 1126 } 1127 ret = mlx5_rxq_start(dev); 1128 if (ret) { 1129 DRV_LOG(ERR, "port %u Rx queue allocation failed: %s", 1130 dev->data->port_id, strerror(rte_errno)); 1131 goto error; 1132 } 1133 /* 1134 * Such step will be skipped if there is no hairpin TX queue configured 1135 * with RX peer queue from the same device. 1136 */ 1137 ret = mlx5_hairpin_auto_bind(dev); 1138 if (ret) { 1139 DRV_LOG(ERR, "port %u hairpin auto binding failed: %s", 1140 dev->data->port_id, strerror(rte_errno)); 1141 goto error; 1142 } 1143 /* Set started flag here for the following steps like control flow. */ 1144 dev->data->dev_started = 1; 1145 ret = mlx5_rx_intr_vec_enable(dev); 1146 if (ret) { 1147 DRV_LOG(ERR, "port %u Rx interrupt vector creation failed", 1148 dev->data->port_id); 1149 goto error; 1150 } 1151 mlx5_os_stats_init(dev); 1152 ret = mlx5_traffic_enable(dev); 1153 if (ret) { 1154 DRV_LOG(ERR, "port %u failed to set defaults flows", 1155 dev->data->port_id); 1156 goto error; 1157 } 1158 /* Set a mask and offset of dynamic metadata flows into Rx queues. */ 1159 mlx5_flow_rxq_dynf_metadata_set(dev); 1160 /* Set flags and context to convert Rx timestamps. */ 1161 mlx5_rxq_timestamp_set(dev); 1162 /* Set a mask and offset of scheduling on timestamp into Tx queues. */ 1163 mlx5_txq_dynf_timestamp_set(dev); 1164 /* 1165 * In non-cached mode, it only needs to start the default mreg copy 1166 * action and no flow created by application exists anymore. 1167 * But it is worth wrapping the interface for further usage. 1168 */ 1169 ret = mlx5_flow_start_default(dev); 1170 if (ret) { 1171 DRV_LOG(DEBUG, "port %u failed to start default actions: %s", 1172 dev->data->port_id, strerror(rte_errno)); 1173 goto error; 1174 } 1175 if (mlx5_dev_ctx_shared_mempool_subscribe(dev) != 0) { 1176 DRV_LOG(ERR, "port %u failed to subscribe for mempool life cycle: %s", 1177 dev->data->port_id, rte_strerror(rte_errno)); 1178 goto error; 1179 } 1180 rte_wmb(); 1181 dev->tx_pkt_burst = mlx5_select_tx_function(dev); 1182 dev->rx_pkt_burst = mlx5_select_rx_function(dev); 1183 /* Enable datapath on secondary process. */ 1184 mlx5_mp_os_req_start_rxtx(dev); 1185 if (priv->sh->intr_handle.fd >= 0) { 1186 priv->sh->port[priv->dev_port - 1].ih_port_id = 1187 (uint32_t)dev->data->port_id; 1188 } else { 1189 DRV_LOG(INFO, "port %u starts without LSC and RMV interrupts.", 1190 dev->data->port_id); 1191 dev->data->dev_conf.intr_conf.lsc = 0; 1192 dev->data->dev_conf.intr_conf.rmv = 0; 1193 } 1194 if (priv->sh->intr_handle_devx.fd >= 0) 1195 priv->sh->port[priv->dev_port - 1].devx_ih_port_id = 1196 (uint32_t)dev->data->port_id; 1197 return 0; 1198 error: 1199 ret = rte_errno; /* Save rte_errno before cleanup. */ 1200 /* Rollback. */ 1201 dev->data->dev_started = 0; 1202 mlx5_flow_stop_default(dev); 1203 mlx5_traffic_disable(dev); 1204 mlx5_txq_stop(dev); 1205 mlx5_rxq_stop(dev); 1206 if (priv->obj_ops.lb_dummy_queue_release) 1207 priv->obj_ops.lb_dummy_queue_release(dev); 1208 mlx5_txpp_stop(dev); /* Stop last. */ 1209 rte_errno = ret; /* Restore rte_errno. */ 1210 return -rte_errno; 1211 } 1212 1213 /** 1214 * DPDK callback to stop the device. 1215 * 1216 * Simulate device stop by detaching all configured flows. 1217 * 1218 * @param dev 1219 * Pointer to Ethernet device structure. 1220 */ 1221 int 1222 mlx5_dev_stop(struct rte_eth_dev *dev) 1223 { 1224 struct mlx5_priv *priv = dev->data->dev_private; 1225 1226 dev->data->dev_started = 0; 1227 /* Prevent crashes when queues are still in use. */ 1228 dev->rx_pkt_burst = removed_rx_burst; 1229 dev->tx_pkt_burst = removed_tx_burst; 1230 rte_wmb(); 1231 /* Disable datapath on secondary process. */ 1232 mlx5_mp_os_req_stop_rxtx(dev); 1233 rte_delay_us_sleep(1000 * priv->rxqs_n); 1234 DRV_LOG(DEBUG, "port %u stopping device", dev->data->port_id); 1235 mlx5_flow_stop_default(dev); 1236 /* Control flows for default traffic can be removed firstly. */ 1237 mlx5_traffic_disable(dev); 1238 /* All RX queue flags will be cleared in the flush interface. */ 1239 mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_GEN, true); 1240 mlx5_flow_meter_rxq_flush(dev); 1241 mlx5_rx_intr_vec_disable(dev); 1242 priv->sh->port[priv->dev_port - 1].ih_port_id = RTE_MAX_ETHPORTS; 1243 priv->sh->port[priv->dev_port - 1].devx_ih_port_id = RTE_MAX_ETHPORTS; 1244 mlx5_txq_stop(dev); 1245 mlx5_rxq_stop(dev); 1246 if (priv->obj_ops.lb_dummy_queue_release) 1247 priv->obj_ops.lb_dummy_queue_release(dev); 1248 mlx5_txpp_stop(dev); 1249 1250 return 0; 1251 } 1252 1253 /** 1254 * Enable traffic flows configured by control plane 1255 * 1256 * @param dev 1257 * Pointer to Ethernet device private data. 1258 * @param dev 1259 * Pointer to Ethernet device structure. 1260 * 1261 * @return 1262 * 0 on success, a negative errno value otherwise and rte_errno is set. 1263 */ 1264 int 1265 mlx5_traffic_enable(struct rte_eth_dev *dev) 1266 { 1267 struct mlx5_priv *priv = dev->data->dev_private; 1268 struct rte_flow_item_eth bcast = { 1269 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff", 1270 }; 1271 struct rte_flow_item_eth ipv6_multi_spec = { 1272 .dst.addr_bytes = "\x33\x33\x00\x00\x00\x00", 1273 }; 1274 struct rte_flow_item_eth ipv6_multi_mask = { 1275 .dst.addr_bytes = "\xff\xff\x00\x00\x00\x00", 1276 }; 1277 struct rte_flow_item_eth unicast = { 1278 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00", 1279 }; 1280 struct rte_flow_item_eth unicast_mask = { 1281 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff", 1282 }; 1283 const unsigned int vlan_filter_n = priv->vlan_filter_n; 1284 const struct rte_ether_addr cmp = { 1285 .addr_bytes = "\x00\x00\x00\x00\x00\x00", 1286 }; 1287 unsigned int i; 1288 unsigned int j; 1289 int ret; 1290 1291 /* 1292 * Hairpin txq default flow should be created no matter if it is 1293 * isolation mode. Or else all the packets to be sent will be sent 1294 * out directly without the TX flow actions, e.g. encapsulation. 1295 */ 1296 for (i = 0; i != priv->txqs_n; ++i) { 1297 struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i); 1298 if (!txq_ctrl) 1299 continue; 1300 /* Only Tx implicit mode requires the default Tx flow. */ 1301 if (txq_ctrl->type == MLX5_TXQ_TYPE_HAIRPIN && 1302 txq_ctrl->hairpin_conf.tx_explicit == 0 && 1303 txq_ctrl->hairpin_conf.peers[0].port == 1304 priv->dev_data->port_id) { 1305 ret = mlx5_ctrl_flow_source_queue(dev, i); 1306 if (ret) { 1307 mlx5_txq_release(dev, i); 1308 goto error; 1309 } 1310 } 1311 mlx5_txq_release(dev, i); 1312 } 1313 if (priv->config.dv_esw_en && !priv->config.vf && !priv->config.sf) { 1314 if (mlx5_flow_create_esw_table_zero_flow(dev)) 1315 priv->fdb_def_rule = 1; 1316 else 1317 DRV_LOG(INFO, "port %u FDB default rule cannot be" 1318 " configured - only Eswitch group 0 flows are" 1319 " supported.", dev->data->port_id); 1320 } 1321 if (!priv->config.lacp_by_user && priv->pf_bond >= 0) { 1322 ret = mlx5_flow_lacp_miss(dev); 1323 if (ret) 1324 DRV_LOG(INFO, "port %u LACP rule cannot be created - " 1325 "forward LACP to kernel.", dev->data->port_id); 1326 else 1327 DRV_LOG(INFO, "LACP traffic will be missed in port %u." 1328 , dev->data->port_id); 1329 } 1330 if (priv->isolated) 1331 return 0; 1332 if (dev->data->promiscuous) { 1333 struct rte_flow_item_eth promisc = { 1334 .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00", 1335 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00", 1336 .type = 0, 1337 }; 1338 1339 ret = mlx5_ctrl_flow(dev, &promisc, &promisc); 1340 if (ret) 1341 goto error; 1342 } 1343 if (dev->data->all_multicast) { 1344 struct rte_flow_item_eth multicast = { 1345 .dst.addr_bytes = "\x01\x00\x00\x00\x00\x00", 1346 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00", 1347 .type = 0, 1348 }; 1349 1350 ret = mlx5_ctrl_flow(dev, &multicast, &multicast); 1351 if (ret) 1352 goto error; 1353 } else { 1354 /* Add broadcast/multicast flows. */ 1355 for (i = 0; i != vlan_filter_n; ++i) { 1356 uint16_t vlan = priv->vlan_filter[i]; 1357 1358 struct rte_flow_item_vlan vlan_spec = { 1359 .tci = rte_cpu_to_be_16(vlan), 1360 }; 1361 struct rte_flow_item_vlan vlan_mask = 1362 rte_flow_item_vlan_mask; 1363 1364 ret = mlx5_ctrl_flow_vlan(dev, &bcast, &bcast, 1365 &vlan_spec, &vlan_mask); 1366 if (ret) 1367 goto error; 1368 ret = mlx5_ctrl_flow_vlan(dev, &ipv6_multi_spec, 1369 &ipv6_multi_mask, 1370 &vlan_spec, &vlan_mask); 1371 if (ret) 1372 goto error; 1373 } 1374 if (!vlan_filter_n) { 1375 ret = mlx5_ctrl_flow(dev, &bcast, &bcast); 1376 if (ret) 1377 goto error; 1378 ret = mlx5_ctrl_flow(dev, &ipv6_multi_spec, 1379 &ipv6_multi_mask); 1380 if (ret) { 1381 /* Do not fail on IPv6 broadcast creation failure. */ 1382 DRV_LOG(WARNING, 1383 "IPv6 broadcast is not supported"); 1384 ret = 0; 1385 } 1386 } 1387 } 1388 /* Add MAC address flows. */ 1389 for (i = 0; i != MLX5_MAX_MAC_ADDRESSES; ++i) { 1390 struct rte_ether_addr *mac = &dev->data->mac_addrs[i]; 1391 1392 if (!memcmp(mac, &cmp, sizeof(*mac))) 1393 continue; 1394 memcpy(&unicast.dst.addr_bytes, 1395 mac->addr_bytes, 1396 RTE_ETHER_ADDR_LEN); 1397 for (j = 0; j != vlan_filter_n; ++j) { 1398 uint16_t vlan = priv->vlan_filter[j]; 1399 1400 struct rte_flow_item_vlan vlan_spec = { 1401 .tci = rte_cpu_to_be_16(vlan), 1402 }; 1403 struct rte_flow_item_vlan vlan_mask = 1404 rte_flow_item_vlan_mask; 1405 1406 ret = mlx5_ctrl_flow_vlan(dev, &unicast, 1407 &unicast_mask, 1408 &vlan_spec, 1409 &vlan_mask); 1410 if (ret) 1411 goto error; 1412 } 1413 if (!vlan_filter_n) { 1414 ret = mlx5_ctrl_flow(dev, &unicast, &unicast_mask); 1415 if (ret) 1416 goto error; 1417 } 1418 } 1419 return 0; 1420 error: 1421 ret = rte_errno; /* Save rte_errno before cleanup. */ 1422 mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_CTL, false); 1423 rte_errno = ret; /* Restore rte_errno. */ 1424 return -rte_errno; 1425 } 1426 1427 1428 /** 1429 * Disable traffic flows configured by control plane 1430 * 1431 * @param dev 1432 * Pointer to Ethernet device private data. 1433 */ 1434 void 1435 mlx5_traffic_disable(struct rte_eth_dev *dev) 1436 { 1437 mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_CTL, false); 1438 } 1439 1440 /** 1441 * Restart traffic flows configured by control plane 1442 * 1443 * @param dev 1444 * Pointer to Ethernet device private data. 1445 * 1446 * @return 1447 * 0 on success, a negative errno value otherwise and rte_errno is set. 1448 */ 1449 int 1450 mlx5_traffic_restart(struct rte_eth_dev *dev) 1451 { 1452 if (dev->data->dev_started) { 1453 mlx5_traffic_disable(dev); 1454 return mlx5_traffic_enable(dev); 1455 } 1456 return 0; 1457 } 1458