1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2015 6WIND S.A. 3 * Copyright 2015 Mellanox Technologies, Ltd 4 */ 5 6 #include <unistd.h> 7 8 #include <rte_ether.h> 9 #include <ethdev_driver.h> 10 #include <rte_interrupts.h> 11 #include <rte_alarm.h> 12 #include <rte_cycles.h> 13 14 #include <mlx5_malloc.h> 15 16 #include "mlx5.h" 17 #include "mlx5_flow.h" 18 #include "mlx5_rx.h" 19 #include "mlx5_tx.h" 20 #include "mlx5_utils.h" 21 #include "rte_pmd_mlx5.h" 22 23 /** 24 * Stop traffic on Tx queues. 25 * 26 * @param dev 27 * Pointer to Ethernet device structure. 28 */ 29 static void 30 mlx5_txq_stop(struct rte_eth_dev *dev) 31 { 32 struct mlx5_priv *priv = dev->data->dev_private; 33 unsigned int i; 34 35 for (i = 0; i != priv->txqs_n; ++i) 36 mlx5_txq_release(dev, i); 37 } 38 39 /** 40 * Start traffic on Tx queues. 41 * 42 * @param dev 43 * Pointer to Ethernet device structure. 44 * 45 * @return 46 * 0 on success, a negative errno value otherwise and rte_errno is set. 47 */ 48 static int 49 mlx5_txq_start(struct rte_eth_dev *dev) 50 { 51 struct mlx5_priv *priv = dev->data->dev_private; 52 unsigned int i; 53 int ret; 54 55 for (i = 0; i != priv->txqs_n; ++i) { 56 struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i); 57 struct mlx5_txq_data *txq_data = &txq_ctrl->txq; 58 uint32_t flags = MLX5_MEM_RTE | MLX5_MEM_ZERO; 59 60 if (!txq_ctrl) 61 continue; 62 if (txq_ctrl->type == MLX5_TXQ_TYPE_STANDARD) 63 txq_alloc_elts(txq_ctrl); 64 MLX5_ASSERT(!txq_ctrl->obj); 65 txq_ctrl->obj = mlx5_malloc(flags, sizeof(struct mlx5_txq_obj), 66 0, txq_ctrl->socket); 67 if (!txq_ctrl->obj) { 68 DRV_LOG(ERR, "Port %u Tx queue %u cannot allocate " 69 "memory resources.", dev->data->port_id, 70 txq_data->idx); 71 rte_errno = ENOMEM; 72 goto error; 73 } 74 ret = priv->obj_ops.txq_obj_new(dev, i); 75 if (ret < 0) { 76 mlx5_free(txq_ctrl->obj); 77 txq_ctrl->obj = NULL; 78 goto error; 79 } 80 if (txq_ctrl->type == MLX5_TXQ_TYPE_STANDARD) { 81 size_t size = txq_data->cqe_s * sizeof(*txq_data->fcqs); 82 83 txq_data->fcqs = mlx5_malloc(flags, size, 84 RTE_CACHE_LINE_SIZE, 85 txq_ctrl->socket); 86 if (!txq_data->fcqs) { 87 DRV_LOG(ERR, "Port %u Tx queue %u cannot " 88 "allocate memory (FCQ).", 89 dev->data->port_id, i); 90 rte_errno = ENOMEM; 91 goto error; 92 } 93 } 94 DRV_LOG(DEBUG, "Port %u txq %u updated with %p.", 95 dev->data->port_id, i, (void *)&txq_ctrl->obj); 96 LIST_INSERT_HEAD(&priv->txqsobj, txq_ctrl->obj, next); 97 } 98 return 0; 99 error: 100 ret = rte_errno; /* Save rte_errno before cleanup. */ 101 do { 102 mlx5_txq_release(dev, i); 103 } while (i-- != 0); 104 rte_errno = ret; /* Restore rte_errno. */ 105 return -rte_errno; 106 } 107 108 /** 109 * Register Rx queue mempools and fill the Rx queue cache. 110 * This function tolerates repeated mempool registration. 111 * 112 * @param[in] rxq_ctrl 113 * Rx queue control data. 114 * 115 * @return 116 * 0 on success, (-1) on failure and rte_errno is set. 117 */ 118 static int 119 mlx5_rxq_mempool_register(struct mlx5_rxq_ctrl *rxq_ctrl) 120 { 121 struct rte_mempool *mp; 122 uint32_t s; 123 int ret = 0; 124 125 mlx5_mr_flush_local_cache(&rxq_ctrl->rxq.mr_ctrl); 126 /* MPRQ mempool is registered on creation, just fill the cache. */ 127 if (mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq)) 128 return mlx5_mr_mempool_populate_cache(&rxq_ctrl->rxq.mr_ctrl, 129 rxq_ctrl->rxq.mprq_mp); 130 for (s = 0; s < rxq_ctrl->rxq.rxseg_n; s++) { 131 bool is_extmem; 132 133 mp = rxq_ctrl->rxq.rxseg[s].mp; 134 is_extmem = (rte_pktmbuf_priv_flags(mp) & 135 RTE_PKTMBUF_POOL_F_PINNED_EXT_BUF) != 0; 136 ret = mlx5_mr_mempool_register(rxq_ctrl->sh->cdev, mp, 137 is_extmem); 138 if (ret < 0 && rte_errno != EEXIST) 139 return ret; 140 ret = mlx5_mr_mempool_populate_cache(&rxq_ctrl->rxq.mr_ctrl, 141 mp); 142 if (ret < 0) 143 return ret; 144 } 145 return 0; 146 } 147 148 /** 149 * Stop traffic on Rx queues. 150 * 151 * @param dev 152 * Pointer to Ethernet device structure. 153 */ 154 static void 155 mlx5_rxq_stop(struct rte_eth_dev *dev) 156 { 157 struct mlx5_priv *priv = dev->data->dev_private; 158 unsigned int i; 159 160 for (i = 0; i != priv->rxqs_n; ++i) 161 mlx5_rxq_release(dev, i); 162 } 163 164 static int 165 mlx5_rxq_ctrl_prepare(struct rte_eth_dev *dev, struct mlx5_rxq_ctrl *rxq_ctrl, 166 unsigned int idx) 167 { 168 int ret = 0; 169 170 if (rxq_ctrl->type == MLX5_RXQ_TYPE_STANDARD) { 171 /* 172 * Pre-register the mempools. Regardless of whether 173 * the implicit registration is enabled or not, 174 * Rx mempool destruction is tracked to free MRs. 175 */ 176 if (mlx5_rxq_mempool_register(rxq_ctrl) < 0) 177 return -rte_errno; 178 ret = rxq_alloc_elts(rxq_ctrl); 179 if (ret) 180 return ret; 181 } 182 MLX5_ASSERT(!rxq_ctrl->obj); 183 rxq_ctrl->obj = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, 184 sizeof(*rxq_ctrl->obj), 0, 185 rxq_ctrl->socket); 186 if (!rxq_ctrl->obj) { 187 DRV_LOG(ERR, "Port %u Rx queue %u can't allocate resources.", 188 dev->data->port_id, idx); 189 rte_errno = ENOMEM; 190 return -rte_errno; 191 } 192 DRV_LOG(DEBUG, "Port %u rxq %u updated with %p.", dev->data->port_id, 193 idx, (void *)&rxq_ctrl->obj); 194 return 0; 195 } 196 197 /** 198 * Start traffic on Rx queues. 199 * 200 * @param dev 201 * Pointer to Ethernet device structure. 202 * 203 * @return 204 * 0 on success, a negative errno value otherwise and rte_errno is set. 205 */ 206 static int 207 mlx5_rxq_start(struct rte_eth_dev *dev) 208 { 209 struct mlx5_priv *priv = dev->data->dev_private; 210 unsigned int i; 211 int ret = 0; 212 213 /* Allocate/reuse/resize mempool for Multi-Packet RQ. */ 214 if (mlx5_mprq_alloc_mp(dev)) { 215 /* Should not release Rx queues but return immediately. */ 216 return -rte_errno; 217 } 218 DRV_LOG(DEBUG, "Port %u device_attr.max_qp_wr is %d.", 219 dev->data->port_id, priv->sh->device_attr.max_qp_wr); 220 DRV_LOG(DEBUG, "Port %u device_attr.max_sge is %d.", 221 dev->data->port_id, priv->sh->device_attr.max_sge); 222 for (i = 0; i != priv->rxqs_n; ++i) { 223 struct mlx5_rxq_priv *rxq = mlx5_rxq_ref(dev, i); 224 struct mlx5_rxq_ctrl *rxq_ctrl; 225 226 if (rxq == NULL) 227 continue; 228 rxq_ctrl = rxq->ctrl; 229 if (!rxq_ctrl->started) { 230 if (mlx5_rxq_ctrl_prepare(dev, rxq_ctrl, i) < 0) 231 goto error; 232 LIST_INSERT_HEAD(&priv->rxqsobj, rxq_ctrl->obj, next); 233 } 234 ret = priv->obj_ops.rxq_obj_new(rxq); 235 if (ret) { 236 mlx5_free(rxq_ctrl->obj); 237 rxq_ctrl->obj = NULL; 238 goto error; 239 } 240 rxq_ctrl->started = true; 241 } 242 return 0; 243 error: 244 ret = rte_errno; /* Save rte_errno before cleanup. */ 245 do { 246 mlx5_rxq_release(dev, i); 247 } while (i-- != 0); 248 rte_errno = ret; /* Restore rte_errno. */ 249 return -rte_errno; 250 } 251 252 /** 253 * Binds Tx queues to Rx queues for hairpin. 254 * 255 * Binds Tx queues to the target Rx queues. 256 * 257 * @param dev 258 * Pointer to Ethernet device structure. 259 * 260 * @return 261 * 0 on success, a negative errno value otherwise and rte_errno is set. 262 */ 263 static int 264 mlx5_hairpin_auto_bind(struct rte_eth_dev *dev) 265 { 266 struct mlx5_priv *priv = dev->data->dev_private; 267 struct mlx5_devx_modify_sq_attr sq_attr = { 0 }; 268 struct mlx5_devx_modify_rq_attr rq_attr = { 0 }; 269 struct mlx5_txq_ctrl *txq_ctrl; 270 struct mlx5_rxq_priv *rxq; 271 struct mlx5_rxq_ctrl *rxq_ctrl; 272 struct mlx5_devx_obj *sq; 273 struct mlx5_devx_obj *rq; 274 unsigned int i; 275 int ret = 0; 276 bool need_auto = false; 277 uint16_t self_port = dev->data->port_id; 278 279 for (i = 0; i != priv->txqs_n; ++i) { 280 txq_ctrl = mlx5_txq_get(dev, i); 281 if (!txq_ctrl) 282 continue; 283 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN || 284 txq_ctrl->hairpin_conf.peers[0].port != self_port) { 285 mlx5_txq_release(dev, i); 286 continue; 287 } 288 if (txq_ctrl->hairpin_conf.manual_bind) { 289 mlx5_txq_release(dev, i); 290 return 0; 291 } 292 need_auto = true; 293 mlx5_txq_release(dev, i); 294 } 295 if (!need_auto) 296 return 0; 297 for (i = 0; i != priv->txqs_n; ++i) { 298 txq_ctrl = mlx5_txq_get(dev, i); 299 if (!txq_ctrl) 300 continue; 301 /* Skip hairpin queues with other peer ports. */ 302 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN || 303 txq_ctrl->hairpin_conf.peers[0].port != self_port) { 304 mlx5_txq_release(dev, i); 305 continue; 306 } 307 if (!txq_ctrl->obj) { 308 rte_errno = ENOMEM; 309 DRV_LOG(ERR, "port %u no txq object found: %d", 310 dev->data->port_id, i); 311 mlx5_txq_release(dev, i); 312 return -rte_errno; 313 } 314 sq = txq_ctrl->obj->sq; 315 rxq = mlx5_rxq_get(dev, txq_ctrl->hairpin_conf.peers[0].queue); 316 if (rxq == NULL) { 317 mlx5_txq_release(dev, i); 318 rte_errno = EINVAL; 319 DRV_LOG(ERR, "port %u no rxq object found: %d", 320 dev->data->port_id, 321 txq_ctrl->hairpin_conf.peers[0].queue); 322 return -rte_errno; 323 } 324 rxq_ctrl = rxq->ctrl; 325 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN || 326 rxq->hairpin_conf.peers[0].queue != i) { 327 rte_errno = ENOMEM; 328 DRV_LOG(ERR, "port %u Tx queue %d can't be binded to " 329 "Rx queue %d", dev->data->port_id, 330 i, txq_ctrl->hairpin_conf.peers[0].queue); 331 goto error; 332 } 333 rq = rxq_ctrl->obj->rq; 334 if (!rq) { 335 rte_errno = ENOMEM; 336 DRV_LOG(ERR, "port %u hairpin no matching rxq: %d", 337 dev->data->port_id, 338 txq_ctrl->hairpin_conf.peers[0].queue); 339 goto error; 340 } 341 sq_attr.state = MLX5_SQC_STATE_RDY; 342 sq_attr.sq_state = MLX5_SQC_STATE_RST; 343 sq_attr.hairpin_peer_rq = rq->id; 344 sq_attr.hairpin_peer_vhca = priv->config.hca_attr.vhca_id; 345 ret = mlx5_devx_cmd_modify_sq(sq, &sq_attr); 346 if (ret) 347 goto error; 348 rq_attr.state = MLX5_SQC_STATE_RDY; 349 rq_attr.rq_state = MLX5_SQC_STATE_RST; 350 rq_attr.hairpin_peer_sq = sq->id; 351 rq_attr.hairpin_peer_vhca = priv->config.hca_attr.vhca_id; 352 ret = mlx5_devx_cmd_modify_rq(rq, &rq_attr); 353 if (ret) 354 goto error; 355 /* Qs with auto-bind will be destroyed directly. */ 356 rxq->hairpin_status = 1; 357 txq_ctrl->hairpin_status = 1; 358 mlx5_txq_release(dev, i); 359 } 360 return 0; 361 error: 362 mlx5_txq_release(dev, i); 363 return -rte_errno; 364 } 365 366 /* 367 * Fetch the peer queue's SW & HW information. 368 * 369 * @param dev 370 * Pointer to Ethernet device structure. 371 * @param peer_queue 372 * Index of the queue to fetch the information. 373 * @param current_info 374 * Pointer to the input peer information, not used currently. 375 * @param peer_info 376 * Pointer to the structure to store the information, output. 377 * @param direction 378 * Positive to get the RxQ information, zero to get the TxQ information. 379 * 380 * @return 381 * 0 on success, a negative errno value otherwise and rte_errno is set. 382 */ 383 int 384 mlx5_hairpin_queue_peer_update(struct rte_eth_dev *dev, uint16_t peer_queue, 385 struct rte_hairpin_peer_info *current_info, 386 struct rte_hairpin_peer_info *peer_info, 387 uint32_t direction) 388 { 389 struct mlx5_priv *priv = dev->data->dev_private; 390 RTE_SET_USED(current_info); 391 392 if (dev->data->dev_started == 0) { 393 rte_errno = EBUSY; 394 DRV_LOG(ERR, "peer port %u is not started", 395 dev->data->port_id); 396 return -rte_errno; 397 } 398 /* 399 * Peer port used as egress. In the current design, hairpin Tx queue 400 * will be bound to the peer Rx queue. Indeed, only the information of 401 * peer Rx queue needs to be fetched. 402 */ 403 if (direction == 0) { 404 struct mlx5_txq_ctrl *txq_ctrl; 405 406 txq_ctrl = mlx5_txq_get(dev, peer_queue); 407 if (txq_ctrl == NULL) { 408 rte_errno = EINVAL; 409 DRV_LOG(ERR, "Failed to get port %u Tx queue %d", 410 dev->data->port_id, peer_queue); 411 return -rte_errno; 412 } 413 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) { 414 rte_errno = EINVAL; 415 DRV_LOG(ERR, "port %u queue %d is not a hairpin Txq", 416 dev->data->port_id, peer_queue); 417 mlx5_txq_release(dev, peer_queue); 418 return -rte_errno; 419 } 420 if (txq_ctrl->obj == NULL || txq_ctrl->obj->sq == NULL) { 421 rte_errno = ENOMEM; 422 DRV_LOG(ERR, "port %u no Txq object found: %d", 423 dev->data->port_id, peer_queue); 424 mlx5_txq_release(dev, peer_queue); 425 return -rte_errno; 426 } 427 peer_info->qp_id = txq_ctrl->obj->sq->id; 428 peer_info->vhca_id = priv->config.hca_attr.vhca_id; 429 /* 1-to-1 mapping, only the first one is used. */ 430 peer_info->peer_q = txq_ctrl->hairpin_conf.peers[0].queue; 431 peer_info->tx_explicit = txq_ctrl->hairpin_conf.tx_explicit; 432 peer_info->manual_bind = txq_ctrl->hairpin_conf.manual_bind; 433 mlx5_txq_release(dev, peer_queue); 434 } else { /* Peer port used as ingress. */ 435 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, peer_queue); 436 struct mlx5_rxq_ctrl *rxq_ctrl; 437 438 if (rxq == NULL) { 439 rte_errno = EINVAL; 440 DRV_LOG(ERR, "Failed to get port %u Rx queue %d", 441 dev->data->port_id, peer_queue); 442 return -rte_errno; 443 } 444 rxq_ctrl = rxq->ctrl; 445 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) { 446 rte_errno = EINVAL; 447 DRV_LOG(ERR, "port %u queue %d is not a hairpin Rxq", 448 dev->data->port_id, peer_queue); 449 return -rte_errno; 450 } 451 if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) { 452 rte_errno = ENOMEM; 453 DRV_LOG(ERR, "port %u no Rxq object found: %d", 454 dev->data->port_id, peer_queue); 455 return -rte_errno; 456 } 457 peer_info->qp_id = rxq_ctrl->obj->rq->id; 458 peer_info->vhca_id = priv->config.hca_attr.vhca_id; 459 peer_info->peer_q = rxq->hairpin_conf.peers[0].queue; 460 peer_info->tx_explicit = rxq->hairpin_conf.tx_explicit; 461 peer_info->manual_bind = rxq->hairpin_conf.manual_bind; 462 } 463 return 0; 464 } 465 466 /* 467 * Bind the hairpin queue with the peer HW information. 468 * This needs to be called twice both for Tx and Rx queues of a pair. 469 * If the queue is already bound, it is considered successful. 470 * 471 * @param dev 472 * Pointer to Ethernet device structure. 473 * @param cur_queue 474 * Index of the queue to change the HW configuration to bind. 475 * @param peer_info 476 * Pointer to information of the peer queue. 477 * @param direction 478 * Positive to configure the TxQ, zero to configure the RxQ. 479 * 480 * @return 481 * 0 on success, a negative errno value otherwise and rte_errno is set. 482 */ 483 int 484 mlx5_hairpin_queue_peer_bind(struct rte_eth_dev *dev, uint16_t cur_queue, 485 struct rte_hairpin_peer_info *peer_info, 486 uint32_t direction) 487 { 488 int ret = 0; 489 490 /* 491 * Consistency checking of the peer queue: opposite direction is used 492 * to get the peer queue info with ethdev port ID, no need to check. 493 */ 494 if (peer_info->peer_q != cur_queue) { 495 rte_errno = EINVAL; 496 DRV_LOG(ERR, "port %u queue %d and peer queue %d mismatch", 497 dev->data->port_id, cur_queue, peer_info->peer_q); 498 return -rte_errno; 499 } 500 if (direction != 0) { 501 struct mlx5_txq_ctrl *txq_ctrl; 502 struct mlx5_devx_modify_sq_attr sq_attr = { 0 }; 503 504 txq_ctrl = mlx5_txq_get(dev, cur_queue); 505 if (txq_ctrl == NULL) { 506 rte_errno = EINVAL; 507 DRV_LOG(ERR, "Failed to get port %u Tx queue %d", 508 dev->data->port_id, cur_queue); 509 return -rte_errno; 510 } 511 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) { 512 rte_errno = EINVAL; 513 DRV_LOG(ERR, "port %u queue %d not a hairpin Txq", 514 dev->data->port_id, cur_queue); 515 mlx5_txq_release(dev, cur_queue); 516 return -rte_errno; 517 } 518 if (txq_ctrl->obj == NULL || txq_ctrl->obj->sq == NULL) { 519 rte_errno = ENOMEM; 520 DRV_LOG(ERR, "port %u no Txq object found: %d", 521 dev->data->port_id, cur_queue); 522 mlx5_txq_release(dev, cur_queue); 523 return -rte_errno; 524 } 525 if (txq_ctrl->hairpin_status != 0) { 526 DRV_LOG(DEBUG, "port %u Tx queue %d is already bound", 527 dev->data->port_id, cur_queue); 528 mlx5_txq_release(dev, cur_queue); 529 return 0; 530 } 531 /* 532 * All queues' of one port consistency checking is done in the 533 * bind() function, and that is optional. 534 */ 535 if (peer_info->tx_explicit != 536 txq_ctrl->hairpin_conf.tx_explicit) { 537 rte_errno = EINVAL; 538 DRV_LOG(ERR, "port %u Tx queue %d and peer Tx rule mode" 539 " mismatch", dev->data->port_id, cur_queue); 540 mlx5_txq_release(dev, cur_queue); 541 return -rte_errno; 542 } 543 if (peer_info->manual_bind != 544 txq_ctrl->hairpin_conf.manual_bind) { 545 rte_errno = EINVAL; 546 DRV_LOG(ERR, "port %u Tx queue %d and peer binding mode" 547 " mismatch", dev->data->port_id, cur_queue); 548 mlx5_txq_release(dev, cur_queue); 549 return -rte_errno; 550 } 551 sq_attr.state = MLX5_SQC_STATE_RDY; 552 sq_attr.sq_state = MLX5_SQC_STATE_RST; 553 sq_attr.hairpin_peer_rq = peer_info->qp_id; 554 sq_attr.hairpin_peer_vhca = peer_info->vhca_id; 555 ret = mlx5_devx_cmd_modify_sq(txq_ctrl->obj->sq, &sq_attr); 556 if (ret == 0) 557 txq_ctrl->hairpin_status = 1; 558 mlx5_txq_release(dev, cur_queue); 559 } else { 560 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, cur_queue); 561 struct mlx5_rxq_ctrl *rxq_ctrl; 562 struct mlx5_devx_modify_rq_attr rq_attr = { 0 }; 563 564 if (rxq == NULL) { 565 rte_errno = EINVAL; 566 DRV_LOG(ERR, "Failed to get port %u Rx queue %d", 567 dev->data->port_id, cur_queue); 568 return -rte_errno; 569 } 570 rxq_ctrl = rxq->ctrl; 571 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) { 572 rte_errno = EINVAL; 573 DRV_LOG(ERR, "port %u queue %d not a hairpin Rxq", 574 dev->data->port_id, cur_queue); 575 return -rte_errno; 576 } 577 if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) { 578 rte_errno = ENOMEM; 579 DRV_LOG(ERR, "port %u no Rxq object found: %d", 580 dev->data->port_id, cur_queue); 581 return -rte_errno; 582 } 583 if (rxq->hairpin_status != 0) { 584 DRV_LOG(DEBUG, "port %u Rx queue %d is already bound", 585 dev->data->port_id, cur_queue); 586 return 0; 587 } 588 if (peer_info->tx_explicit != 589 rxq->hairpin_conf.tx_explicit) { 590 rte_errno = EINVAL; 591 DRV_LOG(ERR, "port %u Rx queue %d and peer Tx rule mode" 592 " mismatch", dev->data->port_id, cur_queue); 593 return -rte_errno; 594 } 595 if (peer_info->manual_bind != 596 rxq->hairpin_conf.manual_bind) { 597 rte_errno = EINVAL; 598 DRV_LOG(ERR, "port %u Rx queue %d and peer binding mode" 599 " mismatch", dev->data->port_id, cur_queue); 600 return -rte_errno; 601 } 602 rq_attr.state = MLX5_SQC_STATE_RDY; 603 rq_attr.rq_state = MLX5_SQC_STATE_RST; 604 rq_attr.hairpin_peer_sq = peer_info->qp_id; 605 rq_attr.hairpin_peer_vhca = peer_info->vhca_id; 606 ret = mlx5_devx_cmd_modify_rq(rxq_ctrl->obj->rq, &rq_attr); 607 if (ret == 0) 608 rxq->hairpin_status = 1; 609 } 610 return ret; 611 } 612 613 /* 614 * Unbind the hairpin queue and reset its HW configuration. 615 * This needs to be called twice both for Tx and Rx queues of a pair. 616 * If the queue is already unbound, it is considered successful. 617 * 618 * @param dev 619 * Pointer to Ethernet device structure. 620 * @param cur_queue 621 * Index of the queue to change the HW configuration to unbind. 622 * @param direction 623 * Positive to reset the TxQ, zero to reset the RxQ. 624 * 625 * @return 626 * 0 on success, a negative errno value otherwise and rte_errno is set. 627 */ 628 int 629 mlx5_hairpin_queue_peer_unbind(struct rte_eth_dev *dev, uint16_t cur_queue, 630 uint32_t direction) 631 { 632 int ret = 0; 633 634 if (direction != 0) { 635 struct mlx5_txq_ctrl *txq_ctrl; 636 struct mlx5_devx_modify_sq_attr sq_attr = { 0 }; 637 638 txq_ctrl = mlx5_txq_get(dev, cur_queue); 639 if (txq_ctrl == NULL) { 640 rte_errno = EINVAL; 641 DRV_LOG(ERR, "Failed to get port %u Tx queue %d", 642 dev->data->port_id, cur_queue); 643 return -rte_errno; 644 } 645 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) { 646 rte_errno = EINVAL; 647 DRV_LOG(ERR, "port %u queue %d not a hairpin Txq", 648 dev->data->port_id, cur_queue); 649 mlx5_txq_release(dev, cur_queue); 650 return -rte_errno; 651 } 652 /* Already unbound, return success before obj checking. */ 653 if (txq_ctrl->hairpin_status == 0) { 654 DRV_LOG(DEBUG, "port %u Tx queue %d is already unbound", 655 dev->data->port_id, cur_queue); 656 mlx5_txq_release(dev, cur_queue); 657 return 0; 658 } 659 if (!txq_ctrl->obj || !txq_ctrl->obj->sq) { 660 rte_errno = ENOMEM; 661 DRV_LOG(ERR, "port %u no Txq object found: %d", 662 dev->data->port_id, cur_queue); 663 mlx5_txq_release(dev, cur_queue); 664 return -rte_errno; 665 } 666 sq_attr.state = MLX5_SQC_STATE_RST; 667 sq_attr.sq_state = MLX5_SQC_STATE_RST; 668 ret = mlx5_devx_cmd_modify_sq(txq_ctrl->obj->sq, &sq_attr); 669 if (ret == 0) 670 txq_ctrl->hairpin_status = 0; 671 mlx5_txq_release(dev, cur_queue); 672 } else { 673 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, cur_queue); 674 struct mlx5_rxq_ctrl *rxq_ctrl; 675 struct mlx5_devx_modify_rq_attr rq_attr = { 0 }; 676 677 if (rxq == NULL) { 678 rte_errno = EINVAL; 679 DRV_LOG(ERR, "Failed to get port %u Rx queue %d", 680 dev->data->port_id, cur_queue); 681 return -rte_errno; 682 } 683 rxq_ctrl = rxq->ctrl; 684 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) { 685 rte_errno = EINVAL; 686 DRV_LOG(ERR, "port %u queue %d not a hairpin Rxq", 687 dev->data->port_id, cur_queue); 688 return -rte_errno; 689 } 690 if (rxq->hairpin_status == 0) { 691 DRV_LOG(DEBUG, "port %u Rx queue %d is already unbound", 692 dev->data->port_id, cur_queue); 693 return 0; 694 } 695 if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) { 696 rte_errno = ENOMEM; 697 DRV_LOG(ERR, "port %u no Rxq object found: %d", 698 dev->data->port_id, cur_queue); 699 return -rte_errno; 700 } 701 rq_attr.state = MLX5_SQC_STATE_RST; 702 rq_attr.rq_state = MLX5_SQC_STATE_RST; 703 ret = mlx5_devx_cmd_modify_rq(rxq_ctrl->obj->rq, &rq_attr); 704 if (ret == 0) 705 rxq->hairpin_status = 0; 706 } 707 return ret; 708 } 709 710 /* 711 * Bind the hairpin port pairs, from the Tx to the peer Rx. 712 * This function only supports to bind the Tx to one Rx. 713 * 714 * @param dev 715 * Pointer to Ethernet device structure. 716 * @param rx_port 717 * Port identifier of the Rx port. 718 * 719 * @return 720 * 0 on success, a negative errno value otherwise and rte_errno is set. 721 */ 722 static int 723 mlx5_hairpin_bind_single_port(struct rte_eth_dev *dev, uint16_t rx_port) 724 { 725 struct mlx5_priv *priv = dev->data->dev_private; 726 int ret = 0; 727 struct mlx5_txq_ctrl *txq_ctrl; 728 uint32_t i; 729 struct rte_hairpin_peer_info peer = {0xffffff}; 730 struct rte_hairpin_peer_info cur; 731 const struct rte_eth_hairpin_conf *conf; 732 uint16_t num_q = 0; 733 uint16_t local_port = priv->dev_data->port_id; 734 uint32_t manual; 735 uint32_t explicit; 736 uint16_t rx_queue; 737 738 if (mlx5_eth_find_next(rx_port, dev->device) != rx_port) { 739 rte_errno = ENODEV; 740 DRV_LOG(ERR, "Rx port %u does not belong to mlx5", rx_port); 741 return -rte_errno; 742 } 743 /* 744 * Before binding TxQ to peer RxQ, first round loop will be used for 745 * checking the queues' configuration consistency. This would be a 746 * little time consuming but better than doing the rollback. 747 */ 748 for (i = 0; i != priv->txqs_n; i++) { 749 txq_ctrl = mlx5_txq_get(dev, i); 750 if (txq_ctrl == NULL) 751 continue; 752 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) { 753 mlx5_txq_release(dev, i); 754 continue; 755 } 756 /* 757 * All hairpin Tx queues of a single port that connected to the 758 * same peer Rx port should have the same "auto binding" and 759 * "implicit Tx flow" modes. 760 * Peer consistency checking will be done in per queue binding. 761 */ 762 conf = &txq_ctrl->hairpin_conf; 763 if (conf->peers[0].port == rx_port) { 764 if (num_q == 0) { 765 manual = conf->manual_bind; 766 explicit = conf->tx_explicit; 767 } else { 768 if (manual != conf->manual_bind || 769 explicit != conf->tx_explicit) { 770 rte_errno = EINVAL; 771 DRV_LOG(ERR, "port %u queue %d mode" 772 " mismatch: %u %u, %u %u", 773 local_port, i, manual, 774 conf->manual_bind, explicit, 775 conf->tx_explicit); 776 mlx5_txq_release(dev, i); 777 return -rte_errno; 778 } 779 } 780 num_q++; 781 } 782 mlx5_txq_release(dev, i); 783 } 784 /* Once no queue is configured, success is returned directly. */ 785 if (num_q == 0) 786 return ret; 787 /* All the hairpin TX queues need to be traversed again. */ 788 for (i = 0; i != priv->txqs_n; i++) { 789 txq_ctrl = mlx5_txq_get(dev, i); 790 if (txq_ctrl == NULL) 791 continue; 792 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) { 793 mlx5_txq_release(dev, i); 794 continue; 795 } 796 if (txq_ctrl->hairpin_conf.peers[0].port != rx_port) { 797 mlx5_txq_release(dev, i); 798 continue; 799 } 800 rx_queue = txq_ctrl->hairpin_conf.peers[0].queue; 801 /* 802 * Fetch peer RxQ's information. 803 * No need to pass the information of the current queue. 804 */ 805 ret = rte_eth_hairpin_queue_peer_update(rx_port, rx_queue, 806 NULL, &peer, 1); 807 if (ret != 0) { 808 mlx5_txq_release(dev, i); 809 goto error; 810 } 811 /* Accessing its own device, inside mlx5 PMD. */ 812 ret = mlx5_hairpin_queue_peer_bind(dev, i, &peer, 1); 813 if (ret != 0) { 814 mlx5_txq_release(dev, i); 815 goto error; 816 } 817 /* Pass TxQ's information to peer RxQ and try binding. */ 818 cur.peer_q = rx_queue; 819 cur.qp_id = txq_ctrl->obj->sq->id; 820 cur.vhca_id = priv->config.hca_attr.vhca_id; 821 cur.tx_explicit = txq_ctrl->hairpin_conf.tx_explicit; 822 cur.manual_bind = txq_ctrl->hairpin_conf.manual_bind; 823 /* 824 * In order to access another device in a proper way, RTE level 825 * private function is needed. 826 */ 827 ret = rte_eth_hairpin_queue_peer_bind(rx_port, rx_queue, 828 &cur, 0); 829 if (ret != 0) { 830 mlx5_txq_release(dev, i); 831 goto error; 832 } 833 mlx5_txq_release(dev, i); 834 } 835 return 0; 836 error: 837 /* 838 * Do roll-back process for the queues already bound. 839 * No need to check the return value of the queue unbind function. 840 */ 841 do { 842 /* No validation is needed here. */ 843 txq_ctrl = mlx5_txq_get(dev, i); 844 if (txq_ctrl == NULL) 845 continue; 846 rx_queue = txq_ctrl->hairpin_conf.peers[0].queue; 847 rte_eth_hairpin_queue_peer_unbind(rx_port, rx_queue, 0); 848 mlx5_hairpin_queue_peer_unbind(dev, i, 1); 849 mlx5_txq_release(dev, i); 850 } while (i--); 851 return ret; 852 } 853 854 /* 855 * Unbind the hairpin port pair, HW configuration of both devices will be clear 856 * and status will be reset for all the queues used between them. 857 * This function only supports to unbind the Tx from one Rx. 858 * 859 * @param dev 860 * Pointer to Ethernet device structure. 861 * @param rx_port 862 * Port identifier of the Rx port. 863 * 864 * @return 865 * 0 on success, a negative errno value otherwise and rte_errno is set. 866 */ 867 static int 868 mlx5_hairpin_unbind_single_port(struct rte_eth_dev *dev, uint16_t rx_port) 869 { 870 struct mlx5_priv *priv = dev->data->dev_private; 871 struct mlx5_txq_ctrl *txq_ctrl; 872 uint32_t i; 873 int ret; 874 uint16_t cur_port = priv->dev_data->port_id; 875 876 if (mlx5_eth_find_next(rx_port, dev->device) != rx_port) { 877 rte_errno = ENODEV; 878 DRV_LOG(ERR, "Rx port %u does not belong to mlx5", rx_port); 879 return -rte_errno; 880 } 881 for (i = 0; i != priv->txqs_n; i++) { 882 uint16_t rx_queue; 883 884 txq_ctrl = mlx5_txq_get(dev, i); 885 if (txq_ctrl == NULL) 886 continue; 887 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) { 888 mlx5_txq_release(dev, i); 889 continue; 890 } 891 if (txq_ctrl->hairpin_conf.peers[0].port != rx_port) { 892 mlx5_txq_release(dev, i); 893 continue; 894 } 895 /* Indeed, only the first used queue needs to be checked. */ 896 if (txq_ctrl->hairpin_conf.manual_bind == 0) { 897 if (cur_port != rx_port) { 898 rte_errno = EINVAL; 899 DRV_LOG(ERR, "port %u and port %u are in" 900 " auto-bind mode", cur_port, rx_port); 901 mlx5_txq_release(dev, i); 902 return -rte_errno; 903 } else { 904 return 0; 905 } 906 } 907 rx_queue = txq_ctrl->hairpin_conf.peers[0].queue; 908 mlx5_txq_release(dev, i); 909 ret = rte_eth_hairpin_queue_peer_unbind(rx_port, rx_queue, 0); 910 if (ret) { 911 DRV_LOG(ERR, "port %u Rx queue %d unbind - failure", 912 rx_port, rx_queue); 913 return ret; 914 } 915 ret = mlx5_hairpin_queue_peer_unbind(dev, i, 1); 916 if (ret) { 917 DRV_LOG(ERR, "port %u Tx queue %d unbind - failure", 918 cur_port, i); 919 return ret; 920 } 921 } 922 return 0; 923 } 924 925 /* 926 * Bind hairpin ports, Rx could be all ports when using RTE_MAX_ETHPORTS. 927 * @see mlx5_hairpin_bind_single_port() 928 */ 929 int 930 mlx5_hairpin_bind(struct rte_eth_dev *dev, uint16_t rx_port) 931 { 932 int ret = 0; 933 uint16_t p, pp; 934 935 /* 936 * If the Rx port has no hairpin configuration with the current port, 937 * the binding will be skipped in the called function of single port. 938 * Device started status will be checked only before the queue 939 * information updating. 940 */ 941 if (rx_port == RTE_MAX_ETHPORTS) { 942 MLX5_ETH_FOREACH_DEV(p, dev->device) { 943 ret = mlx5_hairpin_bind_single_port(dev, p); 944 if (ret != 0) 945 goto unbind; 946 } 947 return ret; 948 } else { 949 return mlx5_hairpin_bind_single_port(dev, rx_port); 950 } 951 unbind: 952 MLX5_ETH_FOREACH_DEV(pp, dev->device) 953 if (pp < p) 954 mlx5_hairpin_unbind_single_port(dev, pp); 955 return ret; 956 } 957 958 /* 959 * Unbind hairpin ports, Rx could be all ports when using RTE_MAX_ETHPORTS. 960 * @see mlx5_hairpin_unbind_single_port() 961 */ 962 int 963 mlx5_hairpin_unbind(struct rte_eth_dev *dev, uint16_t rx_port) 964 { 965 int ret = 0; 966 uint16_t p; 967 968 if (rx_port == RTE_MAX_ETHPORTS) 969 MLX5_ETH_FOREACH_DEV(p, dev->device) { 970 ret = mlx5_hairpin_unbind_single_port(dev, p); 971 if (ret != 0) 972 return ret; 973 } 974 else 975 ret = mlx5_hairpin_unbind_single_port(dev, rx_port); 976 return ret; 977 } 978 979 /* 980 * DPDK callback to get the hairpin peer ports list. 981 * This will return the actual number of peer ports and save the identifiers 982 * into the array (sorted, may be different from that when setting up the 983 * hairpin peer queues). 984 * The peer port ID could be the same as the port ID of the current device. 985 * 986 * @param dev 987 * Pointer to Ethernet device structure. 988 * @param peer_ports 989 * Pointer to array to save the port identifiers. 990 * @param len 991 * The length of the array. 992 * @param direction 993 * Current port to peer port direction. 994 * positive - current used as Tx to get all peer Rx ports. 995 * zero - current used as Rx to get all peer Tx ports. 996 * 997 * @return 998 * 0 or positive value on success, actual number of peer ports. 999 * a negative errno value otherwise and rte_errno is set. 1000 */ 1001 int 1002 mlx5_hairpin_get_peer_ports(struct rte_eth_dev *dev, uint16_t *peer_ports, 1003 size_t len, uint32_t direction) 1004 { 1005 struct mlx5_priv *priv = dev->data->dev_private; 1006 struct mlx5_txq_ctrl *txq_ctrl; 1007 uint32_t i; 1008 uint16_t pp; 1009 uint32_t bits[(RTE_MAX_ETHPORTS + 31) / 32] = {0}; 1010 int ret = 0; 1011 1012 if (direction) { 1013 for (i = 0; i < priv->txqs_n; i++) { 1014 txq_ctrl = mlx5_txq_get(dev, i); 1015 if (!txq_ctrl) 1016 continue; 1017 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) { 1018 mlx5_txq_release(dev, i); 1019 continue; 1020 } 1021 pp = txq_ctrl->hairpin_conf.peers[0].port; 1022 if (pp >= RTE_MAX_ETHPORTS) { 1023 rte_errno = ERANGE; 1024 mlx5_txq_release(dev, i); 1025 DRV_LOG(ERR, "port %hu queue %u peer port " 1026 "out of range %hu", 1027 priv->dev_data->port_id, i, pp); 1028 return -rte_errno; 1029 } 1030 bits[pp / 32] |= 1 << (pp % 32); 1031 mlx5_txq_release(dev, i); 1032 } 1033 } else { 1034 for (i = 0; i < priv->rxqs_n; i++) { 1035 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, i); 1036 struct mlx5_rxq_ctrl *rxq_ctrl; 1037 1038 if (rxq == NULL) 1039 continue; 1040 rxq_ctrl = rxq->ctrl; 1041 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) 1042 continue; 1043 pp = rxq->hairpin_conf.peers[0].port; 1044 if (pp >= RTE_MAX_ETHPORTS) { 1045 rte_errno = ERANGE; 1046 DRV_LOG(ERR, "port %hu queue %u peer port " 1047 "out of range %hu", 1048 priv->dev_data->port_id, i, pp); 1049 return -rte_errno; 1050 } 1051 bits[pp / 32] |= 1 << (pp % 32); 1052 } 1053 } 1054 for (i = 0; i < RTE_MAX_ETHPORTS; i++) { 1055 if (bits[i / 32] & (1 << (i % 32))) { 1056 if ((size_t)ret >= len) { 1057 rte_errno = E2BIG; 1058 return -rte_errno; 1059 } 1060 peer_ports[ret++] = i; 1061 } 1062 } 1063 return ret; 1064 } 1065 1066 /** 1067 * DPDK callback to start the device. 1068 * 1069 * Simulate device start by attaching all configured flows. 1070 * 1071 * @param dev 1072 * Pointer to Ethernet device structure. 1073 * 1074 * @return 1075 * 0 on success, a negative errno value otherwise and rte_errno is set. 1076 */ 1077 int 1078 mlx5_dev_start(struct rte_eth_dev *dev) 1079 { 1080 struct mlx5_priv *priv = dev->data->dev_private; 1081 int ret; 1082 int fine_inline; 1083 1084 DRV_LOG(DEBUG, "port %u starting device", dev->data->port_id); 1085 fine_inline = rte_mbuf_dynflag_lookup 1086 (RTE_PMD_MLX5_FINE_GRANULARITY_INLINE, NULL); 1087 if (fine_inline >= 0) 1088 rte_net_mlx5_dynf_inline_mask = 1UL << fine_inline; 1089 else 1090 rte_net_mlx5_dynf_inline_mask = 0; 1091 if (dev->data->nb_rx_queues > 0) { 1092 ret = mlx5_dev_configure_rss_reta(dev); 1093 if (ret) { 1094 DRV_LOG(ERR, "port %u reta config failed: %s", 1095 dev->data->port_id, strerror(rte_errno)); 1096 return -rte_errno; 1097 } 1098 } 1099 ret = mlx5_txpp_start(dev); 1100 if (ret) { 1101 DRV_LOG(ERR, "port %u Tx packet pacing init failed: %s", 1102 dev->data->port_id, strerror(rte_errno)); 1103 goto error; 1104 } 1105 if ((priv->sh->devx && priv->config.dv_flow_en && 1106 priv->config.dest_tir) && priv->obj_ops.lb_dummy_queue_create) { 1107 ret = priv->obj_ops.lb_dummy_queue_create(dev); 1108 if (ret) 1109 goto error; 1110 } 1111 ret = mlx5_txq_start(dev); 1112 if (ret) { 1113 DRV_LOG(ERR, "port %u Tx queue allocation failed: %s", 1114 dev->data->port_id, strerror(rte_errno)); 1115 goto error; 1116 } 1117 if (priv->config.std_delay_drop || priv->config.hp_delay_drop) { 1118 if (!priv->config.vf && !priv->config.sf && 1119 !priv->representor) { 1120 ret = mlx5_get_flag_dropless_rq(dev); 1121 if (ret < 0) 1122 DRV_LOG(WARNING, 1123 "port %u cannot query dropless flag", 1124 dev->data->port_id); 1125 else if (!ret) 1126 DRV_LOG(WARNING, 1127 "port %u dropless_rq OFF, no rearming", 1128 dev->data->port_id); 1129 } else { 1130 DRV_LOG(DEBUG, 1131 "port %u doesn't support dropless_rq flag", 1132 dev->data->port_id); 1133 } 1134 } 1135 ret = mlx5_rxq_start(dev); 1136 if (ret) { 1137 DRV_LOG(ERR, "port %u Rx queue allocation failed: %s", 1138 dev->data->port_id, strerror(rte_errno)); 1139 goto error; 1140 } 1141 /* 1142 * Such step will be skipped if there is no hairpin TX queue configured 1143 * with RX peer queue from the same device. 1144 */ 1145 ret = mlx5_hairpin_auto_bind(dev); 1146 if (ret) { 1147 DRV_LOG(ERR, "port %u hairpin auto binding failed: %s", 1148 dev->data->port_id, strerror(rte_errno)); 1149 goto error; 1150 } 1151 /* Set started flag here for the following steps like control flow. */ 1152 dev->data->dev_started = 1; 1153 ret = mlx5_rx_intr_vec_enable(dev); 1154 if (ret) { 1155 DRV_LOG(ERR, "port %u Rx interrupt vector creation failed", 1156 dev->data->port_id); 1157 goto error; 1158 } 1159 mlx5_os_stats_init(dev); 1160 /* 1161 * Attach indirection table objects detached on port stop. 1162 * They may be needed to create RSS in non-isolated mode. 1163 */ 1164 ret = mlx5_action_handle_attach(dev); 1165 if (ret) { 1166 DRV_LOG(ERR, 1167 "port %u failed to attach indirect actions: %s", 1168 dev->data->port_id, rte_strerror(rte_errno)); 1169 goto error; 1170 } 1171 ret = mlx5_traffic_enable(dev); 1172 if (ret) { 1173 DRV_LOG(ERR, "port %u failed to set defaults flows", 1174 dev->data->port_id); 1175 goto error; 1176 } 1177 /* Set a mask and offset of dynamic metadata flows into Rx queues. */ 1178 mlx5_flow_rxq_dynf_metadata_set(dev); 1179 /* Set flags and context to convert Rx timestamps. */ 1180 mlx5_rxq_timestamp_set(dev); 1181 /* Set a mask and offset of scheduling on timestamp into Tx queues. */ 1182 mlx5_txq_dynf_timestamp_set(dev); 1183 /* 1184 * In non-cached mode, it only needs to start the default mreg copy 1185 * action and no flow created by application exists anymore. 1186 * But it is worth wrapping the interface for further usage. 1187 */ 1188 ret = mlx5_flow_start_default(dev); 1189 if (ret) { 1190 DRV_LOG(DEBUG, "port %u failed to start default actions: %s", 1191 dev->data->port_id, strerror(rte_errno)); 1192 goto error; 1193 } 1194 if (mlx5_dev_ctx_shared_mempool_subscribe(dev) != 0) { 1195 DRV_LOG(ERR, "port %u failed to subscribe for mempool life cycle: %s", 1196 dev->data->port_id, rte_strerror(rte_errno)); 1197 goto error; 1198 } 1199 rte_wmb(); 1200 dev->tx_pkt_burst = mlx5_select_tx_function(dev); 1201 dev->rx_pkt_burst = mlx5_select_rx_function(dev); 1202 /* Enable datapath on secondary process. */ 1203 mlx5_mp_os_req_start_rxtx(dev); 1204 if (rte_intr_fd_get(priv->sh->intr_handle) >= 0) { 1205 priv->sh->port[priv->dev_port - 1].ih_port_id = 1206 (uint32_t)dev->data->port_id; 1207 } else { 1208 DRV_LOG(INFO, "port %u starts without LSC and RMV interrupts.", 1209 dev->data->port_id); 1210 dev->data->dev_conf.intr_conf.lsc = 0; 1211 dev->data->dev_conf.intr_conf.rmv = 0; 1212 } 1213 if (rte_intr_fd_get(priv->sh->intr_handle_devx) >= 0) 1214 priv->sh->port[priv->dev_port - 1].devx_ih_port_id = 1215 (uint32_t)dev->data->port_id; 1216 return 0; 1217 error: 1218 ret = rte_errno; /* Save rte_errno before cleanup. */ 1219 /* Rollback. */ 1220 dev->data->dev_started = 0; 1221 mlx5_flow_stop_default(dev); 1222 mlx5_traffic_disable(dev); 1223 mlx5_txq_stop(dev); 1224 mlx5_rxq_stop(dev); 1225 if (priv->obj_ops.lb_dummy_queue_release) 1226 priv->obj_ops.lb_dummy_queue_release(dev); 1227 mlx5_txpp_stop(dev); /* Stop last. */ 1228 rte_errno = ret; /* Restore rte_errno. */ 1229 return -rte_errno; 1230 } 1231 1232 /** 1233 * DPDK callback to stop the device. 1234 * 1235 * Simulate device stop by detaching all configured flows. 1236 * 1237 * @param dev 1238 * Pointer to Ethernet device structure. 1239 */ 1240 int 1241 mlx5_dev_stop(struct rte_eth_dev *dev) 1242 { 1243 struct mlx5_priv *priv = dev->data->dev_private; 1244 1245 dev->data->dev_started = 0; 1246 /* Prevent crashes when queues are still in use. */ 1247 dev->rx_pkt_burst = removed_rx_burst; 1248 dev->tx_pkt_burst = removed_tx_burst; 1249 rte_wmb(); 1250 /* Disable datapath on secondary process. */ 1251 mlx5_mp_os_req_stop_rxtx(dev); 1252 rte_delay_us_sleep(1000 * priv->rxqs_n); 1253 DRV_LOG(DEBUG, "port %u stopping device", dev->data->port_id); 1254 mlx5_flow_stop_default(dev); 1255 /* Control flows for default traffic can be removed firstly. */ 1256 mlx5_traffic_disable(dev); 1257 /* All RX queue flags will be cleared in the flush interface. */ 1258 mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_GEN, true); 1259 mlx5_flow_meter_rxq_flush(dev); 1260 mlx5_action_handle_detach(dev); 1261 mlx5_rx_intr_vec_disable(dev); 1262 priv->sh->port[priv->dev_port - 1].ih_port_id = RTE_MAX_ETHPORTS; 1263 priv->sh->port[priv->dev_port - 1].devx_ih_port_id = RTE_MAX_ETHPORTS; 1264 mlx5_txq_stop(dev); 1265 mlx5_rxq_stop(dev); 1266 if (priv->obj_ops.lb_dummy_queue_release) 1267 priv->obj_ops.lb_dummy_queue_release(dev); 1268 mlx5_txpp_stop(dev); 1269 1270 return 0; 1271 } 1272 1273 /** 1274 * Enable traffic flows configured by control plane 1275 * 1276 * @param dev 1277 * Pointer to Ethernet device private data. 1278 * @param dev 1279 * Pointer to Ethernet device structure. 1280 * 1281 * @return 1282 * 0 on success, a negative errno value otherwise and rte_errno is set. 1283 */ 1284 int 1285 mlx5_traffic_enable(struct rte_eth_dev *dev) 1286 { 1287 struct mlx5_priv *priv = dev->data->dev_private; 1288 struct rte_flow_item_eth bcast = { 1289 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff", 1290 }; 1291 struct rte_flow_item_eth ipv6_multi_spec = { 1292 .dst.addr_bytes = "\x33\x33\x00\x00\x00\x00", 1293 }; 1294 struct rte_flow_item_eth ipv6_multi_mask = { 1295 .dst.addr_bytes = "\xff\xff\x00\x00\x00\x00", 1296 }; 1297 struct rte_flow_item_eth unicast = { 1298 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00", 1299 }; 1300 struct rte_flow_item_eth unicast_mask = { 1301 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff", 1302 }; 1303 const unsigned int vlan_filter_n = priv->vlan_filter_n; 1304 const struct rte_ether_addr cmp = { 1305 .addr_bytes = "\x00\x00\x00\x00\x00\x00", 1306 }; 1307 unsigned int i; 1308 unsigned int j; 1309 int ret; 1310 1311 /* 1312 * Hairpin txq default flow should be created no matter if it is 1313 * isolation mode. Or else all the packets to be sent will be sent 1314 * out directly without the TX flow actions, e.g. encapsulation. 1315 */ 1316 for (i = 0; i != priv->txqs_n; ++i) { 1317 struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i); 1318 if (!txq_ctrl) 1319 continue; 1320 /* Only Tx implicit mode requires the default Tx flow. */ 1321 if (txq_ctrl->type == MLX5_TXQ_TYPE_HAIRPIN && 1322 txq_ctrl->hairpin_conf.tx_explicit == 0 && 1323 txq_ctrl->hairpin_conf.peers[0].port == 1324 priv->dev_data->port_id) { 1325 ret = mlx5_ctrl_flow_source_queue(dev, i); 1326 if (ret) { 1327 mlx5_txq_release(dev, i); 1328 goto error; 1329 } 1330 } 1331 if ((priv->representor || priv->master) && 1332 priv->config.dv_esw_en) { 1333 if (mlx5_flow_create_devx_sq_miss_flow(dev, i) == 0) { 1334 DRV_LOG(ERR, 1335 "Port %u Tx queue %u SQ create representor devx default miss rule failed.", 1336 dev->data->port_id, i); 1337 goto error; 1338 } 1339 } 1340 mlx5_txq_release(dev, i); 1341 } 1342 if ((priv->master || priv->representor) && priv->config.dv_esw_en) { 1343 if (mlx5_flow_create_esw_table_zero_flow(dev)) 1344 priv->fdb_def_rule = 1; 1345 else 1346 DRV_LOG(INFO, "port %u FDB default rule cannot be" 1347 " configured - only Eswitch group 0 flows are" 1348 " supported.", dev->data->port_id); 1349 } 1350 if (!priv->config.lacp_by_user && priv->pf_bond >= 0) { 1351 ret = mlx5_flow_lacp_miss(dev); 1352 if (ret) 1353 DRV_LOG(INFO, "port %u LACP rule cannot be created - " 1354 "forward LACP to kernel.", dev->data->port_id); 1355 else 1356 DRV_LOG(INFO, "LACP traffic will be missed in port %u." 1357 , dev->data->port_id); 1358 } 1359 if (priv->isolated) 1360 return 0; 1361 if (dev->data->promiscuous) { 1362 struct rte_flow_item_eth promisc = { 1363 .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00", 1364 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00", 1365 .type = 0, 1366 }; 1367 1368 ret = mlx5_ctrl_flow(dev, &promisc, &promisc); 1369 if (ret) 1370 goto error; 1371 } 1372 if (dev->data->all_multicast) { 1373 struct rte_flow_item_eth multicast = { 1374 .dst.addr_bytes = "\x01\x00\x00\x00\x00\x00", 1375 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00", 1376 .type = 0, 1377 }; 1378 1379 ret = mlx5_ctrl_flow(dev, &multicast, &multicast); 1380 if (ret) 1381 goto error; 1382 } else { 1383 /* Add broadcast/multicast flows. */ 1384 for (i = 0; i != vlan_filter_n; ++i) { 1385 uint16_t vlan = priv->vlan_filter[i]; 1386 1387 struct rte_flow_item_vlan vlan_spec = { 1388 .tci = rte_cpu_to_be_16(vlan), 1389 }; 1390 struct rte_flow_item_vlan vlan_mask = 1391 rte_flow_item_vlan_mask; 1392 1393 ret = mlx5_ctrl_flow_vlan(dev, &bcast, &bcast, 1394 &vlan_spec, &vlan_mask); 1395 if (ret) 1396 goto error; 1397 ret = mlx5_ctrl_flow_vlan(dev, &ipv6_multi_spec, 1398 &ipv6_multi_mask, 1399 &vlan_spec, &vlan_mask); 1400 if (ret) 1401 goto error; 1402 } 1403 if (!vlan_filter_n) { 1404 ret = mlx5_ctrl_flow(dev, &bcast, &bcast); 1405 if (ret) 1406 goto error; 1407 ret = mlx5_ctrl_flow(dev, &ipv6_multi_spec, 1408 &ipv6_multi_mask); 1409 if (ret) { 1410 /* Do not fail on IPv6 broadcast creation failure. */ 1411 DRV_LOG(WARNING, 1412 "IPv6 broadcast is not supported"); 1413 ret = 0; 1414 } 1415 } 1416 } 1417 /* Add MAC address flows. */ 1418 for (i = 0; i != MLX5_MAX_MAC_ADDRESSES; ++i) { 1419 struct rte_ether_addr *mac = &dev->data->mac_addrs[i]; 1420 1421 if (!memcmp(mac, &cmp, sizeof(*mac))) 1422 continue; 1423 memcpy(&unicast.dst.addr_bytes, 1424 mac->addr_bytes, 1425 RTE_ETHER_ADDR_LEN); 1426 for (j = 0; j != vlan_filter_n; ++j) { 1427 uint16_t vlan = priv->vlan_filter[j]; 1428 1429 struct rte_flow_item_vlan vlan_spec = { 1430 .tci = rte_cpu_to_be_16(vlan), 1431 }; 1432 struct rte_flow_item_vlan vlan_mask = 1433 rte_flow_item_vlan_mask; 1434 1435 ret = mlx5_ctrl_flow_vlan(dev, &unicast, 1436 &unicast_mask, 1437 &vlan_spec, 1438 &vlan_mask); 1439 if (ret) 1440 goto error; 1441 } 1442 if (!vlan_filter_n) { 1443 ret = mlx5_ctrl_flow(dev, &unicast, &unicast_mask); 1444 if (ret) 1445 goto error; 1446 } 1447 } 1448 return 0; 1449 error: 1450 ret = rte_errno; /* Save rte_errno before cleanup. */ 1451 mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_CTL, false); 1452 rte_errno = ret; /* Restore rte_errno. */ 1453 return -rte_errno; 1454 } 1455 1456 1457 /** 1458 * Disable traffic flows configured by control plane 1459 * 1460 * @param dev 1461 * Pointer to Ethernet device private data. 1462 */ 1463 void 1464 mlx5_traffic_disable(struct rte_eth_dev *dev) 1465 { 1466 mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_CTL, false); 1467 } 1468 1469 /** 1470 * Restart traffic flows configured by control plane 1471 * 1472 * @param dev 1473 * Pointer to Ethernet device private data. 1474 * 1475 * @return 1476 * 0 on success, a negative errno value otherwise and rte_errno is set. 1477 */ 1478 int 1479 mlx5_traffic_restart(struct rte_eth_dev *dev) 1480 { 1481 if (dev->data->dev_started) { 1482 mlx5_traffic_disable(dev); 1483 return mlx5_traffic_enable(dev); 1484 } 1485 return 0; 1486 } 1487