1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2015 6WIND S.A. 3 * Copyright 2015 Mellanox Technologies, Ltd 4 */ 5 6 #include <unistd.h> 7 8 #include <rte_ether.h> 9 #include <ethdev_driver.h> 10 #include <rte_interrupts.h> 11 #include <rte_alarm.h> 12 #include <rte_cycles.h> 13 14 #include <mlx5_malloc.h> 15 16 #include "mlx5.h" 17 #include "mlx5_flow.h" 18 #include "mlx5_rx.h" 19 #include "mlx5_tx.h" 20 #include "mlx5_utils.h" 21 #include "rte_pmd_mlx5.h" 22 23 /** 24 * Stop traffic on Tx queues. 25 * 26 * @param dev 27 * Pointer to Ethernet device structure. 28 */ 29 static void 30 mlx5_txq_stop(struct rte_eth_dev *dev) 31 { 32 struct mlx5_priv *priv = dev->data->dev_private; 33 unsigned int i; 34 35 for (i = 0; i != priv->txqs_n; ++i) 36 mlx5_txq_release(dev, i); 37 } 38 39 /** 40 * Start traffic on Tx queues. 41 * 42 * @param dev 43 * Pointer to Ethernet device structure. 44 * 45 * @return 46 * 0 on success, a negative errno value otherwise and rte_errno is set. 47 */ 48 static int 49 mlx5_txq_start(struct rte_eth_dev *dev) 50 { 51 struct mlx5_priv *priv = dev->data->dev_private; 52 unsigned int i; 53 int ret; 54 55 for (i = 0; i != priv->txqs_n; ++i) { 56 struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i); 57 struct mlx5_txq_data *txq_data = &txq_ctrl->txq; 58 uint32_t flags = MLX5_MEM_RTE | MLX5_MEM_ZERO; 59 60 if (!txq_ctrl) 61 continue; 62 if (!txq_ctrl->is_hairpin) 63 txq_alloc_elts(txq_ctrl); 64 MLX5_ASSERT(!txq_ctrl->obj); 65 txq_ctrl->obj = mlx5_malloc(flags, sizeof(struct mlx5_txq_obj), 66 0, txq_ctrl->socket); 67 if (!txq_ctrl->obj) { 68 DRV_LOG(ERR, "Port %u Tx queue %u cannot allocate " 69 "memory resources.", dev->data->port_id, 70 txq_data->idx); 71 rte_errno = ENOMEM; 72 goto error; 73 } 74 ret = priv->obj_ops.txq_obj_new(dev, i); 75 if (ret < 0) { 76 mlx5_free(txq_ctrl->obj); 77 txq_ctrl->obj = NULL; 78 goto error; 79 } 80 if (!txq_ctrl->is_hairpin) { 81 size_t size = txq_data->cqe_s * sizeof(*txq_data->fcqs); 82 83 txq_data->fcqs = mlx5_malloc(flags, size, 84 RTE_CACHE_LINE_SIZE, 85 txq_ctrl->socket); 86 if (!txq_data->fcqs) { 87 DRV_LOG(ERR, "Port %u Tx queue %u cannot " 88 "allocate memory (FCQ).", 89 dev->data->port_id, i); 90 rte_errno = ENOMEM; 91 goto error; 92 } 93 } 94 DRV_LOG(DEBUG, "Port %u txq %u updated with %p.", 95 dev->data->port_id, i, (void *)&txq_ctrl->obj); 96 LIST_INSERT_HEAD(&priv->txqsobj, txq_ctrl->obj, next); 97 } 98 return 0; 99 error: 100 ret = rte_errno; /* Save rte_errno before cleanup. */ 101 do { 102 mlx5_txq_release(dev, i); 103 } while (i-- != 0); 104 rte_errno = ret; /* Restore rte_errno. */ 105 return -rte_errno; 106 } 107 108 /** 109 * Register Rx queue mempools and fill the Rx queue cache. 110 * This function tolerates repeated mempool registration. 111 * 112 * @param[in] rxq_ctrl 113 * Rx queue control data. 114 * 115 * @return 116 * 0 on success, (-1) on failure and rte_errno is set. 117 */ 118 static int 119 mlx5_rxq_mempool_register(struct mlx5_rxq_ctrl *rxq_ctrl) 120 { 121 struct rte_mempool *mp; 122 uint32_t s; 123 int ret = 0; 124 125 mlx5_mr_flush_local_cache(&rxq_ctrl->rxq.mr_ctrl); 126 /* MPRQ mempool is registered on creation, just fill the cache. */ 127 if (mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq)) 128 return mlx5_mr_mempool_populate_cache(&rxq_ctrl->rxq.mr_ctrl, 129 rxq_ctrl->rxq.mprq_mp); 130 for (s = 0; s < rxq_ctrl->rxq.rxseg_n; s++) { 131 bool is_extmem; 132 133 mp = rxq_ctrl->rxq.rxseg[s].mp; 134 is_extmem = (rte_pktmbuf_priv_flags(mp) & 135 RTE_PKTMBUF_POOL_F_PINNED_EXT_BUF) != 0; 136 ret = mlx5_mr_mempool_register(rxq_ctrl->sh->cdev, mp, 137 is_extmem); 138 if (ret < 0 && rte_errno != EEXIST) 139 return ret; 140 ret = mlx5_mr_mempool_populate_cache(&rxq_ctrl->rxq.mr_ctrl, 141 mp); 142 if (ret < 0) 143 return ret; 144 } 145 return 0; 146 } 147 148 /** 149 * Stop traffic on Rx queues. 150 * 151 * @param dev 152 * Pointer to Ethernet device structure. 153 */ 154 static void 155 mlx5_rxq_stop(struct rte_eth_dev *dev) 156 { 157 struct mlx5_priv *priv = dev->data->dev_private; 158 unsigned int i; 159 160 for (i = 0; i != priv->rxqs_n; ++i) 161 mlx5_rxq_release(dev, i); 162 } 163 164 static int 165 mlx5_rxq_ctrl_prepare(struct rte_eth_dev *dev, struct mlx5_rxq_ctrl *rxq_ctrl, 166 unsigned int idx) 167 { 168 int ret = 0; 169 170 if (!rxq_ctrl->is_hairpin) { 171 /* 172 * Pre-register the mempools. Regardless of whether 173 * the implicit registration is enabled or not, 174 * Rx mempool destruction is tracked to free MRs. 175 */ 176 if (mlx5_rxq_mempool_register(rxq_ctrl) < 0) 177 return -rte_errno; 178 ret = rxq_alloc_elts(rxq_ctrl); 179 if (ret) 180 return ret; 181 } 182 MLX5_ASSERT(!rxq_ctrl->obj); 183 rxq_ctrl->obj = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, 184 sizeof(*rxq_ctrl->obj), 0, 185 rxq_ctrl->socket); 186 if (!rxq_ctrl->obj) { 187 DRV_LOG(ERR, "Port %u Rx queue %u can't allocate resources.", 188 dev->data->port_id, idx); 189 rte_errno = ENOMEM; 190 return -rte_errno; 191 } 192 DRV_LOG(DEBUG, "Port %u rxq %u updated with %p.", dev->data->port_id, 193 idx, (void *)&rxq_ctrl->obj); 194 return 0; 195 } 196 197 /** 198 * Start traffic on Rx queues. 199 * 200 * @param dev 201 * Pointer to Ethernet device structure. 202 * 203 * @return 204 * 0 on success, a negative errno value otherwise and rte_errno is set. 205 */ 206 static int 207 mlx5_rxq_start(struct rte_eth_dev *dev) 208 { 209 struct mlx5_priv *priv = dev->data->dev_private; 210 unsigned int i; 211 int ret = 0; 212 213 /* Allocate/reuse/resize mempool for Multi-Packet RQ. */ 214 if (mlx5_mprq_alloc_mp(dev)) { 215 /* Should not release Rx queues but return immediately. */ 216 return -rte_errno; 217 } 218 DRV_LOG(DEBUG, "Port %u dev_cap.max_qp_wr is %d.", 219 dev->data->port_id, priv->sh->dev_cap.max_qp_wr); 220 DRV_LOG(DEBUG, "Port %u dev_cap.max_sge is %d.", 221 dev->data->port_id, priv->sh->dev_cap.max_sge); 222 for (i = 0; i != priv->rxqs_n; ++i) { 223 struct mlx5_rxq_priv *rxq = mlx5_rxq_ref(dev, i); 224 struct mlx5_rxq_ctrl *rxq_ctrl; 225 226 if (rxq == NULL) 227 continue; 228 rxq_ctrl = rxq->ctrl; 229 if (!rxq_ctrl->started) 230 if (mlx5_rxq_ctrl_prepare(dev, rxq_ctrl, i) < 0) 231 goto error; 232 ret = priv->obj_ops.rxq_obj_new(rxq); 233 if (ret) { 234 mlx5_free(rxq_ctrl->obj); 235 rxq_ctrl->obj = NULL; 236 goto error; 237 } 238 if (!rxq_ctrl->started) 239 LIST_INSERT_HEAD(&priv->rxqsobj, rxq_ctrl->obj, next); 240 rxq_ctrl->started = true; 241 } 242 return 0; 243 error: 244 ret = rte_errno; /* Save rte_errno before cleanup. */ 245 do { 246 mlx5_rxq_release(dev, i); 247 } while (i-- != 0); 248 rte_errno = ret; /* Restore rte_errno. */ 249 return -rte_errno; 250 } 251 252 /** 253 * Binds Tx queues to Rx queues for hairpin. 254 * 255 * Binds Tx queues to the target Rx queues. 256 * 257 * @param dev 258 * Pointer to Ethernet device structure. 259 * 260 * @return 261 * 0 on success, a negative errno value otherwise and rte_errno is set. 262 */ 263 static int 264 mlx5_hairpin_auto_bind(struct rte_eth_dev *dev) 265 { 266 struct mlx5_priv *priv = dev->data->dev_private; 267 struct mlx5_devx_modify_sq_attr sq_attr = { 0 }; 268 struct mlx5_devx_modify_rq_attr rq_attr = { 0 }; 269 struct mlx5_txq_ctrl *txq_ctrl; 270 struct mlx5_rxq_priv *rxq; 271 struct mlx5_rxq_ctrl *rxq_ctrl; 272 struct mlx5_devx_obj *sq; 273 struct mlx5_devx_obj *rq; 274 unsigned int i; 275 int ret = 0; 276 bool need_auto = false; 277 uint16_t self_port = dev->data->port_id; 278 279 for (i = 0; i != priv->txqs_n; ++i) { 280 txq_ctrl = mlx5_txq_get(dev, i); 281 if (!txq_ctrl) 282 continue; 283 if (!txq_ctrl->is_hairpin || 284 txq_ctrl->hairpin_conf.peers[0].port != self_port) { 285 mlx5_txq_release(dev, i); 286 continue; 287 } 288 if (txq_ctrl->hairpin_conf.manual_bind) { 289 mlx5_txq_release(dev, i); 290 return 0; 291 } 292 need_auto = true; 293 mlx5_txq_release(dev, i); 294 } 295 if (!need_auto) 296 return 0; 297 for (i = 0; i != priv->txqs_n; ++i) { 298 txq_ctrl = mlx5_txq_get(dev, i); 299 if (!txq_ctrl) 300 continue; 301 /* Skip hairpin queues with other peer ports. */ 302 if (!txq_ctrl->is_hairpin || 303 txq_ctrl->hairpin_conf.peers[0].port != self_port) { 304 mlx5_txq_release(dev, i); 305 continue; 306 } 307 if (!txq_ctrl->obj) { 308 rte_errno = ENOMEM; 309 DRV_LOG(ERR, "port %u no txq object found: %d", 310 dev->data->port_id, i); 311 mlx5_txq_release(dev, i); 312 return -rte_errno; 313 } 314 sq = txq_ctrl->obj->sq; 315 rxq = mlx5_rxq_get(dev, txq_ctrl->hairpin_conf.peers[0].queue); 316 if (rxq == NULL) { 317 mlx5_txq_release(dev, i); 318 rte_errno = EINVAL; 319 DRV_LOG(ERR, "port %u no rxq object found: %d", 320 dev->data->port_id, 321 txq_ctrl->hairpin_conf.peers[0].queue); 322 return -rte_errno; 323 } 324 rxq_ctrl = rxq->ctrl; 325 if (!rxq_ctrl->is_hairpin || 326 rxq->hairpin_conf.peers[0].queue != i) { 327 rte_errno = ENOMEM; 328 DRV_LOG(ERR, "port %u Tx queue %d can't be binded to " 329 "Rx queue %d", dev->data->port_id, 330 i, txq_ctrl->hairpin_conf.peers[0].queue); 331 goto error; 332 } 333 rq = rxq_ctrl->obj->rq; 334 if (!rq) { 335 rte_errno = ENOMEM; 336 DRV_LOG(ERR, "port %u hairpin no matching rxq: %d", 337 dev->data->port_id, 338 txq_ctrl->hairpin_conf.peers[0].queue); 339 goto error; 340 } 341 sq_attr.state = MLX5_SQC_STATE_RDY; 342 sq_attr.sq_state = MLX5_SQC_STATE_RST; 343 sq_attr.hairpin_peer_rq = rq->id; 344 sq_attr.hairpin_peer_vhca = 345 priv->sh->cdev->config.hca_attr.vhca_id; 346 ret = mlx5_devx_cmd_modify_sq(sq, &sq_attr); 347 if (ret) 348 goto error; 349 rq_attr.state = MLX5_RQC_STATE_RDY; 350 rq_attr.rq_state = MLX5_RQC_STATE_RST; 351 rq_attr.hairpin_peer_sq = sq->id; 352 rq_attr.hairpin_peer_vhca = 353 priv->sh->cdev->config.hca_attr.vhca_id; 354 ret = mlx5_devx_cmd_modify_rq(rq, &rq_attr); 355 if (ret) 356 goto error; 357 /* Qs with auto-bind will be destroyed directly. */ 358 rxq->hairpin_status = 1; 359 txq_ctrl->hairpin_status = 1; 360 mlx5_txq_release(dev, i); 361 } 362 return 0; 363 error: 364 mlx5_txq_release(dev, i); 365 return -rte_errno; 366 } 367 368 /* 369 * Fetch the peer queue's SW & HW information. 370 * 371 * @param dev 372 * Pointer to Ethernet device structure. 373 * @param peer_queue 374 * Index of the queue to fetch the information. 375 * @param current_info 376 * Pointer to the input peer information, not used currently. 377 * @param peer_info 378 * Pointer to the structure to store the information, output. 379 * @param direction 380 * Positive to get the RxQ information, zero to get the TxQ information. 381 * 382 * @return 383 * 0 on success, a negative errno value otherwise and rte_errno is set. 384 */ 385 int 386 mlx5_hairpin_queue_peer_update(struct rte_eth_dev *dev, uint16_t peer_queue, 387 struct rte_hairpin_peer_info *current_info, 388 struct rte_hairpin_peer_info *peer_info, 389 uint32_t direction) 390 { 391 struct mlx5_priv *priv = dev->data->dev_private; 392 RTE_SET_USED(current_info); 393 394 if (dev->data->dev_started == 0) { 395 rte_errno = EBUSY; 396 DRV_LOG(ERR, "peer port %u is not started", 397 dev->data->port_id); 398 return -rte_errno; 399 } 400 /* 401 * Peer port used as egress. In the current design, hairpin Tx queue 402 * will be bound to the peer Rx queue. Indeed, only the information of 403 * peer Rx queue needs to be fetched. 404 */ 405 if (direction == 0) { 406 struct mlx5_txq_ctrl *txq_ctrl; 407 408 txq_ctrl = mlx5_txq_get(dev, peer_queue); 409 if (txq_ctrl == NULL) { 410 rte_errno = EINVAL; 411 DRV_LOG(ERR, "Failed to get port %u Tx queue %d", 412 dev->data->port_id, peer_queue); 413 return -rte_errno; 414 } 415 if (!txq_ctrl->is_hairpin) { 416 rte_errno = EINVAL; 417 DRV_LOG(ERR, "port %u queue %d is not a hairpin Txq", 418 dev->data->port_id, peer_queue); 419 mlx5_txq_release(dev, peer_queue); 420 return -rte_errno; 421 } 422 if (txq_ctrl->obj == NULL || txq_ctrl->obj->sq == NULL) { 423 rte_errno = ENOMEM; 424 DRV_LOG(ERR, "port %u no Txq object found: %d", 425 dev->data->port_id, peer_queue); 426 mlx5_txq_release(dev, peer_queue); 427 return -rte_errno; 428 } 429 peer_info->qp_id = mlx5_txq_get_sqn(txq_ctrl); 430 peer_info->vhca_id = priv->sh->cdev->config.hca_attr.vhca_id; 431 /* 1-to-1 mapping, only the first one is used. */ 432 peer_info->peer_q = txq_ctrl->hairpin_conf.peers[0].queue; 433 peer_info->tx_explicit = txq_ctrl->hairpin_conf.tx_explicit; 434 peer_info->manual_bind = txq_ctrl->hairpin_conf.manual_bind; 435 mlx5_txq_release(dev, peer_queue); 436 } else { /* Peer port used as ingress. */ 437 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, peer_queue); 438 struct mlx5_rxq_ctrl *rxq_ctrl; 439 440 if (rxq == NULL) { 441 rte_errno = EINVAL; 442 DRV_LOG(ERR, "Failed to get port %u Rx queue %d", 443 dev->data->port_id, peer_queue); 444 return -rte_errno; 445 } 446 rxq_ctrl = rxq->ctrl; 447 if (!rxq_ctrl->is_hairpin) { 448 rte_errno = EINVAL; 449 DRV_LOG(ERR, "port %u queue %d is not a hairpin Rxq", 450 dev->data->port_id, peer_queue); 451 return -rte_errno; 452 } 453 if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) { 454 rte_errno = ENOMEM; 455 DRV_LOG(ERR, "port %u no Rxq object found: %d", 456 dev->data->port_id, peer_queue); 457 return -rte_errno; 458 } 459 peer_info->qp_id = rxq_ctrl->obj->rq->id; 460 peer_info->vhca_id = priv->sh->cdev->config.hca_attr.vhca_id; 461 peer_info->peer_q = rxq->hairpin_conf.peers[0].queue; 462 peer_info->tx_explicit = rxq->hairpin_conf.tx_explicit; 463 peer_info->manual_bind = rxq->hairpin_conf.manual_bind; 464 } 465 return 0; 466 } 467 468 /* 469 * Bind the hairpin queue with the peer HW information. 470 * This needs to be called twice both for Tx and Rx queues of a pair. 471 * If the queue is already bound, it is considered successful. 472 * 473 * @param dev 474 * Pointer to Ethernet device structure. 475 * @param cur_queue 476 * Index of the queue to change the HW configuration to bind. 477 * @param peer_info 478 * Pointer to information of the peer queue. 479 * @param direction 480 * Positive to configure the TxQ, zero to configure the RxQ. 481 * 482 * @return 483 * 0 on success, a negative errno value otherwise and rte_errno is set. 484 */ 485 int 486 mlx5_hairpin_queue_peer_bind(struct rte_eth_dev *dev, uint16_t cur_queue, 487 struct rte_hairpin_peer_info *peer_info, 488 uint32_t direction) 489 { 490 int ret = 0; 491 492 /* 493 * Consistency checking of the peer queue: opposite direction is used 494 * to get the peer queue info with ethdev port ID, no need to check. 495 */ 496 if (peer_info->peer_q != cur_queue) { 497 rte_errno = EINVAL; 498 DRV_LOG(ERR, "port %u queue %d and peer queue %d mismatch", 499 dev->data->port_id, cur_queue, peer_info->peer_q); 500 return -rte_errno; 501 } 502 if (direction != 0) { 503 struct mlx5_txq_ctrl *txq_ctrl; 504 struct mlx5_devx_modify_sq_attr sq_attr = { 0 }; 505 506 txq_ctrl = mlx5_txq_get(dev, cur_queue); 507 if (txq_ctrl == NULL) { 508 rte_errno = EINVAL; 509 DRV_LOG(ERR, "Failed to get port %u Tx queue %d", 510 dev->data->port_id, cur_queue); 511 return -rte_errno; 512 } 513 if (!txq_ctrl->is_hairpin) { 514 rte_errno = EINVAL; 515 DRV_LOG(ERR, "port %u queue %d not a hairpin Txq", 516 dev->data->port_id, cur_queue); 517 mlx5_txq_release(dev, cur_queue); 518 return -rte_errno; 519 } 520 if (txq_ctrl->obj == NULL || txq_ctrl->obj->sq == NULL) { 521 rte_errno = ENOMEM; 522 DRV_LOG(ERR, "port %u no Txq object found: %d", 523 dev->data->port_id, cur_queue); 524 mlx5_txq_release(dev, cur_queue); 525 return -rte_errno; 526 } 527 if (txq_ctrl->hairpin_status != 0) { 528 DRV_LOG(DEBUG, "port %u Tx queue %d is already bound", 529 dev->data->port_id, cur_queue); 530 mlx5_txq_release(dev, cur_queue); 531 return 0; 532 } 533 /* 534 * All queues' of one port consistency checking is done in the 535 * bind() function, and that is optional. 536 */ 537 if (peer_info->tx_explicit != 538 txq_ctrl->hairpin_conf.tx_explicit) { 539 rte_errno = EINVAL; 540 DRV_LOG(ERR, "port %u Tx queue %d and peer Tx rule mode" 541 " mismatch", dev->data->port_id, cur_queue); 542 mlx5_txq_release(dev, cur_queue); 543 return -rte_errno; 544 } 545 if (peer_info->manual_bind != 546 txq_ctrl->hairpin_conf.manual_bind) { 547 rte_errno = EINVAL; 548 DRV_LOG(ERR, "port %u Tx queue %d and peer binding mode" 549 " mismatch", dev->data->port_id, cur_queue); 550 mlx5_txq_release(dev, cur_queue); 551 return -rte_errno; 552 } 553 sq_attr.state = MLX5_SQC_STATE_RDY; 554 sq_attr.sq_state = MLX5_SQC_STATE_RST; 555 sq_attr.hairpin_peer_rq = peer_info->qp_id; 556 sq_attr.hairpin_peer_vhca = peer_info->vhca_id; 557 ret = mlx5_devx_cmd_modify_sq(txq_ctrl->obj->sq, &sq_attr); 558 if (ret == 0) 559 txq_ctrl->hairpin_status = 1; 560 mlx5_txq_release(dev, cur_queue); 561 } else { 562 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, cur_queue); 563 struct mlx5_rxq_ctrl *rxq_ctrl; 564 struct mlx5_devx_modify_rq_attr rq_attr = { 0 }; 565 566 if (rxq == NULL) { 567 rte_errno = EINVAL; 568 DRV_LOG(ERR, "Failed to get port %u Rx queue %d", 569 dev->data->port_id, cur_queue); 570 return -rte_errno; 571 } 572 rxq_ctrl = rxq->ctrl; 573 if (!rxq_ctrl->is_hairpin) { 574 rte_errno = EINVAL; 575 DRV_LOG(ERR, "port %u queue %d not a hairpin Rxq", 576 dev->data->port_id, cur_queue); 577 return -rte_errno; 578 } 579 if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) { 580 rte_errno = ENOMEM; 581 DRV_LOG(ERR, "port %u no Rxq object found: %d", 582 dev->data->port_id, cur_queue); 583 return -rte_errno; 584 } 585 if (rxq->hairpin_status != 0) { 586 DRV_LOG(DEBUG, "port %u Rx queue %d is already bound", 587 dev->data->port_id, cur_queue); 588 return 0; 589 } 590 if (peer_info->tx_explicit != 591 rxq->hairpin_conf.tx_explicit) { 592 rte_errno = EINVAL; 593 DRV_LOG(ERR, "port %u Rx queue %d and peer Tx rule mode" 594 " mismatch", dev->data->port_id, cur_queue); 595 return -rte_errno; 596 } 597 if (peer_info->manual_bind != 598 rxq->hairpin_conf.manual_bind) { 599 rte_errno = EINVAL; 600 DRV_LOG(ERR, "port %u Rx queue %d and peer binding mode" 601 " mismatch", dev->data->port_id, cur_queue); 602 return -rte_errno; 603 } 604 rq_attr.state = MLX5_RQC_STATE_RDY; 605 rq_attr.rq_state = MLX5_RQC_STATE_RST; 606 rq_attr.hairpin_peer_sq = peer_info->qp_id; 607 rq_attr.hairpin_peer_vhca = peer_info->vhca_id; 608 ret = mlx5_devx_cmd_modify_rq(rxq_ctrl->obj->rq, &rq_attr); 609 if (ret == 0) 610 rxq->hairpin_status = 1; 611 } 612 return ret; 613 } 614 615 /* 616 * Unbind the hairpin queue and reset its HW configuration. 617 * This needs to be called twice both for Tx and Rx queues of a pair. 618 * If the queue is already unbound, it is considered successful. 619 * 620 * @param dev 621 * Pointer to Ethernet device structure. 622 * @param cur_queue 623 * Index of the queue to change the HW configuration to unbind. 624 * @param direction 625 * Positive to reset the TxQ, zero to reset the RxQ. 626 * 627 * @return 628 * 0 on success, a negative errno value otherwise and rte_errno is set. 629 */ 630 int 631 mlx5_hairpin_queue_peer_unbind(struct rte_eth_dev *dev, uint16_t cur_queue, 632 uint32_t direction) 633 { 634 int ret = 0; 635 636 if (direction != 0) { 637 struct mlx5_txq_ctrl *txq_ctrl; 638 struct mlx5_devx_modify_sq_attr sq_attr = { 0 }; 639 640 txq_ctrl = mlx5_txq_get(dev, cur_queue); 641 if (txq_ctrl == NULL) { 642 rte_errno = EINVAL; 643 DRV_LOG(ERR, "Failed to get port %u Tx queue %d", 644 dev->data->port_id, cur_queue); 645 return -rte_errno; 646 } 647 if (!txq_ctrl->is_hairpin) { 648 rte_errno = EINVAL; 649 DRV_LOG(ERR, "port %u queue %d not a hairpin Txq", 650 dev->data->port_id, cur_queue); 651 mlx5_txq_release(dev, cur_queue); 652 return -rte_errno; 653 } 654 /* Already unbound, return success before obj checking. */ 655 if (txq_ctrl->hairpin_status == 0) { 656 DRV_LOG(DEBUG, "port %u Tx queue %d is already unbound", 657 dev->data->port_id, cur_queue); 658 mlx5_txq_release(dev, cur_queue); 659 return 0; 660 } 661 if (!txq_ctrl->obj || !txq_ctrl->obj->sq) { 662 rte_errno = ENOMEM; 663 DRV_LOG(ERR, "port %u no Txq object found: %d", 664 dev->data->port_id, cur_queue); 665 mlx5_txq_release(dev, cur_queue); 666 return -rte_errno; 667 } 668 sq_attr.state = MLX5_SQC_STATE_RST; 669 sq_attr.sq_state = MLX5_SQC_STATE_RDY; 670 ret = mlx5_devx_cmd_modify_sq(txq_ctrl->obj->sq, &sq_attr); 671 if (ret == 0) 672 txq_ctrl->hairpin_status = 0; 673 mlx5_txq_release(dev, cur_queue); 674 } else { 675 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, cur_queue); 676 struct mlx5_rxq_ctrl *rxq_ctrl; 677 struct mlx5_devx_modify_rq_attr rq_attr = { 0 }; 678 679 if (rxq == NULL) { 680 rte_errno = EINVAL; 681 DRV_LOG(ERR, "Failed to get port %u Rx queue %d", 682 dev->data->port_id, cur_queue); 683 return -rte_errno; 684 } 685 rxq_ctrl = rxq->ctrl; 686 if (!rxq_ctrl->is_hairpin) { 687 rte_errno = EINVAL; 688 DRV_LOG(ERR, "port %u queue %d not a hairpin Rxq", 689 dev->data->port_id, cur_queue); 690 return -rte_errno; 691 } 692 if (rxq->hairpin_status == 0) { 693 DRV_LOG(DEBUG, "port %u Rx queue %d is already unbound", 694 dev->data->port_id, cur_queue); 695 return 0; 696 } 697 if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) { 698 rte_errno = ENOMEM; 699 DRV_LOG(ERR, "port %u no Rxq object found: %d", 700 dev->data->port_id, cur_queue); 701 return -rte_errno; 702 } 703 rq_attr.state = MLX5_RQC_STATE_RST; 704 rq_attr.rq_state = MLX5_RQC_STATE_RDY; 705 ret = mlx5_devx_cmd_modify_rq(rxq_ctrl->obj->rq, &rq_attr); 706 if (ret == 0) 707 rxq->hairpin_status = 0; 708 } 709 return ret; 710 } 711 712 /* 713 * Bind the hairpin port pairs, from the Tx to the peer Rx. 714 * This function only supports to bind the Tx to one Rx. 715 * 716 * @param dev 717 * Pointer to Ethernet device structure. 718 * @param rx_port 719 * Port identifier of the Rx port. 720 * 721 * @return 722 * 0 on success, a negative errno value otherwise and rte_errno is set. 723 */ 724 static int 725 mlx5_hairpin_bind_single_port(struct rte_eth_dev *dev, uint16_t rx_port) 726 { 727 struct mlx5_priv *priv = dev->data->dev_private; 728 int ret = 0; 729 struct mlx5_txq_ctrl *txq_ctrl; 730 uint32_t i; 731 struct rte_hairpin_peer_info peer = {0xffffff}; 732 struct rte_hairpin_peer_info cur; 733 const struct rte_eth_hairpin_conf *conf; 734 uint16_t num_q = 0; 735 uint16_t local_port = priv->dev_data->port_id; 736 uint32_t manual; 737 uint32_t explicit; 738 uint16_t rx_queue; 739 740 if (mlx5_eth_find_next(rx_port, dev->device) != rx_port) { 741 rte_errno = ENODEV; 742 DRV_LOG(ERR, "Rx port %u does not belong to mlx5", rx_port); 743 return -rte_errno; 744 } 745 /* 746 * Before binding TxQ to peer RxQ, first round loop will be used for 747 * checking the queues' configuration consistency. This would be a 748 * little time consuming but better than doing the rollback. 749 */ 750 for (i = 0; i != priv->txqs_n; i++) { 751 txq_ctrl = mlx5_txq_get(dev, i); 752 if (txq_ctrl == NULL) 753 continue; 754 if (!txq_ctrl->is_hairpin) { 755 mlx5_txq_release(dev, i); 756 continue; 757 } 758 /* 759 * All hairpin Tx queues of a single port that connected to the 760 * same peer Rx port should have the same "auto binding" and 761 * "implicit Tx flow" modes. 762 * Peer consistency checking will be done in per queue binding. 763 */ 764 conf = &txq_ctrl->hairpin_conf; 765 if (conf->peers[0].port == rx_port) { 766 if (num_q == 0) { 767 manual = conf->manual_bind; 768 explicit = conf->tx_explicit; 769 } else { 770 if (manual != conf->manual_bind || 771 explicit != conf->tx_explicit) { 772 rte_errno = EINVAL; 773 DRV_LOG(ERR, "port %u queue %d mode" 774 " mismatch: %u %u, %u %u", 775 local_port, i, manual, 776 conf->manual_bind, explicit, 777 conf->tx_explicit); 778 mlx5_txq_release(dev, i); 779 return -rte_errno; 780 } 781 } 782 num_q++; 783 } 784 mlx5_txq_release(dev, i); 785 } 786 /* Once no queue is configured, success is returned directly. */ 787 if (num_q == 0) 788 return ret; 789 /* All the hairpin TX queues need to be traversed again. */ 790 for (i = 0; i != priv->txqs_n; i++) { 791 txq_ctrl = mlx5_txq_get(dev, i); 792 if (txq_ctrl == NULL) 793 continue; 794 if (!txq_ctrl->is_hairpin) { 795 mlx5_txq_release(dev, i); 796 continue; 797 } 798 if (txq_ctrl->hairpin_conf.peers[0].port != rx_port) { 799 mlx5_txq_release(dev, i); 800 continue; 801 } 802 rx_queue = txq_ctrl->hairpin_conf.peers[0].queue; 803 /* 804 * Fetch peer RxQ's information. 805 * No need to pass the information of the current queue. 806 */ 807 ret = rte_eth_hairpin_queue_peer_update(rx_port, rx_queue, 808 NULL, &peer, 1); 809 if (ret != 0) { 810 mlx5_txq_release(dev, i); 811 goto error; 812 } 813 /* Accessing its own device, inside mlx5 PMD. */ 814 ret = mlx5_hairpin_queue_peer_bind(dev, i, &peer, 1); 815 if (ret != 0) { 816 mlx5_txq_release(dev, i); 817 goto error; 818 } 819 /* Pass TxQ's information to peer RxQ and try binding. */ 820 cur.peer_q = rx_queue; 821 cur.qp_id = mlx5_txq_get_sqn(txq_ctrl); 822 cur.vhca_id = priv->sh->cdev->config.hca_attr.vhca_id; 823 cur.tx_explicit = txq_ctrl->hairpin_conf.tx_explicit; 824 cur.manual_bind = txq_ctrl->hairpin_conf.manual_bind; 825 /* 826 * In order to access another device in a proper way, RTE level 827 * private function is needed. 828 */ 829 ret = rte_eth_hairpin_queue_peer_bind(rx_port, rx_queue, 830 &cur, 0); 831 if (ret != 0) { 832 mlx5_txq_release(dev, i); 833 goto error; 834 } 835 mlx5_txq_release(dev, i); 836 } 837 return 0; 838 error: 839 /* 840 * Do roll-back process for the queues already bound. 841 * No need to check the return value of the queue unbind function. 842 */ 843 do { 844 /* No validation is needed here. */ 845 txq_ctrl = mlx5_txq_get(dev, i); 846 if (txq_ctrl == NULL) 847 continue; 848 if (!txq_ctrl->is_hairpin || 849 txq_ctrl->hairpin_conf.peers[0].port != rx_port) { 850 mlx5_txq_release(dev, i); 851 continue; 852 } 853 rx_queue = txq_ctrl->hairpin_conf.peers[0].queue; 854 rte_eth_hairpin_queue_peer_unbind(rx_port, rx_queue, 0); 855 mlx5_hairpin_queue_peer_unbind(dev, i, 1); 856 mlx5_txq_release(dev, i); 857 } while (i--); 858 return ret; 859 } 860 861 /* 862 * Unbind the hairpin port pair, HW configuration of both devices will be clear 863 * and status will be reset for all the queues used between them. 864 * This function only supports to unbind the Tx from one Rx. 865 * 866 * @param dev 867 * Pointer to Ethernet device structure. 868 * @param rx_port 869 * Port identifier of the Rx port. 870 * 871 * @return 872 * 0 on success, a negative errno value otherwise and rte_errno is set. 873 */ 874 static int 875 mlx5_hairpin_unbind_single_port(struct rte_eth_dev *dev, uint16_t rx_port) 876 { 877 struct mlx5_priv *priv = dev->data->dev_private; 878 struct mlx5_txq_ctrl *txq_ctrl; 879 uint32_t i; 880 int ret; 881 uint16_t cur_port = priv->dev_data->port_id; 882 883 if (mlx5_eth_find_next(rx_port, dev->device) != rx_port) { 884 rte_errno = ENODEV; 885 DRV_LOG(ERR, "Rx port %u does not belong to mlx5", rx_port); 886 return -rte_errno; 887 } 888 for (i = 0; i != priv->txqs_n; i++) { 889 uint16_t rx_queue; 890 891 txq_ctrl = mlx5_txq_get(dev, i); 892 if (txq_ctrl == NULL) 893 continue; 894 if (!txq_ctrl->is_hairpin) { 895 mlx5_txq_release(dev, i); 896 continue; 897 } 898 if (txq_ctrl->hairpin_conf.peers[0].port != rx_port) { 899 mlx5_txq_release(dev, i); 900 continue; 901 } 902 /* Indeed, only the first used queue needs to be checked. */ 903 if (txq_ctrl->hairpin_conf.manual_bind == 0) { 904 mlx5_txq_release(dev, i); 905 if (cur_port != rx_port) { 906 rte_errno = EINVAL; 907 DRV_LOG(ERR, "port %u and port %u are in" 908 " auto-bind mode", cur_port, rx_port); 909 return -rte_errno; 910 } else { 911 return 0; 912 } 913 } 914 rx_queue = txq_ctrl->hairpin_conf.peers[0].queue; 915 mlx5_txq_release(dev, i); 916 ret = rte_eth_hairpin_queue_peer_unbind(rx_port, rx_queue, 0); 917 if (ret) { 918 DRV_LOG(ERR, "port %u Rx queue %d unbind - failure", 919 rx_port, rx_queue); 920 return ret; 921 } 922 ret = mlx5_hairpin_queue_peer_unbind(dev, i, 1); 923 if (ret) { 924 DRV_LOG(ERR, "port %u Tx queue %d unbind - failure", 925 cur_port, i); 926 return ret; 927 } 928 } 929 return 0; 930 } 931 932 /* 933 * Bind hairpin ports, Rx could be all ports when using RTE_MAX_ETHPORTS. 934 * @see mlx5_hairpin_bind_single_port() 935 */ 936 int 937 mlx5_hairpin_bind(struct rte_eth_dev *dev, uint16_t rx_port) 938 { 939 int ret = 0; 940 uint16_t p, pp; 941 942 /* 943 * If the Rx port has no hairpin configuration with the current port, 944 * the binding will be skipped in the called function of single port. 945 * Device started status will be checked only before the queue 946 * information updating. 947 */ 948 if (rx_port == RTE_MAX_ETHPORTS) { 949 MLX5_ETH_FOREACH_DEV(p, dev->device) { 950 ret = mlx5_hairpin_bind_single_port(dev, p); 951 if (ret != 0) 952 goto unbind; 953 } 954 return ret; 955 } else { 956 return mlx5_hairpin_bind_single_port(dev, rx_port); 957 } 958 unbind: 959 MLX5_ETH_FOREACH_DEV(pp, dev->device) 960 if (pp < p) 961 mlx5_hairpin_unbind_single_port(dev, pp); 962 return ret; 963 } 964 965 /* 966 * Unbind hairpin ports, Rx could be all ports when using RTE_MAX_ETHPORTS. 967 * @see mlx5_hairpin_unbind_single_port() 968 */ 969 int 970 mlx5_hairpin_unbind(struct rte_eth_dev *dev, uint16_t rx_port) 971 { 972 int ret = 0; 973 uint16_t p; 974 975 if (rx_port == RTE_MAX_ETHPORTS) 976 MLX5_ETH_FOREACH_DEV(p, dev->device) { 977 ret = mlx5_hairpin_unbind_single_port(dev, p); 978 if (ret != 0) 979 return ret; 980 } 981 else 982 ret = mlx5_hairpin_unbind_single_port(dev, rx_port); 983 return ret; 984 } 985 986 /* 987 * DPDK callback to get the hairpin peer ports list. 988 * This will return the actual number of peer ports and save the identifiers 989 * into the array (sorted, may be different from that when setting up the 990 * hairpin peer queues). 991 * The peer port ID could be the same as the port ID of the current device. 992 * 993 * @param dev 994 * Pointer to Ethernet device structure. 995 * @param peer_ports 996 * Pointer to array to save the port identifiers. 997 * @param len 998 * The length of the array. 999 * @param direction 1000 * Current port to peer port direction. 1001 * positive - current used as Tx to get all peer Rx ports. 1002 * zero - current used as Rx to get all peer Tx ports. 1003 * 1004 * @return 1005 * 0 or positive value on success, actual number of peer ports. 1006 * a negative errno value otherwise and rte_errno is set. 1007 */ 1008 int 1009 mlx5_hairpin_get_peer_ports(struct rte_eth_dev *dev, uint16_t *peer_ports, 1010 size_t len, uint32_t direction) 1011 { 1012 struct mlx5_priv *priv = dev->data->dev_private; 1013 struct mlx5_txq_ctrl *txq_ctrl; 1014 uint32_t i; 1015 uint16_t pp; 1016 uint32_t bits[(RTE_MAX_ETHPORTS + 31) / 32] = {0}; 1017 int ret = 0; 1018 1019 if (direction) { 1020 for (i = 0; i < priv->txqs_n; i++) { 1021 txq_ctrl = mlx5_txq_get(dev, i); 1022 if (!txq_ctrl) 1023 continue; 1024 if (!txq_ctrl->is_hairpin) { 1025 mlx5_txq_release(dev, i); 1026 continue; 1027 } 1028 pp = txq_ctrl->hairpin_conf.peers[0].port; 1029 if (pp >= RTE_MAX_ETHPORTS) { 1030 rte_errno = ERANGE; 1031 mlx5_txq_release(dev, i); 1032 DRV_LOG(ERR, "port %hu queue %u peer port " 1033 "out of range %hu", 1034 priv->dev_data->port_id, i, pp); 1035 return -rte_errno; 1036 } 1037 bits[pp / 32] |= 1 << (pp % 32); 1038 mlx5_txq_release(dev, i); 1039 } 1040 } else { 1041 for (i = 0; i < priv->rxqs_n; i++) { 1042 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, i); 1043 struct mlx5_rxq_ctrl *rxq_ctrl; 1044 1045 if (rxq == NULL) 1046 continue; 1047 rxq_ctrl = rxq->ctrl; 1048 if (!rxq_ctrl->is_hairpin) 1049 continue; 1050 pp = rxq->hairpin_conf.peers[0].port; 1051 if (pp >= RTE_MAX_ETHPORTS) { 1052 rte_errno = ERANGE; 1053 DRV_LOG(ERR, "port %hu queue %u peer port " 1054 "out of range %hu", 1055 priv->dev_data->port_id, i, pp); 1056 return -rte_errno; 1057 } 1058 bits[pp / 32] |= 1 << (pp % 32); 1059 } 1060 } 1061 for (i = 0; i < RTE_MAX_ETHPORTS; i++) { 1062 if (bits[i / 32] & (1 << (i % 32))) { 1063 if ((size_t)ret >= len) { 1064 rte_errno = E2BIG; 1065 return -rte_errno; 1066 } 1067 peer_ports[ret++] = i; 1068 } 1069 } 1070 return ret; 1071 } 1072 1073 #ifdef HAVE_MLX5_HWS_SUPPORT 1074 1075 /** 1076 * Check if starting representor port is allowed. 1077 * 1078 * If transfer proxy port is configured for HWS, then starting representor port 1079 * is allowed if and only if transfer proxy port is started as well. 1080 * 1081 * @param dev 1082 * Pointer to Ethernet device structure. 1083 * 1084 * @return 1085 * If stopping representor port is allowed, then 0 is returned. 1086 * Otherwise rte_errno is set, and negative errno value is returned. 1087 */ 1088 static int 1089 mlx5_hw_representor_port_allowed_start(struct rte_eth_dev *dev) 1090 { 1091 struct mlx5_priv *priv = dev->data->dev_private; 1092 struct rte_eth_dev *proxy_dev; 1093 struct mlx5_priv *proxy_priv; 1094 uint16_t proxy_port_id = UINT16_MAX; 1095 int ret; 1096 1097 MLX5_ASSERT(priv->sh->config.dv_flow_en == 2); 1098 MLX5_ASSERT(priv->sh->config.dv_esw_en); 1099 MLX5_ASSERT(priv->representor); 1100 ret = rte_flow_pick_transfer_proxy(dev->data->port_id, &proxy_port_id, NULL); 1101 if (ret) { 1102 if (ret == -ENODEV) 1103 DRV_LOG(ERR, "Starting representor port %u is not allowed. Transfer " 1104 "proxy port is not available.", dev->data->port_id); 1105 else 1106 DRV_LOG(ERR, "Failed to pick transfer proxy for port %u (ret = %d)", 1107 dev->data->port_id, ret); 1108 return ret; 1109 } 1110 proxy_dev = &rte_eth_devices[proxy_port_id]; 1111 proxy_priv = proxy_dev->data->dev_private; 1112 if (proxy_priv->dr_ctx == NULL) { 1113 DRV_LOG(DEBUG, "Starting representor port %u is allowed, but default traffic flows" 1114 " will not be created. Transfer proxy port must be configured" 1115 " for HWS and started.", 1116 dev->data->port_id); 1117 return 0; 1118 } 1119 if (!proxy_dev->data->dev_started) { 1120 DRV_LOG(ERR, "Failed to start port %u: transfer proxy (port %u) must be started", 1121 dev->data->port_id, proxy_port_id); 1122 rte_errno = EAGAIN; 1123 return -rte_errno; 1124 } 1125 if (priv->sh->config.repr_matching && !priv->dr_ctx) { 1126 DRV_LOG(ERR, "Failed to start port %u: with representor matching enabled, port " 1127 "must be configured for HWS", dev->data->port_id); 1128 rte_errno = EINVAL; 1129 return -rte_errno; 1130 } 1131 return 0; 1132 } 1133 1134 #endif 1135 1136 /** 1137 * DPDK callback to start the device. 1138 * 1139 * Simulate device start by attaching all configured flows. 1140 * 1141 * @param dev 1142 * Pointer to Ethernet device structure. 1143 * 1144 * @return 1145 * 0 on success, a negative errno value otherwise and rte_errno is set. 1146 * The following error values are defined: 1147 * 1148 * - -EAGAIN: If port representor cannot be started, 1149 * because transfer proxy port is not started. 1150 */ 1151 int 1152 mlx5_dev_start(struct rte_eth_dev *dev) 1153 { 1154 struct mlx5_priv *priv = dev->data->dev_private; 1155 int ret; 1156 int fine_inline; 1157 1158 DRV_LOG(DEBUG, "port %u starting device", dev->data->port_id); 1159 #ifdef HAVE_MLX5_HWS_SUPPORT 1160 if (priv->sh->config.dv_flow_en == 2) { 1161 /*If previous configuration does not exist. */ 1162 if (!(priv->dr_ctx)) { 1163 ret = flow_hw_init(dev, NULL); 1164 if (ret) 1165 return ret; 1166 } 1167 /* If there is no E-Switch, then there are no start/stop order limitations. */ 1168 if (!priv->sh->config.dv_esw_en) 1169 goto continue_dev_start; 1170 /* If master is being started, then it is always allowed. */ 1171 if (priv->master) 1172 goto continue_dev_start; 1173 if (mlx5_hw_representor_port_allowed_start(dev)) 1174 return -rte_errno; 1175 } 1176 continue_dev_start: 1177 #endif 1178 fine_inline = rte_mbuf_dynflag_lookup 1179 (RTE_PMD_MLX5_FINE_GRANULARITY_INLINE, NULL); 1180 if (fine_inline >= 0) 1181 rte_net_mlx5_dynf_inline_mask = 1UL << fine_inline; 1182 else 1183 rte_net_mlx5_dynf_inline_mask = 0; 1184 if (dev->data->nb_rx_queues > 0) { 1185 uint32_t max_lro_msg_size = priv->max_lro_msg_size; 1186 1187 if (max_lro_msg_size < MLX5_LRO_SEG_CHUNK_SIZE) { 1188 uint32_t i; 1189 struct mlx5_rxq_priv *rxq; 1190 1191 for (i = 0; i != priv->rxqs_n; ++i) { 1192 rxq = mlx5_rxq_get(dev, i); 1193 if (rxq && rxq->ctrl && rxq->ctrl->rxq.lro) { 1194 DRV_LOG(ERR, "port %u invalid max LRO size", 1195 dev->data->port_id); 1196 rte_errno = EINVAL; 1197 return -rte_errno; 1198 } 1199 } 1200 } 1201 ret = mlx5_dev_configure_rss_reta(dev); 1202 if (ret) { 1203 DRV_LOG(ERR, "port %u reta config failed: %s", 1204 dev->data->port_id, strerror(rte_errno)); 1205 return -rte_errno; 1206 } 1207 } 1208 ret = mlx5_txpp_start(dev); 1209 if (ret) { 1210 DRV_LOG(ERR, "port %u Tx packet pacing init failed: %s", 1211 dev->data->port_id, strerror(rte_errno)); 1212 goto error; 1213 } 1214 if (mlx5_devx_obj_ops_en(priv->sh) && 1215 priv->obj_ops.lb_dummy_queue_create) { 1216 ret = priv->obj_ops.lb_dummy_queue_create(dev); 1217 if (ret) 1218 goto error; 1219 } 1220 ret = mlx5_txq_start(dev); 1221 if (ret) { 1222 DRV_LOG(ERR, "port %u Tx queue allocation failed: %s", 1223 dev->data->port_id, strerror(rte_errno)); 1224 goto error; 1225 } 1226 if (priv->config.std_delay_drop || priv->config.hp_delay_drop) { 1227 if (!priv->sh->dev_cap.vf && !priv->sh->dev_cap.sf && 1228 !priv->representor) { 1229 ret = mlx5_get_flag_dropless_rq(dev); 1230 if (ret < 0) 1231 DRV_LOG(WARNING, 1232 "port %u cannot query dropless flag", 1233 dev->data->port_id); 1234 else if (!ret) 1235 DRV_LOG(WARNING, 1236 "port %u dropless_rq OFF, no rearming", 1237 dev->data->port_id); 1238 } else { 1239 DRV_LOG(DEBUG, 1240 "port %u doesn't support dropless_rq flag", 1241 dev->data->port_id); 1242 } 1243 } 1244 ret = mlx5_rxq_start(dev); 1245 if (ret) { 1246 DRV_LOG(ERR, "port %u Rx queue allocation failed: %s", 1247 dev->data->port_id, strerror(rte_errno)); 1248 goto error; 1249 } 1250 /* 1251 * Such step will be skipped if there is no hairpin TX queue configured 1252 * with RX peer queue from the same device. 1253 */ 1254 ret = mlx5_hairpin_auto_bind(dev); 1255 if (ret) { 1256 DRV_LOG(ERR, "port %u hairpin auto binding failed: %s", 1257 dev->data->port_id, strerror(rte_errno)); 1258 goto error; 1259 } 1260 /* Set started flag here for the following steps like control flow. */ 1261 dev->data->dev_started = 1; 1262 ret = mlx5_rx_intr_vec_enable(dev); 1263 if (ret) { 1264 DRV_LOG(ERR, "port %u Rx interrupt vector creation failed", 1265 dev->data->port_id); 1266 goto error; 1267 } 1268 mlx5_os_stats_init(dev); 1269 /* 1270 * Attach indirection table objects detached on port stop. 1271 * They may be needed to create RSS in non-isolated mode. 1272 */ 1273 ret = mlx5_action_handle_attach(dev); 1274 if (ret) { 1275 DRV_LOG(ERR, 1276 "port %u failed to attach indirect actions: %s", 1277 dev->data->port_id, rte_strerror(rte_errno)); 1278 goto error; 1279 } 1280 #ifdef HAVE_MLX5_HWS_SUPPORT 1281 if (priv->sh->config.dv_flow_en == 2) { 1282 ret = flow_hw_table_update(dev, NULL); 1283 if (ret) { 1284 DRV_LOG(ERR, "port %u failed to update HWS tables", 1285 dev->data->port_id); 1286 goto error; 1287 } 1288 } 1289 #endif 1290 ret = mlx5_traffic_enable(dev); 1291 if (ret) { 1292 DRV_LOG(ERR, "port %u failed to set defaults flows", 1293 dev->data->port_id); 1294 goto error; 1295 } 1296 /* Set dynamic fields and flags into Rx queues. */ 1297 mlx5_flow_rxq_dynf_set(dev); 1298 /* Set flags and context to convert Rx timestamps. */ 1299 mlx5_rxq_timestamp_set(dev); 1300 /* Set a mask and offset of scheduling on timestamp into Tx queues. */ 1301 mlx5_txq_dynf_timestamp_set(dev); 1302 /* 1303 * In non-cached mode, it only needs to start the default mreg copy 1304 * action and no flow created by application exists anymore. 1305 * But it is worth wrapping the interface for further usage. 1306 */ 1307 ret = mlx5_flow_start_default(dev); 1308 if (ret) { 1309 DRV_LOG(DEBUG, "port %u failed to start default actions: %s", 1310 dev->data->port_id, strerror(rte_errno)); 1311 goto error; 1312 } 1313 if (mlx5_dev_ctx_shared_mempool_subscribe(dev) != 0) { 1314 DRV_LOG(ERR, "port %u failed to subscribe for mempool life cycle: %s", 1315 dev->data->port_id, rte_strerror(rte_errno)); 1316 goto error; 1317 } 1318 rte_wmb(); 1319 dev->tx_pkt_burst = mlx5_select_tx_function(dev); 1320 dev->rx_pkt_burst = mlx5_select_rx_function(dev); 1321 /* Enable datapath on secondary process. */ 1322 mlx5_mp_os_req_start_rxtx(dev); 1323 if (rte_intr_fd_get(priv->sh->intr_handle) >= 0) { 1324 priv->sh->port[priv->dev_port - 1].ih_port_id = 1325 (uint32_t)dev->data->port_id; 1326 } else { 1327 DRV_LOG(INFO, "port %u starts without RMV interrupts.", 1328 dev->data->port_id); 1329 dev->data->dev_conf.intr_conf.rmv = 0; 1330 } 1331 if (rte_intr_fd_get(priv->sh->intr_handle_nl) >= 0) { 1332 priv->sh->port[priv->dev_port - 1].nl_ih_port_id = 1333 (uint32_t)dev->data->port_id; 1334 } else { 1335 DRV_LOG(INFO, "port %u starts without LSC interrupts.", 1336 dev->data->port_id); 1337 dev->data->dev_conf.intr_conf.lsc = 0; 1338 } 1339 if (rte_intr_fd_get(priv->sh->intr_handle_devx) >= 0) 1340 priv->sh->port[priv->dev_port - 1].devx_ih_port_id = 1341 (uint32_t)dev->data->port_id; 1342 return 0; 1343 error: 1344 ret = rte_errno; /* Save rte_errno before cleanup. */ 1345 /* Rollback. */ 1346 dev->data->dev_started = 0; 1347 mlx5_flow_stop_default(dev); 1348 mlx5_traffic_disable(dev); 1349 mlx5_txq_stop(dev); 1350 mlx5_rxq_stop(dev); 1351 if (priv->obj_ops.lb_dummy_queue_release) 1352 priv->obj_ops.lb_dummy_queue_release(dev); 1353 mlx5_txpp_stop(dev); /* Stop last. */ 1354 rte_errno = ret; /* Restore rte_errno. */ 1355 return -rte_errno; 1356 } 1357 1358 #ifdef HAVE_MLX5_HWS_SUPPORT 1359 /** 1360 * Check if stopping transfer proxy port is allowed. 1361 * 1362 * If transfer proxy port is configured for HWS, then it is allowed to stop it 1363 * if and only if all other representor ports are stopped. 1364 * 1365 * @param dev 1366 * Pointer to Ethernet device structure. 1367 * 1368 * @return 1369 * If stopping transfer proxy port is allowed, then 0 is returned. 1370 * Otherwise rte_errno is set, and negative errno value is returned. 1371 */ 1372 static int 1373 mlx5_hw_proxy_port_allowed_stop(struct rte_eth_dev *dev) 1374 { 1375 struct mlx5_priv *priv = dev->data->dev_private; 1376 bool representor_started = false; 1377 uint16_t port_id; 1378 1379 MLX5_ASSERT(priv->sh->config.dv_flow_en == 2); 1380 MLX5_ASSERT(priv->sh->config.dv_esw_en); 1381 MLX5_ASSERT(priv->master); 1382 /* If transfer proxy port was not configured for HWS, then stopping it is allowed. */ 1383 if (!priv->dr_ctx) 1384 return 0; 1385 MLX5_ETH_FOREACH_DEV(port_id, dev->device) { 1386 const struct rte_eth_dev *port_dev = &rte_eth_devices[port_id]; 1387 const struct mlx5_priv *port_priv = port_dev->data->dev_private; 1388 1389 if (port_id != dev->data->port_id && 1390 port_priv->domain_id == priv->domain_id && 1391 port_dev->data->dev_started) 1392 representor_started = true; 1393 } 1394 if (representor_started) { 1395 DRV_LOG(ERR, "Failed to stop port %u: attached representor ports" 1396 " must be stopped before stopping transfer proxy port", 1397 dev->data->port_id); 1398 rte_errno = EBUSY; 1399 return -rte_errno; 1400 } 1401 return 0; 1402 } 1403 #endif 1404 1405 /** 1406 * DPDK callback to stop the device. 1407 * 1408 * Simulate device stop by detaching all configured flows. 1409 * 1410 * @param dev 1411 * Pointer to Ethernet device structure. 1412 * 1413 * @return 1414 * 0 on success, a negative errno value otherwise and rte_errno is set. 1415 * The following error values are defined: 1416 * 1417 * - -EBUSY: If transfer proxy port cannot be stopped, 1418 * because other port representors are still running. 1419 */ 1420 int 1421 mlx5_dev_stop(struct rte_eth_dev *dev) 1422 { 1423 struct mlx5_priv *priv = dev->data->dev_private; 1424 1425 #ifdef HAVE_MLX5_HWS_SUPPORT 1426 if (priv->sh->config.dv_flow_en == 2) { 1427 /* If there is no E-Switch, then there are no start/stop order limitations. */ 1428 if (!priv->sh->config.dv_esw_en) 1429 goto continue_dev_stop; 1430 /* If representor is being stopped, then it is always allowed. */ 1431 if (priv->representor) 1432 goto continue_dev_stop; 1433 if (mlx5_hw_proxy_port_allowed_stop(dev)) { 1434 dev->data->dev_started = 1; 1435 return -rte_errno; 1436 } 1437 } 1438 continue_dev_stop: 1439 #endif 1440 dev->data->dev_started = 0; 1441 /* Prevent crashes when queues are still in use. */ 1442 dev->rx_pkt_burst = rte_eth_pkt_burst_dummy; 1443 dev->tx_pkt_burst = rte_eth_pkt_burst_dummy; 1444 rte_wmb(); 1445 /* Disable datapath on secondary process. */ 1446 mlx5_mp_os_req_stop_rxtx(dev); 1447 rte_delay_us_sleep(1000 * priv->rxqs_n); 1448 DRV_LOG(DEBUG, "port %u stopping device", dev->data->port_id); 1449 if (priv->sh->config.dv_flow_en == 2) { 1450 if (!rte_atomic_load_explicit(&priv->hws_mark_refcnt, rte_memory_order_relaxed)) 1451 flow_hw_rxq_flag_set(dev, false); 1452 } else { 1453 mlx5_flow_stop_default(dev); 1454 } 1455 /* Control flows for default traffic can be removed firstly. */ 1456 mlx5_traffic_disable(dev); 1457 /* All RX queue flags will be cleared in the flush interface. */ 1458 mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_GEN, true); 1459 mlx5_flow_meter_rxq_flush(dev); 1460 mlx5_action_handle_detach(dev); 1461 #ifdef HAVE_MLX5_HWS_SUPPORT 1462 mlx5_flow_hw_cleanup_ctrl_rx_templates(dev); 1463 #endif 1464 mlx5_rx_intr_vec_disable(dev); 1465 priv->sh->port[priv->dev_port - 1].ih_port_id = RTE_MAX_ETHPORTS; 1466 priv->sh->port[priv->dev_port - 1].devx_ih_port_id = RTE_MAX_ETHPORTS; 1467 priv->sh->port[priv->dev_port - 1].nl_ih_port_id = RTE_MAX_ETHPORTS; 1468 mlx5_txq_stop(dev); 1469 mlx5_rxq_stop(dev); 1470 if (priv->obj_ops.lb_dummy_queue_release) 1471 priv->obj_ops.lb_dummy_queue_release(dev); 1472 mlx5_txpp_stop(dev); 1473 1474 return 0; 1475 } 1476 1477 #ifdef HAVE_MLX5_HWS_SUPPORT 1478 1479 static int 1480 mlx5_traffic_enable_hws(struct rte_eth_dev *dev) 1481 { 1482 struct mlx5_priv *priv = dev->data->dev_private; 1483 struct mlx5_sh_config *config = &priv->sh->config; 1484 uint64_t flags = 0; 1485 unsigned int i; 1486 int ret; 1487 1488 /* 1489 * With extended metadata enabled, the Tx metadata copy is handled by default 1490 * Tx tagging flow rules, so default Tx flow rule is not needed. It is only 1491 * required when representor matching is disabled. 1492 */ 1493 if (config->dv_esw_en && 1494 !config->repr_matching && 1495 config->dv_xmeta_en == MLX5_XMETA_MODE_META32_HWS && 1496 priv->master) { 1497 if (mlx5_flow_hw_create_tx_default_mreg_copy_flow(dev)) 1498 goto error; 1499 } 1500 for (i = 0; i < priv->txqs_n; ++i) { 1501 struct mlx5_txq_ctrl *txq = mlx5_txq_get(dev, i); 1502 uint32_t queue; 1503 1504 if (!txq) 1505 continue; 1506 queue = mlx5_txq_get_sqn(txq); 1507 if ((priv->representor || priv->master) && 1508 config->dv_esw_en && 1509 config->fdb_def_rule) { 1510 if (mlx5_flow_hw_esw_create_sq_miss_flow(dev, queue, false)) { 1511 mlx5_txq_release(dev, i); 1512 goto error; 1513 } 1514 } 1515 if (config->dv_esw_en && config->repr_matching) { 1516 if (mlx5_flow_hw_tx_repr_matching_flow(dev, queue, false)) { 1517 mlx5_txq_release(dev, i); 1518 goto error; 1519 } 1520 } 1521 mlx5_txq_release(dev, i); 1522 } 1523 if (config->fdb_def_rule) { 1524 if ((priv->master || priv->representor) && config->dv_esw_en) { 1525 if (!mlx5_flow_hw_esw_create_default_jump_flow(dev)) 1526 priv->fdb_def_rule = 1; 1527 else 1528 goto error; 1529 } 1530 } else { 1531 DRV_LOG(INFO, "port %u FDB default rule is disabled", dev->data->port_id); 1532 } 1533 if (priv->isolated) 1534 return 0; 1535 if (!priv->sh->config.lacp_by_user && priv->pf_bond >= 0 && priv->master) 1536 if (mlx5_flow_hw_lacp_rx_flow(dev)) 1537 goto error; 1538 if (dev->data->promiscuous) 1539 flags |= MLX5_CTRL_PROMISCUOUS; 1540 if (dev->data->all_multicast) 1541 flags |= MLX5_CTRL_ALL_MULTICAST; 1542 else 1543 flags |= MLX5_CTRL_BROADCAST | MLX5_CTRL_IPV4_MULTICAST | MLX5_CTRL_IPV6_MULTICAST; 1544 flags |= MLX5_CTRL_DMAC; 1545 if (priv->vlan_filter_n) 1546 flags |= MLX5_CTRL_VLAN_FILTER; 1547 return mlx5_flow_hw_ctrl_flows(dev, flags); 1548 error: 1549 ret = rte_errno; 1550 mlx5_flow_hw_flush_ctrl_flows(dev); 1551 rte_errno = ret; 1552 return -rte_errno; 1553 } 1554 1555 #endif 1556 1557 /** 1558 * Enable traffic flows configured by control plane 1559 * 1560 * @param dev 1561 * Pointer to Ethernet device structure. 1562 * 1563 * @return 1564 * 0 on success, a negative errno value otherwise and rte_errno is set. 1565 */ 1566 int 1567 mlx5_traffic_enable(struct rte_eth_dev *dev) 1568 { 1569 struct mlx5_priv *priv = dev->data->dev_private; 1570 struct rte_flow_item_eth bcast = { 1571 .hdr.dst_addr.addr_bytes = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, 1572 }; 1573 struct rte_flow_item_eth ipv6_multi_spec = { 1574 .hdr.dst_addr.addr_bytes = { 0x33, 0x33, 0x00, 0x00, 0x00, 0x00 }, 1575 }; 1576 struct rte_flow_item_eth ipv6_multi_mask = { 1577 .hdr.dst_addr.addr_bytes = { 0xff, 0xff, 0x00, 0x00, 0x00, 0x00 }, 1578 }; 1579 struct rte_flow_item_eth unicast = { 1580 .hdr.src_addr.addr_bytes = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, 1581 }; 1582 struct rte_flow_item_eth unicast_mask = { 1583 .hdr.dst_addr.addr_bytes = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, 1584 }; 1585 const unsigned int vlan_filter_n = priv->vlan_filter_n; 1586 const struct rte_ether_addr cmp = { 1587 .addr_bytes = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, 1588 }; 1589 unsigned int i; 1590 unsigned int j; 1591 int ret; 1592 1593 #ifdef HAVE_MLX5_HWS_SUPPORT 1594 if (priv->sh->config.dv_flow_en == 2) 1595 return mlx5_traffic_enable_hws(dev); 1596 #endif 1597 /* 1598 * Hairpin txq default flow should be created no matter if it is 1599 * isolation mode. Or else all the packets to be sent will be sent 1600 * out directly without the TX flow actions, e.g. encapsulation. 1601 */ 1602 for (i = 0; i != priv->txqs_n; ++i) { 1603 struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i); 1604 if (!txq_ctrl) 1605 continue; 1606 /* Only Tx implicit mode requires the default Tx flow. */ 1607 if (txq_ctrl->is_hairpin && 1608 txq_ctrl->hairpin_conf.tx_explicit == 0 && 1609 txq_ctrl->hairpin_conf.peers[0].port == 1610 priv->dev_data->port_id) { 1611 ret = mlx5_ctrl_flow_source_queue(dev, 1612 mlx5_txq_get_sqn(txq_ctrl)); 1613 if (ret) { 1614 mlx5_txq_release(dev, i); 1615 goto error; 1616 } 1617 } 1618 if (priv->sh->config.dv_esw_en) { 1619 uint32_t q = mlx5_txq_get_sqn(txq_ctrl); 1620 1621 if (mlx5_flow_create_devx_sq_miss_flow(dev, q) == 0) { 1622 mlx5_txq_release(dev, i); 1623 DRV_LOG(ERR, 1624 "Port %u Tx queue %u SQ create representor devx default miss rule failed.", 1625 dev->data->port_id, i); 1626 goto error; 1627 } 1628 } 1629 mlx5_txq_release(dev, i); 1630 } 1631 if (priv->sh->config.fdb_def_rule) { 1632 if (priv->sh->config.dv_esw_en) { 1633 if (mlx5_flow_create_esw_table_zero_flow(dev)) 1634 priv->fdb_def_rule = 1; 1635 else 1636 DRV_LOG(INFO, "port %u FDB default rule cannot be configured - only Eswitch group 0 flows are supported.", 1637 dev->data->port_id); 1638 } 1639 } else { 1640 DRV_LOG(INFO, "port %u FDB default rule is disabled", 1641 dev->data->port_id); 1642 } 1643 if (!priv->sh->config.lacp_by_user && priv->pf_bond >= 0 && priv->master) { 1644 ret = mlx5_flow_lacp_miss(dev); 1645 if (ret) 1646 DRV_LOG(INFO, "port %u LACP rule cannot be created - " 1647 "forward LACP to kernel.", dev->data->port_id); 1648 else 1649 DRV_LOG(INFO, "LACP traffic will be missed in port %u.", 1650 dev->data->port_id); 1651 } 1652 if (priv->isolated) 1653 return 0; 1654 if (dev->data->promiscuous) { 1655 struct rte_flow_item_eth promisc = { 1656 .hdr.dst_addr.addr_bytes = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, 1657 .hdr.src_addr.addr_bytes = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, 1658 .hdr.ether_type = 0, 1659 }; 1660 1661 ret = mlx5_ctrl_flow(dev, &promisc, &promisc); 1662 if (ret) 1663 goto error; 1664 } 1665 if (dev->data->all_multicast) { 1666 struct rte_flow_item_eth multicast = { 1667 .hdr.dst_addr.addr_bytes = { 0x01, 0x00, 0x00, 0x00, 0x00, 0x00 }, 1668 .hdr.src_addr.addr_bytes = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, 1669 .hdr.ether_type = 0, 1670 }; 1671 1672 ret = mlx5_ctrl_flow(dev, &multicast, &multicast); 1673 if (ret) 1674 goto error; 1675 } else { 1676 /* Add broadcast/multicast flows. */ 1677 for (i = 0; i != vlan_filter_n; ++i) { 1678 uint16_t vlan = priv->vlan_filter[i]; 1679 1680 struct rte_flow_item_vlan vlan_spec = { 1681 .hdr.vlan_tci = rte_cpu_to_be_16(vlan), 1682 }; 1683 struct rte_flow_item_vlan vlan_mask = 1684 rte_flow_item_vlan_mask; 1685 1686 ret = mlx5_ctrl_flow_vlan(dev, &bcast, &bcast, 1687 &vlan_spec, &vlan_mask); 1688 if (ret) 1689 goto error; 1690 ret = mlx5_ctrl_flow_vlan(dev, &ipv6_multi_spec, 1691 &ipv6_multi_mask, 1692 &vlan_spec, &vlan_mask); 1693 if (ret) 1694 goto error; 1695 } 1696 if (!vlan_filter_n) { 1697 ret = mlx5_ctrl_flow(dev, &bcast, &bcast); 1698 if (ret) 1699 goto error; 1700 ret = mlx5_ctrl_flow(dev, &ipv6_multi_spec, 1701 &ipv6_multi_mask); 1702 if (ret) { 1703 /* Do not fail on IPv6 broadcast creation failure. */ 1704 DRV_LOG(WARNING, 1705 "IPv6 broadcast is not supported"); 1706 ret = 0; 1707 } 1708 } 1709 } 1710 /* Add MAC address flows. */ 1711 for (i = 0; i != MLX5_MAX_MAC_ADDRESSES; ++i) { 1712 struct rte_ether_addr *mac = &dev->data->mac_addrs[i]; 1713 1714 if (!memcmp(mac, &cmp, sizeof(*mac))) 1715 continue; 1716 memcpy(&unicast.hdr.dst_addr.addr_bytes, 1717 mac->addr_bytes, 1718 RTE_ETHER_ADDR_LEN); 1719 for (j = 0; j != vlan_filter_n; ++j) { 1720 uint16_t vlan = priv->vlan_filter[j]; 1721 1722 struct rte_flow_item_vlan vlan_spec = { 1723 .hdr.vlan_tci = rte_cpu_to_be_16(vlan), 1724 }; 1725 struct rte_flow_item_vlan vlan_mask = 1726 rte_flow_item_vlan_mask; 1727 1728 ret = mlx5_ctrl_flow_vlan(dev, &unicast, 1729 &unicast_mask, 1730 &vlan_spec, 1731 &vlan_mask); 1732 if (ret) 1733 goto error; 1734 } 1735 if (!vlan_filter_n) { 1736 ret = mlx5_ctrl_flow(dev, &unicast, &unicast_mask); 1737 if (ret) 1738 goto error; 1739 } 1740 } 1741 return 0; 1742 error: 1743 ret = rte_errno; /* Save rte_errno before cleanup. */ 1744 mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_CTL, false); 1745 rte_errno = ret; /* Restore rte_errno. */ 1746 return -rte_errno; 1747 } 1748 1749 1750 /** 1751 * Disable traffic flows configured by control plane 1752 * 1753 * @param dev 1754 * Pointer to Ethernet device private data. 1755 */ 1756 void 1757 mlx5_traffic_disable(struct rte_eth_dev *dev) 1758 { 1759 #ifdef HAVE_MLX5_HWS_SUPPORT 1760 struct mlx5_priv *priv = dev->data->dev_private; 1761 1762 if (priv->sh->config.dv_flow_en == 2) 1763 mlx5_flow_hw_flush_ctrl_flows(dev); 1764 else 1765 #endif 1766 mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_CTL, false); 1767 } 1768 1769 /** 1770 * Restart traffic flows configured by control plane 1771 * 1772 * @param dev 1773 * Pointer to Ethernet device private data. 1774 * 1775 * @return 1776 * 0 on success, a negative errno value otherwise and rte_errno is set. 1777 */ 1778 int 1779 mlx5_traffic_restart(struct rte_eth_dev *dev) 1780 { 1781 if (dev->data->dev_started) { 1782 mlx5_traffic_disable(dev); 1783 #ifdef HAVE_MLX5_HWS_SUPPORT 1784 mlx5_flow_hw_cleanup_ctrl_rx_templates(dev); 1785 #endif 1786 return mlx5_traffic_enable(dev); 1787 } 1788 return 0; 1789 } 1790