1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2015 6WIND S.A. 3 * Copyright 2015 Mellanox Technologies, Ltd 4 */ 5 6 #include <unistd.h> 7 8 #include <rte_ether.h> 9 #include <ethdev_driver.h> 10 #include <rte_interrupts.h> 11 #include <rte_alarm.h> 12 #include <rte_cycles.h> 13 14 #include <mlx5_malloc.h> 15 16 #include "mlx5.h" 17 #include "mlx5_flow.h" 18 #include "mlx5_rx.h" 19 #include "mlx5_tx.h" 20 #include "mlx5_utils.h" 21 #include "rte_pmd_mlx5.h" 22 23 /** 24 * Stop traffic on Tx queues. 25 * 26 * @param dev 27 * Pointer to Ethernet device structure. 28 */ 29 static void 30 mlx5_txq_stop(struct rte_eth_dev *dev) 31 { 32 struct mlx5_priv *priv = dev->data->dev_private; 33 unsigned int i; 34 35 for (i = 0; i != priv->txqs_n; ++i) 36 mlx5_txq_release(dev, i); 37 } 38 39 /** 40 * Start traffic on Tx queues. 41 * 42 * @param dev 43 * Pointer to Ethernet device structure. 44 * 45 * @return 46 * 0 on success, a negative errno value otherwise and rte_errno is set. 47 */ 48 static int 49 mlx5_txq_start(struct rte_eth_dev *dev) 50 { 51 struct mlx5_priv *priv = dev->data->dev_private; 52 unsigned int i; 53 int ret; 54 55 for (i = 0; i != priv->txqs_n; ++i) { 56 struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i); 57 struct mlx5_txq_data *txq_data = &txq_ctrl->txq; 58 uint32_t flags = MLX5_MEM_RTE | MLX5_MEM_ZERO; 59 60 if (!txq_ctrl) 61 continue; 62 if (txq_ctrl->type == MLX5_TXQ_TYPE_STANDARD) 63 txq_alloc_elts(txq_ctrl); 64 MLX5_ASSERT(!txq_ctrl->obj); 65 txq_ctrl->obj = mlx5_malloc(flags, sizeof(struct mlx5_txq_obj), 66 0, txq_ctrl->socket); 67 if (!txq_ctrl->obj) { 68 DRV_LOG(ERR, "Port %u Tx queue %u cannot allocate " 69 "memory resources.", dev->data->port_id, 70 txq_data->idx); 71 rte_errno = ENOMEM; 72 goto error; 73 } 74 ret = priv->obj_ops.txq_obj_new(dev, i); 75 if (ret < 0) { 76 mlx5_free(txq_ctrl->obj); 77 txq_ctrl->obj = NULL; 78 goto error; 79 } 80 if (txq_ctrl->type == MLX5_TXQ_TYPE_STANDARD) { 81 size_t size = txq_data->cqe_s * sizeof(*txq_data->fcqs); 82 83 txq_data->fcqs = mlx5_malloc(flags, size, 84 RTE_CACHE_LINE_SIZE, 85 txq_ctrl->socket); 86 if (!txq_data->fcqs) { 87 DRV_LOG(ERR, "Port %u Tx queue %u cannot " 88 "allocate memory (FCQ).", 89 dev->data->port_id, i); 90 rte_errno = ENOMEM; 91 goto error; 92 } 93 } 94 DRV_LOG(DEBUG, "Port %u txq %u updated with %p.", 95 dev->data->port_id, i, (void *)&txq_ctrl->obj); 96 LIST_INSERT_HEAD(&priv->txqsobj, txq_ctrl->obj, next); 97 } 98 return 0; 99 error: 100 ret = rte_errno; /* Save rte_errno before cleanup. */ 101 do { 102 mlx5_txq_release(dev, i); 103 } while (i-- != 0); 104 rte_errno = ret; /* Restore rte_errno. */ 105 return -rte_errno; 106 } 107 108 /** 109 * Translate the chunk address to MR key in order to put in into the cache. 110 */ 111 static void 112 mlx5_rxq_mempool_register_cb(struct rte_mempool *mp, void *opaque, 113 struct rte_mempool_memhdr *memhdr, 114 unsigned int idx) 115 { 116 struct mlx5_rxq_data *rxq = opaque; 117 118 RTE_SET_USED(mp); 119 RTE_SET_USED(idx); 120 mlx5_rx_addr2mr(rxq, (uintptr_t)memhdr->addr); 121 } 122 123 /** 124 * Register Rx queue mempools and fill the Rx queue cache. 125 * This function tolerates repeated mempool registration. 126 * 127 * @param[in] rxq_ctrl 128 * Rx queue control data. 129 * 130 * @return 131 * 0 on success, (-1) on failure and rte_errno is set. 132 */ 133 static int 134 mlx5_rxq_mempool_register(struct mlx5_rxq_ctrl *rxq_ctrl) 135 { 136 struct rte_mempool *mp; 137 uint32_t s; 138 int ret = 0; 139 140 mlx5_mr_flush_local_cache(&rxq_ctrl->rxq.mr_ctrl); 141 /* MPRQ mempool is registered on creation, just fill the cache. */ 142 if (mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq)) { 143 rte_mempool_mem_iter(rxq_ctrl->rxq.mprq_mp, 144 mlx5_rxq_mempool_register_cb, 145 &rxq_ctrl->rxq); 146 return 0; 147 } 148 for (s = 0; s < rxq_ctrl->rxq.rxseg_n; s++) { 149 uint32_t flags; 150 151 mp = rxq_ctrl->rxq.rxseg[s].mp; 152 flags = rte_pktmbuf_priv_flags(mp); 153 ret = mlx5_mr_mempool_register(rxq_ctrl->sh->cdev, mp); 154 if (ret < 0 && rte_errno != EEXIST) 155 return ret; 156 if ((flags & RTE_PKTMBUF_POOL_F_PINNED_EXT_BUF) == 0) 157 rte_mempool_mem_iter(mp, mlx5_rxq_mempool_register_cb, 158 &rxq_ctrl->rxq); 159 } 160 return 0; 161 } 162 163 /** 164 * Stop traffic on Rx queues. 165 * 166 * @param dev 167 * Pointer to Ethernet device structure. 168 */ 169 static void 170 mlx5_rxq_stop(struct rte_eth_dev *dev) 171 { 172 struct mlx5_priv *priv = dev->data->dev_private; 173 unsigned int i; 174 175 for (i = 0; i != priv->rxqs_n; ++i) 176 mlx5_rxq_release(dev, i); 177 } 178 179 static int 180 mlx5_rxq_ctrl_prepare(struct rte_eth_dev *dev, struct mlx5_rxq_ctrl *rxq_ctrl, 181 unsigned int idx) 182 { 183 int ret = 0; 184 185 if (rxq_ctrl->type == MLX5_RXQ_TYPE_STANDARD) { 186 /* 187 * Pre-register the mempools. Regardless of whether 188 * the implicit registration is enabled or not, 189 * Rx mempool destruction is tracked to free MRs. 190 */ 191 if (mlx5_rxq_mempool_register(rxq_ctrl) < 0) 192 return -rte_errno; 193 ret = rxq_alloc_elts(rxq_ctrl); 194 if (ret) 195 return ret; 196 } 197 MLX5_ASSERT(!rxq_ctrl->obj); 198 rxq_ctrl->obj = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, 199 sizeof(*rxq_ctrl->obj), 0, 200 rxq_ctrl->socket); 201 if (!rxq_ctrl->obj) { 202 DRV_LOG(ERR, "Port %u Rx queue %u can't allocate resources.", 203 dev->data->port_id, idx); 204 rte_errno = ENOMEM; 205 return -rte_errno; 206 } 207 DRV_LOG(DEBUG, "Port %u rxq %u updated with %p.", dev->data->port_id, 208 idx, (void *)&rxq_ctrl->obj); 209 return 0; 210 } 211 212 /** 213 * Start traffic on Rx queues. 214 * 215 * @param dev 216 * Pointer to Ethernet device structure. 217 * 218 * @return 219 * 0 on success, a negative errno value otherwise and rte_errno is set. 220 */ 221 static int 222 mlx5_rxq_start(struct rte_eth_dev *dev) 223 { 224 struct mlx5_priv *priv = dev->data->dev_private; 225 unsigned int i; 226 int ret = 0; 227 228 /* Allocate/reuse/resize mempool for Multi-Packet RQ. */ 229 if (mlx5_mprq_alloc_mp(dev)) { 230 /* Should not release Rx queues but return immediately. */ 231 return -rte_errno; 232 } 233 DRV_LOG(DEBUG, "Port %u device_attr.max_qp_wr is %d.", 234 dev->data->port_id, priv->sh->device_attr.max_qp_wr); 235 DRV_LOG(DEBUG, "Port %u device_attr.max_sge is %d.", 236 dev->data->port_id, priv->sh->device_attr.max_sge); 237 for (i = 0; i != priv->rxqs_n; ++i) { 238 struct mlx5_rxq_priv *rxq = mlx5_rxq_ref(dev, i); 239 struct mlx5_rxq_ctrl *rxq_ctrl; 240 241 if (rxq == NULL) 242 continue; 243 rxq_ctrl = rxq->ctrl; 244 if (!rxq_ctrl->started) { 245 if (mlx5_rxq_ctrl_prepare(dev, rxq_ctrl, i) < 0) 246 goto error; 247 LIST_INSERT_HEAD(&priv->rxqsobj, rxq_ctrl->obj, next); 248 } 249 ret = priv->obj_ops.rxq_obj_new(rxq); 250 if (ret) { 251 mlx5_free(rxq_ctrl->obj); 252 rxq_ctrl->obj = NULL; 253 goto error; 254 } 255 rxq_ctrl->started = true; 256 } 257 return 0; 258 error: 259 ret = rte_errno; /* Save rte_errno before cleanup. */ 260 do { 261 mlx5_rxq_release(dev, i); 262 } while (i-- != 0); 263 rte_errno = ret; /* Restore rte_errno. */ 264 return -rte_errno; 265 } 266 267 /** 268 * Binds Tx queues to Rx queues for hairpin. 269 * 270 * Binds Tx queues to the target Rx queues. 271 * 272 * @param dev 273 * Pointer to Ethernet device structure. 274 * 275 * @return 276 * 0 on success, a negative errno value otherwise and rte_errno is set. 277 */ 278 static int 279 mlx5_hairpin_auto_bind(struct rte_eth_dev *dev) 280 { 281 struct mlx5_priv *priv = dev->data->dev_private; 282 struct mlx5_devx_modify_sq_attr sq_attr = { 0 }; 283 struct mlx5_devx_modify_rq_attr rq_attr = { 0 }; 284 struct mlx5_txq_ctrl *txq_ctrl; 285 struct mlx5_rxq_priv *rxq; 286 struct mlx5_rxq_ctrl *rxq_ctrl; 287 struct mlx5_devx_obj *sq; 288 struct mlx5_devx_obj *rq; 289 unsigned int i; 290 int ret = 0; 291 bool need_auto = false; 292 uint16_t self_port = dev->data->port_id; 293 294 for (i = 0; i != priv->txqs_n; ++i) { 295 txq_ctrl = mlx5_txq_get(dev, i); 296 if (!txq_ctrl) 297 continue; 298 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN || 299 txq_ctrl->hairpin_conf.peers[0].port != self_port) { 300 mlx5_txq_release(dev, i); 301 continue; 302 } 303 if (txq_ctrl->hairpin_conf.manual_bind) { 304 mlx5_txq_release(dev, i); 305 return 0; 306 } 307 need_auto = true; 308 mlx5_txq_release(dev, i); 309 } 310 if (!need_auto) 311 return 0; 312 for (i = 0; i != priv->txqs_n; ++i) { 313 txq_ctrl = mlx5_txq_get(dev, i); 314 if (!txq_ctrl) 315 continue; 316 /* Skip hairpin queues with other peer ports. */ 317 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN || 318 txq_ctrl->hairpin_conf.peers[0].port != self_port) { 319 mlx5_txq_release(dev, i); 320 continue; 321 } 322 if (!txq_ctrl->obj) { 323 rte_errno = ENOMEM; 324 DRV_LOG(ERR, "port %u no txq object found: %d", 325 dev->data->port_id, i); 326 mlx5_txq_release(dev, i); 327 return -rte_errno; 328 } 329 sq = txq_ctrl->obj->sq; 330 rxq = mlx5_rxq_get(dev, txq_ctrl->hairpin_conf.peers[0].queue); 331 if (rxq == NULL) { 332 mlx5_txq_release(dev, i); 333 rte_errno = EINVAL; 334 DRV_LOG(ERR, "port %u no rxq object found: %d", 335 dev->data->port_id, 336 txq_ctrl->hairpin_conf.peers[0].queue); 337 return -rte_errno; 338 } 339 rxq_ctrl = rxq->ctrl; 340 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN || 341 rxq->hairpin_conf.peers[0].queue != i) { 342 rte_errno = ENOMEM; 343 DRV_LOG(ERR, "port %u Tx queue %d can't be binded to " 344 "Rx queue %d", dev->data->port_id, 345 i, txq_ctrl->hairpin_conf.peers[0].queue); 346 goto error; 347 } 348 rq = rxq_ctrl->obj->rq; 349 if (!rq) { 350 rte_errno = ENOMEM; 351 DRV_LOG(ERR, "port %u hairpin no matching rxq: %d", 352 dev->data->port_id, 353 txq_ctrl->hairpin_conf.peers[0].queue); 354 goto error; 355 } 356 sq_attr.state = MLX5_SQC_STATE_RDY; 357 sq_attr.sq_state = MLX5_SQC_STATE_RST; 358 sq_attr.hairpin_peer_rq = rq->id; 359 sq_attr.hairpin_peer_vhca = priv->config.hca_attr.vhca_id; 360 ret = mlx5_devx_cmd_modify_sq(sq, &sq_attr); 361 if (ret) 362 goto error; 363 rq_attr.state = MLX5_SQC_STATE_RDY; 364 rq_attr.rq_state = MLX5_SQC_STATE_RST; 365 rq_attr.hairpin_peer_sq = sq->id; 366 rq_attr.hairpin_peer_vhca = priv->config.hca_attr.vhca_id; 367 ret = mlx5_devx_cmd_modify_rq(rq, &rq_attr); 368 if (ret) 369 goto error; 370 /* Qs with auto-bind will be destroyed directly. */ 371 rxq->hairpin_status = 1; 372 txq_ctrl->hairpin_status = 1; 373 mlx5_txq_release(dev, i); 374 } 375 return 0; 376 error: 377 mlx5_txq_release(dev, i); 378 return -rte_errno; 379 } 380 381 /* 382 * Fetch the peer queue's SW & HW information. 383 * 384 * @param dev 385 * Pointer to Ethernet device structure. 386 * @param peer_queue 387 * Index of the queue to fetch the information. 388 * @param current_info 389 * Pointer to the input peer information, not used currently. 390 * @param peer_info 391 * Pointer to the structure to store the information, output. 392 * @param direction 393 * Positive to get the RxQ information, zero to get the TxQ information. 394 * 395 * @return 396 * 0 on success, a negative errno value otherwise and rte_errno is set. 397 */ 398 int 399 mlx5_hairpin_queue_peer_update(struct rte_eth_dev *dev, uint16_t peer_queue, 400 struct rte_hairpin_peer_info *current_info, 401 struct rte_hairpin_peer_info *peer_info, 402 uint32_t direction) 403 { 404 struct mlx5_priv *priv = dev->data->dev_private; 405 RTE_SET_USED(current_info); 406 407 if (dev->data->dev_started == 0) { 408 rte_errno = EBUSY; 409 DRV_LOG(ERR, "peer port %u is not started", 410 dev->data->port_id); 411 return -rte_errno; 412 } 413 /* 414 * Peer port used as egress. In the current design, hairpin Tx queue 415 * will be bound to the peer Rx queue. Indeed, only the information of 416 * peer Rx queue needs to be fetched. 417 */ 418 if (direction == 0) { 419 struct mlx5_txq_ctrl *txq_ctrl; 420 421 txq_ctrl = mlx5_txq_get(dev, peer_queue); 422 if (txq_ctrl == NULL) { 423 rte_errno = EINVAL; 424 DRV_LOG(ERR, "Failed to get port %u Tx queue %d", 425 dev->data->port_id, peer_queue); 426 return -rte_errno; 427 } 428 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) { 429 rte_errno = EINVAL; 430 DRV_LOG(ERR, "port %u queue %d is not a hairpin Txq", 431 dev->data->port_id, peer_queue); 432 mlx5_txq_release(dev, peer_queue); 433 return -rte_errno; 434 } 435 if (txq_ctrl->obj == NULL || txq_ctrl->obj->sq == NULL) { 436 rte_errno = ENOMEM; 437 DRV_LOG(ERR, "port %u no Txq object found: %d", 438 dev->data->port_id, peer_queue); 439 mlx5_txq_release(dev, peer_queue); 440 return -rte_errno; 441 } 442 peer_info->qp_id = txq_ctrl->obj->sq->id; 443 peer_info->vhca_id = priv->config.hca_attr.vhca_id; 444 /* 1-to-1 mapping, only the first one is used. */ 445 peer_info->peer_q = txq_ctrl->hairpin_conf.peers[0].queue; 446 peer_info->tx_explicit = txq_ctrl->hairpin_conf.tx_explicit; 447 peer_info->manual_bind = txq_ctrl->hairpin_conf.manual_bind; 448 mlx5_txq_release(dev, peer_queue); 449 } else { /* Peer port used as ingress. */ 450 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, peer_queue); 451 struct mlx5_rxq_ctrl *rxq_ctrl; 452 453 if (rxq == NULL) { 454 rte_errno = EINVAL; 455 DRV_LOG(ERR, "Failed to get port %u Rx queue %d", 456 dev->data->port_id, peer_queue); 457 return -rte_errno; 458 } 459 rxq_ctrl = rxq->ctrl; 460 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) { 461 rte_errno = EINVAL; 462 DRV_LOG(ERR, "port %u queue %d is not a hairpin Rxq", 463 dev->data->port_id, peer_queue); 464 return -rte_errno; 465 } 466 if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) { 467 rte_errno = ENOMEM; 468 DRV_LOG(ERR, "port %u no Rxq object found: %d", 469 dev->data->port_id, peer_queue); 470 return -rte_errno; 471 } 472 peer_info->qp_id = rxq_ctrl->obj->rq->id; 473 peer_info->vhca_id = priv->config.hca_attr.vhca_id; 474 peer_info->peer_q = rxq->hairpin_conf.peers[0].queue; 475 peer_info->tx_explicit = rxq->hairpin_conf.tx_explicit; 476 peer_info->manual_bind = rxq->hairpin_conf.manual_bind; 477 } 478 return 0; 479 } 480 481 /* 482 * Bind the hairpin queue with the peer HW information. 483 * This needs to be called twice both for Tx and Rx queues of a pair. 484 * If the queue is already bound, it is considered successful. 485 * 486 * @param dev 487 * Pointer to Ethernet device structure. 488 * @param cur_queue 489 * Index of the queue to change the HW configuration to bind. 490 * @param peer_info 491 * Pointer to information of the peer queue. 492 * @param direction 493 * Positive to configure the TxQ, zero to configure the RxQ. 494 * 495 * @return 496 * 0 on success, a negative errno value otherwise and rte_errno is set. 497 */ 498 int 499 mlx5_hairpin_queue_peer_bind(struct rte_eth_dev *dev, uint16_t cur_queue, 500 struct rte_hairpin_peer_info *peer_info, 501 uint32_t direction) 502 { 503 int ret = 0; 504 505 /* 506 * Consistency checking of the peer queue: opposite direction is used 507 * to get the peer queue info with ethdev port ID, no need to check. 508 */ 509 if (peer_info->peer_q != cur_queue) { 510 rte_errno = EINVAL; 511 DRV_LOG(ERR, "port %u queue %d and peer queue %d mismatch", 512 dev->data->port_id, cur_queue, peer_info->peer_q); 513 return -rte_errno; 514 } 515 if (direction != 0) { 516 struct mlx5_txq_ctrl *txq_ctrl; 517 struct mlx5_devx_modify_sq_attr sq_attr = { 0 }; 518 519 txq_ctrl = mlx5_txq_get(dev, cur_queue); 520 if (txq_ctrl == NULL) { 521 rte_errno = EINVAL; 522 DRV_LOG(ERR, "Failed to get port %u Tx queue %d", 523 dev->data->port_id, cur_queue); 524 return -rte_errno; 525 } 526 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) { 527 rte_errno = EINVAL; 528 DRV_LOG(ERR, "port %u queue %d not a hairpin Txq", 529 dev->data->port_id, cur_queue); 530 mlx5_txq_release(dev, cur_queue); 531 return -rte_errno; 532 } 533 if (txq_ctrl->obj == NULL || txq_ctrl->obj->sq == NULL) { 534 rte_errno = ENOMEM; 535 DRV_LOG(ERR, "port %u no Txq object found: %d", 536 dev->data->port_id, cur_queue); 537 mlx5_txq_release(dev, cur_queue); 538 return -rte_errno; 539 } 540 if (txq_ctrl->hairpin_status != 0) { 541 DRV_LOG(DEBUG, "port %u Tx queue %d is already bound", 542 dev->data->port_id, cur_queue); 543 mlx5_txq_release(dev, cur_queue); 544 return 0; 545 } 546 /* 547 * All queues' of one port consistency checking is done in the 548 * bind() function, and that is optional. 549 */ 550 if (peer_info->tx_explicit != 551 txq_ctrl->hairpin_conf.tx_explicit) { 552 rte_errno = EINVAL; 553 DRV_LOG(ERR, "port %u Tx queue %d and peer Tx rule mode" 554 " mismatch", dev->data->port_id, cur_queue); 555 mlx5_txq_release(dev, cur_queue); 556 return -rte_errno; 557 } 558 if (peer_info->manual_bind != 559 txq_ctrl->hairpin_conf.manual_bind) { 560 rte_errno = EINVAL; 561 DRV_LOG(ERR, "port %u Tx queue %d and peer binding mode" 562 " mismatch", dev->data->port_id, cur_queue); 563 mlx5_txq_release(dev, cur_queue); 564 return -rte_errno; 565 } 566 sq_attr.state = MLX5_SQC_STATE_RDY; 567 sq_attr.sq_state = MLX5_SQC_STATE_RST; 568 sq_attr.hairpin_peer_rq = peer_info->qp_id; 569 sq_attr.hairpin_peer_vhca = peer_info->vhca_id; 570 ret = mlx5_devx_cmd_modify_sq(txq_ctrl->obj->sq, &sq_attr); 571 if (ret == 0) 572 txq_ctrl->hairpin_status = 1; 573 mlx5_txq_release(dev, cur_queue); 574 } else { 575 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, cur_queue); 576 struct mlx5_rxq_ctrl *rxq_ctrl; 577 struct mlx5_devx_modify_rq_attr rq_attr = { 0 }; 578 579 if (rxq == NULL) { 580 rte_errno = EINVAL; 581 DRV_LOG(ERR, "Failed to get port %u Rx queue %d", 582 dev->data->port_id, cur_queue); 583 return -rte_errno; 584 } 585 rxq_ctrl = rxq->ctrl; 586 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) { 587 rte_errno = EINVAL; 588 DRV_LOG(ERR, "port %u queue %d not a hairpin Rxq", 589 dev->data->port_id, cur_queue); 590 return -rte_errno; 591 } 592 if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) { 593 rte_errno = ENOMEM; 594 DRV_LOG(ERR, "port %u no Rxq object found: %d", 595 dev->data->port_id, cur_queue); 596 return -rte_errno; 597 } 598 if (rxq->hairpin_status != 0) { 599 DRV_LOG(DEBUG, "port %u Rx queue %d is already bound", 600 dev->data->port_id, cur_queue); 601 return 0; 602 } 603 if (peer_info->tx_explicit != 604 rxq->hairpin_conf.tx_explicit) { 605 rte_errno = EINVAL; 606 DRV_LOG(ERR, "port %u Rx queue %d and peer Tx rule mode" 607 " mismatch", dev->data->port_id, cur_queue); 608 return -rte_errno; 609 } 610 if (peer_info->manual_bind != 611 rxq->hairpin_conf.manual_bind) { 612 rte_errno = EINVAL; 613 DRV_LOG(ERR, "port %u Rx queue %d and peer binding mode" 614 " mismatch", dev->data->port_id, cur_queue); 615 return -rte_errno; 616 } 617 rq_attr.state = MLX5_SQC_STATE_RDY; 618 rq_attr.rq_state = MLX5_SQC_STATE_RST; 619 rq_attr.hairpin_peer_sq = peer_info->qp_id; 620 rq_attr.hairpin_peer_vhca = peer_info->vhca_id; 621 ret = mlx5_devx_cmd_modify_rq(rxq_ctrl->obj->rq, &rq_attr); 622 if (ret == 0) 623 rxq->hairpin_status = 1; 624 } 625 return ret; 626 } 627 628 /* 629 * Unbind the hairpin queue and reset its HW configuration. 630 * This needs to be called twice both for Tx and Rx queues of a pair. 631 * If the queue is already unbound, it is considered successful. 632 * 633 * @param dev 634 * Pointer to Ethernet device structure. 635 * @param cur_queue 636 * Index of the queue to change the HW configuration to unbind. 637 * @param direction 638 * Positive to reset the TxQ, zero to reset the RxQ. 639 * 640 * @return 641 * 0 on success, a negative errno value otherwise and rte_errno is set. 642 */ 643 int 644 mlx5_hairpin_queue_peer_unbind(struct rte_eth_dev *dev, uint16_t cur_queue, 645 uint32_t direction) 646 { 647 int ret = 0; 648 649 if (direction != 0) { 650 struct mlx5_txq_ctrl *txq_ctrl; 651 struct mlx5_devx_modify_sq_attr sq_attr = { 0 }; 652 653 txq_ctrl = mlx5_txq_get(dev, cur_queue); 654 if (txq_ctrl == NULL) { 655 rte_errno = EINVAL; 656 DRV_LOG(ERR, "Failed to get port %u Tx queue %d", 657 dev->data->port_id, cur_queue); 658 return -rte_errno; 659 } 660 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) { 661 rte_errno = EINVAL; 662 DRV_LOG(ERR, "port %u queue %d not a hairpin Txq", 663 dev->data->port_id, cur_queue); 664 mlx5_txq_release(dev, cur_queue); 665 return -rte_errno; 666 } 667 /* Already unbound, return success before obj checking. */ 668 if (txq_ctrl->hairpin_status == 0) { 669 DRV_LOG(DEBUG, "port %u Tx queue %d is already unbound", 670 dev->data->port_id, cur_queue); 671 mlx5_txq_release(dev, cur_queue); 672 return 0; 673 } 674 if (!txq_ctrl->obj || !txq_ctrl->obj->sq) { 675 rte_errno = ENOMEM; 676 DRV_LOG(ERR, "port %u no Txq object found: %d", 677 dev->data->port_id, cur_queue); 678 mlx5_txq_release(dev, cur_queue); 679 return -rte_errno; 680 } 681 sq_attr.state = MLX5_SQC_STATE_RST; 682 sq_attr.sq_state = MLX5_SQC_STATE_RST; 683 ret = mlx5_devx_cmd_modify_sq(txq_ctrl->obj->sq, &sq_attr); 684 if (ret == 0) 685 txq_ctrl->hairpin_status = 0; 686 mlx5_txq_release(dev, cur_queue); 687 } else { 688 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, cur_queue); 689 struct mlx5_rxq_ctrl *rxq_ctrl; 690 struct mlx5_devx_modify_rq_attr rq_attr = { 0 }; 691 692 if (rxq == NULL) { 693 rte_errno = EINVAL; 694 DRV_LOG(ERR, "Failed to get port %u Rx queue %d", 695 dev->data->port_id, cur_queue); 696 return -rte_errno; 697 } 698 rxq_ctrl = rxq->ctrl; 699 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) { 700 rte_errno = EINVAL; 701 DRV_LOG(ERR, "port %u queue %d not a hairpin Rxq", 702 dev->data->port_id, cur_queue); 703 return -rte_errno; 704 } 705 if (rxq->hairpin_status == 0) { 706 DRV_LOG(DEBUG, "port %u Rx queue %d is already unbound", 707 dev->data->port_id, cur_queue); 708 return 0; 709 } 710 if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) { 711 rte_errno = ENOMEM; 712 DRV_LOG(ERR, "port %u no Rxq object found: %d", 713 dev->data->port_id, cur_queue); 714 return -rte_errno; 715 } 716 rq_attr.state = MLX5_SQC_STATE_RST; 717 rq_attr.rq_state = MLX5_SQC_STATE_RST; 718 ret = mlx5_devx_cmd_modify_rq(rxq_ctrl->obj->rq, &rq_attr); 719 if (ret == 0) 720 rxq->hairpin_status = 0; 721 } 722 return ret; 723 } 724 725 /* 726 * Bind the hairpin port pairs, from the Tx to the peer Rx. 727 * This function only supports to bind the Tx to one Rx. 728 * 729 * @param dev 730 * Pointer to Ethernet device structure. 731 * @param rx_port 732 * Port identifier of the Rx port. 733 * 734 * @return 735 * 0 on success, a negative errno value otherwise and rte_errno is set. 736 */ 737 static int 738 mlx5_hairpin_bind_single_port(struct rte_eth_dev *dev, uint16_t rx_port) 739 { 740 struct mlx5_priv *priv = dev->data->dev_private; 741 int ret = 0; 742 struct mlx5_txq_ctrl *txq_ctrl; 743 uint32_t i; 744 struct rte_hairpin_peer_info peer = {0xffffff}; 745 struct rte_hairpin_peer_info cur; 746 const struct rte_eth_hairpin_conf *conf; 747 uint16_t num_q = 0; 748 uint16_t local_port = priv->dev_data->port_id; 749 uint32_t manual; 750 uint32_t explicit; 751 uint16_t rx_queue; 752 753 if (mlx5_eth_find_next(rx_port, dev->device) != rx_port) { 754 rte_errno = ENODEV; 755 DRV_LOG(ERR, "Rx port %u does not belong to mlx5", rx_port); 756 return -rte_errno; 757 } 758 /* 759 * Before binding TxQ to peer RxQ, first round loop will be used for 760 * checking the queues' configuration consistency. This would be a 761 * little time consuming but better than doing the rollback. 762 */ 763 for (i = 0; i != priv->txqs_n; i++) { 764 txq_ctrl = mlx5_txq_get(dev, i); 765 if (txq_ctrl == NULL) 766 continue; 767 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) { 768 mlx5_txq_release(dev, i); 769 continue; 770 } 771 /* 772 * All hairpin Tx queues of a single port that connected to the 773 * same peer Rx port should have the same "auto binding" and 774 * "implicit Tx flow" modes. 775 * Peer consistency checking will be done in per queue binding. 776 */ 777 conf = &txq_ctrl->hairpin_conf; 778 if (conf->peers[0].port == rx_port) { 779 if (num_q == 0) { 780 manual = conf->manual_bind; 781 explicit = conf->tx_explicit; 782 } else { 783 if (manual != conf->manual_bind || 784 explicit != conf->tx_explicit) { 785 rte_errno = EINVAL; 786 DRV_LOG(ERR, "port %u queue %d mode" 787 " mismatch: %u %u, %u %u", 788 local_port, i, manual, 789 conf->manual_bind, explicit, 790 conf->tx_explicit); 791 mlx5_txq_release(dev, i); 792 return -rte_errno; 793 } 794 } 795 num_q++; 796 } 797 mlx5_txq_release(dev, i); 798 } 799 /* Once no queue is configured, success is returned directly. */ 800 if (num_q == 0) 801 return ret; 802 /* All the hairpin TX queues need to be traversed again. */ 803 for (i = 0; i != priv->txqs_n; i++) { 804 txq_ctrl = mlx5_txq_get(dev, i); 805 if (txq_ctrl == NULL) 806 continue; 807 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) { 808 mlx5_txq_release(dev, i); 809 continue; 810 } 811 if (txq_ctrl->hairpin_conf.peers[0].port != rx_port) { 812 mlx5_txq_release(dev, i); 813 continue; 814 } 815 rx_queue = txq_ctrl->hairpin_conf.peers[0].queue; 816 /* 817 * Fetch peer RxQ's information. 818 * No need to pass the information of the current queue. 819 */ 820 ret = rte_eth_hairpin_queue_peer_update(rx_port, rx_queue, 821 NULL, &peer, 1); 822 if (ret != 0) { 823 mlx5_txq_release(dev, i); 824 goto error; 825 } 826 /* Accessing its own device, inside mlx5 PMD. */ 827 ret = mlx5_hairpin_queue_peer_bind(dev, i, &peer, 1); 828 if (ret != 0) { 829 mlx5_txq_release(dev, i); 830 goto error; 831 } 832 /* Pass TxQ's information to peer RxQ and try binding. */ 833 cur.peer_q = rx_queue; 834 cur.qp_id = txq_ctrl->obj->sq->id; 835 cur.vhca_id = priv->config.hca_attr.vhca_id; 836 cur.tx_explicit = txq_ctrl->hairpin_conf.tx_explicit; 837 cur.manual_bind = txq_ctrl->hairpin_conf.manual_bind; 838 /* 839 * In order to access another device in a proper way, RTE level 840 * private function is needed. 841 */ 842 ret = rte_eth_hairpin_queue_peer_bind(rx_port, rx_queue, 843 &cur, 0); 844 if (ret != 0) { 845 mlx5_txq_release(dev, i); 846 goto error; 847 } 848 mlx5_txq_release(dev, i); 849 } 850 return 0; 851 error: 852 /* 853 * Do roll-back process for the queues already bound. 854 * No need to check the return value of the queue unbind function. 855 */ 856 do { 857 /* No validation is needed here. */ 858 txq_ctrl = mlx5_txq_get(dev, i); 859 if (txq_ctrl == NULL) 860 continue; 861 rx_queue = txq_ctrl->hairpin_conf.peers[0].queue; 862 rte_eth_hairpin_queue_peer_unbind(rx_port, rx_queue, 0); 863 mlx5_hairpin_queue_peer_unbind(dev, i, 1); 864 mlx5_txq_release(dev, i); 865 } while (i--); 866 return ret; 867 } 868 869 /* 870 * Unbind the hairpin port pair, HW configuration of both devices will be clear 871 * and status will be reset for all the queues used between the them. 872 * This function only supports to unbind the Tx from one Rx. 873 * 874 * @param dev 875 * Pointer to Ethernet device structure. 876 * @param rx_port 877 * Port identifier of the Rx port. 878 * 879 * @return 880 * 0 on success, a negative errno value otherwise and rte_errno is set. 881 */ 882 static int 883 mlx5_hairpin_unbind_single_port(struct rte_eth_dev *dev, uint16_t rx_port) 884 { 885 struct mlx5_priv *priv = dev->data->dev_private; 886 struct mlx5_txq_ctrl *txq_ctrl; 887 uint32_t i; 888 int ret; 889 uint16_t cur_port = priv->dev_data->port_id; 890 891 if (mlx5_eth_find_next(rx_port, dev->device) != rx_port) { 892 rte_errno = ENODEV; 893 DRV_LOG(ERR, "Rx port %u does not belong to mlx5", rx_port); 894 return -rte_errno; 895 } 896 for (i = 0; i != priv->txqs_n; i++) { 897 uint16_t rx_queue; 898 899 txq_ctrl = mlx5_txq_get(dev, i); 900 if (txq_ctrl == NULL) 901 continue; 902 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) { 903 mlx5_txq_release(dev, i); 904 continue; 905 } 906 if (txq_ctrl->hairpin_conf.peers[0].port != rx_port) { 907 mlx5_txq_release(dev, i); 908 continue; 909 } 910 /* Indeed, only the first used queue needs to be checked. */ 911 if (txq_ctrl->hairpin_conf.manual_bind == 0) { 912 if (cur_port != rx_port) { 913 rte_errno = EINVAL; 914 DRV_LOG(ERR, "port %u and port %u are in" 915 " auto-bind mode", cur_port, rx_port); 916 mlx5_txq_release(dev, i); 917 return -rte_errno; 918 } else { 919 return 0; 920 } 921 } 922 rx_queue = txq_ctrl->hairpin_conf.peers[0].queue; 923 mlx5_txq_release(dev, i); 924 ret = rte_eth_hairpin_queue_peer_unbind(rx_port, rx_queue, 0); 925 if (ret) { 926 DRV_LOG(ERR, "port %u Rx queue %d unbind - failure", 927 rx_port, rx_queue); 928 return ret; 929 } 930 ret = mlx5_hairpin_queue_peer_unbind(dev, i, 1); 931 if (ret) { 932 DRV_LOG(ERR, "port %u Tx queue %d unbind - failure", 933 cur_port, i); 934 return ret; 935 } 936 } 937 return 0; 938 } 939 940 /* 941 * Bind hairpin ports, Rx could be all ports when using RTE_MAX_ETHPORTS. 942 * @see mlx5_hairpin_bind_single_port() 943 */ 944 int 945 mlx5_hairpin_bind(struct rte_eth_dev *dev, uint16_t rx_port) 946 { 947 int ret = 0; 948 uint16_t p, pp; 949 950 /* 951 * If the Rx port has no hairpin configuration with the current port, 952 * the binding will be skipped in the called function of single port. 953 * Device started status will be checked only before the queue 954 * information updating. 955 */ 956 if (rx_port == RTE_MAX_ETHPORTS) { 957 MLX5_ETH_FOREACH_DEV(p, dev->device) { 958 ret = mlx5_hairpin_bind_single_port(dev, p); 959 if (ret != 0) 960 goto unbind; 961 } 962 return ret; 963 } else { 964 return mlx5_hairpin_bind_single_port(dev, rx_port); 965 } 966 unbind: 967 MLX5_ETH_FOREACH_DEV(pp, dev->device) 968 if (pp < p) 969 mlx5_hairpin_unbind_single_port(dev, pp); 970 return ret; 971 } 972 973 /* 974 * Unbind hairpin ports, Rx could be all ports when using RTE_MAX_ETHPORTS. 975 * @see mlx5_hairpin_unbind_single_port() 976 */ 977 int 978 mlx5_hairpin_unbind(struct rte_eth_dev *dev, uint16_t rx_port) 979 { 980 int ret = 0; 981 uint16_t p; 982 983 if (rx_port == RTE_MAX_ETHPORTS) 984 MLX5_ETH_FOREACH_DEV(p, dev->device) { 985 ret = mlx5_hairpin_unbind_single_port(dev, p); 986 if (ret != 0) 987 return ret; 988 } 989 else 990 ret = mlx5_hairpin_unbind_single_port(dev, rx_port); 991 return ret; 992 } 993 994 /* 995 * DPDK callback to get the hairpin peer ports list. 996 * This will return the actual number of peer ports and save the identifiers 997 * into the array (sorted, may be different from that when setting up the 998 * hairpin peer queues). 999 * The peer port ID could be the same as the port ID of the current device. 1000 * 1001 * @param dev 1002 * Pointer to Ethernet device structure. 1003 * @param peer_ports 1004 * Pointer to array to save the port identifiers. 1005 * @param len 1006 * The length of the array. 1007 * @param direction 1008 * Current port to peer port direction. 1009 * positive - current used as Tx to get all peer Rx ports. 1010 * zero - current used as Rx to get all peer Tx ports. 1011 * 1012 * @return 1013 * 0 or positive value on success, actual number of peer ports. 1014 * a negative errno value otherwise and rte_errno is set. 1015 */ 1016 int 1017 mlx5_hairpin_get_peer_ports(struct rte_eth_dev *dev, uint16_t *peer_ports, 1018 size_t len, uint32_t direction) 1019 { 1020 struct mlx5_priv *priv = dev->data->dev_private; 1021 struct mlx5_txq_ctrl *txq_ctrl; 1022 uint32_t i; 1023 uint16_t pp; 1024 uint32_t bits[(RTE_MAX_ETHPORTS + 31) / 32] = {0}; 1025 int ret = 0; 1026 1027 if (direction) { 1028 for (i = 0; i < priv->txqs_n; i++) { 1029 txq_ctrl = mlx5_txq_get(dev, i); 1030 if (!txq_ctrl) 1031 continue; 1032 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) { 1033 mlx5_txq_release(dev, i); 1034 continue; 1035 } 1036 pp = txq_ctrl->hairpin_conf.peers[0].port; 1037 if (pp >= RTE_MAX_ETHPORTS) { 1038 rte_errno = ERANGE; 1039 mlx5_txq_release(dev, i); 1040 DRV_LOG(ERR, "port %hu queue %u peer port " 1041 "out of range %hu", 1042 priv->dev_data->port_id, i, pp); 1043 return -rte_errno; 1044 } 1045 bits[pp / 32] |= 1 << (pp % 32); 1046 mlx5_txq_release(dev, i); 1047 } 1048 } else { 1049 for (i = 0; i < priv->rxqs_n; i++) { 1050 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, i); 1051 struct mlx5_rxq_ctrl *rxq_ctrl; 1052 1053 if (rxq == NULL) 1054 continue; 1055 rxq_ctrl = rxq->ctrl; 1056 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN) 1057 continue; 1058 pp = rxq->hairpin_conf.peers[0].port; 1059 if (pp >= RTE_MAX_ETHPORTS) { 1060 rte_errno = ERANGE; 1061 DRV_LOG(ERR, "port %hu queue %u peer port " 1062 "out of range %hu", 1063 priv->dev_data->port_id, i, pp); 1064 return -rte_errno; 1065 } 1066 bits[pp / 32] |= 1 << (pp % 32); 1067 } 1068 } 1069 for (i = 0; i < RTE_MAX_ETHPORTS; i++) { 1070 if (bits[i / 32] & (1 << (i % 32))) { 1071 if ((size_t)ret >= len) { 1072 rte_errno = E2BIG; 1073 return -rte_errno; 1074 } 1075 peer_ports[ret++] = i; 1076 } 1077 } 1078 return ret; 1079 } 1080 1081 /** 1082 * DPDK callback to start the device. 1083 * 1084 * Simulate device start by attaching all configured flows. 1085 * 1086 * @param dev 1087 * Pointer to Ethernet device structure. 1088 * 1089 * @return 1090 * 0 on success, a negative errno value otherwise and rte_errno is set. 1091 */ 1092 int 1093 mlx5_dev_start(struct rte_eth_dev *dev) 1094 { 1095 struct mlx5_priv *priv = dev->data->dev_private; 1096 int ret; 1097 int fine_inline; 1098 1099 DRV_LOG(DEBUG, "port %u starting device", dev->data->port_id); 1100 fine_inline = rte_mbuf_dynflag_lookup 1101 (RTE_PMD_MLX5_FINE_GRANULARITY_INLINE, NULL); 1102 if (fine_inline >= 0) 1103 rte_net_mlx5_dynf_inline_mask = 1UL << fine_inline; 1104 else 1105 rte_net_mlx5_dynf_inline_mask = 0; 1106 if (dev->data->nb_rx_queues > 0) { 1107 ret = mlx5_dev_configure_rss_reta(dev); 1108 if (ret) { 1109 DRV_LOG(ERR, "port %u reta config failed: %s", 1110 dev->data->port_id, strerror(rte_errno)); 1111 return -rte_errno; 1112 } 1113 } 1114 ret = mlx5_txpp_start(dev); 1115 if (ret) { 1116 DRV_LOG(ERR, "port %u Tx packet pacing init failed: %s", 1117 dev->data->port_id, strerror(rte_errno)); 1118 goto error; 1119 } 1120 if ((priv->sh->devx && priv->config.dv_flow_en && 1121 priv->config.dest_tir) && priv->obj_ops.lb_dummy_queue_create) { 1122 ret = priv->obj_ops.lb_dummy_queue_create(dev); 1123 if (ret) 1124 goto error; 1125 } 1126 ret = mlx5_txq_start(dev); 1127 if (ret) { 1128 DRV_LOG(ERR, "port %u Tx queue allocation failed: %s", 1129 dev->data->port_id, strerror(rte_errno)); 1130 goto error; 1131 } 1132 if (priv->config.std_delay_drop || priv->config.hp_delay_drop) { 1133 if (!priv->config.vf && !priv->config.sf && 1134 !priv->representor) { 1135 ret = mlx5_get_flag_dropless_rq(dev); 1136 if (ret < 0) 1137 DRV_LOG(WARNING, 1138 "port %u cannot query dropless flag", 1139 dev->data->port_id); 1140 else if (!ret) 1141 DRV_LOG(WARNING, 1142 "port %u dropless_rq OFF, no rearming", 1143 dev->data->port_id); 1144 } else { 1145 DRV_LOG(DEBUG, 1146 "port %u doesn't support dropless_rq flag", 1147 dev->data->port_id); 1148 } 1149 } 1150 ret = mlx5_rxq_start(dev); 1151 if (ret) { 1152 DRV_LOG(ERR, "port %u Rx queue allocation failed: %s", 1153 dev->data->port_id, strerror(rte_errno)); 1154 goto error; 1155 } 1156 /* 1157 * Such step will be skipped if there is no hairpin TX queue configured 1158 * with RX peer queue from the same device. 1159 */ 1160 ret = mlx5_hairpin_auto_bind(dev); 1161 if (ret) { 1162 DRV_LOG(ERR, "port %u hairpin auto binding failed: %s", 1163 dev->data->port_id, strerror(rte_errno)); 1164 goto error; 1165 } 1166 /* Set started flag here for the following steps like control flow. */ 1167 dev->data->dev_started = 1; 1168 ret = mlx5_rx_intr_vec_enable(dev); 1169 if (ret) { 1170 DRV_LOG(ERR, "port %u Rx interrupt vector creation failed", 1171 dev->data->port_id); 1172 goto error; 1173 } 1174 mlx5_os_stats_init(dev); 1175 /* 1176 * Attach indirection table objects detached on port stop. 1177 * They may be needed to create RSS in non-isolated mode. 1178 */ 1179 ret = mlx5_action_handle_attach(dev); 1180 if (ret) { 1181 DRV_LOG(ERR, 1182 "port %u failed to attach indirect actions: %s", 1183 dev->data->port_id, rte_strerror(rte_errno)); 1184 goto error; 1185 } 1186 ret = mlx5_traffic_enable(dev); 1187 if (ret) { 1188 DRV_LOG(ERR, "port %u failed to set defaults flows", 1189 dev->data->port_id); 1190 goto error; 1191 } 1192 /* Set a mask and offset of dynamic metadata flows into Rx queues. */ 1193 mlx5_flow_rxq_dynf_metadata_set(dev); 1194 /* Set flags and context to convert Rx timestamps. */ 1195 mlx5_rxq_timestamp_set(dev); 1196 /* Set a mask and offset of scheduling on timestamp into Tx queues. */ 1197 mlx5_txq_dynf_timestamp_set(dev); 1198 /* 1199 * In non-cached mode, it only needs to start the default mreg copy 1200 * action and no flow created by application exists anymore. 1201 * But it is worth wrapping the interface for further usage. 1202 */ 1203 ret = mlx5_flow_start_default(dev); 1204 if (ret) { 1205 DRV_LOG(DEBUG, "port %u failed to start default actions: %s", 1206 dev->data->port_id, strerror(rte_errno)); 1207 goto error; 1208 } 1209 if (mlx5_dev_ctx_shared_mempool_subscribe(dev) != 0) { 1210 DRV_LOG(ERR, "port %u failed to subscribe for mempool life cycle: %s", 1211 dev->data->port_id, rte_strerror(rte_errno)); 1212 goto error; 1213 } 1214 rte_wmb(); 1215 dev->tx_pkt_burst = mlx5_select_tx_function(dev); 1216 dev->rx_pkt_burst = mlx5_select_rx_function(dev); 1217 /* Enable datapath on secondary process. */ 1218 mlx5_mp_os_req_start_rxtx(dev); 1219 if (rte_intr_fd_get(priv->sh->intr_handle) >= 0) { 1220 priv->sh->port[priv->dev_port - 1].ih_port_id = 1221 (uint32_t)dev->data->port_id; 1222 } else { 1223 DRV_LOG(INFO, "port %u starts without LSC and RMV interrupts.", 1224 dev->data->port_id); 1225 dev->data->dev_conf.intr_conf.lsc = 0; 1226 dev->data->dev_conf.intr_conf.rmv = 0; 1227 } 1228 if (rte_intr_fd_get(priv->sh->intr_handle_devx) >= 0) 1229 priv->sh->port[priv->dev_port - 1].devx_ih_port_id = 1230 (uint32_t)dev->data->port_id; 1231 return 0; 1232 error: 1233 ret = rte_errno; /* Save rte_errno before cleanup. */ 1234 /* Rollback. */ 1235 dev->data->dev_started = 0; 1236 mlx5_flow_stop_default(dev); 1237 mlx5_traffic_disable(dev); 1238 mlx5_txq_stop(dev); 1239 mlx5_rxq_stop(dev); 1240 if (priv->obj_ops.lb_dummy_queue_release) 1241 priv->obj_ops.lb_dummy_queue_release(dev); 1242 mlx5_txpp_stop(dev); /* Stop last. */ 1243 rte_errno = ret; /* Restore rte_errno. */ 1244 return -rte_errno; 1245 } 1246 1247 /** 1248 * DPDK callback to stop the device. 1249 * 1250 * Simulate device stop by detaching all configured flows. 1251 * 1252 * @param dev 1253 * Pointer to Ethernet device structure. 1254 */ 1255 int 1256 mlx5_dev_stop(struct rte_eth_dev *dev) 1257 { 1258 struct mlx5_priv *priv = dev->data->dev_private; 1259 1260 dev->data->dev_started = 0; 1261 /* Prevent crashes when queues are still in use. */ 1262 dev->rx_pkt_burst = removed_rx_burst; 1263 dev->tx_pkt_burst = removed_tx_burst; 1264 rte_wmb(); 1265 /* Disable datapath on secondary process. */ 1266 mlx5_mp_os_req_stop_rxtx(dev); 1267 rte_delay_us_sleep(1000 * priv->rxqs_n); 1268 DRV_LOG(DEBUG, "port %u stopping device", dev->data->port_id); 1269 mlx5_flow_stop_default(dev); 1270 /* Control flows for default traffic can be removed firstly. */ 1271 mlx5_traffic_disable(dev); 1272 /* All RX queue flags will be cleared in the flush interface. */ 1273 mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_GEN, true); 1274 mlx5_flow_meter_rxq_flush(dev); 1275 mlx5_action_handle_detach(dev); 1276 mlx5_rx_intr_vec_disable(dev); 1277 priv->sh->port[priv->dev_port - 1].ih_port_id = RTE_MAX_ETHPORTS; 1278 priv->sh->port[priv->dev_port - 1].devx_ih_port_id = RTE_MAX_ETHPORTS; 1279 mlx5_txq_stop(dev); 1280 mlx5_rxq_stop(dev); 1281 if (priv->obj_ops.lb_dummy_queue_release) 1282 priv->obj_ops.lb_dummy_queue_release(dev); 1283 mlx5_txpp_stop(dev); 1284 1285 return 0; 1286 } 1287 1288 /** 1289 * Enable traffic flows configured by control plane 1290 * 1291 * @param dev 1292 * Pointer to Ethernet device private data. 1293 * @param dev 1294 * Pointer to Ethernet device structure. 1295 * 1296 * @return 1297 * 0 on success, a negative errno value otherwise and rte_errno is set. 1298 */ 1299 int 1300 mlx5_traffic_enable(struct rte_eth_dev *dev) 1301 { 1302 struct mlx5_priv *priv = dev->data->dev_private; 1303 struct rte_flow_item_eth bcast = { 1304 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff", 1305 }; 1306 struct rte_flow_item_eth ipv6_multi_spec = { 1307 .dst.addr_bytes = "\x33\x33\x00\x00\x00\x00", 1308 }; 1309 struct rte_flow_item_eth ipv6_multi_mask = { 1310 .dst.addr_bytes = "\xff\xff\x00\x00\x00\x00", 1311 }; 1312 struct rte_flow_item_eth unicast = { 1313 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00", 1314 }; 1315 struct rte_flow_item_eth unicast_mask = { 1316 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff", 1317 }; 1318 const unsigned int vlan_filter_n = priv->vlan_filter_n; 1319 const struct rte_ether_addr cmp = { 1320 .addr_bytes = "\x00\x00\x00\x00\x00\x00", 1321 }; 1322 unsigned int i; 1323 unsigned int j; 1324 int ret; 1325 1326 /* 1327 * Hairpin txq default flow should be created no matter if it is 1328 * isolation mode. Or else all the packets to be sent will be sent 1329 * out directly without the TX flow actions, e.g. encapsulation. 1330 */ 1331 for (i = 0; i != priv->txqs_n; ++i) { 1332 struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i); 1333 if (!txq_ctrl) 1334 continue; 1335 /* Only Tx implicit mode requires the default Tx flow. */ 1336 if (txq_ctrl->type == MLX5_TXQ_TYPE_HAIRPIN && 1337 txq_ctrl->hairpin_conf.tx_explicit == 0 && 1338 txq_ctrl->hairpin_conf.peers[0].port == 1339 priv->dev_data->port_id) { 1340 ret = mlx5_ctrl_flow_source_queue(dev, i); 1341 if (ret) { 1342 mlx5_txq_release(dev, i); 1343 goto error; 1344 } 1345 } 1346 if ((priv->representor || priv->master) && 1347 priv->config.dv_esw_en) { 1348 if (mlx5_flow_create_devx_sq_miss_flow(dev, i) == 0) { 1349 DRV_LOG(ERR, 1350 "Port %u Tx queue %u SQ create representor devx default miss rule failed.", 1351 dev->data->port_id, i); 1352 goto error; 1353 } 1354 } 1355 mlx5_txq_release(dev, i); 1356 } 1357 if ((priv->master || priv->representor) && priv->config.dv_esw_en) { 1358 if (mlx5_flow_create_esw_table_zero_flow(dev)) 1359 priv->fdb_def_rule = 1; 1360 else 1361 DRV_LOG(INFO, "port %u FDB default rule cannot be" 1362 " configured - only Eswitch group 0 flows are" 1363 " supported.", dev->data->port_id); 1364 } 1365 if (!priv->config.lacp_by_user && priv->pf_bond >= 0) { 1366 ret = mlx5_flow_lacp_miss(dev); 1367 if (ret) 1368 DRV_LOG(INFO, "port %u LACP rule cannot be created - " 1369 "forward LACP to kernel.", dev->data->port_id); 1370 else 1371 DRV_LOG(INFO, "LACP traffic will be missed in port %u." 1372 , dev->data->port_id); 1373 } 1374 if (priv->isolated) 1375 return 0; 1376 if (dev->data->promiscuous) { 1377 struct rte_flow_item_eth promisc = { 1378 .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00", 1379 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00", 1380 .type = 0, 1381 }; 1382 1383 ret = mlx5_ctrl_flow(dev, &promisc, &promisc); 1384 if (ret) 1385 goto error; 1386 } 1387 if (dev->data->all_multicast) { 1388 struct rte_flow_item_eth multicast = { 1389 .dst.addr_bytes = "\x01\x00\x00\x00\x00\x00", 1390 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00", 1391 .type = 0, 1392 }; 1393 1394 ret = mlx5_ctrl_flow(dev, &multicast, &multicast); 1395 if (ret) 1396 goto error; 1397 } else { 1398 /* Add broadcast/multicast flows. */ 1399 for (i = 0; i != vlan_filter_n; ++i) { 1400 uint16_t vlan = priv->vlan_filter[i]; 1401 1402 struct rte_flow_item_vlan vlan_spec = { 1403 .tci = rte_cpu_to_be_16(vlan), 1404 }; 1405 struct rte_flow_item_vlan vlan_mask = 1406 rte_flow_item_vlan_mask; 1407 1408 ret = mlx5_ctrl_flow_vlan(dev, &bcast, &bcast, 1409 &vlan_spec, &vlan_mask); 1410 if (ret) 1411 goto error; 1412 ret = mlx5_ctrl_flow_vlan(dev, &ipv6_multi_spec, 1413 &ipv6_multi_mask, 1414 &vlan_spec, &vlan_mask); 1415 if (ret) 1416 goto error; 1417 } 1418 if (!vlan_filter_n) { 1419 ret = mlx5_ctrl_flow(dev, &bcast, &bcast); 1420 if (ret) 1421 goto error; 1422 ret = mlx5_ctrl_flow(dev, &ipv6_multi_spec, 1423 &ipv6_multi_mask); 1424 if (ret) { 1425 /* Do not fail on IPv6 broadcast creation failure. */ 1426 DRV_LOG(WARNING, 1427 "IPv6 broadcast is not supported"); 1428 ret = 0; 1429 } 1430 } 1431 } 1432 /* Add MAC address flows. */ 1433 for (i = 0; i != MLX5_MAX_MAC_ADDRESSES; ++i) { 1434 struct rte_ether_addr *mac = &dev->data->mac_addrs[i]; 1435 1436 if (!memcmp(mac, &cmp, sizeof(*mac))) 1437 continue; 1438 memcpy(&unicast.dst.addr_bytes, 1439 mac->addr_bytes, 1440 RTE_ETHER_ADDR_LEN); 1441 for (j = 0; j != vlan_filter_n; ++j) { 1442 uint16_t vlan = priv->vlan_filter[j]; 1443 1444 struct rte_flow_item_vlan vlan_spec = { 1445 .tci = rte_cpu_to_be_16(vlan), 1446 }; 1447 struct rte_flow_item_vlan vlan_mask = 1448 rte_flow_item_vlan_mask; 1449 1450 ret = mlx5_ctrl_flow_vlan(dev, &unicast, 1451 &unicast_mask, 1452 &vlan_spec, 1453 &vlan_mask); 1454 if (ret) 1455 goto error; 1456 } 1457 if (!vlan_filter_n) { 1458 ret = mlx5_ctrl_flow(dev, &unicast, &unicast_mask); 1459 if (ret) 1460 goto error; 1461 } 1462 } 1463 return 0; 1464 error: 1465 ret = rte_errno; /* Save rte_errno before cleanup. */ 1466 mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_CTL, false); 1467 rte_errno = ret; /* Restore rte_errno. */ 1468 return -rte_errno; 1469 } 1470 1471 1472 /** 1473 * Disable traffic flows configured by control plane 1474 * 1475 * @param dev 1476 * Pointer to Ethernet device private data. 1477 */ 1478 void 1479 mlx5_traffic_disable(struct rte_eth_dev *dev) 1480 { 1481 mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_CTL, false); 1482 } 1483 1484 /** 1485 * Restart traffic flows configured by control plane 1486 * 1487 * @param dev 1488 * Pointer to Ethernet device private data. 1489 * 1490 * @return 1491 * 0 on success, a negative errno value otherwise and rte_errno is set. 1492 */ 1493 int 1494 mlx5_traffic_restart(struct rte_eth_dev *dev) 1495 { 1496 if (dev->data->dev_started) { 1497 mlx5_traffic_disable(dev); 1498 return mlx5_traffic_enable(dev); 1499 } 1500 return 0; 1501 } 1502