1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2015 6WIND S.A. 3 * Copyright 2015 Mellanox Technologies, Ltd 4 */ 5 6 #include <unistd.h> 7 8 #include <rte_ether.h> 9 #include <rte_ethdev_driver.h> 10 #include <rte_interrupts.h> 11 #include <rte_alarm.h> 12 13 #include "mlx5.h" 14 #include "mlx5_mr.h" 15 #include "mlx5_rxtx.h" 16 #include "mlx5_utils.h" 17 #include "rte_pmd_mlx5.h" 18 19 /** 20 * Stop traffic on Tx queues. 21 * 22 * @param dev 23 * Pointer to Ethernet device structure. 24 */ 25 static void 26 mlx5_txq_stop(struct rte_eth_dev *dev) 27 { 28 struct mlx5_priv *priv = dev->data->dev_private; 29 unsigned int i; 30 31 for (i = 0; i != priv->txqs_n; ++i) 32 mlx5_txq_release(dev, i); 33 } 34 35 /** 36 * Start traffic on Tx queues. 37 * 38 * @param dev 39 * Pointer to Ethernet device structure. 40 * 41 * @return 42 * 0 on success, a negative errno value otherwise and rte_errno is set. 43 */ 44 static int 45 mlx5_txq_start(struct rte_eth_dev *dev) 46 { 47 struct mlx5_priv *priv = dev->data->dev_private; 48 unsigned int i; 49 int ret; 50 51 for (i = 0; i != priv->txqs_n; ++i) { 52 struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i); 53 54 if (!txq_ctrl) 55 continue; 56 if (txq_ctrl->type == MLX5_TXQ_TYPE_HAIRPIN) { 57 txq_ctrl->obj = mlx5_txq_obj_new 58 (dev, i, MLX5_TXQ_OBJ_TYPE_DEVX_HAIRPIN); 59 } else { 60 txq_alloc_elts(txq_ctrl); 61 txq_ctrl->obj = mlx5_txq_obj_new 62 (dev, i, priv->txpp_en ? 63 MLX5_TXQ_OBJ_TYPE_DEVX_SQ : 64 MLX5_TXQ_OBJ_TYPE_IBV); 65 } 66 if (!txq_ctrl->obj) { 67 rte_errno = ENOMEM; 68 goto error; 69 } 70 } 71 return 0; 72 error: 73 ret = rte_errno; /* Save rte_errno before cleanup. */ 74 do { 75 mlx5_txq_release(dev, i); 76 } while (i-- != 0); 77 rte_errno = ret; /* Restore rte_errno. */ 78 return -rte_errno; 79 } 80 81 /** 82 * Stop traffic on Rx queues. 83 * 84 * @param dev 85 * Pointer to Ethernet device structure. 86 */ 87 static void 88 mlx5_rxq_stop(struct rte_eth_dev *dev) 89 { 90 struct mlx5_priv *priv = dev->data->dev_private; 91 unsigned int i; 92 93 for (i = 0; i != priv->rxqs_n; ++i) 94 mlx5_rxq_release(dev, i); 95 } 96 97 /** 98 * Start traffic on Rx queues. 99 * 100 * @param dev 101 * Pointer to Ethernet device structure. 102 * 103 * @return 104 * 0 on success, a negative errno value otherwise and rte_errno is set. 105 */ 106 static int 107 mlx5_rxq_start(struct rte_eth_dev *dev) 108 { 109 struct mlx5_priv *priv = dev->data->dev_private; 110 unsigned int i; 111 int ret = 0; 112 enum mlx5_rxq_obj_type obj_type = MLX5_RXQ_OBJ_TYPE_IBV; 113 struct mlx5_rxq_data *rxq = NULL; 114 115 for (i = 0; i < priv->rxqs_n; ++i) { 116 rxq = (*priv->rxqs)[i]; 117 if (rxq && rxq->lro) { 118 obj_type = MLX5_RXQ_OBJ_TYPE_DEVX_RQ; 119 break; 120 } 121 } 122 /* Allocate/reuse/resize mempool for Multi-Packet RQ. */ 123 if (mlx5_mprq_alloc_mp(dev)) { 124 /* Should not release Rx queues but return immediately. */ 125 return -rte_errno; 126 } 127 for (i = 0; i != priv->rxqs_n; ++i) { 128 struct mlx5_rxq_ctrl *rxq_ctrl = mlx5_rxq_get(dev, i); 129 struct rte_mempool *mp; 130 131 if (!rxq_ctrl) 132 continue; 133 if (rxq_ctrl->type == MLX5_RXQ_TYPE_HAIRPIN) { 134 rxq_ctrl->obj = mlx5_rxq_obj_new 135 (dev, i, MLX5_RXQ_OBJ_TYPE_DEVX_HAIRPIN); 136 if (!rxq_ctrl->obj) 137 goto error; 138 continue; 139 } 140 /* Pre-register Rx mempool. */ 141 mp = mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq) ? 142 rxq_ctrl->rxq.mprq_mp : rxq_ctrl->rxq.mp; 143 DRV_LOG(DEBUG, 144 "port %u Rx queue %u registering" 145 " mp %s having %u chunks", 146 dev->data->port_id, rxq_ctrl->rxq.idx, 147 mp->name, mp->nb_mem_chunks); 148 mlx5_mr_update_mp(dev, &rxq_ctrl->rxq.mr_ctrl, mp); 149 ret = rxq_alloc_elts(rxq_ctrl); 150 if (ret) 151 goto error; 152 rxq_ctrl->obj = mlx5_rxq_obj_new(dev, i, obj_type); 153 if (!rxq_ctrl->obj) 154 goto error; 155 if (obj_type == MLX5_RXQ_OBJ_TYPE_IBV) 156 rxq_ctrl->wqn = rxq_ctrl->obj->wq->wq_num; 157 else if (obj_type == MLX5_RXQ_OBJ_TYPE_DEVX_RQ) 158 rxq_ctrl->wqn = rxq_ctrl->obj->rq->id; 159 } 160 return 0; 161 error: 162 ret = rte_errno; /* Save rte_errno before cleanup. */ 163 do { 164 mlx5_rxq_release(dev, i); 165 } while (i-- != 0); 166 rte_errno = ret; /* Restore rte_errno. */ 167 return -rte_errno; 168 } 169 170 /** 171 * Binds Tx queues to Rx queues for hairpin. 172 * 173 * Binds Tx queues to the target Rx queues. 174 * 175 * @param dev 176 * Pointer to Ethernet device structure. 177 * 178 * @return 179 * 0 on success, a negative errno value otherwise and rte_errno is set. 180 */ 181 static int 182 mlx5_hairpin_bind(struct rte_eth_dev *dev) 183 { 184 struct mlx5_priv *priv = dev->data->dev_private; 185 struct mlx5_devx_modify_sq_attr sq_attr = { 0 }; 186 struct mlx5_devx_modify_rq_attr rq_attr = { 0 }; 187 struct mlx5_txq_ctrl *txq_ctrl; 188 struct mlx5_rxq_ctrl *rxq_ctrl; 189 struct mlx5_devx_obj *sq; 190 struct mlx5_devx_obj *rq; 191 unsigned int i; 192 int ret = 0; 193 194 for (i = 0; i != priv->txqs_n; ++i) { 195 txq_ctrl = mlx5_txq_get(dev, i); 196 if (!txq_ctrl) 197 continue; 198 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) { 199 mlx5_txq_release(dev, i); 200 continue; 201 } 202 if (!txq_ctrl->obj) { 203 rte_errno = ENOMEM; 204 DRV_LOG(ERR, "port %u no txq object found: %d", 205 dev->data->port_id, i); 206 mlx5_txq_release(dev, i); 207 return -rte_errno; 208 } 209 sq = txq_ctrl->obj->sq; 210 rxq_ctrl = mlx5_rxq_get(dev, 211 txq_ctrl->hairpin_conf.peers[0].queue); 212 if (!rxq_ctrl) { 213 mlx5_txq_release(dev, i); 214 rte_errno = EINVAL; 215 DRV_LOG(ERR, "port %u no rxq object found: %d", 216 dev->data->port_id, 217 txq_ctrl->hairpin_conf.peers[0].queue); 218 return -rte_errno; 219 } 220 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN || 221 rxq_ctrl->hairpin_conf.peers[0].queue != i) { 222 rte_errno = ENOMEM; 223 DRV_LOG(ERR, "port %u Tx queue %d can't be binded to " 224 "Rx queue %d", dev->data->port_id, 225 i, txq_ctrl->hairpin_conf.peers[0].queue); 226 goto error; 227 } 228 rq = rxq_ctrl->obj->rq; 229 if (!rq) { 230 rte_errno = ENOMEM; 231 DRV_LOG(ERR, "port %u hairpin no matching rxq: %d", 232 dev->data->port_id, 233 txq_ctrl->hairpin_conf.peers[0].queue); 234 goto error; 235 } 236 sq_attr.state = MLX5_SQC_STATE_RDY; 237 sq_attr.sq_state = MLX5_SQC_STATE_RST; 238 sq_attr.hairpin_peer_rq = rq->id; 239 sq_attr.hairpin_peer_vhca = priv->config.hca_attr.vhca_id; 240 ret = mlx5_devx_cmd_modify_sq(sq, &sq_attr); 241 if (ret) 242 goto error; 243 rq_attr.state = MLX5_SQC_STATE_RDY; 244 rq_attr.rq_state = MLX5_SQC_STATE_RST; 245 rq_attr.hairpin_peer_sq = sq->id; 246 rq_attr.hairpin_peer_vhca = priv->config.hca_attr.vhca_id; 247 ret = mlx5_devx_cmd_modify_rq(rq, &rq_attr); 248 if (ret) 249 goto error; 250 mlx5_txq_release(dev, i); 251 mlx5_rxq_release(dev, txq_ctrl->hairpin_conf.peers[0].queue); 252 } 253 return 0; 254 error: 255 mlx5_txq_release(dev, i); 256 mlx5_rxq_release(dev, txq_ctrl->hairpin_conf.peers[0].queue); 257 return -rte_errno; 258 } 259 260 /** 261 * DPDK callback to start the device. 262 * 263 * Simulate device start by attaching all configured flows. 264 * 265 * @param dev 266 * Pointer to Ethernet device structure. 267 * 268 * @return 269 * 0 on success, a negative errno value otherwise and rte_errno is set. 270 */ 271 int 272 mlx5_dev_start(struct rte_eth_dev *dev) 273 { 274 struct mlx5_priv *priv = dev->data->dev_private; 275 int ret; 276 int fine_inline; 277 278 DRV_LOG(DEBUG, "port %u starting device", dev->data->port_id); 279 fine_inline = rte_mbuf_dynflag_lookup 280 (RTE_PMD_MLX5_FINE_GRANULARITY_INLINE, NULL); 281 if (fine_inline > 0) 282 rte_net_mlx5_dynf_inline_mask = 1UL << fine_inline; 283 else 284 rte_net_mlx5_dynf_inline_mask = 0; 285 if (dev->data->nb_rx_queues > 0) { 286 ret = mlx5_dev_configure_rss_reta(dev); 287 if (ret) { 288 DRV_LOG(ERR, "port %u reta config failed: %s", 289 dev->data->port_id, strerror(rte_errno)); 290 return -rte_errno; 291 } 292 } 293 ret = mlx5_txpp_start(dev); 294 if (ret) { 295 DRV_LOG(ERR, "port %u Tx packet pacing init failed: %s", 296 dev->data->port_id, strerror(rte_errno)); 297 goto error; 298 } 299 ret = mlx5_txq_start(dev); 300 if (ret) { 301 DRV_LOG(ERR, "port %u Tx queue allocation failed: %s", 302 dev->data->port_id, strerror(rte_errno)); 303 goto error; 304 } 305 ret = mlx5_rxq_start(dev); 306 if (ret) { 307 DRV_LOG(ERR, "port %u Rx queue allocation failed: %s", 308 dev->data->port_id, strerror(rte_errno)); 309 goto error; 310 } 311 ret = mlx5_hairpin_bind(dev); 312 if (ret) { 313 DRV_LOG(ERR, "port %u hairpin binding failed: %s", 314 dev->data->port_id, strerror(rte_errno)); 315 goto error; 316 } 317 /* Set started flag here for the following steps like control flow. */ 318 dev->data->dev_started = 1; 319 ret = mlx5_rx_intr_vec_enable(dev); 320 if (ret) { 321 DRV_LOG(ERR, "port %u Rx interrupt vector creation failed", 322 dev->data->port_id); 323 goto error; 324 } 325 mlx5_os_stats_init(dev); 326 ret = mlx5_traffic_enable(dev); 327 if (ret) { 328 DRV_LOG(ERR, "port %u failed to set defaults flows", 329 dev->data->port_id); 330 goto error; 331 } 332 /* Set a mask and offset of dynamic metadata flows into Rx queues. */ 333 mlx5_flow_rxq_dynf_metadata_set(dev); 334 /* Set flags and context to convert Rx timestamps. */ 335 mlx5_rxq_timestamp_set(dev); 336 /* Set a mask and offset of scheduling on timestamp into Tx queues. */ 337 mlx5_txq_dynf_timestamp_set(dev); 338 /* 339 * In non-cached mode, it only needs to start the default mreg copy 340 * action and no flow created by application exists anymore. 341 * But it is worth wrapping the interface for further usage. 342 */ 343 ret = mlx5_flow_start_default(dev); 344 if (ret) { 345 DRV_LOG(DEBUG, "port %u failed to start default actions: %s", 346 dev->data->port_id, strerror(rte_errno)); 347 goto error; 348 } 349 rte_wmb(); 350 dev->tx_pkt_burst = mlx5_select_tx_function(dev); 351 dev->rx_pkt_burst = mlx5_select_rx_function(dev); 352 /* Enable datapath on secondary process. */ 353 mlx5_mp_os_req_start_rxtx(dev); 354 if (priv->sh->intr_handle.fd >= 0) { 355 priv->sh->port[priv->dev_port - 1].ih_port_id = 356 (uint32_t)dev->data->port_id; 357 } else { 358 DRV_LOG(INFO, "port %u starts without LSC and RMV interrupts.", 359 dev->data->port_id); 360 dev->data->dev_conf.intr_conf.lsc = 0; 361 dev->data->dev_conf.intr_conf.rmv = 0; 362 } 363 if (priv->sh->intr_handle_devx.fd >= 0) 364 priv->sh->port[priv->dev_port - 1].devx_ih_port_id = 365 (uint32_t)dev->data->port_id; 366 return 0; 367 error: 368 ret = rte_errno; /* Save rte_errno before cleanup. */ 369 /* Rollback. */ 370 dev->data->dev_started = 0; 371 mlx5_flow_stop_default(dev); 372 mlx5_traffic_disable(dev); 373 mlx5_txq_stop(dev); 374 mlx5_rxq_stop(dev); 375 mlx5_txpp_stop(dev); /* Stop last. */ 376 rte_errno = ret; /* Restore rte_errno. */ 377 return -rte_errno; 378 } 379 380 /** 381 * DPDK callback to stop the device. 382 * 383 * Simulate device stop by detaching all configured flows. 384 * 385 * @param dev 386 * Pointer to Ethernet device structure. 387 */ 388 void 389 mlx5_dev_stop(struct rte_eth_dev *dev) 390 { 391 struct mlx5_priv *priv = dev->data->dev_private; 392 393 dev->data->dev_started = 0; 394 /* Prevent crashes when queues are still in use. */ 395 dev->rx_pkt_burst = removed_rx_burst; 396 dev->tx_pkt_burst = removed_tx_burst; 397 rte_wmb(); 398 /* Disable datapath on secondary process. */ 399 mlx5_mp_os_req_stop_rxtx(dev); 400 usleep(1000 * priv->rxqs_n); 401 DRV_LOG(DEBUG, "port %u stopping device", dev->data->port_id); 402 mlx5_flow_stop_default(dev); 403 /* Control flows for default traffic can be removed firstly. */ 404 mlx5_traffic_disable(dev); 405 /* All RX queue flags will be cleared in the flush interface. */ 406 mlx5_flow_list_flush(dev, &priv->flows, true); 407 mlx5_rx_intr_vec_disable(dev); 408 priv->sh->port[priv->dev_port - 1].ih_port_id = RTE_MAX_ETHPORTS; 409 priv->sh->port[priv->dev_port - 1].devx_ih_port_id = RTE_MAX_ETHPORTS; 410 mlx5_txq_stop(dev); 411 mlx5_rxq_stop(dev); 412 mlx5_txpp_stop(dev); 413 } 414 415 /** 416 * Enable traffic flows configured by control plane 417 * 418 * @param dev 419 * Pointer to Ethernet device private data. 420 * @param dev 421 * Pointer to Ethernet device structure. 422 * 423 * @return 424 * 0 on success, a negative errno value otherwise and rte_errno is set. 425 */ 426 int 427 mlx5_traffic_enable(struct rte_eth_dev *dev) 428 { 429 struct mlx5_priv *priv = dev->data->dev_private; 430 struct rte_flow_item_eth bcast = { 431 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff", 432 }; 433 struct rte_flow_item_eth ipv6_multi_spec = { 434 .dst.addr_bytes = "\x33\x33\x00\x00\x00\x00", 435 }; 436 struct rte_flow_item_eth ipv6_multi_mask = { 437 .dst.addr_bytes = "\xff\xff\x00\x00\x00\x00", 438 }; 439 struct rte_flow_item_eth unicast = { 440 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00", 441 }; 442 struct rte_flow_item_eth unicast_mask = { 443 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff", 444 }; 445 const unsigned int vlan_filter_n = priv->vlan_filter_n; 446 const struct rte_ether_addr cmp = { 447 .addr_bytes = "\x00\x00\x00\x00\x00\x00", 448 }; 449 unsigned int i; 450 unsigned int j; 451 int ret; 452 453 /* 454 * Hairpin txq default flow should be created no matter if it is 455 * isolation mode. Or else all the packets to be sent will be sent 456 * out directly without the TX flow actions, e.g. encapsulation. 457 */ 458 for (i = 0; i != priv->txqs_n; ++i) { 459 struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i); 460 if (!txq_ctrl) 461 continue; 462 if (txq_ctrl->type == MLX5_TXQ_TYPE_HAIRPIN) { 463 ret = mlx5_ctrl_flow_source_queue(dev, i); 464 if (ret) { 465 mlx5_txq_release(dev, i); 466 goto error; 467 } 468 } 469 mlx5_txq_release(dev, i); 470 } 471 if (priv->config.dv_esw_en && !priv->config.vf) { 472 if (mlx5_flow_create_esw_table_zero_flow(dev)) 473 priv->fdb_def_rule = 1; 474 else 475 DRV_LOG(INFO, "port %u FDB default rule cannot be" 476 " configured - only Eswitch group 0 flows are" 477 " supported.", dev->data->port_id); 478 } 479 if (!priv->config.lacp_by_user && priv->pf_bond >= 0) { 480 ret = mlx5_flow_lacp_miss(dev); 481 if (ret) 482 DRV_LOG(INFO, "port %u LACP rule cannot be created - " 483 "forward LACP to kernel.", dev->data->port_id); 484 else 485 DRV_LOG(INFO, "LACP traffic will be missed in port %u." 486 , dev->data->port_id); 487 } 488 if (priv->isolated) 489 return 0; 490 if (dev->data->promiscuous) { 491 struct rte_flow_item_eth promisc = { 492 .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00", 493 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00", 494 .type = 0, 495 }; 496 497 ret = mlx5_ctrl_flow(dev, &promisc, &promisc); 498 if (ret) 499 goto error; 500 } 501 if (dev->data->all_multicast) { 502 struct rte_flow_item_eth multicast = { 503 .dst.addr_bytes = "\x01\x00\x00\x00\x00\x00", 504 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00", 505 .type = 0, 506 }; 507 508 ret = mlx5_ctrl_flow(dev, &multicast, &multicast); 509 if (ret) 510 goto error; 511 } else { 512 /* Add broadcast/multicast flows. */ 513 for (i = 0; i != vlan_filter_n; ++i) { 514 uint16_t vlan = priv->vlan_filter[i]; 515 516 struct rte_flow_item_vlan vlan_spec = { 517 .tci = rte_cpu_to_be_16(vlan), 518 }; 519 struct rte_flow_item_vlan vlan_mask = 520 rte_flow_item_vlan_mask; 521 522 ret = mlx5_ctrl_flow_vlan(dev, &bcast, &bcast, 523 &vlan_spec, &vlan_mask); 524 if (ret) 525 goto error; 526 ret = mlx5_ctrl_flow_vlan(dev, &ipv6_multi_spec, 527 &ipv6_multi_mask, 528 &vlan_spec, &vlan_mask); 529 if (ret) 530 goto error; 531 } 532 if (!vlan_filter_n) { 533 ret = mlx5_ctrl_flow(dev, &bcast, &bcast); 534 if (ret) 535 goto error; 536 ret = mlx5_ctrl_flow(dev, &ipv6_multi_spec, 537 &ipv6_multi_mask); 538 if (ret) 539 goto error; 540 } 541 } 542 /* Add MAC address flows. */ 543 for (i = 0; i != MLX5_MAX_MAC_ADDRESSES; ++i) { 544 struct rte_ether_addr *mac = &dev->data->mac_addrs[i]; 545 546 if (!memcmp(mac, &cmp, sizeof(*mac))) 547 continue; 548 memcpy(&unicast.dst.addr_bytes, 549 mac->addr_bytes, 550 RTE_ETHER_ADDR_LEN); 551 for (j = 0; j != vlan_filter_n; ++j) { 552 uint16_t vlan = priv->vlan_filter[j]; 553 554 struct rte_flow_item_vlan vlan_spec = { 555 .tci = rte_cpu_to_be_16(vlan), 556 }; 557 struct rte_flow_item_vlan vlan_mask = 558 rte_flow_item_vlan_mask; 559 560 ret = mlx5_ctrl_flow_vlan(dev, &unicast, 561 &unicast_mask, 562 &vlan_spec, 563 &vlan_mask); 564 if (ret) 565 goto error; 566 } 567 if (!vlan_filter_n) { 568 ret = mlx5_ctrl_flow(dev, &unicast, &unicast_mask); 569 if (ret) 570 goto error; 571 } 572 } 573 return 0; 574 error: 575 ret = rte_errno; /* Save rte_errno before cleanup. */ 576 mlx5_flow_list_flush(dev, &priv->ctrl_flows, false); 577 rte_errno = ret; /* Restore rte_errno. */ 578 return -rte_errno; 579 } 580 581 582 /** 583 * Disable traffic flows configured by control plane 584 * 585 * @param dev 586 * Pointer to Ethernet device private data. 587 */ 588 void 589 mlx5_traffic_disable(struct rte_eth_dev *dev) 590 { 591 struct mlx5_priv *priv = dev->data->dev_private; 592 593 mlx5_flow_list_flush(dev, &priv->ctrl_flows, false); 594 } 595 596 /** 597 * Restart traffic flows configured by control plane 598 * 599 * @param dev 600 * Pointer to Ethernet device private data. 601 * 602 * @return 603 * 0 on success, a negative errno value otherwise and rte_errno is set. 604 */ 605 int 606 mlx5_traffic_restart(struct rte_eth_dev *dev) 607 { 608 if (dev->data->dev_started) { 609 mlx5_traffic_disable(dev); 610 return mlx5_traffic_enable(dev); 611 } 612 return 0; 613 } 614