1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2015 6WIND S.A. 3 * Copyright 2015 Mellanox Technologies, Ltd 4 */ 5 6 #include <unistd.h> 7 8 #include <rte_ether.h> 9 #include <rte_ethdev_driver.h> 10 #include <rte_interrupts.h> 11 #include <rte_alarm.h> 12 13 #include <mlx5_malloc.h> 14 15 #include "mlx5.h" 16 #include "mlx5_mr.h" 17 #include "mlx5_rxtx.h" 18 #include "mlx5_utils.h" 19 #include "rte_pmd_mlx5.h" 20 21 /** 22 * Stop traffic on Tx queues. 23 * 24 * @param dev 25 * Pointer to Ethernet device structure. 26 */ 27 static void 28 mlx5_txq_stop(struct rte_eth_dev *dev) 29 { 30 struct mlx5_priv *priv = dev->data->dev_private; 31 unsigned int i; 32 33 for (i = 0; i != priv->txqs_n; ++i) 34 mlx5_txq_release(dev, i); 35 } 36 37 /** 38 * Start traffic on Tx queues. 39 * 40 * @param dev 41 * Pointer to Ethernet device structure. 42 * 43 * @return 44 * 0 on success, a negative errno value otherwise and rte_errno is set. 45 */ 46 static int 47 mlx5_txq_start(struct rte_eth_dev *dev) 48 { 49 struct mlx5_priv *priv = dev->data->dev_private; 50 unsigned int i; 51 int ret; 52 53 for (i = 0; i != priv->txqs_n; ++i) { 54 struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i); 55 56 if (!txq_ctrl) 57 continue; 58 if (txq_ctrl->type == MLX5_TXQ_TYPE_STANDARD) 59 txq_alloc_elts(txq_ctrl); 60 txq_ctrl->obj = priv->obj_ops.txq_obj_new(dev, i); 61 if (!txq_ctrl->obj) { 62 rte_errno = ENOMEM; 63 goto error; 64 } 65 } 66 return 0; 67 error: 68 ret = rte_errno; /* Save rte_errno before cleanup. */ 69 do { 70 mlx5_txq_release(dev, i); 71 } while (i-- != 0); 72 rte_errno = ret; /* Restore rte_errno. */ 73 return -rte_errno; 74 } 75 76 /** 77 * Stop traffic on Rx queues. 78 * 79 * @param dev 80 * Pointer to Ethernet device structure. 81 */ 82 static void 83 mlx5_rxq_stop(struct rte_eth_dev *dev) 84 { 85 struct mlx5_priv *priv = dev->data->dev_private; 86 unsigned int i; 87 88 for (i = 0; i != priv->rxqs_n; ++i) 89 mlx5_rxq_release(dev, i); 90 } 91 92 /** 93 * Start traffic on Rx queues. 94 * 95 * @param dev 96 * Pointer to Ethernet device structure. 97 * 98 * @return 99 * 0 on success, a negative errno value otherwise and rte_errno is set. 100 */ 101 static int 102 mlx5_rxq_start(struct rte_eth_dev *dev) 103 { 104 struct mlx5_priv *priv = dev->data->dev_private; 105 unsigned int i; 106 int ret = 0; 107 108 /* Allocate/reuse/resize mempool for Multi-Packet RQ. */ 109 if (mlx5_mprq_alloc_mp(dev)) { 110 /* Should not release Rx queues but return immediately. */ 111 return -rte_errno; 112 } 113 DRV_LOG(DEBUG, "Port %u device_attr.max_qp_wr is %d.", 114 dev->data->port_id, priv->sh->device_attr.max_qp_wr); 115 DRV_LOG(DEBUG, "Port %u device_attr.max_sge is %d.", 116 dev->data->port_id, priv->sh->device_attr.max_sge); 117 for (i = 0; i != priv->rxqs_n; ++i) { 118 struct mlx5_rxq_ctrl *rxq_ctrl = mlx5_rxq_get(dev, i); 119 struct rte_mempool *mp; 120 121 if (!rxq_ctrl) 122 continue; 123 if (rxq_ctrl->type == MLX5_RXQ_TYPE_STANDARD) { 124 /* Pre-register Rx mempool. */ 125 mp = mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq) ? 126 rxq_ctrl->rxq.mprq_mp : rxq_ctrl->rxq.mp; 127 DRV_LOG(DEBUG, "Port %u Rx queue %u registering mp %s" 128 " having %u chunks.", dev->data->port_id, 129 rxq_ctrl->rxq.idx, mp->name, mp->nb_mem_chunks); 130 mlx5_mr_update_mp(dev, &rxq_ctrl->rxq.mr_ctrl, mp); 131 ret = rxq_alloc_elts(rxq_ctrl); 132 if (ret) 133 goto error; 134 } 135 MLX5_ASSERT(!rxq_ctrl->obj); 136 rxq_ctrl->obj = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, 137 sizeof(*rxq_ctrl->obj), 0, 138 rxq_ctrl->socket); 139 if (!rxq_ctrl->obj) { 140 DRV_LOG(ERR, 141 "Port %u Rx queue %u can't allocate resources.", 142 dev->data->port_id, (*priv->rxqs)[i]->idx); 143 rte_errno = ENOMEM; 144 goto error; 145 } 146 ret = priv->obj_ops.rxq_obj_new(dev, i); 147 if (ret) { 148 mlx5_free(rxq_ctrl->obj); 149 goto error; 150 } 151 DRV_LOG(DEBUG, "Port %u rxq %u updated with %p.", 152 dev->data->port_id, i, (void *)&rxq_ctrl->obj); 153 LIST_INSERT_HEAD(&priv->rxqsobj, rxq_ctrl->obj, next); 154 } 155 return 0; 156 error: 157 ret = rte_errno; /* Save rte_errno before cleanup. */ 158 do { 159 mlx5_rxq_release(dev, i); 160 } while (i-- != 0); 161 rte_errno = ret; /* Restore rte_errno. */ 162 return -rte_errno; 163 } 164 165 /** 166 * Binds Tx queues to Rx queues for hairpin. 167 * 168 * Binds Tx queues to the target Rx queues. 169 * 170 * @param dev 171 * Pointer to Ethernet device structure. 172 * 173 * @return 174 * 0 on success, a negative errno value otherwise and rte_errno is set. 175 */ 176 static int 177 mlx5_hairpin_bind(struct rte_eth_dev *dev) 178 { 179 struct mlx5_priv *priv = dev->data->dev_private; 180 struct mlx5_devx_modify_sq_attr sq_attr = { 0 }; 181 struct mlx5_devx_modify_rq_attr rq_attr = { 0 }; 182 struct mlx5_txq_ctrl *txq_ctrl; 183 struct mlx5_rxq_ctrl *rxq_ctrl; 184 struct mlx5_devx_obj *sq; 185 struct mlx5_devx_obj *rq; 186 unsigned int i; 187 int ret = 0; 188 189 for (i = 0; i != priv->txqs_n; ++i) { 190 txq_ctrl = mlx5_txq_get(dev, i); 191 if (!txq_ctrl) 192 continue; 193 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) { 194 mlx5_txq_release(dev, i); 195 continue; 196 } 197 if (!txq_ctrl->obj) { 198 rte_errno = ENOMEM; 199 DRV_LOG(ERR, "port %u no txq object found: %d", 200 dev->data->port_id, i); 201 mlx5_txq_release(dev, i); 202 return -rte_errno; 203 } 204 sq = txq_ctrl->obj->sq; 205 rxq_ctrl = mlx5_rxq_get(dev, 206 txq_ctrl->hairpin_conf.peers[0].queue); 207 if (!rxq_ctrl) { 208 mlx5_txq_release(dev, i); 209 rte_errno = EINVAL; 210 DRV_LOG(ERR, "port %u no rxq object found: %d", 211 dev->data->port_id, 212 txq_ctrl->hairpin_conf.peers[0].queue); 213 return -rte_errno; 214 } 215 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN || 216 rxq_ctrl->hairpin_conf.peers[0].queue != i) { 217 rte_errno = ENOMEM; 218 DRV_LOG(ERR, "port %u Tx queue %d can't be binded to " 219 "Rx queue %d", dev->data->port_id, 220 i, txq_ctrl->hairpin_conf.peers[0].queue); 221 goto error; 222 } 223 rq = rxq_ctrl->obj->rq; 224 if (!rq) { 225 rte_errno = ENOMEM; 226 DRV_LOG(ERR, "port %u hairpin no matching rxq: %d", 227 dev->data->port_id, 228 txq_ctrl->hairpin_conf.peers[0].queue); 229 goto error; 230 } 231 sq_attr.state = MLX5_SQC_STATE_RDY; 232 sq_attr.sq_state = MLX5_SQC_STATE_RST; 233 sq_attr.hairpin_peer_rq = rq->id; 234 sq_attr.hairpin_peer_vhca = priv->config.hca_attr.vhca_id; 235 ret = mlx5_devx_cmd_modify_sq(sq, &sq_attr); 236 if (ret) 237 goto error; 238 rq_attr.state = MLX5_SQC_STATE_RDY; 239 rq_attr.rq_state = MLX5_SQC_STATE_RST; 240 rq_attr.hairpin_peer_sq = sq->id; 241 rq_attr.hairpin_peer_vhca = priv->config.hca_attr.vhca_id; 242 ret = mlx5_devx_cmd_modify_rq(rq, &rq_attr); 243 if (ret) 244 goto error; 245 mlx5_txq_release(dev, i); 246 mlx5_rxq_release(dev, txq_ctrl->hairpin_conf.peers[0].queue); 247 } 248 return 0; 249 error: 250 mlx5_txq_release(dev, i); 251 mlx5_rxq_release(dev, txq_ctrl->hairpin_conf.peers[0].queue); 252 return -rte_errno; 253 } 254 255 /** 256 * DPDK callback to start the device. 257 * 258 * Simulate device start by attaching all configured flows. 259 * 260 * @param dev 261 * Pointer to Ethernet device structure. 262 * 263 * @return 264 * 0 on success, a negative errno value otherwise and rte_errno is set. 265 */ 266 int 267 mlx5_dev_start(struct rte_eth_dev *dev) 268 { 269 struct mlx5_priv *priv = dev->data->dev_private; 270 int ret; 271 int fine_inline; 272 273 DRV_LOG(DEBUG, "port %u starting device", dev->data->port_id); 274 fine_inline = rte_mbuf_dynflag_lookup 275 (RTE_PMD_MLX5_FINE_GRANULARITY_INLINE, NULL); 276 if (fine_inline > 0) 277 rte_net_mlx5_dynf_inline_mask = 1UL << fine_inline; 278 else 279 rte_net_mlx5_dynf_inline_mask = 0; 280 if (dev->data->nb_rx_queues > 0) { 281 ret = mlx5_dev_configure_rss_reta(dev); 282 if (ret) { 283 DRV_LOG(ERR, "port %u reta config failed: %s", 284 dev->data->port_id, strerror(rte_errno)); 285 return -rte_errno; 286 } 287 } 288 ret = mlx5_txpp_start(dev); 289 if (ret) { 290 DRV_LOG(ERR, "port %u Tx packet pacing init failed: %s", 291 dev->data->port_id, strerror(rte_errno)); 292 goto error; 293 } 294 ret = mlx5_txq_start(dev); 295 if (ret) { 296 DRV_LOG(ERR, "port %u Tx queue allocation failed: %s", 297 dev->data->port_id, strerror(rte_errno)); 298 goto error; 299 } 300 ret = mlx5_rxq_start(dev); 301 if (ret) { 302 DRV_LOG(ERR, "port %u Rx queue allocation failed: %s", 303 dev->data->port_id, strerror(rte_errno)); 304 goto error; 305 } 306 ret = mlx5_hairpin_bind(dev); 307 if (ret) { 308 DRV_LOG(ERR, "port %u hairpin binding failed: %s", 309 dev->data->port_id, strerror(rte_errno)); 310 goto error; 311 } 312 /* Set started flag here for the following steps like control flow. */ 313 dev->data->dev_started = 1; 314 ret = mlx5_rx_intr_vec_enable(dev); 315 if (ret) { 316 DRV_LOG(ERR, "port %u Rx interrupt vector creation failed", 317 dev->data->port_id); 318 goto error; 319 } 320 mlx5_os_stats_init(dev); 321 ret = mlx5_traffic_enable(dev); 322 if (ret) { 323 DRV_LOG(ERR, "port %u failed to set defaults flows", 324 dev->data->port_id); 325 goto error; 326 } 327 /* Set a mask and offset of dynamic metadata flows into Rx queues. */ 328 mlx5_flow_rxq_dynf_metadata_set(dev); 329 /* Set flags and context to convert Rx timestamps. */ 330 mlx5_rxq_timestamp_set(dev); 331 /* Set a mask and offset of scheduling on timestamp into Tx queues. */ 332 mlx5_txq_dynf_timestamp_set(dev); 333 /* 334 * In non-cached mode, it only needs to start the default mreg copy 335 * action and no flow created by application exists anymore. 336 * But it is worth wrapping the interface for further usage. 337 */ 338 ret = mlx5_flow_start_default(dev); 339 if (ret) { 340 DRV_LOG(DEBUG, "port %u failed to start default actions: %s", 341 dev->data->port_id, strerror(rte_errno)); 342 goto error; 343 } 344 rte_wmb(); 345 dev->tx_pkt_burst = mlx5_select_tx_function(dev); 346 dev->rx_pkt_burst = mlx5_select_rx_function(dev); 347 /* Enable datapath on secondary process. */ 348 mlx5_mp_os_req_start_rxtx(dev); 349 if (priv->sh->intr_handle.fd >= 0) { 350 priv->sh->port[priv->dev_port - 1].ih_port_id = 351 (uint32_t)dev->data->port_id; 352 } else { 353 DRV_LOG(INFO, "port %u starts without LSC and RMV interrupts.", 354 dev->data->port_id); 355 dev->data->dev_conf.intr_conf.lsc = 0; 356 dev->data->dev_conf.intr_conf.rmv = 0; 357 } 358 if (priv->sh->intr_handle_devx.fd >= 0) 359 priv->sh->port[priv->dev_port - 1].devx_ih_port_id = 360 (uint32_t)dev->data->port_id; 361 return 0; 362 error: 363 ret = rte_errno; /* Save rte_errno before cleanup. */ 364 /* Rollback. */ 365 dev->data->dev_started = 0; 366 mlx5_flow_stop_default(dev); 367 mlx5_traffic_disable(dev); 368 mlx5_txq_stop(dev); 369 mlx5_rxq_stop(dev); 370 mlx5_txpp_stop(dev); /* Stop last. */ 371 rte_errno = ret; /* Restore rte_errno. */ 372 return -rte_errno; 373 } 374 375 /** 376 * DPDK callback to stop the device. 377 * 378 * Simulate device stop by detaching all configured flows. 379 * 380 * @param dev 381 * Pointer to Ethernet device structure. 382 */ 383 void 384 mlx5_dev_stop(struct rte_eth_dev *dev) 385 { 386 struct mlx5_priv *priv = dev->data->dev_private; 387 388 dev->data->dev_started = 0; 389 /* Prevent crashes when queues are still in use. */ 390 dev->rx_pkt_burst = removed_rx_burst; 391 dev->tx_pkt_burst = removed_tx_burst; 392 rte_wmb(); 393 /* Disable datapath on secondary process. */ 394 mlx5_mp_os_req_stop_rxtx(dev); 395 usleep(1000 * priv->rxqs_n); 396 DRV_LOG(DEBUG, "port %u stopping device", dev->data->port_id); 397 mlx5_flow_stop_default(dev); 398 /* Control flows for default traffic can be removed firstly. */ 399 mlx5_traffic_disable(dev); 400 /* All RX queue flags will be cleared in the flush interface. */ 401 mlx5_flow_list_flush(dev, &priv->flows, true); 402 mlx5_rx_intr_vec_disable(dev); 403 priv->sh->port[priv->dev_port - 1].ih_port_id = RTE_MAX_ETHPORTS; 404 priv->sh->port[priv->dev_port - 1].devx_ih_port_id = RTE_MAX_ETHPORTS; 405 mlx5_txq_stop(dev); 406 mlx5_rxq_stop(dev); 407 mlx5_txpp_stop(dev); 408 } 409 410 /** 411 * Enable traffic flows configured by control plane 412 * 413 * @param dev 414 * Pointer to Ethernet device private data. 415 * @param dev 416 * Pointer to Ethernet device structure. 417 * 418 * @return 419 * 0 on success, a negative errno value otherwise and rte_errno is set. 420 */ 421 int 422 mlx5_traffic_enable(struct rte_eth_dev *dev) 423 { 424 struct mlx5_priv *priv = dev->data->dev_private; 425 struct rte_flow_item_eth bcast = { 426 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff", 427 }; 428 struct rte_flow_item_eth ipv6_multi_spec = { 429 .dst.addr_bytes = "\x33\x33\x00\x00\x00\x00", 430 }; 431 struct rte_flow_item_eth ipv6_multi_mask = { 432 .dst.addr_bytes = "\xff\xff\x00\x00\x00\x00", 433 }; 434 struct rte_flow_item_eth unicast = { 435 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00", 436 }; 437 struct rte_flow_item_eth unicast_mask = { 438 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff", 439 }; 440 const unsigned int vlan_filter_n = priv->vlan_filter_n; 441 const struct rte_ether_addr cmp = { 442 .addr_bytes = "\x00\x00\x00\x00\x00\x00", 443 }; 444 unsigned int i; 445 unsigned int j; 446 int ret; 447 448 /* 449 * Hairpin txq default flow should be created no matter if it is 450 * isolation mode. Or else all the packets to be sent will be sent 451 * out directly without the TX flow actions, e.g. encapsulation. 452 */ 453 for (i = 0; i != priv->txqs_n; ++i) { 454 struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i); 455 if (!txq_ctrl) 456 continue; 457 if (txq_ctrl->type == MLX5_TXQ_TYPE_HAIRPIN) { 458 ret = mlx5_ctrl_flow_source_queue(dev, i); 459 if (ret) { 460 mlx5_txq_release(dev, i); 461 goto error; 462 } 463 } 464 mlx5_txq_release(dev, i); 465 } 466 if (priv->config.dv_esw_en && !priv->config.vf) { 467 if (mlx5_flow_create_esw_table_zero_flow(dev)) 468 priv->fdb_def_rule = 1; 469 else 470 DRV_LOG(INFO, "port %u FDB default rule cannot be" 471 " configured - only Eswitch group 0 flows are" 472 " supported.", dev->data->port_id); 473 } 474 if (!priv->config.lacp_by_user && priv->pf_bond >= 0) { 475 ret = mlx5_flow_lacp_miss(dev); 476 if (ret) 477 DRV_LOG(INFO, "port %u LACP rule cannot be created - " 478 "forward LACP to kernel.", dev->data->port_id); 479 else 480 DRV_LOG(INFO, "LACP traffic will be missed in port %u." 481 , dev->data->port_id); 482 } 483 if (priv->isolated) 484 return 0; 485 if (dev->data->promiscuous) { 486 struct rte_flow_item_eth promisc = { 487 .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00", 488 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00", 489 .type = 0, 490 }; 491 492 ret = mlx5_ctrl_flow(dev, &promisc, &promisc); 493 if (ret) 494 goto error; 495 } 496 if (dev->data->all_multicast) { 497 struct rte_flow_item_eth multicast = { 498 .dst.addr_bytes = "\x01\x00\x00\x00\x00\x00", 499 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00", 500 .type = 0, 501 }; 502 503 ret = mlx5_ctrl_flow(dev, &multicast, &multicast); 504 if (ret) 505 goto error; 506 } else { 507 /* Add broadcast/multicast flows. */ 508 for (i = 0; i != vlan_filter_n; ++i) { 509 uint16_t vlan = priv->vlan_filter[i]; 510 511 struct rte_flow_item_vlan vlan_spec = { 512 .tci = rte_cpu_to_be_16(vlan), 513 }; 514 struct rte_flow_item_vlan vlan_mask = 515 rte_flow_item_vlan_mask; 516 517 ret = mlx5_ctrl_flow_vlan(dev, &bcast, &bcast, 518 &vlan_spec, &vlan_mask); 519 if (ret) 520 goto error; 521 ret = mlx5_ctrl_flow_vlan(dev, &ipv6_multi_spec, 522 &ipv6_multi_mask, 523 &vlan_spec, &vlan_mask); 524 if (ret) 525 goto error; 526 } 527 if (!vlan_filter_n) { 528 ret = mlx5_ctrl_flow(dev, &bcast, &bcast); 529 if (ret) 530 goto error; 531 ret = mlx5_ctrl_flow(dev, &ipv6_multi_spec, 532 &ipv6_multi_mask); 533 if (ret) 534 goto error; 535 } 536 } 537 /* Add MAC address flows. */ 538 for (i = 0; i != MLX5_MAX_MAC_ADDRESSES; ++i) { 539 struct rte_ether_addr *mac = &dev->data->mac_addrs[i]; 540 541 if (!memcmp(mac, &cmp, sizeof(*mac))) 542 continue; 543 memcpy(&unicast.dst.addr_bytes, 544 mac->addr_bytes, 545 RTE_ETHER_ADDR_LEN); 546 for (j = 0; j != vlan_filter_n; ++j) { 547 uint16_t vlan = priv->vlan_filter[j]; 548 549 struct rte_flow_item_vlan vlan_spec = { 550 .tci = rte_cpu_to_be_16(vlan), 551 }; 552 struct rte_flow_item_vlan vlan_mask = 553 rte_flow_item_vlan_mask; 554 555 ret = mlx5_ctrl_flow_vlan(dev, &unicast, 556 &unicast_mask, 557 &vlan_spec, 558 &vlan_mask); 559 if (ret) 560 goto error; 561 } 562 if (!vlan_filter_n) { 563 ret = mlx5_ctrl_flow(dev, &unicast, &unicast_mask); 564 if (ret) 565 goto error; 566 } 567 } 568 return 0; 569 error: 570 ret = rte_errno; /* Save rte_errno before cleanup. */ 571 mlx5_flow_list_flush(dev, &priv->ctrl_flows, false); 572 rte_errno = ret; /* Restore rte_errno. */ 573 return -rte_errno; 574 } 575 576 577 /** 578 * Disable traffic flows configured by control plane 579 * 580 * @param dev 581 * Pointer to Ethernet device private data. 582 */ 583 void 584 mlx5_traffic_disable(struct rte_eth_dev *dev) 585 { 586 struct mlx5_priv *priv = dev->data->dev_private; 587 588 mlx5_flow_list_flush(dev, &priv->ctrl_flows, false); 589 } 590 591 /** 592 * Restart traffic flows configured by control plane 593 * 594 * @param dev 595 * Pointer to Ethernet device private data. 596 * 597 * @return 598 * 0 on success, a negative errno value otherwise and rte_errno is set. 599 */ 600 int 601 mlx5_traffic_restart(struct rte_eth_dev *dev) 602 { 603 if (dev->data->dev_started) { 604 mlx5_traffic_disable(dev); 605 return mlx5_traffic_enable(dev); 606 } 607 return 0; 608 } 609