1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2015 6WIND S.A. 3 * Copyright 2015 Mellanox Technologies, Ltd 4 */ 5 6 #include <unistd.h> 7 8 #include <rte_ether.h> 9 #include <rte_ethdev_driver.h> 10 #include <rte_interrupts.h> 11 #include <rte_alarm.h> 12 13 #include <mlx5_malloc.h> 14 15 #include "mlx5.h" 16 #include "mlx5_mr.h" 17 #include "mlx5_rxtx.h" 18 #include "mlx5_utils.h" 19 #include "rte_pmd_mlx5.h" 20 21 /** 22 * Stop traffic on Tx queues. 23 * 24 * @param dev 25 * Pointer to Ethernet device structure. 26 */ 27 static void 28 mlx5_txq_stop(struct rte_eth_dev *dev) 29 { 30 struct mlx5_priv *priv = dev->data->dev_private; 31 unsigned int i; 32 33 for (i = 0; i != priv->txqs_n; ++i) 34 mlx5_txq_release(dev, i); 35 } 36 37 /** 38 * Start traffic on Tx queues. 39 * 40 * @param dev 41 * Pointer to Ethernet device structure. 42 * 43 * @return 44 * 0 on success, a negative errno value otherwise and rte_errno is set. 45 */ 46 static int 47 mlx5_txq_start(struct rte_eth_dev *dev) 48 { 49 struct mlx5_priv *priv = dev->data->dev_private; 50 unsigned int i; 51 int ret; 52 53 for (i = 0; i != priv->txqs_n; ++i) { 54 struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i); 55 56 if (!txq_ctrl) 57 continue; 58 if (txq_ctrl->type == MLX5_TXQ_TYPE_HAIRPIN) { 59 txq_ctrl->obj = mlx5_txq_obj_new 60 (dev, i, MLX5_TXQ_OBJ_TYPE_DEVX_HAIRPIN); 61 } else { 62 txq_alloc_elts(txq_ctrl); 63 txq_ctrl->obj = mlx5_txq_obj_new 64 (dev, i, priv->txpp_en ? 65 MLX5_TXQ_OBJ_TYPE_DEVX_SQ : 66 MLX5_TXQ_OBJ_TYPE_IBV); 67 } 68 if (!txq_ctrl->obj) { 69 rte_errno = ENOMEM; 70 goto error; 71 } 72 } 73 return 0; 74 error: 75 ret = rte_errno; /* Save rte_errno before cleanup. */ 76 do { 77 mlx5_txq_release(dev, i); 78 } while (i-- != 0); 79 rte_errno = ret; /* Restore rte_errno. */ 80 return -rte_errno; 81 } 82 83 /** 84 * Stop traffic on Rx queues. 85 * 86 * @param dev 87 * Pointer to Ethernet device structure. 88 */ 89 static void 90 mlx5_rxq_stop(struct rte_eth_dev *dev) 91 { 92 struct mlx5_priv *priv = dev->data->dev_private; 93 unsigned int i; 94 95 for (i = 0; i != priv->rxqs_n; ++i) 96 mlx5_rxq_release(dev, i); 97 } 98 99 /** 100 * Start traffic on Rx queues. 101 * 102 * @param dev 103 * Pointer to Ethernet device structure. 104 * 105 * @return 106 * 0 on success, a negative errno value otherwise and rte_errno is set. 107 */ 108 static int 109 mlx5_rxq_start(struct rte_eth_dev *dev) 110 { 111 struct mlx5_priv *priv = dev->data->dev_private; 112 unsigned int i; 113 int ret = 0; 114 115 /* Allocate/reuse/resize mempool for Multi-Packet RQ. */ 116 if (mlx5_mprq_alloc_mp(dev)) { 117 /* Should not release Rx queues but return immediately. */ 118 return -rte_errno; 119 } 120 DRV_LOG(DEBUG, "Port %u device_attr.max_qp_wr is %d.", 121 dev->data->port_id, priv->sh->device_attr.max_qp_wr); 122 DRV_LOG(DEBUG, "Port %u device_attr.max_sge is %d.", 123 dev->data->port_id, priv->sh->device_attr.max_sge); 124 for (i = 0; i != priv->rxqs_n; ++i) { 125 struct mlx5_rxq_ctrl *rxq_ctrl = mlx5_rxq_get(dev, i); 126 struct rte_mempool *mp; 127 128 if (!rxq_ctrl) 129 continue; 130 if (rxq_ctrl->type == MLX5_RXQ_TYPE_STANDARD) { 131 /* Pre-register Rx mempool. */ 132 mp = mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq) ? 133 rxq_ctrl->rxq.mprq_mp : rxq_ctrl->rxq.mp; 134 DRV_LOG(DEBUG, "Port %u Rx queue %u registering mp %s" 135 " having %u chunks.", dev->data->port_id, 136 rxq_ctrl->rxq.idx, mp->name, mp->nb_mem_chunks); 137 mlx5_mr_update_mp(dev, &rxq_ctrl->rxq.mr_ctrl, mp); 138 ret = rxq_alloc_elts(rxq_ctrl); 139 if (ret) 140 goto error; 141 } 142 MLX5_ASSERT(!rxq_ctrl->obj); 143 rxq_ctrl->obj = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, 144 sizeof(*rxq_ctrl->obj), 0, 145 rxq_ctrl->socket); 146 if (!rxq_ctrl->obj) { 147 DRV_LOG(ERR, 148 "Port %u Rx queue %u can't allocate resources.", 149 dev->data->port_id, (*priv->rxqs)[i]->idx); 150 rte_errno = ENOMEM; 151 goto error; 152 } 153 ret = priv->obj_ops.rxq_obj_new(dev, i); 154 if (ret) { 155 mlx5_free(rxq_ctrl->obj); 156 goto error; 157 } 158 DRV_LOG(DEBUG, "Port %u rxq %u updated with %p.", 159 dev->data->port_id, i, (void *)&rxq_ctrl->obj); 160 LIST_INSERT_HEAD(&priv->rxqsobj, rxq_ctrl->obj, next); 161 } 162 return 0; 163 error: 164 ret = rte_errno; /* Save rte_errno before cleanup. */ 165 do { 166 mlx5_rxq_release(dev, i); 167 } while (i-- != 0); 168 rte_errno = ret; /* Restore rte_errno. */ 169 return -rte_errno; 170 } 171 172 /** 173 * Binds Tx queues to Rx queues for hairpin. 174 * 175 * Binds Tx queues to the target Rx queues. 176 * 177 * @param dev 178 * Pointer to Ethernet device structure. 179 * 180 * @return 181 * 0 on success, a negative errno value otherwise and rte_errno is set. 182 */ 183 static int 184 mlx5_hairpin_bind(struct rte_eth_dev *dev) 185 { 186 struct mlx5_priv *priv = dev->data->dev_private; 187 struct mlx5_devx_modify_sq_attr sq_attr = { 0 }; 188 struct mlx5_devx_modify_rq_attr rq_attr = { 0 }; 189 struct mlx5_txq_ctrl *txq_ctrl; 190 struct mlx5_rxq_ctrl *rxq_ctrl; 191 struct mlx5_devx_obj *sq; 192 struct mlx5_devx_obj *rq; 193 unsigned int i; 194 int ret = 0; 195 196 for (i = 0; i != priv->txqs_n; ++i) { 197 txq_ctrl = mlx5_txq_get(dev, i); 198 if (!txq_ctrl) 199 continue; 200 if (txq_ctrl->type != MLX5_TXQ_TYPE_HAIRPIN) { 201 mlx5_txq_release(dev, i); 202 continue; 203 } 204 if (!txq_ctrl->obj) { 205 rte_errno = ENOMEM; 206 DRV_LOG(ERR, "port %u no txq object found: %d", 207 dev->data->port_id, i); 208 mlx5_txq_release(dev, i); 209 return -rte_errno; 210 } 211 sq = txq_ctrl->obj->sq; 212 rxq_ctrl = mlx5_rxq_get(dev, 213 txq_ctrl->hairpin_conf.peers[0].queue); 214 if (!rxq_ctrl) { 215 mlx5_txq_release(dev, i); 216 rte_errno = EINVAL; 217 DRV_LOG(ERR, "port %u no rxq object found: %d", 218 dev->data->port_id, 219 txq_ctrl->hairpin_conf.peers[0].queue); 220 return -rte_errno; 221 } 222 if (rxq_ctrl->type != MLX5_RXQ_TYPE_HAIRPIN || 223 rxq_ctrl->hairpin_conf.peers[0].queue != i) { 224 rte_errno = ENOMEM; 225 DRV_LOG(ERR, "port %u Tx queue %d can't be binded to " 226 "Rx queue %d", dev->data->port_id, 227 i, txq_ctrl->hairpin_conf.peers[0].queue); 228 goto error; 229 } 230 rq = rxq_ctrl->obj->rq; 231 if (!rq) { 232 rte_errno = ENOMEM; 233 DRV_LOG(ERR, "port %u hairpin no matching rxq: %d", 234 dev->data->port_id, 235 txq_ctrl->hairpin_conf.peers[0].queue); 236 goto error; 237 } 238 sq_attr.state = MLX5_SQC_STATE_RDY; 239 sq_attr.sq_state = MLX5_SQC_STATE_RST; 240 sq_attr.hairpin_peer_rq = rq->id; 241 sq_attr.hairpin_peer_vhca = priv->config.hca_attr.vhca_id; 242 ret = mlx5_devx_cmd_modify_sq(sq, &sq_attr); 243 if (ret) 244 goto error; 245 rq_attr.state = MLX5_SQC_STATE_RDY; 246 rq_attr.rq_state = MLX5_SQC_STATE_RST; 247 rq_attr.hairpin_peer_sq = sq->id; 248 rq_attr.hairpin_peer_vhca = priv->config.hca_attr.vhca_id; 249 ret = mlx5_devx_cmd_modify_rq(rq, &rq_attr); 250 if (ret) 251 goto error; 252 mlx5_txq_release(dev, i); 253 mlx5_rxq_release(dev, txq_ctrl->hairpin_conf.peers[0].queue); 254 } 255 return 0; 256 error: 257 mlx5_txq_release(dev, i); 258 mlx5_rxq_release(dev, txq_ctrl->hairpin_conf.peers[0].queue); 259 return -rte_errno; 260 } 261 262 /** 263 * DPDK callback to start the device. 264 * 265 * Simulate device start by attaching all configured flows. 266 * 267 * @param dev 268 * Pointer to Ethernet device structure. 269 * 270 * @return 271 * 0 on success, a negative errno value otherwise and rte_errno is set. 272 */ 273 int 274 mlx5_dev_start(struct rte_eth_dev *dev) 275 { 276 struct mlx5_priv *priv = dev->data->dev_private; 277 int ret; 278 int fine_inline; 279 280 DRV_LOG(DEBUG, "port %u starting device", dev->data->port_id); 281 fine_inline = rte_mbuf_dynflag_lookup 282 (RTE_PMD_MLX5_FINE_GRANULARITY_INLINE, NULL); 283 if (fine_inline > 0) 284 rte_net_mlx5_dynf_inline_mask = 1UL << fine_inline; 285 else 286 rte_net_mlx5_dynf_inline_mask = 0; 287 if (dev->data->nb_rx_queues > 0) { 288 ret = mlx5_dev_configure_rss_reta(dev); 289 if (ret) { 290 DRV_LOG(ERR, "port %u reta config failed: %s", 291 dev->data->port_id, strerror(rte_errno)); 292 return -rte_errno; 293 } 294 } 295 ret = mlx5_txpp_start(dev); 296 if (ret) { 297 DRV_LOG(ERR, "port %u Tx packet pacing init failed: %s", 298 dev->data->port_id, strerror(rte_errno)); 299 goto error; 300 } 301 ret = mlx5_txq_start(dev); 302 if (ret) { 303 DRV_LOG(ERR, "port %u Tx queue allocation failed: %s", 304 dev->data->port_id, strerror(rte_errno)); 305 goto error; 306 } 307 ret = mlx5_rxq_start(dev); 308 if (ret) { 309 DRV_LOG(ERR, "port %u Rx queue allocation failed: %s", 310 dev->data->port_id, strerror(rte_errno)); 311 goto error; 312 } 313 ret = mlx5_hairpin_bind(dev); 314 if (ret) { 315 DRV_LOG(ERR, "port %u hairpin binding failed: %s", 316 dev->data->port_id, strerror(rte_errno)); 317 goto error; 318 } 319 /* Set started flag here for the following steps like control flow. */ 320 dev->data->dev_started = 1; 321 ret = mlx5_rx_intr_vec_enable(dev); 322 if (ret) { 323 DRV_LOG(ERR, "port %u Rx interrupt vector creation failed", 324 dev->data->port_id); 325 goto error; 326 } 327 mlx5_os_stats_init(dev); 328 ret = mlx5_traffic_enable(dev); 329 if (ret) { 330 DRV_LOG(ERR, "port %u failed to set defaults flows", 331 dev->data->port_id); 332 goto error; 333 } 334 /* Set a mask and offset of dynamic metadata flows into Rx queues. */ 335 mlx5_flow_rxq_dynf_metadata_set(dev); 336 /* Set flags and context to convert Rx timestamps. */ 337 mlx5_rxq_timestamp_set(dev); 338 /* Set a mask and offset of scheduling on timestamp into Tx queues. */ 339 mlx5_txq_dynf_timestamp_set(dev); 340 /* 341 * In non-cached mode, it only needs to start the default mreg copy 342 * action and no flow created by application exists anymore. 343 * But it is worth wrapping the interface for further usage. 344 */ 345 ret = mlx5_flow_start_default(dev); 346 if (ret) { 347 DRV_LOG(DEBUG, "port %u failed to start default actions: %s", 348 dev->data->port_id, strerror(rte_errno)); 349 goto error; 350 } 351 rte_wmb(); 352 dev->tx_pkt_burst = mlx5_select_tx_function(dev); 353 dev->rx_pkt_burst = mlx5_select_rx_function(dev); 354 /* Enable datapath on secondary process. */ 355 mlx5_mp_os_req_start_rxtx(dev); 356 if (priv->sh->intr_handle.fd >= 0) { 357 priv->sh->port[priv->dev_port - 1].ih_port_id = 358 (uint32_t)dev->data->port_id; 359 } else { 360 DRV_LOG(INFO, "port %u starts without LSC and RMV interrupts.", 361 dev->data->port_id); 362 dev->data->dev_conf.intr_conf.lsc = 0; 363 dev->data->dev_conf.intr_conf.rmv = 0; 364 } 365 if (priv->sh->intr_handle_devx.fd >= 0) 366 priv->sh->port[priv->dev_port - 1].devx_ih_port_id = 367 (uint32_t)dev->data->port_id; 368 return 0; 369 error: 370 ret = rte_errno; /* Save rte_errno before cleanup. */ 371 /* Rollback. */ 372 dev->data->dev_started = 0; 373 mlx5_flow_stop_default(dev); 374 mlx5_traffic_disable(dev); 375 mlx5_txq_stop(dev); 376 mlx5_rxq_stop(dev); 377 mlx5_txpp_stop(dev); /* Stop last. */ 378 rte_errno = ret; /* Restore rte_errno. */ 379 return -rte_errno; 380 } 381 382 /** 383 * DPDK callback to stop the device. 384 * 385 * Simulate device stop by detaching all configured flows. 386 * 387 * @param dev 388 * Pointer to Ethernet device structure. 389 */ 390 void 391 mlx5_dev_stop(struct rte_eth_dev *dev) 392 { 393 struct mlx5_priv *priv = dev->data->dev_private; 394 395 dev->data->dev_started = 0; 396 /* Prevent crashes when queues are still in use. */ 397 dev->rx_pkt_burst = removed_rx_burst; 398 dev->tx_pkt_burst = removed_tx_burst; 399 rte_wmb(); 400 /* Disable datapath on secondary process. */ 401 mlx5_mp_os_req_stop_rxtx(dev); 402 usleep(1000 * priv->rxqs_n); 403 DRV_LOG(DEBUG, "port %u stopping device", dev->data->port_id); 404 mlx5_flow_stop_default(dev); 405 /* Control flows for default traffic can be removed firstly. */ 406 mlx5_traffic_disable(dev); 407 /* All RX queue flags will be cleared in the flush interface. */ 408 mlx5_flow_list_flush(dev, &priv->flows, true); 409 mlx5_rx_intr_vec_disable(dev); 410 priv->sh->port[priv->dev_port - 1].ih_port_id = RTE_MAX_ETHPORTS; 411 priv->sh->port[priv->dev_port - 1].devx_ih_port_id = RTE_MAX_ETHPORTS; 412 mlx5_txq_stop(dev); 413 mlx5_rxq_stop(dev); 414 mlx5_txpp_stop(dev); 415 } 416 417 /** 418 * Enable traffic flows configured by control plane 419 * 420 * @param dev 421 * Pointer to Ethernet device private data. 422 * @param dev 423 * Pointer to Ethernet device structure. 424 * 425 * @return 426 * 0 on success, a negative errno value otherwise and rte_errno is set. 427 */ 428 int 429 mlx5_traffic_enable(struct rte_eth_dev *dev) 430 { 431 struct mlx5_priv *priv = dev->data->dev_private; 432 struct rte_flow_item_eth bcast = { 433 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff", 434 }; 435 struct rte_flow_item_eth ipv6_multi_spec = { 436 .dst.addr_bytes = "\x33\x33\x00\x00\x00\x00", 437 }; 438 struct rte_flow_item_eth ipv6_multi_mask = { 439 .dst.addr_bytes = "\xff\xff\x00\x00\x00\x00", 440 }; 441 struct rte_flow_item_eth unicast = { 442 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00", 443 }; 444 struct rte_flow_item_eth unicast_mask = { 445 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff", 446 }; 447 const unsigned int vlan_filter_n = priv->vlan_filter_n; 448 const struct rte_ether_addr cmp = { 449 .addr_bytes = "\x00\x00\x00\x00\x00\x00", 450 }; 451 unsigned int i; 452 unsigned int j; 453 int ret; 454 455 /* 456 * Hairpin txq default flow should be created no matter if it is 457 * isolation mode. Or else all the packets to be sent will be sent 458 * out directly without the TX flow actions, e.g. encapsulation. 459 */ 460 for (i = 0; i != priv->txqs_n; ++i) { 461 struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i); 462 if (!txq_ctrl) 463 continue; 464 if (txq_ctrl->type == MLX5_TXQ_TYPE_HAIRPIN) { 465 ret = mlx5_ctrl_flow_source_queue(dev, i); 466 if (ret) { 467 mlx5_txq_release(dev, i); 468 goto error; 469 } 470 } 471 mlx5_txq_release(dev, i); 472 } 473 if (priv->config.dv_esw_en && !priv->config.vf) { 474 if (mlx5_flow_create_esw_table_zero_flow(dev)) 475 priv->fdb_def_rule = 1; 476 else 477 DRV_LOG(INFO, "port %u FDB default rule cannot be" 478 " configured - only Eswitch group 0 flows are" 479 " supported.", dev->data->port_id); 480 } 481 if (!priv->config.lacp_by_user && priv->pf_bond >= 0) { 482 ret = mlx5_flow_lacp_miss(dev); 483 if (ret) 484 DRV_LOG(INFO, "port %u LACP rule cannot be created - " 485 "forward LACP to kernel.", dev->data->port_id); 486 else 487 DRV_LOG(INFO, "LACP traffic will be missed in port %u." 488 , dev->data->port_id); 489 } 490 if (priv->isolated) 491 return 0; 492 if (dev->data->promiscuous) { 493 struct rte_flow_item_eth promisc = { 494 .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00", 495 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00", 496 .type = 0, 497 }; 498 499 ret = mlx5_ctrl_flow(dev, &promisc, &promisc); 500 if (ret) 501 goto error; 502 } 503 if (dev->data->all_multicast) { 504 struct rte_flow_item_eth multicast = { 505 .dst.addr_bytes = "\x01\x00\x00\x00\x00\x00", 506 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00", 507 .type = 0, 508 }; 509 510 ret = mlx5_ctrl_flow(dev, &multicast, &multicast); 511 if (ret) 512 goto error; 513 } else { 514 /* Add broadcast/multicast flows. */ 515 for (i = 0; i != vlan_filter_n; ++i) { 516 uint16_t vlan = priv->vlan_filter[i]; 517 518 struct rte_flow_item_vlan vlan_spec = { 519 .tci = rte_cpu_to_be_16(vlan), 520 }; 521 struct rte_flow_item_vlan vlan_mask = 522 rte_flow_item_vlan_mask; 523 524 ret = mlx5_ctrl_flow_vlan(dev, &bcast, &bcast, 525 &vlan_spec, &vlan_mask); 526 if (ret) 527 goto error; 528 ret = mlx5_ctrl_flow_vlan(dev, &ipv6_multi_spec, 529 &ipv6_multi_mask, 530 &vlan_spec, &vlan_mask); 531 if (ret) 532 goto error; 533 } 534 if (!vlan_filter_n) { 535 ret = mlx5_ctrl_flow(dev, &bcast, &bcast); 536 if (ret) 537 goto error; 538 ret = mlx5_ctrl_flow(dev, &ipv6_multi_spec, 539 &ipv6_multi_mask); 540 if (ret) 541 goto error; 542 } 543 } 544 /* Add MAC address flows. */ 545 for (i = 0; i != MLX5_MAX_MAC_ADDRESSES; ++i) { 546 struct rte_ether_addr *mac = &dev->data->mac_addrs[i]; 547 548 if (!memcmp(mac, &cmp, sizeof(*mac))) 549 continue; 550 memcpy(&unicast.dst.addr_bytes, 551 mac->addr_bytes, 552 RTE_ETHER_ADDR_LEN); 553 for (j = 0; j != vlan_filter_n; ++j) { 554 uint16_t vlan = priv->vlan_filter[j]; 555 556 struct rte_flow_item_vlan vlan_spec = { 557 .tci = rte_cpu_to_be_16(vlan), 558 }; 559 struct rte_flow_item_vlan vlan_mask = 560 rte_flow_item_vlan_mask; 561 562 ret = mlx5_ctrl_flow_vlan(dev, &unicast, 563 &unicast_mask, 564 &vlan_spec, 565 &vlan_mask); 566 if (ret) 567 goto error; 568 } 569 if (!vlan_filter_n) { 570 ret = mlx5_ctrl_flow(dev, &unicast, &unicast_mask); 571 if (ret) 572 goto error; 573 } 574 } 575 return 0; 576 error: 577 ret = rte_errno; /* Save rte_errno before cleanup. */ 578 mlx5_flow_list_flush(dev, &priv->ctrl_flows, false); 579 rte_errno = ret; /* Restore rte_errno. */ 580 return -rte_errno; 581 } 582 583 584 /** 585 * Disable traffic flows configured by control plane 586 * 587 * @param dev 588 * Pointer to Ethernet device private data. 589 */ 590 void 591 mlx5_traffic_disable(struct rte_eth_dev *dev) 592 { 593 struct mlx5_priv *priv = dev->data->dev_private; 594 595 mlx5_flow_list_flush(dev, &priv->ctrl_flows, false); 596 } 597 598 /** 599 * Restart traffic flows configured by control plane 600 * 601 * @param dev 602 * Pointer to Ethernet device private data. 603 * 604 * @return 605 * 0 on success, a negative errno value otherwise and rte_errno is set. 606 */ 607 int 608 mlx5_traffic_restart(struct rte_eth_dev *dev) 609 { 610 if (dev->data->dev_started) { 611 mlx5_traffic_disable(dev); 612 return mlx5_traffic_enable(dev); 613 } 614 return 0; 615 } 616