1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2015 6WIND S.A. 3 * Copyright 2015 Mellanox Technologies, Ltd 4 */ 5 6 #include <stddef.h> 7 #include <unistd.h> 8 #include <string.h> 9 #include <stdint.h> 10 #include <stdlib.h> 11 #include <errno.h> 12 13 #include <rte_ethdev_driver.h> 14 #include <rte_bus_pci.h> 15 #include <rte_mbuf.h> 16 #include <rte_common.h> 17 #include <rte_interrupts.h> 18 #include <rte_malloc.h> 19 #include <rte_string_fns.h> 20 #include <rte_rwlock.h> 21 #include <rte_cycles.h> 22 23 #include <mlx5_malloc.h> 24 25 #include "mlx5_rxtx.h" 26 #include "mlx5_autoconf.h" 27 28 /** 29 * Get the interface index from device name. 30 * 31 * @param[in] dev 32 * Pointer to Ethernet device. 33 * 34 * @return 35 * Nonzero interface index on success, zero otherwise and rte_errno is set. 36 */ 37 unsigned int 38 mlx5_ifindex(const struct rte_eth_dev *dev) 39 { 40 struct mlx5_priv *priv = dev->data->dev_private; 41 unsigned int ifindex; 42 43 MLX5_ASSERT(priv); 44 MLX5_ASSERT(priv->if_index); 45 ifindex = priv->bond_ifindex > 0 ? priv->bond_ifindex : priv->if_index; 46 if (!ifindex) 47 rte_errno = ENXIO; 48 return ifindex; 49 } 50 51 /** 52 * DPDK callback for Ethernet device configuration. 53 * 54 * @param dev 55 * Pointer to Ethernet device structure. 56 * 57 * @return 58 * 0 on success, a negative errno value otherwise and rte_errno is set. 59 */ 60 int 61 mlx5_dev_configure(struct rte_eth_dev *dev) 62 { 63 struct mlx5_priv *priv = dev->data->dev_private; 64 unsigned int rxqs_n = dev->data->nb_rx_queues; 65 unsigned int txqs_n = dev->data->nb_tx_queues; 66 const uint8_t use_app_rss_key = 67 !!dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key; 68 int ret = 0; 69 70 if (use_app_rss_key && 71 (dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len != 72 MLX5_RSS_HASH_KEY_LEN)) { 73 DRV_LOG(ERR, "port %u RSS key len must be %s Bytes long", 74 dev->data->port_id, RTE_STR(MLX5_RSS_HASH_KEY_LEN)); 75 rte_errno = EINVAL; 76 return -rte_errno; 77 } 78 priv->rss_conf.rss_key = 79 mlx5_realloc(priv->rss_conf.rss_key, MLX5_MEM_RTE, 80 MLX5_RSS_HASH_KEY_LEN, 0, SOCKET_ID_ANY); 81 if (!priv->rss_conf.rss_key) { 82 DRV_LOG(ERR, "port %u cannot allocate RSS hash key memory (%u)", 83 dev->data->port_id, rxqs_n); 84 rte_errno = ENOMEM; 85 return -rte_errno; 86 } 87 88 if ((dev->data->dev_conf.txmode.offloads & 89 DEV_TX_OFFLOAD_SEND_ON_TIMESTAMP) && 90 rte_mbuf_dyn_tx_timestamp_register(NULL, NULL) != 0) { 91 DRV_LOG(ERR, "port %u cannot register Tx timestamp field/flag", 92 dev->data->port_id); 93 return -rte_errno; 94 } 95 memcpy(priv->rss_conf.rss_key, 96 use_app_rss_key ? 97 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key : 98 rss_hash_default_key, 99 MLX5_RSS_HASH_KEY_LEN); 100 priv->rss_conf.rss_key_len = MLX5_RSS_HASH_KEY_LEN; 101 priv->rss_conf.rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf; 102 priv->rxqs = (void *)dev->data->rx_queues; 103 priv->txqs = (void *)dev->data->tx_queues; 104 if (txqs_n != priv->txqs_n) { 105 DRV_LOG(INFO, "port %u Tx queues number update: %u -> %u", 106 dev->data->port_id, priv->txqs_n, txqs_n); 107 priv->txqs_n = txqs_n; 108 } 109 if (rxqs_n > priv->config.ind_table_max_size) { 110 DRV_LOG(ERR, "port %u cannot handle this many Rx queues (%u)", 111 dev->data->port_id, rxqs_n); 112 rte_errno = EINVAL; 113 return -rte_errno; 114 } 115 if (rxqs_n != priv->rxqs_n) { 116 DRV_LOG(INFO, "port %u Rx queues number update: %u -> %u", 117 dev->data->port_id, priv->rxqs_n, rxqs_n); 118 priv->rxqs_n = rxqs_n; 119 } 120 priv->skip_default_rss_reta = 0; 121 ret = mlx5_proc_priv_init(dev); 122 if (ret) 123 return ret; 124 return 0; 125 } 126 127 /** 128 * Configure default RSS reta. 129 * 130 * @param dev 131 * Pointer to Ethernet device structure. 132 * 133 * @return 134 * 0 on success, a negative errno value otherwise and rte_errno is set. 135 */ 136 int 137 mlx5_dev_configure_rss_reta(struct rte_eth_dev *dev) 138 { 139 struct mlx5_priv *priv = dev->data->dev_private; 140 unsigned int rxqs_n = dev->data->nb_rx_queues; 141 unsigned int i; 142 unsigned int j; 143 unsigned int reta_idx_n; 144 int ret = 0; 145 unsigned int *rss_queue_arr = NULL; 146 unsigned int rss_queue_n = 0; 147 148 if (priv->skip_default_rss_reta) 149 return ret; 150 rss_queue_arr = mlx5_malloc(0, rxqs_n * sizeof(unsigned int), 0, 151 SOCKET_ID_ANY); 152 if (!rss_queue_arr) { 153 DRV_LOG(ERR, "port %u cannot allocate RSS queue list (%u)", 154 dev->data->port_id, rxqs_n); 155 rte_errno = ENOMEM; 156 return -rte_errno; 157 } 158 for (i = 0, j = 0; i < rxqs_n; i++) { 159 struct mlx5_rxq_data *rxq_data; 160 struct mlx5_rxq_ctrl *rxq_ctrl; 161 162 rxq_data = (*priv->rxqs)[i]; 163 rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq); 164 if (rxq_ctrl && rxq_ctrl->type == MLX5_RXQ_TYPE_STANDARD) 165 rss_queue_arr[j++] = i; 166 } 167 rss_queue_n = j; 168 if (rss_queue_n > priv->config.ind_table_max_size) { 169 DRV_LOG(ERR, "port %u cannot handle this many Rx queues (%u)", 170 dev->data->port_id, rss_queue_n); 171 rte_errno = EINVAL; 172 mlx5_free(rss_queue_arr); 173 return -rte_errno; 174 } 175 DRV_LOG(INFO, "port %u Rx queues number update: %u -> %u", 176 dev->data->port_id, priv->rxqs_n, rxqs_n); 177 priv->rxqs_n = rxqs_n; 178 /* 179 * If the requested number of RX queues is not a power of two, 180 * use the maximum indirection table size for better balancing. 181 * The result is always rounded to the next power of two. 182 */ 183 reta_idx_n = (1 << log2above((rss_queue_n & (rss_queue_n - 1)) ? 184 priv->config.ind_table_max_size : 185 rss_queue_n)); 186 ret = mlx5_rss_reta_index_resize(dev, reta_idx_n); 187 if (ret) { 188 mlx5_free(rss_queue_arr); 189 return ret; 190 } 191 /* 192 * When the number of RX queues is not a power of two, 193 * the remaining table entries are padded with reused WQs 194 * and hashes are not spread uniformly. 195 */ 196 for (i = 0, j = 0; (i != reta_idx_n); ++i) { 197 (*priv->reta_idx)[i] = rss_queue_arr[j]; 198 if (++j == rss_queue_n) 199 j = 0; 200 } 201 mlx5_free(rss_queue_arr); 202 return ret; 203 } 204 205 /** 206 * Sets default tuning parameters. 207 * 208 * @param dev 209 * Pointer to Ethernet device. 210 * @param[out] info 211 * Info structure output buffer. 212 */ 213 static void 214 mlx5_set_default_params(struct rte_eth_dev *dev, struct rte_eth_dev_info *info) 215 { 216 struct mlx5_priv *priv = dev->data->dev_private; 217 218 /* Minimum CPU utilization. */ 219 info->default_rxportconf.ring_size = 256; 220 info->default_txportconf.ring_size = 256; 221 info->default_rxportconf.burst_size = MLX5_RX_DEFAULT_BURST; 222 info->default_txportconf.burst_size = MLX5_TX_DEFAULT_BURST; 223 if ((priv->link_speed_capa & ETH_LINK_SPEED_200G) | 224 (priv->link_speed_capa & ETH_LINK_SPEED_100G)) { 225 info->default_rxportconf.nb_queues = 16; 226 info->default_txportconf.nb_queues = 16; 227 if (dev->data->nb_rx_queues > 2 || 228 dev->data->nb_tx_queues > 2) { 229 /* Max Throughput. */ 230 info->default_rxportconf.ring_size = 2048; 231 info->default_txportconf.ring_size = 2048; 232 } 233 } else { 234 info->default_rxportconf.nb_queues = 8; 235 info->default_txportconf.nb_queues = 8; 236 if (dev->data->nb_rx_queues > 2 || 237 dev->data->nb_tx_queues > 2) { 238 /* Max Throughput. */ 239 info->default_rxportconf.ring_size = 4096; 240 info->default_txportconf.ring_size = 4096; 241 } 242 } 243 } 244 245 /** 246 * Sets tx mbuf limiting parameters. 247 * 248 * @param dev 249 * Pointer to Ethernet device. 250 * @param[out] info 251 * Info structure output buffer. 252 */ 253 static void 254 mlx5_set_txlimit_params(struct rte_eth_dev *dev, struct rte_eth_dev_info *info) 255 { 256 struct mlx5_priv *priv = dev->data->dev_private; 257 struct mlx5_dev_config *config = &priv->config; 258 unsigned int inlen; 259 uint16_t nb_max; 260 261 inlen = (config->txq_inline_max == MLX5_ARG_UNSET) ? 262 MLX5_SEND_DEF_INLINE_LEN : 263 (unsigned int)config->txq_inline_max; 264 MLX5_ASSERT(config->txq_inline_min >= 0); 265 inlen = RTE_MAX(inlen, (unsigned int)config->txq_inline_min); 266 inlen = RTE_MIN(inlen, MLX5_WQE_SIZE_MAX + 267 MLX5_ESEG_MIN_INLINE_SIZE - 268 MLX5_WQE_CSEG_SIZE - 269 MLX5_WQE_ESEG_SIZE - 270 MLX5_WQE_DSEG_SIZE * 2); 271 nb_max = (MLX5_WQE_SIZE_MAX + 272 MLX5_ESEG_MIN_INLINE_SIZE - 273 MLX5_WQE_CSEG_SIZE - 274 MLX5_WQE_ESEG_SIZE - 275 MLX5_WQE_DSEG_SIZE - 276 inlen) / MLX5_WSEG_SIZE; 277 info->tx_desc_lim.nb_seg_max = nb_max; 278 info->tx_desc_lim.nb_mtu_seg_max = nb_max; 279 } 280 281 /** 282 * DPDK callback to get information about the device. 283 * 284 * @param dev 285 * Pointer to Ethernet device structure. 286 * @param[out] info 287 * Info structure output buffer. 288 */ 289 int 290 mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info) 291 { 292 struct mlx5_priv *priv = dev->data->dev_private; 293 struct mlx5_dev_config *config = &priv->config; 294 unsigned int max; 295 296 /* FIXME: we should ask the device for these values. */ 297 info->min_rx_bufsize = 32; 298 info->max_rx_pktlen = 65536; 299 info->max_lro_pkt_size = MLX5_MAX_LRO_SIZE; 300 /* 301 * Since we need one CQ per QP, the limit is the minimum number 302 * between the two values. 303 */ 304 max = RTE_MIN(priv->sh->device_attr.max_cq, 305 priv->sh->device_attr.max_qp); 306 /* max_rx_queues is uint16_t. */ 307 max = RTE_MIN(max, (unsigned int)UINT16_MAX); 308 info->max_rx_queues = max; 309 info->max_tx_queues = max; 310 info->max_mac_addrs = MLX5_MAX_UC_MAC_ADDRESSES; 311 info->rx_queue_offload_capa = mlx5_get_rx_queue_offloads(dev); 312 info->rx_seg_capa.max_nseg = MLX5_MAX_RXQ_NSEG; 313 info->rx_seg_capa.multi_pools = 1; 314 info->rx_seg_capa.offset_allowed = 1; 315 info->rx_seg_capa.offset_align_log2 = 0; 316 info->rx_offload_capa = (mlx5_get_rx_port_offloads() | 317 info->rx_queue_offload_capa); 318 info->tx_offload_capa = mlx5_get_tx_port_offloads(dev); 319 info->if_index = mlx5_ifindex(dev); 320 info->reta_size = priv->reta_idx_n ? 321 priv->reta_idx_n : config->ind_table_max_size; 322 info->hash_key_size = MLX5_RSS_HASH_KEY_LEN; 323 info->speed_capa = priv->link_speed_capa; 324 info->flow_type_rss_offloads = ~MLX5_RSS_HF_MASK; 325 mlx5_set_default_params(dev, info); 326 mlx5_set_txlimit_params(dev, info); 327 info->switch_info.name = dev->data->name; 328 info->switch_info.domain_id = priv->domain_id; 329 info->switch_info.port_id = priv->representor_id; 330 if (priv->representor) { 331 uint16_t port_id; 332 333 if (priv->pf_bond >= 0) { 334 /* 335 * Switch port ID is opaque value with driver defined 336 * format. Push the PF index in bonding configurations 337 * in upper four bits of port ID. If we get too many 338 * representors (more than 4K) or PFs (more than 15) 339 * this approach must be reconsidered. 340 */ 341 /* Switch port ID for VF representors: 0 - 0xFFE */ 342 if ((info->switch_info.port_id != 0xffff && 343 info->switch_info.port_id >= 344 ((1 << MLX5_PORT_ID_BONDING_PF_SHIFT) - 1)) || 345 priv->pf_bond > MLX5_PORT_ID_BONDING_PF_MASK) { 346 DRV_LOG(ERR, "can't update switch port ID" 347 " for bonding device"); 348 MLX5_ASSERT(false); 349 return -ENODEV; 350 } 351 /* 352 * Switch port ID for Host PF representor 353 * (representor_id is -1) , set to 0xFFF 354 */ 355 if (info->switch_info.port_id == 0xffff) 356 info->switch_info.port_id = 0xfff; 357 info->switch_info.port_id |= 358 priv->pf_bond << MLX5_PORT_ID_BONDING_PF_SHIFT; 359 } 360 MLX5_ETH_FOREACH_DEV(port_id, priv->pci_dev) { 361 struct mlx5_priv *opriv = 362 rte_eth_devices[port_id].data->dev_private; 363 364 if (!opriv || 365 opriv->representor || 366 opriv->sh != priv->sh || 367 opriv->domain_id != priv->domain_id) 368 continue; 369 /* 370 * Override switch name with that of the master 371 * device. 372 */ 373 info->switch_info.name = opriv->dev_data->name; 374 break; 375 } 376 } 377 return 0; 378 } 379 380 /** 381 * Get firmware version of a device. 382 * 383 * @param dev 384 * Ethernet device port. 385 * @param fw_ver 386 * String output allocated by caller. 387 * @param fw_size 388 * Size of the output string, including terminating null byte. 389 * 390 * @return 391 * 0 on success, or the size of the non truncated string if too big. 392 */ 393 int 394 mlx5_fw_version_get(struct rte_eth_dev *dev, char *fw_ver, size_t fw_size) 395 { 396 struct mlx5_priv *priv = dev->data->dev_private; 397 struct mlx5_dev_attr *attr = &priv->sh->device_attr; 398 size_t size = strnlen(attr->fw_ver, sizeof(attr->fw_ver)) + 1; 399 400 if (fw_size < size) 401 return size; 402 if (fw_ver != NULL) 403 strlcpy(fw_ver, attr->fw_ver, fw_size); 404 return 0; 405 } 406 407 /** 408 * Get supported packet types. 409 * 410 * @param dev 411 * Pointer to Ethernet device structure. 412 * 413 * @return 414 * A pointer to the supported Packet types array. 415 */ 416 const uint32_t * 417 mlx5_dev_supported_ptypes_get(struct rte_eth_dev *dev) 418 { 419 static const uint32_t ptypes[] = { 420 /* refers to rxq_cq_to_pkt_type() */ 421 RTE_PTYPE_L2_ETHER, 422 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN, 423 RTE_PTYPE_L3_IPV6_EXT_UNKNOWN, 424 RTE_PTYPE_L4_NONFRAG, 425 RTE_PTYPE_L4_FRAG, 426 RTE_PTYPE_L4_TCP, 427 RTE_PTYPE_L4_UDP, 428 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN, 429 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN, 430 RTE_PTYPE_INNER_L4_NONFRAG, 431 RTE_PTYPE_INNER_L4_FRAG, 432 RTE_PTYPE_INNER_L4_TCP, 433 RTE_PTYPE_INNER_L4_UDP, 434 RTE_PTYPE_UNKNOWN 435 }; 436 437 if (dev->rx_pkt_burst == mlx5_rx_burst || 438 dev->rx_pkt_burst == mlx5_rx_burst_mprq || 439 dev->rx_pkt_burst == mlx5_rx_burst_vec || 440 dev->rx_pkt_burst == mlx5_rx_burst_mprq_vec) 441 return ptypes; 442 return NULL; 443 } 444 445 /** 446 * DPDK callback to change the MTU. 447 * 448 * @param dev 449 * Pointer to Ethernet device structure. 450 * @param in_mtu 451 * New MTU. 452 * 453 * @return 454 * 0 on success, a negative errno value otherwise and rte_errno is set. 455 */ 456 int 457 mlx5_dev_set_mtu(struct rte_eth_dev *dev, uint16_t mtu) 458 { 459 struct mlx5_priv *priv = dev->data->dev_private; 460 uint16_t kern_mtu = 0; 461 int ret; 462 463 ret = mlx5_get_mtu(dev, &kern_mtu); 464 if (ret) 465 return ret; 466 /* Set kernel interface MTU first. */ 467 ret = mlx5_set_mtu(dev, mtu); 468 if (ret) 469 return ret; 470 ret = mlx5_get_mtu(dev, &kern_mtu); 471 if (ret) 472 return ret; 473 if (kern_mtu == mtu) { 474 priv->mtu = mtu; 475 DRV_LOG(DEBUG, "port %u adapter MTU set to %u", 476 dev->data->port_id, mtu); 477 return 0; 478 } 479 rte_errno = EAGAIN; 480 return -rte_errno; 481 } 482 483 /** 484 * Configure the RX function to use. 485 * 486 * @param dev 487 * Pointer to private data structure. 488 * 489 * @return 490 * Pointer to selected Rx burst function. 491 */ 492 eth_rx_burst_t 493 mlx5_select_rx_function(struct rte_eth_dev *dev) 494 { 495 eth_rx_burst_t rx_pkt_burst = mlx5_rx_burst; 496 497 MLX5_ASSERT(dev != NULL); 498 if (mlx5_check_vec_rx_support(dev) > 0) { 499 if (mlx5_mprq_enabled(dev)) { 500 rx_pkt_burst = mlx5_rx_burst_mprq_vec; 501 DRV_LOG(DEBUG, "port %u selected vectorized" 502 " MPRQ Rx function", dev->data->port_id); 503 } else { 504 rx_pkt_burst = mlx5_rx_burst_vec; 505 DRV_LOG(DEBUG, "port %u selected vectorized" 506 " SPRQ Rx function", dev->data->port_id); 507 } 508 } else if (mlx5_mprq_enabled(dev)) { 509 rx_pkt_burst = mlx5_rx_burst_mprq; 510 DRV_LOG(DEBUG, "port %u selected MPRQ Rx function", 511 dev->data->port_id); 512 } else { 513 DRV_LOG(DEBUG, "port %u selected SPRQ Rx function", 514 dev->data->port_id); 515 } 516 return rx_pkt_burst; 517 } 518 519 /** 520 * Get the E-Switch parameters by port id. 521 * 522 * @param[in] port 523 * Device port id. 524 * @param[in] valid 525 * Device port id is valid, skip check. This flag is useful 526 * when trials are performed from probing and device is not 527 * flagged as valid yet (in attaching process). 528 * @param[out] es_domain_id 529 * E-Switch domain id. 530 * @param[out] es_port_id 531 * The port id of the port in the E-Switch. 532 * 533 * @return 534 * pointer to device private data structure containing data needed 535 * on success, NULL otherwise and rte_errno is set. 536 */ 537 struct mlx5_priv * 538 mlx5_port_to_eswitch_info(uint16_t port, bool valid) 539 { 540 struct rte_eth_dev *dev; 541 struct mlx5_priv *priv; 542 543 if (port >= RTE_MAX_ETHPORTS) { 544 rte_errno = EINVAL; 545 return NULL; 546 } 547 if (!valid && !rte_eth_dev_is_valid_port(port)) { 548 rte_errno = ENODEV; 549 return NULL; 550 } 551 dev = &rte_eth_devices[port]; 552 priv = dev->data->dev_private; 553 if (!(priv->representor || priv->master)) { 554 rte_errno = EINVAL; 555 return NULL; 556 } 557 return priv; 558 } 559 560 /** 561 * Get the E-Switch parameters by device instance. 562 * 563 * @param[in] port 564 * Device port id. 565 * @param[out] es_domain_id 566 * E-Switch domain id. 567 * @param[out] es_port_id 568 * The port id of the port in the E-Switch. 569 * 570 * @return 571 * pointer to device private data structure containing data needed 572 * on success, NULL otherwise and rte_errno is set. 573 */ 574 struct mlx5_priv * 575 mlx5_dev_to_eswitch_info(struct rte_eth_dev *dev) 576 { 577 struct mlx5_priv *priv; 578 579 priv = dev->data->dev_private; 580 if (!(priv->representor || priv->master)) { 581 rte_errno = EINVAL; 582 return NULL; 583 } 584 return priv; 585 } 586 587 /** 588 * DPDK callback to retrieve hairpin capabilities. 589 * 590 * @param dev 591 * Pointer to Ethernet device structure. 592 * @param[out] cap 593 * Storage for hairpin capability data. 594 * 595 * @return 596 * 0 on success, a negative errno value otherwise and rte_errno is set. 597 */ 598 int 599 mlx5_hairpin_cap_get(struct rte_eth_dev *dev, struct rte_eth_hairpin_cap *cap) 600 { 601 struct mlx5_priv *priv = dev->data->dev_private; 602 struct mlx5_dev_config *config = &priv->config; 603 604 if (!priv->sh->devx || !config->dest_tir || !config->dv_flow_en) { 605 rte_errno = ENOTSUP; 606 return -rte_errno; 607 } 608 cap->max_nb_queues = UINT16_MAX; 609 cap->max_rx_2_tx = 1; 610 cap->max_tx_2_rx = 1; 611 cap->max_nb_desc = 8192; 612 return 0; 613 } 614