1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2015 6WIND S.A. 3 * Copyright 2015 Mellanox Technologies, Ltd 4 */ 5 6 #include <stddef.h> 7 #include <unistd.h> 8 #include <string.h> 9 #include <stdint.h> 10 #include <stdlib.h> 11 #include <errno.h> 12 13 #include <rte_atomic.h> 14 #include <rte_ethdev_driver.h> 15 #include <rte_bus_pci.h> 16 #include <rte_mbuf.h> 17 #include <rte_common.h> 18 #include <rte_interrupts.h> 19 #include <rte_malloc.h> 20 #include <rte_string_fns.h> 21 #include <rte_rwlock.h> 22 #include <rte_cycles.h> 23 24 #include "mlx5_rxtx.h" 25 #include "mlx5_autoconf.h" 26 27 /** 28 * Get the interface index from device name. 29 * 30 * @param[in] dev 31 * Pointer to Ethernet device. 32 * 33 * @return 34 * Nonzero interface index on success, zero otherwise and rte_errno is set. 35 */ 36 unsigned int 37 mlx5_ifindex(const struct rte_eth_dev *dev) 38 { 39 struct mlx5_priv *priv = dev->data->dev_private; 40 unsigned int ifindex; 41 42 MLX5_ASSERT(priv); 43 MLX5_ASSERT(priv->if_index); 44 ifindex = priv->if_index; 45 if (!ifindex) 46 rte_errno = ENXIO; 47 return ifindex; 48 } 49 50 /** 51 * DPDK callback for Ethernet device configuration. 52 * 53 * @param dev 54 * Pointer to Ethernet device structure. 55 * 56 * @return 57 * 0 on success, a negative errno value otherwise and rte_errno is set. 58 */ 59 int 60 mlx5_dev_configure(struct rte_eth_dev *dev) 61 { 62 struct mlx5_priv *priv = dev->data->dev_private; 63 unsigned int rxqs_n = dev->data->nb_rx_queues; 64 unsigned int txqs_n = dev->data->nb_tx_queues; 65 const uint8_t use_app_rss_key = 66 !!dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key; 67 int ret = 0; 68 69 if (use_app_rss_key && 70 (dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len != 71 MLX5_RSS_HASH_KEY_LEN)) { 72 DRV_LOG(ERR, "port %u RSS key len must be %s Bytes long", 73 dev->data->port_id, RTE_STR(MLX5_RSS_HASH_KEY_LEN)); 74 rte_errno = EINVAL; 75 return -rte_errno; 76 } 77 priv->rss_conf.rss_key = 78 rte_realloc(priv->rss_conf.rss_key, 79 MLX5_RSS_HASH_KEY_LEN, 0); 80 if (!priv->rss_conf.rss_key) { 81 DRV_LOG(ERR, "port %u cannot allocate RSS hash key memory (%u)", 82 dev->data->port_id, rxqs_n); 83 rte_errno = ENOMEM; 84 return -rte_errno; 85 } 86 87 if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) 88 dev->data->dev_conf.rxmode.offloads |= DEV_RX_OFFLOAD_RSS_HASH; 89 90 memcpy(priv->rss_conf.rss_key, 91 use_app_rss_key ? 92 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key : 93 rss_hash_default_key, 94 MLX5_RSS_HASH_KEY_LEN); 95 priv->rss_conf.rss_key_len = MLX5_RSS_HASH_KEY_LEN; 96 priv->rss_conf.rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf; 97 priv->rxqs = (void *)dev->data->rx_queues; 98 priv->txqs = (void *)dev->data->tx_queues; 99 if (txqs_n != priv->txqs_n) { 100 DRV_LOG(INFO, "port %u Tx queues number update: %u -> %u", 101 dev->data->port_id, priv->txqs_n, txqs_n); 102 priv->txqs_n = txqs_n; 103 } 104 if (rxqs_n > priv->config.ind_table_max_size) { 105 DRV_LOG(ERR, "port %u cannot handle this many Rx queues (%u)", 106 dev->data->port_id, rxqs_n); 107 rte_errno = EINVAL; 108 return -rte_errno; 109 } 110 if (rxqs_n != priv->rxqs_n) { 111 DRV_LOG(INFO, "port %u Rx queues number update: %u -> %u", 112 dev->data->port_id, priv->rxqs_n, rxqs_n); 113 priv->rxqs_n = rxqs_n; 114 } 115 priv->skip_default_rss_reta = 0; 116 ret = mlx5_proc_priv_init(dev); 117 if (ret) 118 return ret; 119 return 0; 120 } 121 122 /** 123 * Configure default RSS reta. 124 * 125 * @param dev 126 * Pointer to Ethernet device structure. 127 * 128 * @return 129 * 0 on success, a negative errno value otherwise and rte_errno is set. 130 */ 131 int 132 mlx5_dev_configure_rss_reta(struct rte_eth_dev *dev) 133 { 134 struct mlx5_priv *priv = dev->data->dev_private; 135 unsigned int rxqs_n = dev->data->nb_rx_queues; 136 unsigned int i; 137 unsigned int j; 138 unsigned int reta_idx_n; 139 int ret = 0; 140 unsigned int *rss_queue_arr = NULL; 141 unsigned int rss_queue_n = 0; 142 143 if (priv->skip_default_rss_reta) 144 return ret; 145 rss_queue_arr = rte_malloc("", rxqs_n * sizeof(unsigned int), 0); 146 if (!rss_queue_arr) { 147 DRV_LOG(ERR, "port %u cannot allocate RSS queue list (%u)", 148 dev->data->port_id, rxqs_n); 149 rte_errno = ENOMEM; 150 return -rte_errno; 151 } 152 for (i = 0, j = 0; i < rxqs_n; i++) { 153 struct mlx5_rxq_data *rxq_data; 154 struct mlx5_rxq_ctrl *rxq_ctrl; 155 156 rxq_data = (*priv->rxqs)[i]; 157 rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq); 158 if (rxq_ctrl && rxq_ctrl->type == MLX5_RXQ_TYPE_STANDARD) 159 rss_queue_arr[j++] = i; 160 } 161 rss_queue_n = j; 162 if (rss_queue_n > priv->config.ind_table_max_size) { 163 DRV_LOG(ERR, "port %u cannot handle this many Rx queues (%u)", 164 dev->data->port_id, rss_queue_n); 165 rte_errno = EINVAL; 166 rte_free(rss_queue_arr); 167 return -rte_errno; 168 } 169 DRV_LOG(INFO, "port %u Rx queues number update: %u -> %u", 170 dev->data->port_id, priv->rxqs_n, rxqs_n); 171 priv->rxqs_n = rxqs_n; 172 /* 173 * If the requested number of RX queues is not a power of two, 174 * use the maximum indirection table size for better balancing. 175 * The result is always rounded to the next power of two. 176 */ 177 reta_idx_n = (1 << log2above((rss_queue_n & (rss_queue_n - 1)) ? 178 priv->config.ind_table_max_size : 179 rss_queue_n)); 180 ret = mlx5_rss_reta_index_resize(dev, reta_idx_n); 181 if (ret) { 182 rte_free(rss_queue_arr); 183 return ret; 184 } 185 /* 186 * When the number of RX queues is not a power of two, 187 * the remaining table entries are padded with reused WQs 188 * and hashes are not spread uniformly. 189 */ 190 for (i = 0, j = 0; (i != reta_idx_n); ++i) { 191 (*priv->reta_idx)[i] = rss_queue_arr[j]; 192 if (++j == rss_queue_n) 193 j = 0; 194 } 195 rte_free(rss_queue_arr); 196 return ret; 197 } 198 199 /** 200 * Sets default tuning parameters. 201 * 202 * @param dev 203 * Pointer to Ethernet device. 204 * @param[out] info 205 * Info structure output buffer. 206 */ 207 static void 208 mlx5_set_default_params(struct rte_eth_dev *dev, struct rte_eth_dev_info *info) 209 { 210 struct mlx5_priv *priv = dev->data->dev_private; 211 212 /* Minimum CPU utilization. */ 213 info->default_rxportconf.ring_size = 256; 214 info->default_txportconf.ring_size = 256; 215 info->default_rxportconf.burst_size = MLX5_RX_DEFAULT_BURST; 216 info->default_txportconf.burst_size = MLX5_TX_DEFAULT_BURST; 217 if ((priv->link_speed_capa & ETH_LINK_SPEED_200G) | 218 (priv->link_speed_capa & ETH_LINK_SPEED_100G)) { 219 info->default_rxportconf.nb_queues = 16; 220 info->default_txportconf.nb_queues = 16; 221 if (dev->data->nb_rx_queues > 2 || 222 dev->data->nb_tx_queues > 2) { 223 /* Max Throughput. */ 224 info->default_rxportconf.ring_size = 2048; 225 info->default_txportconf.ring_size = 2048; 226 } 227 } else { 228 info->default_rxportconf.nb_queues = 8; 229 info->default_txportconf.nb_queues = 8; 230 if (dev->data->nb_rx_queues > 2 || 231 dev->data->nb_tx_queues > 2) { 232 /* Max Throughput. */ 233 info->default_rxportconf.ring_size = 4096; 234 info->default_txportconf.ring_size = 4096; 235 } 236 } 237 } 238 239 /** 240 * Sets tx mbuf limiting parameters. 241 * 242 * @param dev 243 * Pointer to Ethernet device. 244 * @param[out] info 245 * Info structure output buffer. 246 */ 247 static void 248 mlx5_set_txlimit_params(struct rte_eth_dev *dev, struct rte_eth_dev_info *info) 249 { 250 struct mlx5_priv *priv = dev->data->dev_private; 251 struct mlx5_dev_config *config = &priv->config; 252 unsigned int inlen; 253 uint16_t nb_max; 254 255 inlen = (config->txq_inline_max == MLX5_ARG_UNSET) ? 256 MLX5_SEND_DEF_INLINE_LEN : 257 (unsigned int)config->txq_inline_max; 258 MLX5_ASSERT(config->txq_inline_min >= 0); 259 inlen = RTE_MAX(inlen, (unsigned int)config->txq_inline_min); 260 inlen = RTE_MIN(inlen, MLX5_WQE_SIZE_MAX + 261 MLX5_ESEG_MIN_INLINE_SIZE - 262 MLX5_WQE_CSEG_SIZE - 263 MLX5_WQE_ESEG_SIZE - 264 MLX5_WQE_DSEG_SIZE * 2); 265 nb_max = (MLX5_WQE_SIZE_MAX + 266 MLX5_ESEG_MIN_INLINE_SIZE - 267 MLX5_WQE_CSEG_SIZE - 268 MLX5_WQE_ESEG_SIZE - 269 MLX5_WQE_DSEG_SIZE - 270 inlen) / MLX5_WSEG_SIZE; 271 info->tx_desc_lim.nb_seg_max = nb_max; 272 info->tx_desc_lim.nb_mtu_seg_max = nb_max; 273 } 274 275 /** 276 * DPDK callback to get information about the device. 277 * 278 * @param dev 279 * Pointer to Ethernet device structure. 280 * @param[out] info 281 * Info structure output buffer. 282 */ 283 int 284 mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info) 285 { 286 struct mlx5_priv *priv = dev->data->dev_private; 287 struct mlx5_dev_config *config = &priv->config; 288 unsigned int max; 289 290 /* FIXME: we should ask the device for these values. */ 291 info->min_rx_bufsize = 32; 292 info->max_rx_pktlen = 65536; 293 info->max_lro_pkt_size = MLX5_MAX_LRO_SIZE; 294 /* 295 * Since we need one CQ per QP, the limit is the minimum number 296 * between the two values. 297 */ 298 max = RTE_MIN(priv->sh->device_attr.max_cq, 299 priv->sh->device_attr.max_qp); 300 /* max_rx_queues is uint16_t. */ 301 max = RTE_MIN(max, (unsigned int)UINT16_MAX); 302 info->max_rx_queues = max; 303 info->max_tx_queues = max; 304 info->max_mac_addrs = MLX5_MAX_UC_MAC_ADDRESSES; 305 info->rx_queue_offload_capa = mlx5_get_rx_queue_offloads(dev); 306 info->rx_offload_capa = (mlx5_get_rx_port_offloads() | 307 info->rx_queue_offload_capa); 308 info->tx_offload_capa = mlx5_get_tx_port_offloads(dev); 309 info->if_index = mlx5_ifindex(dev); 310 info->reta_size = priv->reta_idx_n ? 311 priv->reta_idx_n : config->ind_table_max_size; 312 info->hash_key_size = MLX5_RSS_HASH_KEY_LEN; 313 info->speed_capa = priv->link_speed_capa; 314 info->flow_type_rss_offloads = ~MLX5_RSS_HF_MASK; 315 mlx5_set_default_params(dev, info); 316 mlx5_set_txlimit_params(dev, info); 317 info->switch_info.name = dev->data->name; 318 info->switch_info.domain_id = priv->domain_id; 319 info->switch_info.port_id = priv->representor_id; 320 if (priv->representor) { 321 uint16_t port_id; 322 323 if (priv->pf_bond >= 0) { 324 /* 325 * Switch port ID is opaque value with driver defined 326 * format. Push the PF index in bonding configurations 327 * in upper four bits of port ID. If we get too many 328 * representors (more than 4K) or PFs (more than 15) 329 * this approach must be reconsidered. 330 */ 331 if ((info->switch_info.port_id >> 332 MLX5_PORT_ID_BONDING_PF_SHIFT) || 333 priv->pf_bond > MLX5_PORT_ID_BONDING_PF_MASK) { 334 DRV_LOG(ERR, "can't update switch port ID" 335 " for bonding device"); 336 MLX5_ASSERT(false); 337 return -ENODEV; 338 } 339 info->switch_info.port_id |= 340 priv->pf_bond << MLX5_PORT_ID_BONDING_PF_SHIFT; 341 } 342 MLX5_ETH_FOREACH_DEV(port_id, priv->pci_dev) { 343 struct mlx5_priv *opriv = 344 rte_eth_devices[port_id].data->dev_private; 345 346 if (!opriv || 347 opriv->representor || 348 opriv->sh != priv->sh || 349 opriv->domain_id != priv->domain_id) 350 continue; 351 /* 352 * Override switch name with that of the master 353 * device. 354 */ 355 info->switch_info.name = opriv->dev_data->name; 356 break; 357 } 358 } 359 return 0; 360 } 361 362 /** 363 * Get firmware version of a device. 364 * 365 * @param dev 366 * Ethernet device port. 367 * @param fw_ver 368 * String output allocated by caller. 369 * @param fw_size 370 * Size of the output string, including terminating null byte. 371 * 372 * @return 373 * 0 on success, or the size of the non truncated string if too big. 374 */ 375 int 376 mlx5_fw_version_get(struct rte_eth_dev *dev, char *fw_ver, size_t fw_size) 377 { 378 struct mlx5_priv *priv = dev->data->dev_private; 379 struct mlx5_dev_attr *attr = &priv->sh->device_attr; 380 size_t size = strnlen(attr->fw_ver, sizeof(attr->fw_ver)) + 1; 381 382 if (fw_size < size) 383 return size; 384 if (fw_ver != NULL) 385 strlcpy(fw_ver, attr->fw_ver, fw_size); 386 return 0; 387 } 388 389 /** 390 * Get supported packet types. 391 * 392 * @param dev 393 * Pointer to Ethernet device structure. 394 * 395 * @return 396 * A pointer to the supported Packet types array. 397 */ 398 const uint32_t * 399 mlx5_dev_supported_ptypes_get(struct rte_eth_dev *dev) 400 { 401 static const uint32_t ptypes[] = { 402 /* refers to rxq_cq_to_pkt_type() */ 403 RTE_PTYPE_L2_ETHER, 404 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN, 405 RTE_PTYPE_L3_IPV6_EXT_UNKNOWN, 406 RTE_PTYPE_L4_NONFRAG, 407 RTE_PTYPE_L4_FRAG, 408 RTE_PTYPE_L4_TCP, 409 RTE_PTYPE_L4_UDP, 410 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN, 411 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN, 412 RTE_PTYPE_INNER_L4_NONFRAG, 413 RTE_PTYPE_INNER_L4_FRAG, 414 RTE_PTYPE_INNER_L4_TCP, 415 RTE_PTYPE_INNER_L4_UDP, 416 RTE_PTYPE_UNKNOWN 417 }; 418 419 if (dev->rx_pkt_burst == mlx5_rx_burst || 420 dev->rx_pkt_burst == mlx5_rx_burst_mprq || 421 dev->rx_pkt_burst == mlx5_rx_burst_vec) 422 return ptypes; 423 return NULL; 424 } 425 426 /** 427 * DPDK callback to change the MTU. 428 * 429 * @param dev 430 * Pointer to Ethernet device structure. 431 * @param in_mtu 432 * New MTU. 433 * 434 * @return 435 * 0 on success, a negative errno value otherwise and rte_errno is set. 436 */ 437 int 438 mlx5_dev_set_mtu(struct rte_eth_dev *dev, uint16_t mtu) 439 { 440 struct mlx5_priv *priv = dev->data->dev_private; 441 uint16_t kern_mtu = 0; 442 int ret; 443 444 ret = mlx5_get_mtu(dev, &kern_mtu); 445 if (ret) 446 return ret; 447 /* Set kernel interface MTU first. */ 448 ret = mlx5_set_mtu(dev, mtu); 449 if (ret) 450 return ret; 451 ret = mlx5_get_mtu(dev, &kern_mtu); 452 if (ret) 453 return ret; 454 if (kern_mtu == mtu) { 455 priv->mtu = mtu; 456 DRV_LOG(DEBUG, "port %u adapter MTU set to %u", 457 dev->data->port_id, mtu); 458 return 0; 459 } 460 rte_errno = EAGAIN; 461 return -rte_errno; 462 } 463 464 /** 465 * Configure the RX function to use. 466 * 467 * @param dev 468 * Pointer to private data structure. 469 * 470 * @return 471 * Pointer to selected Rx burst function. 472 */ 473 eth_rx_burst_t 474 mlx5_select_rx_function(struct rte_eth_dev *dev) 475 { 476 eth_rx_burst_t rx_pkt_burst = mlx5_rx_burst; 477 478 MLX5_ASSERT(dev != NULL); 479 if (mlx5_check_vec_rx_support(dev) > 0) { 480 rx_pkt_burst = mlx5_rx_burst_vec; 481 DRV_LOG(DEBUG, "port %u selected Rx vectorized function", 482 dev->data->port_id); 483 } else if (mlx5_mprq_enabled(dev)) { 484 rx_pkt_burst = mlx5_rx_burst_mprq; 485 } 486 return rx_pkt_burst; 487 } 488 489 /** 490 * Get the E-Switch parameters by port id. 491 * 492 * @param[in] port 493 * Device port id. 494 * @param[in] valid 495 * Device port id is valid, skip check. This flag is useful 496 * when trials are performed from probing and device is not 497 * flagged as valid yet (in attaching process). 498 * @param[out] es_domain_id 499 * E-Switch domain id. 500 * @param[out] es_port_id 501 * The port id of the port in the E-Switch. 502 * 503 * @return 504 * pointer to device private data structure containing data needed 505 * on success, NULL otherwise and rte_errno is set. 506 */ 507 struct mlx5_priv * 508 mlx5_port_to_eswitch_info(uint16_t port, bool valid) 509 { 510 struct rte_eth_dev *dev; 511 struct mlx5_priv *priv; 512 513 if (port >= RTE_MAX_ETHPORTS) { 514 rte_errno = EINVAL; 515 return NULL; 516 } 517 if (!valid && !rte_eth_dev_is_valid_port(port)) { 518 rte_errno = ENODEV; 519 return NULL; 520 } 521 dev = &rte_eth_devices[port]; 522 priv = dev->data->dev_private; 523 if (!(priv->representor || priv->master)) { 524 rte_errno = EINVAL; 525 return NULL; 526 } 527 return priv; 528 } 529 530 /** 531 * Get the E-Switch parameters by device instance. 532 * 533 * @param[in] port 534 * Device port id. 535 * @param[out] es_domain_id 536 * E-Switch domain id. 537 * @param[out] es_port_id 538 * The port id of the port in the E-Switch. 539 * 540 * @return 541 * pointer to device private data structure containing data needed 542 * on success, NULL otherwise and rte_errno is set. 543 */ 544 struct mlx5_priv * 545 mlx5_dev_to_eswitch_info(struct rte_eth_dev *dev) 546 { 547 struct mlx5_priv *priv; 548 549 priv = dev->data->dev_private; 550 if (!(priv->representor || priv->master)) { 551 rte_errno = EINVAL; 552 return NULL; 553 } 554 return priv; 555 } 556 557 /** 558 * DPDK callback to retrieve hairpin capabilities. 559 * 560 * @param dev 561 * Pointer to Ethernet device structure. 562 * @param[out] cap 563 * Storage for hairpin capability data. 564 * 565 * @return 566 * 0 on success, a negative errno value otherwise and rte_errno is set. 567 */ 568 int 569 mlx5_hairpin_cap_get(struct rte_eth_dev *dev, 570 struct rte_eth_hairpin_cap *cap) 571 { 572 struct mlx5_priv *priv = dev->data->dev_private; 573 574 if (priv->sh->devx == 0) { 575 rte_errno = ENOTSUP; 576 return -rte_errno; 577 } 578 cap->max_nb_queues = UINT16_MAX; 579 cap->max_rx_2_tx = 1; 580 cap->max_tx_2_rx = 1; 581 cap->max_nb_desc = 8192; 582 return 0; 583 } 584