1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2015 6WIND S.A. 3 * Copyright 2015 Mellanox Technologies, Ltd 4 */ 5 6 #include <stddef.h> 7 #include <unistd.h> 8 #include <string.h> 9 #include <stdint.h> 10 #include <stdlib.h> 11 #include <errno.h> 12 13 #include <rte_atomic.h> 14 #include <rte_ethdev_driver.h> 15 #include <rte_bus_pci.h> 16 #include <rte_mbuf.h> 17 #include <rte_common.h> 18 #include <rte_interrupts.h> 19 #include <rte_malloc.h> 20 #include <rte_string_fns.h> 21 #include <rte_rwlock.h> 22 #include <rte_cycles.h> 23 24 #include <mlx5_malloc.h> 25 26 #include "mlx5_rxtx.h" 27 #include "mlx5_autoconf.h" 28 29 /** 30 * Get the interface index from device name. 31 * 32 * @param[in] dev 33 * Pointer to Ethernet device. 34 * 35 * @return 36 * Nonzero interface index on success, zero otherwise and rte_errno is set. 37 */ 38 unsigned int 39 mlx5_ifindex(const struct rte_eth_dev *dev) 40 { 41 struct mlx5_priv *priv = dev->data->dev_private; 42 unsigned int ifindex; 43 44 MLX5_ASSERT(priv); 45 MLX5_ASSERT(priv->if_index); 46 ifindex = priv->if_index; 47 if (!ifindex) 48 rte_errno = ENXIO; 49 return ifindex; 50 } 51 52 /** 53 * DPDK callback for Ethernet device configuration. 54 * 55 * @param dev 56 * Pointer to Ethernet device structure. 57 * 58 * @return 59 * 0 on success, a negative errno value otherwise and rte_errno is set. 60 */ 61 int 62 mlx5_dev_configure(struct rte_eth_dev *dev) 63 { 64 struct mlx5_priv *priv = dev->data->dev_private; 65 unsigned int rxqs_n = dev->data->nb_rx_queues; 66 unsigned int txqs_n = dev->data->nb_tx_queues; 67 const uint8_t use_app_rss_key = 68 !!dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key; 69 int ret = 0; 70 71 if (use_app_rss_key && 72 (dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len != 73 MLX5_RSS_HASH_KEY_LEN)) { 74 DRV_LOG(ERR, "port %u RSS key len must be %s Bytes long", 75 dev->data->port_id, RTE_STR(MLX5_RSS_HASH_KEY_LEN)); 76 rte_errno = EINVAL; 77 return -rte_errno; 78 } 79 priv->rss_conf.rss_key = 80 mlx5_realloc(priv->rss_conf.rss_key, MLX5_MEM_RTE, 81 MLX5_RSS_HASH_KEY_LEN, 0, SOCKET_ID_ANY); 82 if (!priv->rss_conf.rss_key) { 83 DRV_LOG(ERR, "port %u cannot allocate RSS hash key memory (%u)", 84 dev->data->port_id, rxqs_n); 85 rte_errno = ENOMEM; 86 return -rte_errno; 87 } 88 89 if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) 90 dev->data->dev_conf.rxmode.offloads |= DEV_RX_OFFLOAD_RSS_HASH; 91 92 memcpy(priv->rss_conf.rss_key, 93 use_app_rss_key ? 94 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key : 95 rss_hash_default_key, 96 MLX5_RSS_HASH_KEY_LEN); 97 priv->rss_conf.rss_key_len = MLX5_RSS_HASH_KEY_LEN; 98 priv->rss_conf.rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf; 99 priv->rxqs = (void *)dev->data->rx_queues; 100 priv->txqs = (void *)dev->data->tx_queues; 101 if (txqs_n != priv->txqs_n) { 102 DRV_LOG(INFO, "port %u Tx queues number update: %u -> %u", 103 dev->data->port_id, priv->txqs_n, txqs_n); 104 priv->txqs_n = txqs_n; 105 } 106 if (rxqs_n > priv->config.ind_table_max_size) { 107 DRV_LOG(ERR, "port %u cannot handle this many Rx queues (%u)", 108 dev->data->port_id, rxqs_n); 109 rte_errno = EINVAL; 110 return -rte_errno; 111 } 112 if (rxqs_n != priv->rxqs_n) { 113 DRV_LOG(INFO, "port %u Rx queues number update: %u -> %u", 114 dev->data->port_id, priv->rxqs_n, rxqs_n); 115 priv->rxqs_n = rxqs_n; 116 } 117 priv->skip_default_rss_reta = 0; 118 ret = mlx5_proc_priv_init(dev); 119 if (ret) 120 return ret; 121 return 0; 122 } 123 124 /** 125 * Configure default RSS reta. 126 * 127 * @param dev 128 * Pointer to Ethernet device structure. 129 * 130 * @return 131 * 0 on success, a negative errno value otherwise and rte_errno is set. 132 */ 133 int 134 mlx5_dev_configure_rss_reta(struct rte_eth_dev *dev) 135 { 136 struct mlx5_priv *priv = dev->data->dev_private; 137 unsigned int rxqs_n = dev->data->nb_rx_queues; 138 unsigned int i; 139 unsigned int j; 140 unsigned int reta_idx_n; 141 int ret = 0; 142 unsigned int *rss_queue_arr = NULL; 143 unsigned int rss_queue_n = 0; 144 145 if (priv->skip_default_rss_reta) 146 return ret; 147 rss_queue_arr = mlx5_malloc(0, rxqs_n * sizeof(unsigned int), 0, 148 SOCKET_ID_ANY); 149 if (!rss_queue_arr) { 150 DRV_LOG(ERR, "port %u cannot allocate RSS queue list (%u)", 151 dev->data->port_id, rxqs_n); 152 rte_errno = ENOMEM; 153 return -rte_errno; 154 } 155 for (i = 0, j = 0; i < rxqs_n; i++) { 156 struct mlx5_rxq_data *rxq_data; 157 struct mlx5_rxq_ctrl *rxq_ctrl; 158 159 rxq_data = (*priv->rxqs)[i]; 160 rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq); 161 if (rxq_ctrl && rxq_ctrl->type == MLX5_RXQ_TYPE_STANDARD) 162 rss_queue_arr[j++] = i; 163 } 164 rss_queue_n = j; 165 if (rss_queue_n > priv->config.ind_table_max_size) { 166 DRV_LOG(ERR, "port %u cannot handle this many Rx queues (%u)", 167 dev->data->port_id, rss_queue_n); 168 rte_errno = EINVAL; 169 mlx5_free(rss_queue_arr); 170 return -rte_errno; 171 } 172 DRV_LOG(INFO, "port %u Rx queues number update: %u -> %u", 173 dev->data->port_id, priv->rxqs_n, rxqs_n); 174 priv->rxqs_n = rxqs_n; 175 /* 176 * If the requested number of RX queues is not a power of two, 177 * use the maximum indirection table size for better balancing. 178 * The result is always rounded to the next power of two. 179 */ 180 reta_idx_n = (1 << log2above((rss_queue_n & (rss_queue_n - 1)) ? 181 priv->config.ind_table_max_size : 182 rss_queue_n)); 183 ret = mlx5_rss_reta_index_resize(dev, reta_idx_n); 184 if (ret) { 185 mlx5_free(rss_queue_arr); 186 return ret; 187 } 188 /* 189 * When the number of RX queues is not a power of two, 190 * the remaining table entries are padded with reused WQs 191 * and hashes are not spread uniformly. 192 */ 193 for (i = 0, j = 0; (i != reta_idx_n); ++i) { 194 (*priv->reta_idx)[i] = rss_queue_arr[j]; 195 if (++j == rss_queue_n) 196 j = 0; 197 } 198 mlx5_free(rss_queue_arr); 199 return ret; 200 } 201 202 /** 203 * Sets default tuning parameters. 204 * 205 * @param dev 206 * Pointer to Ethernet device. 207 * @param[out] info 208 * Info structure output buffer. 209 */ 210 static void 211 mlx5_set_default_params(struct rte_eth_dev *dev, struct rte_eth_dev_info *info) 212 { 213 struct mlx5_priv *priv = dev->data->dev_private; 214 215 /* Minimum CPU utilization. */ 216 info->default_rxportconf.ring_size = 256; 217 info->default_txportconf.ring_size = 256; 218 info->default_rxportconf.burst_size = MLX5_RX_DEFAULT_BURST; 219 info->default_txportconf.burst_size = MLX5_TX_DEFAULT_BURST; 220 if ((priv->link_speed_capa & ETH_LINK_SPEED_200G) | 221 (priv->link_speed_capa & ETH_LINK_SPEED_100G)) { 222 info->default_rxportconf.nb_queues = 16; 223 info->default_txportconf.nb_queues = 16; 224 if (dev->data->nb_rx_queues > 2 || 225 dev->data->nb_tx_queues > 2) { 226 /* Max Throughput. */ 227 info->default_rxportconf.ring_size = 2048; 228 info->default_txportconf.ring_size = 2048; 229 } 230 } else { 231 info->default_rxportconf.nb_queues = 8; 232 info->default_txportconf.nb_queues = 8; 233 if (dev->data->nb_rx_queues > 2 || 234 dev->data->nb_tx_queues > 2) { 235 /* Max Throughput. */ 236 info->default_rxportconf.ring_size = 4096; 237 info->default_txportconf.ring_size = 4096; 238 } 239 } 240 } 241 242 /** 243 * Sets tx mbuf limiting parameters. 244 * 245 * @param dev 246 * Pointer to Ethernet device. 247 * @param[out] info 248 * Info structure output buffer. 249 */ 250 static void 251 mlx5_set_txlimit_params(struct rte_eth_dev *dev, struct rte_eth_dev_info *info) 252 { 253 struct mlx5_priv *priv = dev->data->dev_private; 254 struct mlx5_dev_config *config = &priv->config; 255 unsigned int inlen; 256 uint16_t nb_max; 257 258 inlen = (config->txq_inline_max == MLX5_ARG_UNSET) ? 259 MLX5_SEND_DEF_INLINE_LEN : 260 (unsigned int)config->txq_inline_max; 261 MLX5_ASSERT(config->txq_inline_min >= 0); 262 inlen = RTE_MAX(inlen, (unsigned int)config->txq_inline_min); 263 inlen = RTE_MIN(inlen, MLX5_WQE_SIZE_MAX + 264 MLX5_ESEG_MIN_INLINE_SIZE - 265 MLX5_WQE_CSEG_SIZE - 266 MLX5_WQE_ESEG_SIZE - 267 MLX5_WQE_DSEG_SIZE * 2); 268 nb_max = (MLX5_WQE_SIZE_MAX + 269 MLX5_ESEG_MIN_INLINE_SIZE - 270 MLX5_WQE_CSEG_SIZE - 271 MLX5_WQE_ESEG_SIZE - 272 MLX5_WQE_DSEG_SIZE - 273 inlen) / MLX5_WSEG_SIZE; 274 info->tx_desc_lim.nb_seg_max = nb_max; 275 info->tx_desc_lim.nb_mtu_seg_max = nb_max; 276 } 277 278 /** 279 * DPDK callback to get information about the device. 280 * 281 * @param dev 282 * Pointer to Ethernet device structure. 283 * @param[out] info 284 * Info structure output buffer. 285 */ 286 int 287 mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info) 288 { 289 struct mlx5_priv *priv = dev->data->dev_private; 290 struct mlx5_dev_config *config = &priv->config; 291 unsigned int max; 292 293 /* FIXME: we should ask the device for these values. */ 294 info->min_rx_bufsize = 32; 295 info->max_rx_pktlen = 65536; 296 info->max_lro_pkt_size = MLX5_MAX_LRO_SIZE; 297 /* 298 * Since we need one CQ per QP, the limit is the minimum number 299 * between the two values. 300 */ 301 max = RTE_MIN(priv->sh->device_attr.max_cq, 302 priv->sh->device_attr.max_qp); 303 /* max_rx_queues is uint16_t. */ 304 max = RTE_MIN(max, (unsigned int)UINT16_MAX); 305 info->max_rx_queues = max; 306 info->max_tx_queues = max; 307 info->max_mac_addrs = MLX5_MAX_UC_MAC_ADDRESSES; 308 info->rx_queue_offload_capa = mlx5_get_rx_queue_offloads(dev); 309 info->rx_offload_capa = (mlx5_get_rx_port_offloads() | 310 info->rx_queue_offload_capa); 311 info->tx_offload_capa = mlx5_get_tx_port_offloads(dev); 312 info->if_index = mlx5_ifindex(dev); 313 info->reta_size = priv->reta_idx_n ? 314 priv->reta_idx_n : config->ind_table_max_size; 315 info->hash_key_size = MLX5_RSS_HASH_KEY_LEN; 316 info->speed_capa = priv->link_speed_capa; 317 info->flow_type_rss_offloads = ~MLX5_RSS_HF_MASK; 318 mlx5_set_default_params(dev, info); 319 mlx5_set_txlimit_params(dev, info); 320 info->switch_info.name = dev->data->name; 321 info->switch_info.domain_id = priv->domain_id; 322 info->switch_info.port_id = priv->representor_id; 323 if (priv->representor) { 324 uint16_t port_id; 325 326 if (priv->pf_bond >= 0) { 327 /* 328 * Switch port ID is opaque value with driver defined 329 * format. Push the PF index in bonding configurations 330 * in upper four bits of port ID. If we get too many 331 * representors (more than 4K) or PFs (more than 15) 332 * this approach must be reconsidered. 333 */ 334 if ((info->switch_info.port_id >> 335 MLX5_PORT_ID_BONDING_PF_SHIFT) || 336 priv->pf_bond > MLX5_PORT_ID_BONDING_PF_MASK) { 337 DRV_LOG(ERR, "can't update switch port ID" 338 " for bonding device"); 339 MLX5_ASSERT(false); 340 return -ENODEV; 341 } 342 info->switch_info.port_id |= 343 priv->pf_bond << MLX5_PORT_ID_BONDING_PF_SHIFT; 344 } 345 MLX5_ETH_FOREACH_DEV(port_id, priv->pci_dev) { 346 struct mlx5_priv *opriv = 347 rte_eth_devices[port_id].data->dev_private; 348 349 if (!opriv || 350 opriv->representor || 351 opriv->sh != priv->sh || 352 opriv->domain_id != priv->domain_id) 353 continue; 354 /* 355 * Override switch name with that of the master 356 * device. 357 */ 358 info->switch_info.name = opriv->dev_data->name; 359 break; 360 } 361 } 362 return 0; 363 } 364 365 /** 366 * Get firmware version of a device. 367 * 368 * @param dev 369 * Ethernet device port. 370 * @param fw_ver 371 * String output allocated by caller. 372 * @param fw_size 373 * Size of the output string, including terminating null byte. 374 * 375 * @return 376 * 0 on success, or the size of the non truncated string if too big. 377 */ 378 int 379 mlx5_fw_version_get(struct rte_eth_dev *dev, char *fw_ver, size_t fw_size) 380 { 381 struct mlx5_priv *priv = dev->data->dev_private; 382 struct mlx5_dev_attr *attr = &priv->sh->device_attr; 383 size_t size = strnlen(attr->fw_ver, sizeof(attr->fw_ver)) + 1; 384 385 if (fw_size < size) 386 return size; 387 if (fw_ver != NULL) 388 strlcpy(fw_ver, attr->fw_ver, fw_size); 389 return 0; 390 } 391 392 /** 393 * Get supported packet types. 394 * 395 * @param dev 396 * Pointer to Ethernet device structure. 397 * 398 * @return 399 * A pointer to the supported Packet types array. 400 */ 401 const uint32_t * 402 mlx5_dev_supported_ptypes_get(struct rte_eth_dev *dev) 403 { 404 static const uint32_t ptypes[] = { 405 /* refers to rxq_cq_to_pkt_type() */ 406 RTE_PTYPE_L2_ETHER, 407 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN, 408 RTE_PTYPE_L3_IPV6_EXT_UNKNOWN, 409 RTE_PTYPE_L4_NONFRAG, 410 RTE_PTYPE_L4_FRAG, 411 RTE_PTYPE_L4_TCP, 412 RTE_PTYPE_L4_UDP, 413 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN, 414 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN, 415 RTE_PTYPE_INNER_L4_NONFRAG, 416 RTE_PTYPE_INNER_L4_FRAG, 417 RTE_PTYPE_INNER_L4_TCP, 418 RTE_PTYPE_INNER_L4_UDP, 419 RTE_PTYPE_UNKNOWN 420 }; 421 422 if (dev->rx_pkt_burst == mlx5_rx_burst || 423 dev->rx_pkt_burst == mlx5_rx_burst_mprq || 424 dev->rx_pkt_burst == mlx5_rx_burst_vec) 425 return ptypes; 426 return NULL; 427 } 428 429 /** 430 * DPDK callback to change the MTU. 431 * 432 * @param dev 433 * Pointer to Ethernet device structure. 434 * @param in_mtu 435 * New MTU. 436 * 437 * @return 438 * 0 on success, a negative errno value otherwise and rte_errno is set. 439 */ 440 int 441 mlx5_dev_set_mtu(struct rte_eth_dev *dev, uint16_t mtu) 442 { 443 struct mlx5_priv *priv = dev->data->dev_private; 444 uint16_t kern_mtu = 0; 445 int ret; 446 447 ret = mlx5_get_mtu(dev, &kern_mtu); 448 if (ret) 449 return ret; 450 /* Set kernel interface MTU first. */ 451 ret = mlx5_set_mtu(dev, mtu); 452 if (ret) 453 return ret; 454 ret = mlx5_get_mtu(dev, &kern_mtu); 455 if (ret) 456 return ret; 457 if (kern_mtu == mtu) { 458 priv->mtu = mtu; 459 DRV_LOG(DEBUG, "port %u adapter MTU set to %u", 460 dev->data->port_id, mtu); 461 return 0; 462 } 463 rte_errno = EAGAIN; 464 return -rte_errno; 465 } 466 467 /** 468 * Configure the RX function to use. 469 * 470 * @param dev 471 * Pointer to private data structure. 472 * 473 * @return 474 * Pointer to selected Rx burst function. 475 */ 476 eth_rx_burst_t 477 mlx5_select_rx_function(struct rte_eth_dev *dev) 478 { 479 eth_rx_burst_t rx_pkt_burst = mlx5_rx_burst; 480 481 MLX5_ASSERT(dev != NULL); 482 if (mlx5_check_vec_rx_support(dev) > 0) { 483 rx_pkt_burst = mlx5_rx_burst_vec; 484 DRV_LOG(DEBUG, "port %u selected Rx vectorized function", 485 dev->data->port_id); 486 } else if (mlx5_mprq_enabled(dev)) { 487 rx_pkt_burst = mlx5_rx_burst_mprq; 488 } 489 return rx_pkt_burst; 490 } 491 492 /** 493 * Get the E-Switch parameters by port id. 494 * 495 * @param[in] port 496 * Device port id. 497 * @param[in] valid 498 * Device port id is valid, skip check. This flag is useful 499 * when trials are performed from probing and device is not 500 * flagged as valid yet (in attaching process). 501 * @param[out] es_domain_id 502 * E-Switch domain id. 503 * @param[out] es_port_id 504 * The port id of the port in the E-Switch. 505 * 506 * @return 507 * pointer to device private data structure containing data needed 508 * on success, NULL otherwise and rte_errno is set. 509 */ 510 struct mlx5_priv * 511 mlx5_port_to_eswitch_info(uint16_t port, bool valid) 512 { 513 struct rte_eth_dev *dev; 514 struct mlx5_priv *priv; 515 516 if (port >= RTE_MAX_ETHPORTS) { 517 rte_errno = EINVAL; 518 return NULL; 519 } 520 if (!valid && !rte_eth_dev_is_valid_port(port)) { 521 rte_errno = ENODEV; 522 return NULL; 523 } 524 dev = &rte_eth_devices[port]; 525 priv = dev->data->dev_private; 526 if (!(priv->representor || priv->master)) { 527 rte_errno = EINVAL; 528 return NULL; 529 } 530 return priv; 531 } 532 533 /** 534 * Get the E-Switch parameters by device instance. 535 * 536 * @param[in] port 537 * Device port id. 538 * @param[out] es_domain_id 539 * E-Switch domain id. 540 * @param[out] es_port_id 541 * The port id of the port in the E-Switch. 542 * 543 * @return 544 * pointer to device private data structure containing data needed 545 * on success, NULL otherwise and rte_errno is set. 546 */ 547 struct mlx5_priv * 548 mlx5_dev_to_eswitch_info(struct rte_eth_dev *dev) 549 { 550 struct mlx5_priv *priv; 551 552 priv = dev->data->dev_private; 553 if (!(priv->representor || priv->master)) { 554 rte_errno = EINVAL; 555 return NULL; 556 } 557 return priv; 558 } 559 560 /** 561 * DPDK callback to retrieve hairpin capabilities. 562 * 563 * @param dev 564 * Pointer to Ethernet device structure. 565 * @param[out] cap 566 * Storage for hairpin capability data. 567 * 568 * @return 569 * 0 on success, a negative errno value otherwise and rte_errno is set. 570 */ 571 int 572 mlx5_hairpin_cap_get(struct rte_eth_dev *dev, 573 struct rte_eth_hairpin_cap *cap) 574 { 575 struct mlx5_priv *priv = dev->data->dev_private; 576 577 if (priv->sh->devx == 0) { 578 rte_errno = ENOTSUP; 579 return -rte_errno; 580 } 581 cap->max_nb_queues = UINT16_MAX; 582 cap->max_rx_2_tx = 1; 583 cap->max_tx_2_rx = 1; 584 cap->max_nb_desc = 8192; 585 return 0; 586 } 587