1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2015 6WIND S.A. 3 * Copyright 2015 Mellanox Technologies, Ltd 4 */ 5 6 #include <stddef.h> 7 #include <unistd.h> 8 #include <string.h> 9 #include <stdint.h> 10 #include <stdlib.h> 11 #include <errno.h> 12 13 #include <rte_ethdev_driver.h> 14 #include <rte_bus_pci.h> 15 #include <rte_mbuf.h> 16 #include <rte_common.h> 17 #include <rte_interrupts.h> 18 #include <rte_malloc.h> 19 #include <rte_string_fns.h> 20 #include <rte_rwlock.h> 21 #include <rte_cycles.h> 22 23 #include <mlx5_malloc.h> 24 25 #include "mlx5_rxtx.h" 26 #include "mlx5_autoconf.h" 27 28 /** 29 * Get the interface index from device name. 30 * 31 * @param[in] dev 32 * Pointer to Ethernet device. 33 * 34 * @return 35 * Nonzero interface index on success, zero otherwise and rte_errno is set. 36 */ 37 unsigned int 38 mlx5_ifindex(const struct rte_eth_dev *dev) 39 { 40 struct mlx5_priv *priv = dev->data->dev_private; 41 unsigned int ifindex; 42 43 MLX5_ASSERT(priv); 44 MLX5_ASSERT(priv->if_index); 45 ifindex = priv->bond_ifindex > 0 ? priv->bond_ifindex : priv->if_index; 46 if (!ifindex) 47 rte_errno = ENXIO; 48 return ifindex; 49 } 50 51 /** 52 * DPDK callback for Ethernet device configuration. 53 * 54 * @param dev 55 * Pointer to Ethernet device structure. 56 * 57 * @return 58 * 0 on success, a negative errno value otherwise and rte_errno is set. 59 */ 60 int 61 mlx5_dev_configure(struct rte_eth_dev *dev) 62 { 63 struct mlx5_priv *priv = dev->data->dev_private; 64 unsigned int rxqs_n = dev->data->nb_rx_queues; 65 unsigned int txqs_n = dev->data->nb_tx_queues; 66 const uint8_t use_app_rss_key = 67 !!dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key; 68 int ret = 0; 69 70 if (use_app_rss_key && 71 (dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len != 72 MLX5_RSS_HASH_KEY_LEN)) { 73 DRV_LOG(ERR, "port %u RSS key len must be %s Bytes long", 74 dev->data->port_id, RTE_STR(MLX5_RSS_HASH_KEY_LEN)); 75 rte_errno = EINVAL; 76 return -rte_errno; 77 } 78 priv->rss_conf.rss_key = 79 mlx5_realloc(priv->rss_conf.rss_key, MLX5_MEM_RTE, 80 MLX5_RSS_HASH_KEY_LEN, 0, SOCKET_ID_ANY); 81 if (!priv->rss_conf.rss_key) { 82 DRV_LOG(ERR, "port %u cannot allocate RSS hash key memory (%u)", 83 dev->data->port_id, rxqs_n); 84 rte_errno = ENOMEM; 85 return -rte_errno; 86 } 87 88 if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) 89 dev->data->dev_conf.rxmode.offloads |= DEV_RX_OFFLOAD_RSS_HASH; 90 if ((dev->data->dev_conf.txmode.offloads & 91 DEV_TX_OFFLOAD_SEND_ON_TIMESTAMP) && 92 rte_mbuf_dyn_tx_timestamp_register(NULL, NULL) != 0) { 93 DRV_LOG(ERR, "port %u cannot register Tx timestamp field/flag", 94 dev->data->port_id); 95 return -rte_errno; 96 } 97 memcpy(priv->rss_conf.rss_key, 98 use_app_rss_key ? 99 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key : 100 rss_hash_default_key, 101 MLX5_RSS_HASH_KEY_LEN); 102 priv->rss_conf.rss_key_len = MLX5_RSS_HASH_KEY_LEN; 103 priv->rss_conf.rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf; 104 priv->rxqs = (void *)dev->data->rx_queues; 105 priv->txqs = (void *)dev->data->tx_queues; 106 if (txqs_n != priv->txqs_n) { 107 DRV_LOG(INFO, "port %u Tx queues number update: %u -> %u", 108 dev->data->port_id, priv->txqs_n, txqs_n); 109 priv->txqs_n = txqs_n; 110 } 111 if (rxqs_n > priv->config.ind_table_max_size) { 112 DRV_LOG(ERR, "port %u cannot handle this many Rx queues (%u)", 113 dev->data->port_id, rxqs_n); 114 rte_errno = EINVAL; 115 return -rte_errno; 116 } 117 if (rxqs_n != priv->rxqs_n) { 118 DRV_LOG(INFO, "port %u Rx queues number update: %u -> %u", 119 dev->data->port_id, priv->rxqs_n, rxqs_n); 120 priv->rxqs_n = rxqs_n; 121 } 122 priv->skip_default_rss_reta = 0; 123 ret = mlx5_proc_priv_init(dev); 124 if (ret) 125 return ret; 126 return 0; 127 } 128 129 /** 130 * Configure default RSS reta. 131 * 132 * @param dev 133 * Pointer to Ethernet device structure. 134 * 135 * @return 136 * 0 on success, a negative errno value otherwise and rte_errno is set. 137 */ 138 int 139 mlx5_dev_configure_rss_reta(struct rte_eth_dev *dev) 140 { 141 struct mlx5_priv *priv = dev->data->dev_private; 142 unsigned int rxqs_n = dev->data->nb_rx_queues; 143 unsigned int i; 144 unsigned int j; 145 unsigned int reta_idx_n; 146 int ret = 0; 147 unsigned int *rss_queue_arr = NULL; 148 unsigned int rss_queue_n = 0; 149 150 if (priv->skip_default_rss_reta) 151 return ret; 152 rss_queue_arr = mlx5_malloc(0, rxqs_n * sizeof(unsigned int), 0, 153 SOCKET_ID_ANY); 154 if (!rss_queue_arr) { 155 DRV_LOG(ERR, "port %u cannot allocate RSS queue list (%u)", 156 dev->data->port_id, rxqs_n); 157 rte_errno = ENOMEM; 158 return -rte_errno; 159 } 160 for (i = 0, j = 0; i < rxqs_n; i++) { 161 struct mlx5_rxq_data *rxq_data; 162 struct mlx5_rxq_ctrl *rxq_ctrl; 163 164 rxq_data = (*priv->rxqs)[i]; 165 rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq); 166 if (rxq_ctrl && rxq_ctrl->type == MLX5_RXQ_TYPE_STANDARD) 167 rss_queue_arr[j++] = i; 168 } 169 rss_queue_n = j; 170 if (rss_queue_n > priv->config.ind_table_max_size) { 171 DRV_LOG(ERR, "port %u cannot handle this many Rx queues (%u)", 172 dev->data->port_id, rss_queue_n); 173 rte_errno = EINVAL; 174 mlx5_free(rss_queue_arr); 175 return -rte_errno; 176 } 177 DRV_LOG(INFO, "port %u Rx queues number update: %u -> %u", 178 dev->data->port_id, priv->rxqs_n, rxqs_n); 179 priv->rxqs_n = rxqs_n; 180 /* 181 * If the requested number of RX queues is not a power of two, 182 * use the maximum indirection table size for better balancing. 183 * The result is always rounded to the next power of two. 184 */ 185 reta_idx_n = (1 << log2above((rss_queue_n & (rss_queue_n - 1)) ? 186 priv->config.ind_table_max_size : 187 rss_queue_n)); 188 ret = mlx5_rss_reta_index_resize(dev, reta_idx_n); 189 if (ret) { 190 mlx5_free(rss_queue_arr); 191 return ret; 192 } 193 /* 194 * When the number of RX queues is not a power of two, 195 * the remaining table entries are padded with reused WQs 196 * and hashes are not spread uniformly. 197 */ 198 for (i = 0, j = 0; (i != reta_idx_n); ++i) { 199 (*priv->reta_idx)[i] = rss_queue_arr[j]; 200 if (++j == rss_queue_n) 201 j = 0; 202 } 203 mlx5_free(rss_queue_arr); 204 return ret; 205 } 206 207 /** 208 * Sets default tuning parameters. 209 * 210 * @param dev 211 * Pointer to Ethernet device. 212 * @param[out] info 213 * Info structure output buffer. 214 */ 215 static void 216 mlx5_set_default_params(struct rte_eth_dev *dev, struct rte_eth_dev_info *info) 217 { 218 struct mlx5_priv *priv = dev->data->dev_private; 219 220 /* Minimum CPU utilization. */ 221 info->default_rxportconf.ring_size = 256; 222 info->default_txportconf.ring_size = 256; 223 info->default_rxportconf.burst_size = MLX5_RX_DEFAULT_BURST; 224 info->default_txportconf.burst_size = MLX5_TX_DEFAULT_BURST; 225 if ((priv->link_speed_capa & ETH_LINK_SPEED_200G) | 226 (priv->link_speed_capa & ETH_LINK_SPEED_100G)) { 227 info->default_rxportconf.nb_queues = 16; 228 info->default_txportconf.nb_queues = 16; 229 if (dev->data->nb_rx_queues > 2 || 230 dev->data->nb_tx_queues > 2) { 231 /* Max Throughput. */ 232 info->default_rxportconf.ring_size = 2048; 233 info->default_txportconf.ring_size = 2048; 234 } 235 } else { 236 info->default_rxportconf.nb_queues = 8; 237 info->default_txportconf.nb_queues = 8; 238 if (dev->data->nb_rx_queues > 2 || 239 dev->data->nb_tx_queues > 2) { 240 /* Max Throughput. */ 241 info->default_rxportconf.ring_size = 4096; 242 info->default_txportconf.ring_size = 4096; 243 } 244 } 245 } 246 247 /** 248 * Sets tx mbuf limiting parameters. 249 * 250 * @param dev 251 * Pointer to Ethernet device. 252 * @param[out] info 253 * Info structure output buffer. 254 */ 255 static void 256 mlx5_set_txlimit_params(struct rte_eth_dev *dev, struct rte_eth_dev_info *info) 257 { 258 struct mlx5_priv *priv = dev->data->dev_private; 259 struct mlx5_dev_config *config = &priv->config; 260 unsigned int inlen; 261 uint16_t nb_max; 262 263 inlen = (config->txq_inline_max == MLX5_ARG_UNSET) ? 264 MLX5_SEND_DEF_INLINE_LEN : 265 (unsigned int)config->txq_inline_max; 266 MLX5_ASSERT(config->txq_inline_min >= 0); 267 inlen = RTE_MAX(inlen, (unsigned int)config->txq_inline_min); 268 inlen = RTE_MIN(inlen, MLX5_WQE_SIZE_MAX + 269 MLX5_ESEG_MIN_INLINE_SIZE - 270 MLX5_WQE_CSEG_SIZE - 271 MLX5_WQE_ESEG_SIZE - 272 MLX5_WQE_DSEG_SIZE * 2); 273 nb_max = (MLX5_WQE_SIZE_MAX + 274 MLX5_ESEG_MIN_INLINE_SIZE - 275 MLX5_WQE_CSEG_SIZE - 276 MLX5_WQE_ESEG_SIZE - 277 MLX5_WQE_DSEG_SIZE - 278 inlen) / MLX5_WSEG_SIZE; 279 info->tx_desc_lim.nb_seg_max = nb_max; 280 info->tx_desc_lim.nb_mtu_seg_max = nb_max; 281 } 282 283 /** 284 * DPDK callback to get information about the device. 285 * 286 * @param dev 287 * Pointer to Ethernet device structure. 288 * @param[out] info 289 * Info structure output buffer. 290 */ 291 int 292 mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info) 293 { 294 struct mlx5_priv *priv = dev->data->dev_private; 295 struct mlx5_dev_config *config = &priv->config; 296 unsigned int max; 297 298 /* FIXME: we should ask the device for these values. */ 299 info->min_rx_bufsize = 32; 300 info->max_rx_pktlen = 65536; 301 info->max_lro_pkt_size = MLX5_MAX_LRO_SIZE; 302 /* 303 * Since we need one CQ per QP, the limit is the minimum number 304 * between the two values. 305 */ 306 max = RTE_MIN(priv->sh->device_attr.max_cq, 307 priv->sh->device_attr.max_qp); 308 /* max_rx_queues is uint16_t. */ 309 max = RTE_MIN(max, (unsigned int)UINT16_MAX); 310 info->max_rx_queues = max; 311 info->max_tx_queues = max; 312 info->max_mac_addrs = MLX5_MAX_UC_MAC_ADDRESSES; 313 info->rx_queue_offload_capa = mlx5_get_rx_queue_offloads(dev); 314 info->rx_seg_capa.max_nseg = MLX5_MAX_RXQ_NSEG; 315 info->rx_seg_capa.multi_pools = 1; 316 info->rx_seg_capa.offset_allowed = 1; 317 info->rx_seg_capa.offset_align_log2 = 0; 318 info->rx_offload_capa = (mlx5_get_rx_port_offloads() | 319 info->rx_queue_offload_capa); 320 info->tx_offload_capa = mlx5_get_tx_port_offloads(dev); 321 info->if_index = mlx5_ifindex(dev); 322 info->reta_size = priv->reta_idx_n ? 323 priv->reta_idx_n : config->ind_table_max_size; 324 info->hash_key_size = MLX5_RSS_HASH_KEY_LEN; 325 info->speed_capa = priv->link_speed_capa; 326 info->flow_type_rss_offloads = ~MLX5_RSS_HF_MASK; 327 mlx5_set_default_params(dev, info); 328 mlx5_set_txlimit_params(dev, info); 329 info->switch_info.name = dev->data->name; 330 info->switch_info.domain_id = priv->domain_id; 331 info->switch_info.port_id = priv->representor_id; 332 if (priv->representor) { 333 uint16_t port_id; 334 335 if (priv->pf_bond >= 0) { 336 /* 337 * Switch port ID is opaque value with driver defined 338 * format. Push the PF index in bonding configurations 339 * in upper four bits of port ID. If we get too many 340 * representors (more than 4K) or PFs (more than 15) 341 * this approach must be reconsidered. 342 */ 343 if ((info->switch_info.port_id >> 344 MLX5_PORT_ID_BONDING_PF_SHIFT) || 345 priv->pf_bond > MLX5_PORT_ID_BONDING_PF_MASK) { 346 DRV_LOG(ERR, "can't update switch port ID" 347 " for bonding device"); 348 MLX5_ASSERT(false); 349 return -ENODEV; 350 } 351 info->switch_info.port_id |= 352 priv->pf_bond << MLX5_PORT_ID_BONDING_PF_SHIFT; 353 } 354 MLX5_ETH_FOREACH_DEV(port_id, priv->pci_dev) { 355 struct mlx5_priv *opriv = 356 rte_eth_devices[port_id].data->dev_private; 357 358 if (!opriv || 359 opriv->representor || 360 opriv->sh != priv->sh || 361 opriv->domain_id != priv->domain_id) 362 continue; 363 /* 364 * Override switch name with that of the master 365 * device. 366 */ 367 info->switch_info.name = opriv->dev_data->name; 368 break; 369 } 370 } 371 return 0; 372 } 373 374 /** 375 * Get firmware version of a device. 376 * 377 * @param dev 378 * Ethernet device port. 379 * @param fw_ver 380 * String output allocated by caller. 381 * @param fw_size 382 * Size of the output string, including terminating null byte. 383 * 384 * @return 385 * 0 on success, or the size of the non truncated string if too big. 386 */ 387 int 388 mlx5_fw_version_get(struct rte_eth_dev *dev, char *fw_ver, size_t fw_size) 389 { 390 struct mlx5_priv *priv = dev->data->dev_private; 391 struct mlx5_dev_attr *attr = &priv->sh->device_attr; 392 size_t size = strnlen(attr->fw_ver, sizeof(attr->fw_ver)) + 1; 393 394 if (fw_size < size) 395 return size; 396 if (fw_ver != NULL) 397 strlcpy(fw_ver, attr->fw_ver, fw_size); 398 return 0; 399 } 400 401 /** 402 * Get supported packet types. 403 * 404 * @param dev 405 * Pointer to Ethernet device structure. 406 * 407 * @return 408 * A pointer to the supported Packet types array. 409 */ 410 const uint32_t * 411 mlx5_dev_supported_ptypes_get(struct rte_eth_dev *dev) 412 { 413 static const uint32_t ptypes[] = { 414 /* refers to rxq_cq_to_pkt_type() */ 415 RTE_PTYPE_L2_ETHER, 416 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN, 417 RTE_PTYPE_L3_IPV6_EXT_UNKNOWN, 418 RTE_PTYPE_L4_NONFRAG, 419 RTE_PTYPE_L4_FRAG, 420 RTE_PTYPE_L4_TCP, 421 RTE_PTYPE_L4_UDP, 422 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN, 423 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN, 424 RTE_PTYPE_INNER_L4_NONFRAG, 425 RTE_PTYPE_INNER_L4_FRAG, 426 RTE_PTYPE_INNER_L4_TCP, 427 RTE_PTYPE_INNER_L4_UDP, 428 RTE_PTYPE_UNKNOWN 429 }; 430 431 if (dev->rx_pkt_burst == mlx5_rx_burst || 432 dev->rx_pkt_burst == mlx5_rx_burst_mprq || 433 dev->rx_pkt_burst == mlx5_rx_burst_vec || 434 dev->rx_pkt_burst == mlx5_rx_burst_mprq_vec) 435 return ptypes; 436 return NULL; 437 } 438 439 /** 440 * DPDK callback to change the MTU. 441 * 442 * @param dev 443 * Pointer to Ethernet device structure. 444 * @param in_mtu 445 * New MTU. 446 * 447 * @return 448 * 0 on success, a negative errno value otherwise and rte_errno is set. 449 */ 450 int 451 mlx5_dev_set_mtu(struct rte_eth_dev *dev, uint16_t mtu) 452 { 453 struct mlx5_priv *priv = dev->data->dev_private; 454 uint16_t kern_mtu = 0; 455 int ret; 456 457 ret = mlx5_get_mtu(dev, &kern_mtu); 458 if (ret) 459 return ret; 460 /* Set kernel interface MTU first. */ 461 ret = mlx5_set_mtu(dev, mtu); 462 if (ret) 463 return ret; 464 ret = mlx5_get_mtu(dev, &kern_mtu); 465 if (ret) 466 return ret; 467 if (kern_mtu == mtu) { 468 priv->mtu = mtu; 469 DRV_LOG(DEBUG, "port %u adapter MTU set to %u", 470 dev->data->port_id, mtu); 471 return 0; 472 } 473 rte_errno = EAGAIN; 474 return -rte_errno; 475 } 476 477 /** 478 * Configure the RX function to use. 479 * 480 * @param dev 481 * Pointer to private data structure. 482 * 483 * @return 484 * Pointer to selected Rx burst function. 485 */ 486 eth_rx_burst_t 487 mlx5_select_rx_function(struct rte_eth_dev *dev) 488 { 489 eth_rx_burst_t rx_pkt_burst = mlx5_rx_burst; 490 491 MLX5_ASSERT(dev != NULL); 492 if (mlx5_check_vec_rx_support(dev) > 0) { 493 if (mlx5_mprq_enabled(dev)) { 494 rx_pkt_burst = mlx5_rx_burst_mprq_vec; 495 DRV_LOG(DEBUG, "port %u selected vectorized" 496 " MPRQ Rx function", dev->data->port_id); 497 } else { 498 rx_pkt_burst = mlx5_rx_burst_vec; 499 DRV_LOG(DEBUG, "port %u selected vectorized" 500 " SPRQ Rx function", dev->data->port_id); 501 } 502 } else if (mlx5_mprq_enabled(dev)) { 503 rx_pkt_burst = mlx5_rx_burst_mprq; 504 DRV_LOG(DEBUG, "port %u selected MPRQ Rx function", 505 dev->data->port_id); 506 } else { 507 DRV_LOG(DEBUG, "port %u selected SPRQ Rx function", 508 dev->data->port_id); 509 } 510 return rx_pkt_burst; 511 } 512 513 /** 514 * Get the E-Switch parameters by port id. 515 * 516 * @param[in] port 517 * Device port id. 518 * @param[in] valid 519 * Device port id is valid, skip check. This flag is useful 520 * when trials are performed from probing and device is not 521 * flagged as valid yet (in attaching process). 522 * @param[out] es_domain_id 523 * E-Switch domain id. 524 * @param[out] es_port_id 525 * The port id of the port in the E-Switch. 526 * 527 * @return 528 * pointer to device private data structure containing data needed 529 * on success, NULL otherwise and rte_errno is set. 530 */ 531 struct mlx5_priv * 532 mlx5_port_to_eswitch_info(uint16_t port, bool valid) 533 { 534 struct rte_eth_dev *dev; 535 struct mlx5_priv *priv; 536 537 if (port >= RTE_MAX_ETHPORTS) { 538 rte_errno = EINVAL; 539 return NULL; 540 } 541 if (!valid && !rte_eth_dev_is_valid_port(port)) { 542 rte_errno = ENODEV; 543 return NULL; 544 } 545 dev = &rte_eth_devices[port]; 546 priv = dev->data->dev_private; 547 if (!(priv->representor || priv->master)) { 548 rte_errno = EINVAL; 549 return NULL; 550 } 551 return priv; 552 } 553 554 /** 555 * Get the E-Switch parameters by device instance. 556 * 557 * @param[in] port 558 * Device port id. 559 * @param[out] es_domain_id 560 * E-Switch domain id. 561 * @param[out] es_port_id 562 * The port id of the port in the E-Switch. 563 * 564 * @return 565 * pointer to device private data structure containing data needed 566 * on success, NULL otherwise and rte_errno is set. 567 */ 568 struct mlx5_priv * 569 mlx5_dev_to_eswitch_info(struct rte_eth_dev *dev) 570 { 571 struct mlx5_priv *priv; 572 573 priv = dev->data->dev_private; 574 if (!(priv->representor || priv->master)) { 575 rte_errno = EINVAL; 576 return NULL; 577 } 578 return priv; 579 } 580 581 /** 582 * DPDK callback to retrieve hairpin capabilities. 583 * 584 * @param dev 585 * Pointer to Ethernet device structure. 586 * @param[out] cap 587 * Storage for hairpin capability data. 588 * 589 * @return 590 * 0 on success, a negative errno value otherwise and rte_errno is set. 591 */ 592 int 593 mlx5_hairpin_cap_get(struct rte_eth_dev *dev, struct rte_eth_hairpin_cap *cap) 594 { 595 struct mlx5_priv *priv = dev->data->dev_private; 596 struct mlx5_dev_config *config = &priv->config; 597 598 if (!priv->sh->devx || !config->dest_tir || !config->dv_flow_en) { 599 rte_errno = ENOTSUP; 600 return -rte_errno; 601 } 602 cap->max_nb_queues = UINT16_MAX; 603 cap->max_rx_2_tx = 1; 604 cap->max_tx_2_rx = 1; 605 cap->max_nb_desc = 8192; 606 return 0; 607 } 608