1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2015 6WIND S.A. 3 * Copyright 2015 Mellanox Technologies, Ltd 4 */ 5 6 #include <stddef.h> 7 #include <unistd.h> 8 #include <string.h> 9 #include <stdint.h> 10 #include <stdlib.h> 11 #include <errno.h> 12 13 #include <rte_atomic.h> 14 #include <rte_ethdev_driver.h> 15 #include <rte_bus_pci.h> 16 #include <rte_mbuf.h> 17 #include <rte_common.h> 18 #include <rte_interrupts.h> 19 #include <rte_malloc.h> 20 #include <rte_string_fns.h> 21 #include <rte_rwlock.h> 22 #include <rte_cycles.h> 23 24 #include <mlx5_malloc.h> 25 26 #include "mlx5_rxtx.h" 27 #include "mlx5_autoconf.h" 28 29 /** 30 * Get the interface index from device name. 31 * 32 * @param[in] dev 33 * Pointer to Ethernet device. 34 * 35 * @return 36 * Nonzero interface index on success, zero otherwise and rte_errno is set. 37 */ 38 unsigned int 39 mlx5_ifindex(const struct rte_eth_dev *dev) 40 { 41 struct mlx5_priv *priv = dev->data->dev_private; 42 unsigned int ifindex; 43 44 MLX5_ASSERT(priv); 45 MLX5_ASSERT(priv->if_index); 46 ifindex = priv->bond_ifindex > 0 ? priv->bond_ifindex : priv->if_index; 47 if (!ifindex) 48 rte_errno = ENXIO; 49 return ifindex; 50 } 51 52 /** 53 * DPDK callback for Ethernet device configuration. 54 * 55 * @param dev 56 * Pointer to Ethernet device structure. 57 * 58 * @return 59 * 0 on success, a negative errno value otherwise and rte_errno is set. 60 */ 61 int 62 mlx5_dev_configure(struct rte_eth_dev *dev) 63 { 64 struct mlx5_priv *priv = dev->data->dev_private; 65 unsigned int rxqs_n = dev->data->nb_rx_queues; 66 unsigned int txqs_n = dev->data->nb_tx_queues; 67 const uint8_t use_app_rss_key = 68 !!dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key; 69 int ret = 0; 70 71 if (use_app_rss_key && 72 (dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len != 73 MLX5_RSS_HASH_KEY_LEN)) { 74 DRV_LOG(ERR, "port %u RSS key len must be %s Bytes long", 75 dev->data->port_id, RTE_STR(MLX5_RSS_HASH_KEY_LEN)); 76 rte_errno = EINVAL; 77 return -rte_errno; 78 } 79 priv->rss_conf.rss_key = 80 mlx5_realloc(priv->rss_conf.rss_key, MLX5_MEM_RTE, 81 MLX5_RSS_HASH_KEY_LEN, 0, SOCKET_ID_ANY); 82 if (!priv->rss_conf.rss_key) { 83 DRV_LOG(ERR, "port %u cannot allocate RSS hash key memory (%u)", 84 dev->data->port_id, rxqs_n); 85 rte_errno = ENOMEM; 86 return -rte_errno; 87 } 88 89 if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) 90 dev->data->dev_conf.rxmode.offloads |= DEV_RX_OFFLOAD_RSS_HASH; 91 if ((dev->data->dev_conf.txmode.offloads & 92 DEV_TX_OFFLOAD_SEND_ON_TIMESTAMP) && 93 rte_mbuf_dyn_tx_timestamp_register(NULL, NULL) != 0) { 94 DRV_LOG(ERR, "port %u cannot register Tx timestamp field/flag", 95 dev->data->port_id); 96 return -rte_errno; 97 } 98 memcpy(priv->rss_conf.rss_key, 99 use_app_rss_key ? 100 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key : 101 rss_hash_default_key, 102 MLX5_RSS_HASH_KEY_LEN); 103 priv->rss_conf.rss_key_len = MLX5_RSS_HASH_KEY_LEN; 104 priv->rss_conf.rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf; 105 priv->rxqs = (void *)dev->data->rx_queues; 106 priv->txqs = (void *)dev->data->tx_queues; 107 if (txqs_n != priv->txqs_n) { 108 DRV_LOG(INFO, "port %u Tx queues number update: %u -> %u", 109 dev->data->port_id, priv->txqs_n, txqs_n); 110 priv->txqs_n = txqs_n; 111 } 112 if (rxqs_n > priv->config.ind_table_max_size) { 113 DRV_LOG(ERR, "port %u cannot handle this many Rx queues (%u)", 114 dev->data->port_id, rxqs_n); 115 rte_errno = EINVAL; 116 return -rte_errno; 117 } 118 if (rxqs_n != priv->rxqs_n) { 119 DRV_LOG(INFO, "port %u Rx queues number update: %u -> %u", 120 dev->data->port_id, priv->rxqs_n, rxqs_n); 121 priv->rxqs_n = rxqs_n; 122 } 123 priv->skip_default_rss_reta = 0; 124 ret = mlx5_proc_priv_init(dev); 125 if (ret) 126 return ret; 127 return 0; 128 } 129 130 /** 131 * Configure default RSS reta. 132 * 133 * @param dev 134 * Pointer to Ethernet device structure. 135 * 136 * @return 137 * 0 on success, a negative errno value otherwise and rte_errno is set. 138 */ 139 int 140 mlx5_dev_configure_rss_reta(struct rte_eth_dev *dev) 141 { 142 struct mlx5_priv *priv = dev->data->dev_private; 143 unsigned int rxqs_n = dev->data->nb_rx_queues; 144 unsigned int i; 145 unsigned int j; 146 unsigned int reta_idx_n; 147 int ret = 0; 148 unsigned int *rss_queue_arr = NULL; 149 unsigned int rss_queue_n = 0; 150 151 if (priv->skip_default_rss_reta) 152 return ret; 153 rss_queue_arr = mlx5_malloc(0, rxqs_n * sizeof(unsigned int), 0, 154 SOCKET_ID_ANY); 155 if (!rss_queue_arr) { 156 DRV_LOG(ERR, "port %u cannot allocate RSS queue list (%u)", 157 dev->data->port_id, rxqs_n); 158 rte_errno = ENOMEM; 159 return -rte_errno; 160 } 161 for (i = 0, j = 0; i < rxqs_n; i++) { 162 struct mlx5_rxq_data *rxq_data; 163 struct mlx5_rxq_ctrl *rxq_ctrl; 164 165 rxq_data = (*priv->rxqs)[i]; 166 rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq); 167 if (rxq_ctrl && rxq_ctrl->type == MLX5_RXQ_TYPE_STANDARD) 168 rss_queue_arr[j++] = i; 169 } 170 rss_queue_n = j; 171 if (rss_queue_n > priv->config.ind_table_max_size) { 172 DRV_LOG(ERR, "port %u cannot handle this many Rx queues (%u)", 173 dev->data->port_id, rss_queue_n); 174 rte_errno = EINVAL; 175 mlx5_free(rss_queue_arr); 176 return -rte_errno; 177 } 178 DRV_LOG(INFO, "port %u Rx queues number update: %u -> %u", 179 dev->data->port_id, priv->rxqs_n, rxqs_n); 180 priv->rxqs_n = rxqs_n; 181 /* 182 * If the requested number of RX queues is not a power of two, 183 * use the maximum indirection table size for better balancing. 184 * The result is always rounded to the next power of two. 185 */ 186 reta_idx_n = (1 << log2above((rss_queue_n & (rss_queue_n - 1)) ? 187 priv->config.ind_table_max_size : 188 rss_queue_n)); 189 ret = mlx5_rss_reta_index_resize(dev, reta_idx_n); 190 if (ret) { 191 mlx5_free(rss_queue_arr); 192 return ret; 193 } 194 /* 195 * When the number of RX queues is not a power of two, 196 * the remaining table entries are padded with reused WQs 197 * and hashes are not spread uniformly. 198 */ 199 for (i = 0, j = 0; (i != reta_idx_n); ++i) { 200 (*priv->reta_idx)[i] = rss_queue_arr[j]; 201 if (++j == rss_queue_n) 202 j = 0; 203 } 204 mlx5_free(rss_queue_arr); 205 return ret; 206 } 207 208 /** 209 * Sets default tuning parameters. 210 * 211 * @param dev 212 * Pointer to Ethernet device. 213 * @param[out] info 214 * Info structure output buffer. 215 */ 216 static void 217 mlx5_set_default_params(struct rte_eth_dev *dev, struct rte_eth_dev_info *info) 218 { 219 struct mlx5_priv *priv = dev->data->dev_private; 220 221 /* Minimum CPU utilization. */ 222 info->default_rxportconf.ring_size = 256; 223 info->default_txportconf.ring_size = 256; 224 info->default_rxportconf.burst_size = MLX5_RX_DEFAULT_BURST; 225 info->default_txportconf.burst_size = MLX5_TX_DEFAULT_BURST; 226 if ((priv->link_speed_capa & ETH_LINK_SPEED_200G) | 227 (priv->link_speed_capa & ETH_LINK_SPEED_100G)) { 228 info->default_rxportconf.nb_queues = 16; 229 info->default_txportconf.nb_queues = 16; 230 if (dev->data->nb_rx_queues > 2 || 231 dev->data->nb_tx_queues > 2) { 232 /* Max Throughput. */ 233 info->default_rxportconf.ring_size = 2048; 234 info->default_txportconf.ring_size = 2048; 235 } 236 } else { 237 info->default_rxportconf.nb_queues = 8; 238 info->default_txportconf.nb_queues = 8; 239 if (dev->data->nb_rx_queues > 2 || 240 dev->data->nb_tx_queues > 2) { 241 /* Max Throughput. */ 242 info->default_rxportconf.ring_size = 4096; 243 info->default_txportconf.ring_size = 4096; 244 } 245 } 246 } 247 248 /** 249 * Sets tx mbuf limiting parameters. 250 * 251 * @param dev 252 * Pointer to Ethernet device. 253 * @param[out] info 254 * Info structure output buffer. 255 */ 256 static void 257 mlx5_set_txlimit_params(struct rte_eth_dev *dev, struct rte_eth_dev_info *info) 258 { 259 struct mlx5_priv *priv = dev->data->dev_private; 260 struct mlx5_dev_config *config = &priv->config; 261 unsigned int inlen; 262 uint16_t nb_max; 263 264 inlen = (config->txq_inline_max == MLX5_ARG_UNSET) ? 265 MLX5_SEND_DEF_INLINE_LEN : 266 (unsigned int)config->txq_inline_max; 267 MLX5_ASSERT(config->txq_inline_min >= 0); 268 inlen = RTE_MAX(inlen, (unsigned int)config->txq_inline_min); 269 inlen = RTE_MIN(inlen, MLX5_WQE_SIZE_MAX + 270 MLX5_ESEG_MIN_INLINE_SIZE - 271 MLX5_WQE_CSEG_SIZE - 272 MLX5_WQE_ESEG_SIZE - 273 MLX5_WQE_DSEG_SIZE * 2); 274 nb_max = (MLX5_WQE_SIZE_MAX + 275 MLX5_ESEG_MIN_INLINE_SIZE - 276 MLX5_WQE_CSEG_SIZE - 277 MLX5_WQE_ESEG_SIZE - 278 MLX5_WQE_DSEG_SIZE - 279 inlen) / MLX5_WSEG_SIZE; 280 info->tx_desc_lim.nb_seg_max = nb_max; 281 info->tx_desc_lim.nb_mtu_seg_max = nb_max; 282 } 283 284 /** 285 * DPDK callback to get information about the device. 286 * 287 * @param dev 288 * Pointer to Ethernet device structure. 289 * @param[out] info 290 * Info structure output buffer. 291 */ 292 int 293 mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info) 294 { 295 struct mlx5_priv *priv = dev->data->dev_private; 296 struct mlx5_dev_config *config = &priv->config; 297 unsigned int max; 298 299 /* FIXME: we should ask the device for these values. */ 300 info->min_rx_bufsize = 32; 301 info->max_rx_pktlen = 65536; 302 info->max_lro_pkt_size = MLX5_MAX_LRO_SIZE; 303 /* 304 * Since we need one CQ per QP, the limit is the minimum number 305 * between the two values. 306 */ 307 max = RTE_MIN(priv->sh->device_attr.max_cq, 308 priv->sh->device_attr.max_qp); 309 /* max_rx_queues is uint16_t. */ 310 max = RTE_MIN(max, (unsigned int)UINT16_MAX); 311 info->max_rx_queues = max; 312 info->max_tx_queues = max; 313 info->max_mac_addrs = MLX5_MAX_UC_MAC_ADDRESSES; 314 info->rx_queue_offload_capa = mlx5_get_rx_queue_offloads(dev); 315 info->rx_seg_capa.max_nseg = MLX5_MAX_RXQ_NSEG; 316 info->rx_seg_capa.multi_pools = 1; 317 info->rx_seg_capa.offset_allowed = 1; 318 info->rx_seg_capa.offset_align_log2 = 0; 319 info->rx_offload_capa = (mlx5_get_rx_port_offloads() | 320 info->rx_queue_offload_capa); 321 info->tx_offload_capa = mlx5_get_tx_port_offloads(dev); 322 info->if_index = mlx5_ifindex(dev); 323 info->reta_size = priv->reta_idx_n ? 324 priv->reta_idx_n : config->ind_table_max_size; 325 info->hash_key_size = MLX5_RSS_HASH_KEY_LEN; 326 info->speed_capa = priv->link_speed_capa; 327 info->flow_type_rss_offloads = ~MLX5_RSS_HF_MASK; 328 mlx5_set_default_params(dev, info); 329 mlx5_set_txlimit_params(dev, info); 330 info->switch_info.name = dev->data->name; 331 info->switch_info.domain_id = priv->domain_id; 332 info->switch_info.port_id = priv->representor_id; 333 if (priv->representor) { 334 uint16_t port_id; 335 336 if (priv->pf_bond >= 0) { 337 /* 338 * Switch port ID is opaque value with driver defined 339 * format. Push the PF index in bonding configurations 340 * in upper four bits of port ID. If we get too many 341 * representors (more than 4K) or PFs (more than 15) 342 * this approach must be reconsidered. 343 */ 344 if ((info->switch_info.port_id >> 345 MLX5_PORT_ID_BONDING_PF_SHIFT) || 346 priv->pf_bond > MLX5_PORT_ID_BONDING_PF_MASK) { 347 DRV_LOG(ERR, "can't update switch port ID" 348 " for bonding device"); 349 MLX5_ASSERT(false); 350 return -ENODEV; 351 } 352 info->switch_info.port_id |= 353 priv->pf_bond << MLX5_PORT_ID_BONDING_PF_SHIFT; 354 } 355 MLX5_ETH_FOREACH_DEV(port_id, priv->pci_dev) { 356 struct mlx5_priv *opriv = 357 rte_eth_devices[port_id].data->dev_private; 358 359 if (!opriv || 360 opriv->representor || 361 opriv->sh != priv->sh || 362 opriv->domain_id != priv->domain_id) 363 continue; 364 /* 365 * Override switch name with that of the master 366 * device. 367 */ 368 info->switch_info.name = opriv->dev_data->name; 369 break; 370 } 371 } 372 return 0; 373 } 374 375 /** 376 * Get firmware version of a device. 377 * 378 * @param dev 379 * Ethernet device port. 380 * @param fw_ver 381 * String output allocated by caller. 382 * @param fw_size 383 * Size of the output string, including terminating null byte. 384 * 385 * @return 386 * 0 on success, or the size of the non truncated string if too big. 387 */ 388 int 389 mlx5_fw_version_get(struct rte_eth_dev *dev, char *fw_ver, size_t fw_size) 390 { 391 struct mlx5_priv *priv = dev->data->dev_private; 392 struct mlx5_dev_attr *attr = &priv->sh->device_attr; 393 size_t size = strnlen(attr->fw_ver, sizeof(attr->fw_ver)) + 1; 394 395 if (fw_size < size) 396 return size; 397 if (fw_ver != NULL) 398 strlcpy(fw_ver, attr->fw_ver, fw_size); 399 return 0; 400 } 401 402 /** 403 * Get supported packet types. 404 * 405 * @param dev 406 * Pointer to Ethernet device structure. 407 * 408 * @return 409 * A pointer to the supported Packet types array. 410 */ 411 const uint32_t * 412 mlx5_dev_supported_ptypes_get(struct rte_eth_dev *dev) 413 { 414 static const uint32_t ptypes[] = { 415 /* refers to rxq_cq_to_pkt_type() */ 416 RTE_PTYPE_L2_ETHER, 417 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN, 418 RTE_PTYPE_L3_IPV6_EXT_UNKNOWN, 419 RTE_PTYPE_L4_NONFRAG, 420 RTE_PTYPE_L4_FRAG, 421 RTE_PTYPE_L4_TCP, 422 RTE_PTYPE_L4_UDP, 423 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN, 424 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN, 425 RTE_PTYPE_INNER_L4_NONFRAG, 426 RTE_PTYPE_INNER_L4_FRAG, 427 RTE_PTYPE_INNER_L4_TCP, 428 RTE_PTYPE_INNER_L4_UDP, 429 RTE_PTYPE_UNKNOWN 430 }; 431 432 if (dev->rx_pkt_burst == mlx5_rx_burst || 433 dev->rx_pkt_burst == mlx5_rx_burst_mprq || 434 dev->rx_pkt_burst == mlx5_rx_burst_vec || 435 dev->rx_pkt_burst == mlx5_rx_burst_mprq_vec) 436 return ptypes; 437 return NULL; 438 } 439 440 /** 441 * DPDK callback to change the MTU. 442 * 443 * @param dev 444 * Pointer to Ethernet device structure. 445 * @param in_mtu 446 * New MTU. 447 * 448 * @return 449 * 0 on success, a negative errno value otherwise and rte_errno is set. 450 */ 451 int 452 mlx5_dev_set_mtu(struct rte_eth_dev *dev, uint16_t mtu) 453 { 454 struct mlx5_priv *priv = dev->data->dev_private; 455 uint16_t kern_mtu = 0; 456 int ret; 457 458 ret = mlx5_get_mtu(dev, &kern_mtu); 459 if (ret) 460 return ret; 461 /* Set kernel interface MTU first. */ 462 ret = mlx5_set_mtu(dev, mtu); 463 if (ret) 464 return ret; 465 ret = mlx5_get_mtu(dev, &kern_mtu); 466 if (ret) 467 return ret; 468 if (kern_mtu == mtu) { 469 priv->mtu = mtu; 470 DRV_LOG(DEBUG, "port %u adapter MTU set to %u", 471 dev->data->port_id, mtu); 472 return 0; 473 } 474 rte_errno = EAGAIN; 475 return -rte_errno; 476 } 477 478 /** 479 * Configure the RX function to use. 480 * 481 * @param dev 482 * Pointer to private data structure. 483 * 484 * @return 485 * Pointer to selected Rx burst function. 486 */ 487 eth_rx_burst_t 488 mlx5_select_rx_function(struct rte_eth_dev *dev) 489 { 490 eth_rx_burst_t rx_pkt_burst = mlx5_rx_burst; 491 492 MLX5_ASSERT(dev != NULL); 493 if (mlx5_check_vec_rx_support(dev) > 0) { 494 if (mlx5_mprq_enabled(dev)) { 495 rx_pkt_burst = mlx5_rx_burst_mprq_vec; 496 DRV_LOG(DEBUG, "port %u selected vectorized" 497 " MPRQ Rx function", dev->data->port_id); 498 } else { 499 rx_pkt_burst = mlx5_rx_burst_vec; 500 DRV_LOG(DEBUG, "port %u selected vectorized" 501 " SPRQ Rx function", dev->data->port_id); 502 } 503 } else if (mlx5_mprq_enabled(dev)) { 504 rx_pkt_burst = mlx5_rx_burst_mprq; 505 DRV_LOG(DEBUG, "port %u selected MPRQ Rx function", 506 dev->data->port_id); 507 } else { 508 DRV_LOG(DEBUG, "port %u selected SPRQ Rx function", 509 dev->data->port_id); 510 } 511 return rx_pkt_burst; 512 } 513 514 /** 515 * Get the E-Switch parameters by port id. 516 * 517 * @param[in] port 518 * Device port id. 519 * @param[in] valid 520 * Device port id is valid, skip check. This flag is useful 521 * when trials are performed from probing and device is not 522 * flagged as valid yet (in attaching process). 523 * @param[out] es_domain_id 524 * E-Switch domain id. 525 * @param[out] es_port_id 526 * The port id of the port in the E-Switch. 527 * 528 * @return 529 * pointer to device private data structure containing data needed 530 * on success, NULL otherwise and rte_errno is set. 531 */ 532 struct mlx5_priv * 533 mlx5_port_to_eswitch_info(uint16_t port, bool valid) 534 { 535 struct rte_eth_dev *dev; 536 struct mlx5_priv *priv; 537 538 if (port >= RTE_MAX_ETHPORTS) { 539 rte_errno = EINVAL; 540 return NULL; 541 } 542 if (!valid && !rte_eth_dev_is_valid_port(port)) { 543 rte_errno = ENODEV; 544 return NULL; 545 } 546 dev = &rte_eth_devices[port]; 547 priv = dev->data->dev_private; 548 if (!(priv->representor || priv->master)) { 549 rte_errno = EINVAL; 550 return NULL; 551 } 552 return priv; 553 } 554 555 /** 556 * Get the E-Switch parameters by device instance. 557 * 558 * @param[in] port 559 * Device port id. 560 * @param[out] es_domain_id 561 * E-Switch domain id. 562 * @param[out] es_port_id 563 * The port id of the port in the E-Switch. 564 * 565 * @return 566 * pointer to device private data structure containing data needed 567 * on success, NULL otherwise and rte_errno is set. 568 */ 569 struct mlx5_priv * 570 mlx5_dev_to_eswitch_info(struct rte_eth_dev *dev) 571 { 572 struct mlx5_priv *priv; 573 574 priv = dev->data->dev_private; 575 if (!(priv->representor || priv->master)) { 576 rte_errno = EINVAL; 577 return NULL; 578 } 579 return priv; 580 } 581 582 /** 583 * DPDK callback to retrieve hairpin capabilities. 584 * 585 * @param dev 586 * Pointer to Ethernet device structure. 587 * @param[out] cap 588 * Storage for hairpin capability data. 589 * 590 * @return 591 * 0 on success, a negative errno value otherwise and rte_errno is set. 592 */ 593 int 594 mlx5_hairpin_cap_get(struct rte_eth_dev *dev, struct rte_eth_hairpin_cap *cap) 595 { 596 struct mlx5_priv *priv = dev->data->dev_private; 597 struct mlx5_dev_config *config = &priv->config; 598 599 if (!priv->sh->devx || !config->dest_tir || !config->dv_flow_en) { 600 rte_errno = ENOTSUP; 601 return -rte_errno; 602 } 603 cap->max_nb_queues = UINT16_MAX; 604 cap->max_rx_2_tx = 1; 605 cap->max_tx_2_rx = 1; 606 cap->max_nb_desc = 8192; 607 return 0; 608 } 609