1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2015 6WIND S.A. 3 * Copyright 2015 Mellanox Technologies, Ltd 4 */ 5 6 #include <stddef.h> 7 #include <inttypes.h> 8 #include <unistd.h> 9 #include <stdbool.h> 10 #include <stdint.h> 11 #include <stdio.h> 12 #include <string.h> 13 #include <stdlib.h> 14 #include <errno.h> 15 #include <dirent.h> 16 #include <net/if.h> 17 #include <sys/ioctl.h> 18 #include <sys/socket.h> 19 #include <netinet/in.h> 20 #include <linux/ethtool.h> 21 #include <linux/sockios.h> 22 #include <fcntl.h> 23 #include <stdalign.h> 24 #include <sys/un.h> 25 #include <time.h> 26 27 #include <rte_atomic.h> 28 #include <rte_ethdev_driver.h> 29 #include <rte_bus_pci.h> 30 #include <rte_mbuf.h> 31 #include <rte_common.h> 32 #include <rte_interrupts.h> 33 #include <rte_malloc.h> 34 #include <rte_string_fns.h> 35 #include <rte_rwlock.h> 36 #include <rte_cycles.h> 37 38 #include <mlx5_glue.h> 39 #include <mlx5_devx_cmds.h> 40 #include <mlx5_common.h> 41 #include <mlx5_malloc.h> 42 43 #include "mlx5.h" 44 #include "mlx5_rxtx.h" 45 #include "mlx5_utils.h" 46 47 /* Supported speed values found in /usr/include/linux/ethtool.h */ 48 #ifndef HAVE_SUPPORTED_40000baseKR4_Full 49 #define SUPPORTED_40000baseKR4_Full (1 << 23) 50 #endif 51 #ifndef HAVE_SUPPORTED_40000baseCR4_Full 52 #define SUPPORTED_40000baseCR4_Full (1 << 24) 53 #endif 54 #ifndef HAVE_SUPPORTED_40000baseSR4_Full 55 #define SUPPORTED_40000baseSR4_Full (1 << 25) 56 #endif 57 #ifndef HAVE_SUPPORTED_40000baseLR4_Full 58 #define SUPPORTED_40000baseLR4_Full (1 << 26) 59 #endif 60 #ifndef HAVE_SUPPORTED_56000baseKR4_Full 61 #define SUPPORTED_56000baseKR4_Full (1 << 27) 62 #endif 63 #ifndef HAVE_SUPPORTED_56000baseCR4_Full 64 #define SUPPORTED_56000baseCR4_Full (1 << 28) 65 #endif 66 #ifndef HAVE_SUPPORTED_56000baseSR4_Full 67 #define SUPPORTED_56000baseSR4_Full (1 << 29) 68 #endif 69 #ifndef HAVE_SUPPORTED_56000baseLR4_Full 70 #define SUPPORTED_56000baseLR4_Full (1 << 30) 71 #endif 72 73 /* Add defines in case the running kernel is not the same as user headers. */ 74 #ifndef ETHTOOL_GLINKSETTINGS 75 struct ethtool_link_settings { 76 uint32_t cmd; 77 uint32_t speed; 78 uint8_t duplex; 79 uint8_t port; 80 uint8_t phy_address; 81 uint8_t autoneg; 82 uint8_t mdio_support; 83 uint8_t eth_to_mdix; 84 uint8_t eth_tp_mdix_ctrl; 85 int8_t link_mode_masks_nwords; 86 uint32_t reserved[8]; 87 uint32_t link_mode_masks[]; 88 }; 89 90 /* The kernel values can be found in /include/uapi/linux/ethtool.h */ 91 #define ETHTOOL_GLINKSETTINGS 0x0000004c 92 #define ETHTOOL_LINK_MODE_1000baseT_Full_BIT 5 93 #define ETHTOOL_LINK_MODE_Autoneg_BIT 6 94 #define ETHTOOL_LINK_MODE_1000baseKX_Full_BIT 17 95 #define ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT 18 96 #define ETHTOOL_LINK_MODE_10000baseKR_Full_BIT 19 97 #define ETHTOOL_LINK_MODE_10000baseR_FEC_BIT 20 98 #define ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT 21 99 #define ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT 22 100 #define ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT 23 101 #define ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT 24 102 #define ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT 25 103 #define ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT 26 104 #define ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT 27 105 #define ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT 28 106 #define ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT 29 107 #define ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT 30 108 #endif 109 #ifndef HAVE_ETHTOOL_LINK_MODE_25G 110 #define ETHTOOL_LINK_MODE_25000baseCR_Full_BIT 31 111 #define ETHTOOL_LINK_MODE_25000baseKR_Full_BIT 32 112 #define ETHTOOL_LINK_MODE_25000baseSR_Full_BIT 33 113 #endif 114 #ifndef HAVE_ETHTOOL_LINK_MODE_50G 115 #define ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT 34 116 #define ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT 35 117 #endif 118 #ifndef HAVE_ETHTOOL_LINK_MODE_100G 119 #define ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT 36 120 #define ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT 37 121 #define ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT 38 122 #define ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT 39 123 #endif 124 #ifndef HAVE_ETHTOOL_LINK_MODE_200G 125 #define ETHTOOL_LINK_MODE_200000baseKR4_Full_BIT 62 126 #define ETHTOOL_LINK_MODE_200000baseSR4_Full_BIT 63 127 #define ETHTOOL_LINK_MODE_200000baseLR4_ER4_FR4_Full_BIT 0 /* 64 - 64 */ 128 #define ETHTOOL_LINK_MODE_200000baseDR4_Full_BIT 1 /* 65 - 64 */ 129 #define ETHTOOL_LINK_MODE_200000baseCR4_Full_BIT 2 /* 66 - 64 */ 130 #endif 131 132 133 /** 134 * Get interface name from private structure. 135 * 136 * This is a port representor-aware version of mlx5_get_ifname_sysfs(). 137 * 138 * @param[in] dev 139 * Pointer to Ethernet device. 140 * @param[out] ifname 141 * Interface name output buffer. 142 * 143 * @return 144 * 0 on success, a negative errno value otherwise and rte_errno is set. 145 */ 146 int 147 mlx5_get_ifname(const struct rte_eth_dev *dev, char (*ifname)[IF_NAMESIZE]) 148 { 149 struct mlx5_priv *priv = dev->data->dev_private; 150 unsigned int ifindex; 151 152 MLX5_ASSERT(priv); 153 MLX5_ASSERT(priv->sh); 154 ifindex = mlx5_ifindex(dev); 155 if (!ifindex) { 156 if (!priv->representor) 157 return mlx5_get_ifname_sysfs(priv->sh->ibdev_path, 158 *ifname); 159 rte_errno = ENXIO; 160 return -rte_errno; 161 } 162 if (if_indextoname(ifindex, &(*ifname)[0])) 163 return 0; 164 rte_errno = errno; 165 return -rte_errno; 166 } 167 168 /** 169 * Perform ifreq ioctl() on associated Ethernet device. 170 * 171 * @param[in] dev 172 * Pointer to Ethernet device. 173 * @param req 174 * Request number to pass to ioctl(). 175 * @param[out] ifr 176 * Interface request structure output buffer. 177 * 178 * @return 179 * 0 on success, a negative errno value otherwise and rte_errno is set. 180 */ 181 static int 182 mlx5_ifreq(const struct rte_eth_dev *dev, int req, struct ifreq *ifr) 183 { 184 int sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP); 185 int ret = 0; 186 187 if (sock == -1) { 188 rte_errno = errno; 189 return -rte_errno; 190 } 191 ret = mlx5_get_ifname(dev, &ifr->ifr_name); 192 if (ret) 193 goto error; 194 ret = ioctl(sock, req, ifr); 195 if (ret == -1) { 196 rte_errno = errno; 197 goto error; 198 } 199 close(sock); 200 return 0; 201 error: 202 close(sock); 203 return -rte_errno; 204 } 205 206 /** 207 * Get device MTU. 208 * 209 * @param dev 210 * Pointer to Ethernet device. 211 * @param[out] mtu 212 * MTU value output buffer. 213 * 214 * @return 215 * 0 on success, a negative errno value otherwise and rte_errno is set. 216 */ 217 int 218 mlx5_get_mtu(struct rte_eth_dev *dev, uint16_t *mtu) 219 { 220 struct ifreq request; 221 int ret = mlx5_ifreq(dev, SIOCGIFMTU, &request); 222 223 if (ret) 224 return ret; 225 *mtu = request.ifr_mtu; 226 return 0; 227 } 228 229 /** 230 * Set device MTU. 231 * 232 * @param dev 233 * Pointer to Ethernet device. 234 * @param mtu 235 * MTU value to set. 236 * 237 * @return 238 * 0 on success, a negative errno value otherwise and rte_errno is set. 239 */ 240 int 241 mlx5_set_mtu(struct rte_eth_dev *dev, uint16_t mtu) 242 { 243 struct ifreq request = { .ifr_mtu = mtu, }; 244 245 return mlx5_ifreq(dev, SIOCSIFMTU, &request); 246 } 247 248 /** 249 * Set device flags. 250 * 251 * @param dev 252 * Pointer to Ethernet device. 253 * @param keep 254 * Bitmask for flags that must remain untouched. 255 * @param flags 256 * Bitmask for flags to modify. 257 * 258 * @return 259 * 0 on success, a negative errno value otherwise and rte_errno is set. 260 */ 261 static int 262 mlx5_set_flags(struct rte_eth_dev *dev, unsigned int keep, unsigned int flags) 263 { 264 struct ifreq request; 265 int ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &request); 266 267 if (ret) 268 return ret; 269 request.ifr_flags &= keep; 270 request.ifr_flags |= flags & ~keep; 271 return mlx5_ifreq(dev, SIOCSIFFLAGS, &request); 272 } 273 274 /** 275 * Get device current raw clock counter 276 * 277 * @param dev 278 * Pointer to Ethernet device structure. 279 * @param[out] time 280 * Current raw clock counter of the device. 281 * 282 * @return 283 * 0 if the clock has correctly been read 284 * The value of errno in case of error 285 */ 286 int 287 mlx5_read_clock(struct rte_eth_dev *dev, uint64_t *clock) 288 { 289 struct mlx5_priv *priv = dev->data->dev_private; 290 struct ibv_context *ctx = priv->sh->ctx; 291 struct ibv_values_ex values; 292 int err = 0; 293 294 values.comp_mask = IBV_VALUES_MASK_RAW_CLOCK; 295 err = mlx5_glue->query_rt_values_ex(ctx, &values); 296 if (err != 0) { 297 DRV_LOG(WARNING, "Could not query the clock !"); 298 return err; 299 } 300 *clock = values.raw_clock.tv_nsec; 301 return 0; 302 } 303 304 /** 305 * Retrieve the master device for representor in the same switch domain. 306 * 307 * @param dev 308 * Pointer to representor Ethernet device structure. 309 * 310 * @return 311 * Master device structure on success, NULL otherwise. 312 */ 313 static struct rte_eth_dev * 314 mlx5_find_master_dev(struct rte_eth_dev *dev) 315 { 316 struct mlx5_priv *priv; 317 uint16_t port_id; 318 uint16_t domain_id; 319 320 priv = dev->data->dev_private; 321 domain_id = priv->domain_id; 322 MLX5_ASSERT(priv->representor); 323 MLX5_ETH_FOREACH_DEV(port_id, priv->pci_dev) { 324 struct mlx5_priv *opriv = 325 rte_eth_devices[port_id].data->dev_private; 326 if (opriv && 327 opriv->master && 328 opriv->domain_id == domain_id && 329 opriv->sh == priv->sh) 330 return &rte_eth_devices[port_id]; 331 } 332 return NULL; 333 } 334 335 /** 336 * DPDK callback to retrieve physical link information. 337 * 338 * @param dev 339 * Pointer to Ethernet device structure. 340 * @param[out] link 341 * Storage for current link status. 342 * 343 * @return 344 * 0 on success, a negative errno value otherwise and rte_errno is set. 345 */ 346 static int 347 mlx5_link_update_unlocked_gset(struct rte_eth_dev *dev, 348 struct rte_eth_link *link) 349 { 350 struct mlx5_priv *priv = dev->data->dev_private; 351 struct ethtool_cmd edata = { 352 .cmd = ETHTOOL_GSET /* Deprecated since Linux v4.5. */ 353 }; 354 struct ifreq ifr; 355 struct rte_eth_link dev_link; 356 int link_speed = 0; 357 int ret; 358 359 ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &ifr); 360 if (ret) { 361 DRV_LOG(WARNING, "port %u ioctl(SIOCGIFFLAGS) failed: %s", 362 dev->data->port_id, strerror(rte_errno)); 363 return ret; 364 } 365 dev_link = (struct rte_eth_link) { 366 .link_status = ((ifr.ifr_flags & IFF_UP) && 367 (ifr.ifr_flags & IFF_RUNNING)), 368 }; 369 ifr = (struct ifreq) { 370 .ifr_data = (void *)&edata, 371 }; 372 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 373 if (ret) { 374 if (ret == -ENOTSUP && priv->representor) { 375 struct rte_eth_dev *master; 376 377 /* 378 * For representors we can try to inherit link 379 * settings from the master device. Actually 380 * link settings do not make a lot of sense 381 * for representors due to missing physical 382 * link. The old kernel drivers supported 383 * emulated settings query for representors, 384 * the new ones do not, so we have to add 385 * this code for compatibility issues. 386 */ 387 master = mlx5_find_master_dev(dev); 388 if (master) { 389 ifr = (struct ifreq) { 390 .ifr_data = (void *)&edata, 391 }; 392 ret = mlx5_ifreq(master, SIOCETHTOOL, &ifr); 393 } 394 } 395 if (ret) { 396 DRV_LOG(WARNING, 397 "port %u ioctl(SIOCETHTOOL," 398 " ETHTOOL_GSET) failed: %s", 399 dev->data->port_id, strerror(rte_errno)); 400 return ret; 401 } 402 } 403 link_speed = ethtool_cmd_speed(&edata); 404 if (link_speed == -1) 405 dev_link.link_speed = ETH_SPEED_NUM_NONE; 406 else 407 dev_link.link_speed = link_speed; 408 priv->link_speed_capa = 0; 409 if (edata.supported & SUPPORTED_Autoneg) 410 priv->link_speed_capa |= ETH_LINK_SPEED_AUTONEG; 411 if (edata.supported & (SUPPORTED_1000baseT_Full | 412 SUPPORTED_1000baseKX_Full)) 413 priv->link_speed_capa |= ETH_LINK_SPEED_1G; 414 if (edata.supported & SUPPORTED_10000baseKR_Full) 415 priv->link_speed_capa |= ETH_LINK_SPEED_10G; 416 if (edata.supported & (SUPPORTED_40000baseKR4_Full | 417 SUPPORTED_40000baseCR4_Full | 418 SUPPORTED_40000baseSR4_Full | 419 SUPPORTED_40000baseLR4_Full)) 420 priv->link_speed_capa |= ETH_LINK_SPEED_40G; 421 dev_link.link_duplex = ((edata.duplex == DUPLEX_HALF) ? 422 ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX); 423 dev_link.link_autoneg = !(dev->data->dev_conf.link_speeds & 424 ETH_LINK_SPEED_FIXED); 425 if (((dev_link.link_speed && !dev_link.link_status) || 426 (!dev_link.link_speed && dev_link.link_status))) { 427 rte_errno = EAGAIN; 428 return -rte_errno; 429 } 430 *link = dev_link; 431 return 0; 432 } 433 434 /** 435 * Retrieve physical link information (unlocked version using new ioctl). 436 * 437 * @param dev 438 * Pointer to Ethernet device structure. 439 * @param[out] link 440 * Storage for current link status. 441 * 442 * @return 443 * 0 on success, a negative errno value otherwise and rte_errno is set. 444 */ 445 static int 446 mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev, 447 struct rte_eth_link *link) 448 449 { 450 struct mlx5_priv *priv = dev->data->dev_private; 451 struct ethtool_link_settings gcmd = { .cmd = ETHTOOL_GLINKSETTINGS }; 452 struct ifreq ifr; 453 struct rte_eth_link dev_link; 454 struct rte_eth_dev *master = NULL; 455 uint64_t sc; 456 int ret; 457 458 ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &ifr); 459 if (ret) { 460 DRV_LOG(WARNING, "port %u ioctl(SIOCGIFFLAGS) failed: %s", 461 dev->data->port_id, strerror(rte_errno)); 462 return ret; 463 } 464 dev_link = (struct rte_eth_link) { 465 .link_status = ((ifr.ifr_flags & IFF_UP) && 466 (ifr.ifr_flags & IFF_RUNNING)), 467 }; 468 ifr = (struct ifreq) { 469 .ifr_data = (void *)&gcmd, 470 }; 471 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 472 if (ret) { 473 if (ret == -ENOTSUP && priv->representor) { 474 /* 475 * For representors we can try to inherit link 476 * settings from the master device. Actually 477 * link settings do not make a lot of sense 478 * for representors due to missing physical 479 * link. The old kernel drivers supported 480 * emulated settings query for representors, 481 * the new ones do not, so we have to add 482 * this code for compatibility issues. 483 */ 484 master = mlx5_find_master_dev(dev); 485 if (master) { 486 ifr = (struct ifreq) { 487 .ifr_data = (void *)&gcmd, 488 }; 489 ret = mlx5_ifreq(master, SIOCETHTOOL, &ifr); 490 } 491 } 492 if (ret) { 493 DRV_LOG(DEBUG, 494 "port %u ioctl(SIOCETHTOOL," 495 " ETHTOOL_GLINKSETTINGS) failed: %s", 496 dev->data->port_id, strerror(rte_errno)); 497 return ret; 498 } 499 } 500 gcmd.link_mode_masks_nwords = -gcmd.link_mode_masks_nwords; 501 502 alignas(struct ethtool_link_settings) 503 uint8_t data[offsetof(struct ethtool_link_settings, link_mode_masks) + 504 sizeof(uint32_t) * gcmd.link_mode_masks_nwords * 3]; 505 struct ethtool_link_settings *ecmd = (void *)data; 506 507 *ecmd = gcmd; 508 ifr.ifr_data = (void *)ecmd; 509 ret = mlx5_ifreq(master ? master : dev, SIOCETHTOOL, &ifr); 510 if (ret) { 511 DRV_LOG(DEBUG, 512 "port %u ioctl(SIOCETHTOOL," 513 "ETHTOOL_GLINKSETTINGS) failed: %s", 514 dev->data->port_id, strerror(rte_errno)); 515 return ret; 516 } 517 dev_link.link_speed = (ecmd->speed == UINT32_MAX) ? ETH_SPEED_NUM_NONE : 518 ecmd->speed; 519 sc = ecmd->link_mode_masks[0] | 520 ((uint64_t)ecmd->link_mode_masks[1] << 32); 521 priv->link_speed_capa = 0; 522 if (sc & MLX5_BITSHIFT(ETHTOOL_LINK_MODE_Autoneg_BIT)) 523 priv->link_speed_capa |= ETH_LINK_SPEED_AUTONEG; 524 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_1000baseT_Full_BIT) | 525 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_1000baseKX_Full_BIT))) 526 priv->link_speed_capa |= ETH_LINK_SPEED_1G; 527 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT) | 528 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_10000baseKR_Full_BIT) | 529 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_10000baseR_FEC_BIT))) 530 priv->link_speed_capa |= ETH_LINK_SPEED_10G; 531 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT) | 532 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT))) 533 priv->link_speed_capa |= ETH_LINK_SPEED_20G; 534 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT) | 535 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT) | 536 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT) | 537 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT))) 538 priv->link_speed_capa |= ETH_LINK_SPEED_40G; 539 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT) | 540 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT) | 541 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT) | 542 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT))) 543 priv->link_speed_capa |= ETH_LINK_SPEED_56G; 544 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_25000baseCR_Full_BIT) | 545 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_25000baseKR_Full_BIT) | 546 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_25000baseSR_Full_BIT))) 547 priv->link_speed_capa |= ETH_LINK_SPEED_25G; 548 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT) | 549 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT))) 550 priv->link_speed_capa |= ETH_LINK_SPEED_50G; 551 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT) | 552 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT) | 553 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT) | 554 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT))) 555 priv->link_speed_capa |= ETH_LINK_SPEED_100G; 556 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_200000baseKR4_Full_BIT) | 557 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_200000baseSR4_Full_BIT))) 558 priv->link_speed_capa |= ETH_LINK_SPEED_200G; 559 560 sc = ecmd->link_mode_masks[2] | 561 ((uint64_t)ecmd->link_mode_masks[3] << 32); 562 if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_200000baseCR4_Full_BIT) | 563 MLX5_BITSHIFT 564 (ETHTOOL_LINK_MODE_200000baseLR4_ER4_FR4_Full_BIT) | 565 MLX5_BITSHIFT(ETHTOOL_LINK_MODE_200000baseDR4_Full_BIT))) 566 priv->link_speed_capa |= ETH_LINK_SPEED_200G; 567 dev_link.link_duplex = ((ecmd->duplex == DUPLEX_HALF) ? 568 ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX); 569 dev_link.link_autoneg = !(dev->data->dev_conf.link_speeds & 570 ETH_LINK_SPEED_FIXED); 571 if (((dev_link.link_speed && !dev_link.link_status) || 572 (!dev_link.link_speed && dev_link.link_status))) { 573 rte_errno = EAGAIN; 574 return -rte_errno; 575 } 576 *link = dev_link; 577 return 0; 578 } 579 580 /** 581 * DPDK callback to retrieve physical link information. 582 * 583 * @param dev 584 * Pointer to Ethernet device structure. 585 * @param wait_to_complete 586 * Wait for request completion. 587 * 588 * @return 589 * 0 if link status was not updated, positive if it was, a negative errno 590 * value otherwise and rte_errno is set. 591 */ 592 int 593 mlx5_link_update(struct rte_eth_dev *dev, int wait_to_complete) 594 { 595 int ret; 596 struct rte_eth_link dev_link; 597 time_t start_time = time(NULL); 598 int retry = MLX5_GET_LINK_STATUS_RETRY_COUNT; 599 600 do { 601 ret = mlx5_link_update_unlocked_gs(dev, &dev_link); 602 if (ret == -ENOTSUP) 603 ret = mlx5_link_update_unlocked_gset(dev, &dev_link); 604 if (ret == 0) 605 break; 606 /* Handle wait to complete situation. */ 607 if ((wait_to_complete || retry) && ret == -EAGAIN) { 608 if (abs((int)difftime(time(NULL), start_time)) < 609 MLX5_LINK_STATUS_TIMEOUT) { 610 usleep(0); 611 continue; 612 } else { 613 rte_errno = EBUSY; 614 return -rte_errno; 615 } 616 } else if (ret < 0) { 617 return ret; 618 } 619 } while (wait_to_complete || retry-- > 0); 620 ret = !!memcmp(&dev->data->dev_link, &dev_link, 621 sizeof(struct rte_eth_link)); 622 dev->data->dev_link = dev_link; 623 return ret; 624 } 625 626 /** 627 * DPDK callback to get flow control status. 628 * 629 * @param dev 630 * Pointer to Ethernet device structure. 631 * @param[out] fc_conf 632 * Flow control output buffer. 633 * 634 * @return 635 * 0 on success, a negative errno value otherwise and rte_errno is set. 636 */ 637 int 638 mlx5_dev_get_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf) 639 { 640 struct ifreq ifr; 641 struct ethtool_pauseparam ethpause = { 642 .cmd = ETHTOOL_GPAUSEPARAM 643 }; 644 int ret; 645 646 ifr.ifr_data = (void *)ðpause; 647 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 648 if (ret) { 649 DRV_LOG(WARNING, 650 "port %u ioctl(SIOCETHTOOL, ETHTOOL_GPAUSEPARAM) failed:" 651 " %s", 652 dev->data->port_id, strerror(rte_errno)); 653 return ret; 654 } 655 fc_conf->autoneg = ethpause.autoneg; 656 if (ethpause.rx_pause && ethpause.tx_pause) 657 fc_conf->mode = RTE_FC_FULL; 658 else if (ethpause.rx_pause) 659 fc_conf->mode = RTE_FC_RX_PAUSE; 660 else if (ethpause.tx_pause) 661 fc_conf->mode = RTE_FC_TX_PAUSE; 662 else 663 fc_conf->mode = RTE_FC_NONE; 664 return 0; 665 } 666 667 /** 668 * DPDK callback to modify flow control parameters. 669 * 670 * @param dev 671 * Pointer to Ethernet device structure. 672 * @param[in] fc_conf 673 * Flow control parameters. 674 * 675 * @return 676 * 0 on success, a negative errno value otherwise and rte_errno is set. 677 */ 678 int 679 mlx5_dev_set_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf) 680 { 681 struct ifreq ifr; 682 struct ethtool_pauseparam ethpause = { 683 .cmd = ETHTOOL_SPAUSEPARAM 684 }; 685 int ret; 686 687 ifr.ifr_data = (void *)ðpause; 688 ethpause.autoneg = fc_conf->autoneg; 689 if (((fc_conf->mode & RTE_FC_FULL) == RTE_FC_FULL) || 690 (fc_conf->mode & RTE_FC_RX_PAUSE)) 691 ethpause.rx_pause = 1; 692 else 693 ethpause.rx_pause = 0; 694 695 if (((fc_conf->mode & RTE_FC_FULL) == RTE_FC_FULL) || 696 (fc_conf->mode & RTE_FC_TX_PAUSE)) 697 ethpause.tx_pause = 1; 698 else 699 ethpause.tx_pause = 0; 700 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 701 if (ret) { 702 DRV_LOG(WARNING, 703 "port %u ioctl(SIOCETHTOOL, ETHTOOL_SPAUSEPARAM)" 704 " failed: %s", 705 dev->data->port_id, strerror(rte_errno)); 706 return ret; 707 } 708 return 0; 709 } 710 711 /** 712 * Handle asynchronous removal event for entire multiport device. 713 * 714 * @param sh 715 * Infiniband device shared context. 716 */ 717 static void 718 mlx5_dev_interrupt_device_fatal(struct mlx5_dev_ctx_shared *sh) 719 { 720 uint32_t i; 721 722 for (i = 0; i < sh->max_port; ++i) { 723 struct rte_eth_dev *dev; 724 725 if (sh->port[i].ih_port_id >= RTE_MAX_ETHPORTS) { 726 /* 727 * Or not existing port either no 728 * handler installed for this port. 729 */ 730 continue; 731 } 732 dev = &rte_eth_devices[sh->port[i].ih_port_id]; 733 MLX5_ASSERT(dev); 734 if (dev->data->dev_conf.intr_conf.rmv) 735 _rte_eth_dev_callback_process 736 (dev, RTE_ETH_EVENT_INTR_RMV, NULL); 737 } 738 } 739 740 /** 741 * Handle shared asynchronous events the NIC (removal event 742 * and link status change). Supports multiport IB device. 743 * 744 * @param cb_arg 745 * Callback argument. 746 */ 747 void 748 mlx5_dev_interrupt_handler(void *cb_arg) 749 { 750 struct mlx5_dev_ctx_shared *sh = cb_arg; 751 struct ibv_async_event event; 752 753 /* Read all message from the IB device and acknowledge them. */ 754 for (;;) { 755 struct rte_eth_dev *dev; 756 uint32_t tmp; 757 758 if (mlx5_glue->get_async_event(sh->ctx, &event)) 759 break; 760 /* Retrieve and check IB port index. */ 761 tmp = (uint32_t)event.element.port_num; 762 if (!tmp && event.event_type == IBV_EVENT_DEVICE_FATAL) { 763 /* 764 * The DEVICE_FATAL event is called once for 765 * entire device without port specifying. 766 * We should notify all existing ports. 767 */ 768 mlx5_glue->ack_async_event(&event); 769 mlx5_dev_interrupt_device_fatal(sh); 770 continue; 771 } 772 MLX5_ASSERT(tmp && (tmp <= sh->max_port)); 773 if (!tmp) { 774 /* Unsupported device level event. */ 775 mlx5_glue->ack_async_event(&event); 776 DRV_LOG(DEBUG, 777 "unsupported common event (type %d)", 778 event.event_type); 779 continue; 780 } 781 if (tmp > sh->max_port) { 782 /* Invalid IB port index. */ 783 mlx5_glue->ack_async_event(&event); 784 DRV_LOG(DEBUG, 785 "cannot handle an event (type %d)" 786 "due to invalid IB port index (%u)", 787 event.event_type, tmp); 788 continue; 789 } 790 if (sh->port[tmp - 1].ih_port_id >= RTE_MAX_ETHPORTS) { 791 /* No handler installed. */ 792 mlx5_glue->ack_async_event(&event); 793 DRV_LOG(DEBUG, 794 "cannot handle an event (type %d)" 795 "due to no handler installed for port %u", 796 event.event_type, tmp); 797 continue; 798 } 799 /* Retrieve ethernet device descriptor. */ 800 tmp = sh->port[tmp - 1].ih_port_id; 801 dev = &rte_eth_devices[tmp]; 802 MLX5_ASSERT(dev); 803 if ((event.event_type == IBV_EVENT_PORT_ACTIVE || 804 event.event_type == IBV_EVENT_PORT_ERR) && 805 dev->data->dev_conf.intr_conf.lsc) { 806 mlx5_glue->ack_async_event(&event); 807 if (mlx5_link_update(dev, 0) == -EAGAIN) { 808 usleep(0); 809 continue; 810 } 811 _rte_eth_dev_callback_process 812 (dev, RTE_ETH_EVENT_INTR_LSC, NULL); 813 continue; 814 } 815 DRV_LOG(DEBUG, 816 "port %u cannot handle an unknown event (type %d)", 817 dev->data->port_id, event.event_type); 818 mlx5_glue->ack_async_event(&event); 819 } 820 } 821 822 /* 823 * Unregister callback handler safely. The handler may be active 824 * while we are trying to unregister it, in this case code -EAGAIN 825 * is returned by rte_intr_callback_unregister(). This routine checks 826 * the return code and tries to unregister handler again. 827 * 828 * @param handle 829 * interrupt handle 830 * @param cb_fn 831 * pointer to callback routine 832 * @cb_arg 833 * opaque callback parameter 834 */ 835 void 836 mlx5_intr_callback_unregister(const struct rte_intr_handle *handle, 837 rte_intr_callback_fn cb_fn, void *cb_arg) 838 { 839 /* 840 * Try to reduce timeout management overhead by not calling 841 * the timer related routines on the first iteration. If the 842 * unregistering succeeds on first call there will be no 843 * timer calls at all. 844 */ 845 uint64_t twait = 0; 846 uint64_t start = 0; 847 848 do { 849 int ret; 850 851 ret = rte_intr_callback_unregister(handle, cb_fn, cb_arg); 852 if (ret >= 0) 853 return; 854 if (ret != -EAGAIN) { 855 DRV_LOG(INFO, "failed to unregister interrupt" 856 " handler (error: %d)", ret); 857 MLX5_ASSERT(false); 858 return; 859 } 860 if (twait) { 861 struct timespec onems; 862 863 /* Wait one millisecond and try again. */ 864 onems.tv_sec = 0; 865 onems.tv_nsec = NS_PER_S / MS_PER_S; 866 nanosleep(&onems, 0); 867 /* Check whether one second elapsed. */ 868 if ((rte_get_timer_cycles() - start) <= twait) 869 continue; 870 } else { 871 /* 872 * We get the amount of timer ticks for one second. 873 * If this amount elapsed it means we spent one 874 * second in waiting. This branch is executed once 875 * on first iteration. 876 */ 877 twait = rte_get_timer_hz(); 878 MLX5_ASSERT(twait); 879 } 880 /* 881 * Timeout elapsed, show message (once a second) and retry. 882 * We have no other acceptable option here, if we ignore 883 * the unregistering return code the handler will not 884 * be unregistered, fd will be closed and we may get the 885 * crush. Hanging and messaging in the loop seems not to be 886 * the worst choice. 887 */ 888 DRV_LOG(INFO, "Retrying to unregister interrupt handler"); 889 start = rte_get_timer_cycles(); 890 } while (true); 891 } 892 893 /** 894 * Handle DEVX interrupts from the NIC. 895 * This function is probably called from the DPDK host thread. 896 * 897 * @param cb_arg 898 * Callback argument. 899 */ 900 void 901 mlx5_dev_interrupt_handler_devx(void *cb_arg) 902 { 903 #ifndef HAVE_IBV_DEVX_ASYNC 904 (void)cb_arg; 905 return; 906 #else 907 struct mlx5_dev_ctx_shared *sh = cb_arg; 908 union { 909 struct mlx5dv_devx_async_cmd_hdr cmd_resp; 910 uint8_t buf[MLX5_ST_SZ_BYTES(query_flow_counter_out) + 911 MLX5_ST_SZ_BYTES(traffic_counter) + 912 sizeof(struct mlx5dv_devx_async_cmd_hdr)]; 913 } out; 914 uint8_t *buf = out.buf + sizeof(out.cmd_resp); 915 916 while (!mlx5_glue->devx_get_async_cmd_comp(sh->devx_comp, 917 &out.cmd_resp, 918 sizeof(out.buf))) 919 mlx5_flow_async_pool_query_handle 920 (sh, (uint64_t)out.cmd_resp.wr_id, 921 mlx5_devx_get_out_command_status(buf)); 922 #endif /* HAVE_IBV_DEVX_ASYNC */ 923 } 924 925 /** 926 * DPDK callback to bring the link DOWN. 927 * 928 * @param dev 929 * Pointer to Ethernet device structure. 930 * 931 * @return 932 * 0 on success, a negative errno value otherwise and rte_errno is set. 933 */ 934 int 935 mlx5_set_link_down(struct rte_eth_dev *dev) 936 { 937 return mlx5_set_flags(dev, ~IFF_UP, ~IFF_UP); 938 } 939 940 /** 941 * DPDK callback to bring the link UP. 942 * 943 * @param dev 944 * Pointer to Ethernet device structure. 945 * 946 * @return 947 * 0 on success, a negative errno value otherwise and rte_errno is set. 948 */ 949 int 950 mlx5_set_link_up(struct rte_eth_dev *dev) 951 { 952 return mlx5_set_flags(dev, ~IFF_UP, IFF_UP); 953 } 954 955 /** 956 * Check if mlx5 device was removed. 957 * 958 * @param dev 959 * Pointer to Ethernet device structure. 960 * 961 * @return 962 * 1 when device is removed, otherwise 0. 963 */ 964 int 965 mlx5_is_removed(struct rte_eth_dev *dev) 966 { 967 struct ibv_device_attr device_attr; 968 struct mlx5_priv *priv = dev->data->dev_private; 969 970 if (mlx5_glue->query_device(priv->sh->ctx, &device_attr) == EIO) 971 return 1; 972 return 0; 973 } 974 975 /** 976 * Analyze gathered port parameters via sysfs to recognize master 977 * and representor devices for E-Switch configuration. 978 * 979 * @param[in] device_dir 980 * flag of presence of "device" directory under port device key. 981 * @param[inout] switch_info 982 * Port information, including port name as a number and port name 983 * type if recognized 984 * 985 * @return 986 * master and representor flags are set in switch_info according to 987 * recognized parameters (if any). 988 */ 989 static void 990 mlx5_sysfs_check_switch_info(bool device_dir, 991 struct mlx5_switch_info *switch_info) 992 { 993 switch (switch_info->name_type) { 994 case MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN: 995 /* 996 * Name is not recognized, assume the master, 997 * check the device directory presence. 998 */ 999 switch_info->master = device_dir; 1000 break; 1001 case MLX5_PHYS_PORT_NAME_TYPE_NOTSET: 1002 /* 1003 * Name is not set, this assumes the legacy naming 1004 * schema for master, just check if there is 1005 * a device directory. 1006 */ 1007 switch_info->master = device_dir; 1008 break; 1009 case MLX5_PHYS_PORT_NAME_TYPE_UPLINK: 1010 /* New uplink naming schema recognized. */ 1011 switch_info->master = 1; 1012 break; 1013 case MLX5_PHYS_PORT_NAME_TYPE_LEGACY: 1014 /* Legacy representors naming schema. */ 1015 switch_info->representor = !device_dir; 1016 break; 1017 case MLX5_PHYS_PORT_NAME_TYPE_PFHPF: 1018 /* Fallthrough */ 1019 case MLX5_PHYS_PORT_NAME_TYPE_PFVF: 1020 /* New representors naming schema. */ 1021 switch_info->representor = 1; 1022 break; 1023 } 1024 } 1025 1026 /** 1027 * Get switch information associated with network interface. 1028 * 1029 * @param ifindex 1030 * Network interface index. 1031 * @param[out] info 1032 * Switch information object, populated in case of success. 1033 * 1034 * @return 1035 * 0 on success, a negative errno value otherwise and rte_errno is set. 1036 */ 1037 int 1038 mlx5_sysfs_switch_info(unsigned int ifindex, struct mlx5_switch_info *info) 1039 { 1040 char ifname[IF_NAMESIZE]; 1041 char port_name[IF_NAMESIZE]; 1042 FILE *file; 1043 struct mlx5_switch_info data = { 1044 .master = 0, 1045 .representor = 0, 1046 .name_type = MLX5_PHYS_PORT_NAME_TYPE_NOTSET, 1047 .port_name = 0, 1048 .switch_id = 0, 1049 }; 1050 DIR *dir; 1051 bool port_switch_id_set = false; 1052 bool device_dir = false; 1053 char c; 1054 int ret; 1055 1056 if (!if_indextoname(ifindex, ifname)) { 1057 rte_errno = errno; 1058 return -rte_errno; 1059 } 1060 1061 MKSTR(phys_port_name, "/sys/class/net/%s/phys_port_name", 1062 ifname); 1063 MKSTR(phys_switch_id, "/sys/class/net/%s/phys_switch_id", 1064 ifname); 1065 MKSTR(pci_device, "/sys/class/net/%s/device", 1066 ifname); 1067 1068 file = fopen(phys_port_name, "rb"); 1069 if (file != NULL) { 1070 ret = fscanf(file, "%" RTE_STR(IF_NAMESIZE) "s", port_name); 1071 fclose(file); 1072 if (ret == 1) 1073 mlx5_translate_port_name(port_name, &data); 1074 } 1075 file = fopen(phys_switch_id, "rb"); 1076 if (file == NULL) { 1077 rte_errno = errno; 1078 return -rte_errno; 1079 } 1080 port_switch_id_set = 1081 fscanf(file, "%" SCNx64 "%c", &data.switch_id, &c) == 2 && 1082 c == '\n'; 1083 fclose(file); 1084 dir = opendir(pci_device); 1085 if (dir != NULL) { 1086 closedir(dir); 1087 device_dir = true; 1088 } 1089 if (port_switch_id_set) { 1090 /* We have some E-Switch configuration. */ 1091 mlx5_sysfs_check_switch_info(device_dir, &data); 1092 } 1093 *info = data; 1094 MLX5_ASSERT(!(data.master && data.representor)); 1095 if (data.master && data.representor) { 1096 DRV_LOG(ERR, "ifindex %u device is recognized as master" 1097 " and as representor", ifindex); 1098 rte_errno = ENODEV; 1099 return -rte_errno; 1100 } 1101 return 0; 1102 } 1103 1104 /** 1105 * DPDK callback to retrieve plug-in module EEPROM information (type and size). 1106 * 1107 * @param dev 1108 * Pointer to Ethernet device structure. 1109 * @param[out] modinfo 1110 * Storage for plug-in module EEPROM information. 1111 * 1112 * @return 1113 * 0 on success, a negative errno value otherwise and rte_errno is set. 1114 */ 1115 int 1116 mlx5_get_module_info(struct rte_eth_dev *dev, 1117 struct rte_eth_dev_module_info *modinfo) 1118 { 1119 struct ethtool_modinfo info = { 1120 .cmd = ETHTOOL_GMODULEINFO, 1121 }; 1122 struct ifreq ifr = (struct ifreq) { 1123 .ifr_data = (void *)&info, 1124 }; 1125 int ret = 0; 1126 1127 if (!dev || !modinfo) { 1128 DRV_LOG(WARNING, "missing argument, cannot get module info"); 1129 rte_errno = EINVAL; 1130 return -rte_errno; 1131 } 1132 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 1133 if (ret) { 1134 DRV_LOG(WARNING, "port %u ioctl(SIOCETHTOOL) failed: %s", 1135 dev->data->port_id, strerror(rte_errno)); 1136 return ret; 1137 } 1138 modinfo->type = info.type; 1139 modinfo->eeprom_len = info.eeprom_len; 1140 return ret; 1141 } 1142 1143 /** 1144 * DPDK callback to retrieve plug-in module EEPROM data. 1145 * 1146 * @param dev 1147 * Pointer to Ethernet device structure. 1148 * @param[out] info 1149 * Storage for plug-in module EEPROM data. 1150 * 1151 * @return 1152 * 0 on success, a negative errno value otherwise and rte_errno is set. 1153 */ 1154 int mlx5_get_module_eeprom(struct rte_eth_dev *dev, 1155 struct rte_dev_eeprom_info *info) 1156 { 1157 struct ethtool_eeprom *eeprom; 1158 struct ifreq ifr; 1159 int ret = 0; 1160 1161 if (!dev || !info) { 1162 DRV_LOG(WARNING, "missing argument, cannot get module eeprom"); 1163 rte_errno = EINVAL; 1164 return -rte_errno; 1165 } 1166 eeprom = mlx5_malloc(MLX5_MEM_ZERO, 1167 (sizeof(struct ethtool_eeprom) + info->length), 0, 1168 SOCKET_ID_ANY); 1169 if (!eeprom) { 1170 DRV_LOG(WARNING, "port %u cannot allocate memory for " 1171 "eeprom data", dev->data->port_id); 1172 rte_errno = ENOMEM; 1173 return -rte_errno; 1174 } 1175 eeprom->cmd = ETHTOOL_GMODULEEEPROM; 1176 eeprom->offset = info->offset; 1177 eeprom->len = info->length; 1178 ifr = (struct ifreq) { 1179 .ifr_data = (void *)eeprom, 1180 }; 1181 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 1182 if (ret) 1183 DRV_LOG(WARNING, "port %u ioctl(SIOCETHTOOL) failed: %s", 1184 dev->data->port_id, strerror(rte_errno)); 1185 else 1186 rte_memcpy(info->data, eeprom->data, info->length); 1187 mlx5_free(eeprom); 1188 return ret; 1189 } 1190 1191 /** 1192 * Read device counters table. 1193 * 1194 * @param dev 1195 * Pointer to Ethernet device. 1196 * @param[out] stats 1197 * Counters table output buffer. 1198 * 1199 * @return 1200 * 0 on success and stats is filled, negative errno value otherwise and 1201 * rte_errno is set. 1202 */ 1203 int 1204 mlx5_os_read_dev_counters(struct rte_eth_dev *dev, uint64_t *stats) 1205 { 1206 struct mlx5_priv *priv = dev->data->dev_private; 1207 struct mlx5_xstats_ctrl *xstats_ctrl = &priv->xstats_ctrl; 1208 unsigned int i; 1209 struct ifreq ifr; 1210 unsigned int stats_sz = xstats_ctrl->stats_n * sizeof(uint64_t); 1211 unsigned char et_stat_buf[sizeof(struct ethtool_stats) + stats_sz]; 1212 struct ethtool_stats *et_stats = (struct ethtool_stats *)et_stat_buf; 1213 int ret; 1214 1215 et_stats->cmd = ETHTOOL_GSTATS; 1216 et_stats->n_stats = xstats_ctrl->stats_n; 1217 ifr.ifr_data = (caddr_t)et_stats; 1218 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 1219 if (ret) { 1220 DRV_LOG(WARNING, 1221 "port %u unable to read statistic values from device", 1222 dev->data->port_id); 1223 return ret; 1224 } 1225 for (i = 0; i != xstats_ctrl->mlx5_stats_n; ++i) { 1226 if (xstats_ctrl->info[i].dev) { 1227 ret = mlx5_os_read_dev_stat(priv, 1228 xstats_ctrl->info[i].ctr_name, 1229 &stats[i]); 1230 /* return last xstats counter if fail to read. */ 1231 if (ret == 0) 1232 xstats_ctrl->xstats[i] = stats[i]; 1233 else 1234 stats[i] = xstats_ctrl->xstats[i]; 1235 } else { 1236 stats[i] = (uint64_t) 1237 et_stats->data[xstats_ctrl->dev_table_idx[i]]; 1238 } 1239 } 1240 return 0; 1241 } 1242 1243 /** 1244 * Query the number of statistics provided by ETHTOOL. 1245 * 1246 * @param dev 1247 * Pointer to Ethernet device. 1248 * 1249 * @return 1250 * Number of statistics on success, negative errno value otherwise and 1251 * rte_errno is set. 1252 */ 1253 int 1254 mlx5_os_get_stats_n(struct rte_eth_dev *dev) 1255 { 1256 struct ethtool_drvinfo drvinfo; 1257 struct ifreq ifr; 1258 int ret; 1259 1260 drvinfo.cmd = ETHTOOL_GDRVINFO; 1261 ifr.ifr_data = (caddr_t)&drvinfo; 1262 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 1263 if (ret) { 1264 DRV_LOG(WARNING, "port %u unable to query number of statistics", 1265 dev->data->port_id); 1266 return ret; 1267 } 1268 return drvinfo.n_stats; 1269 } 1270 1271 static const struct mlx5_counter_ctrl mlx5_counters_init[] = { 1272 { 1273 .dpdk_name = "rx_port_unicast_bytes", 1274 .ctr_name = "rx_vport_unicast_bytes", 1275 }, 1276 { 1277 .dpdk_name = "rx_port_multicast_bytes", 1278 .ctr_name = "rx_vport_multicast_bytes", 1279 }, 1280 { 1281 .dpdk_name = "rx_port_broadcast_bytes", 1282 .ctr_name = "rx_vport_broadcast_bytes", 1283 }, 1284 { 1285 .dpdk_name = "rx_port_unicast_packets", 1286 .ctr_name = "rx_vport_unicast_packets", 1287 }, 1288 { 1289 .dpdk_name = "rx_port_multicast_packets", 1290 .ctr_name = "rx_vport_multicast_packets", 1291 }, 1292 { 1293 .dpdk_name = "rx_port_broadcast_packets", 1294 .ctr_name = "rx_vport_broadcast_packets", 1295 }, 1296 { 1297 .dpdk_name = "tx_port_unicast_bytes", 1298 .ctr_name = "tx_vport_unicast_bytes", 1299 }, 1300 { 1301 .dpdk_name = "tx_port_multicast_bytes", 1302 .ctr_name = "tx_vport_multicast_bytes", 1303 }, 1304 { 1305 .dpdk_name = "tx_port_broadcast_bytes", 1306 .ctr_name = "tx_vport_broadcast_bytes", 1307 }, 1308 { 1309 .dpdk_name = "tx_port_unicast_packets", 1310 .ctr_name = "tx_vport_unicast_packets", 1311 }, 1312 { 1313 .dpdk_name = "tx_port_multicast_packets", 1314 .ctr_name = "tx_vport_multicast_packets", 1315 }, 1316 { 1317 .dpdk_name = "tx_port_broadcast_packets", 1318 .ctr_name = "tx_vport_broadcast_packets", 1319 }, 1320 { 1321 .dpdk_name = "rx_wqe_err", 1322 .ctr_name = "rx_wqe_err", 1323 }, 1324 { 1325 .dpdk_name = "rx_crc_errors_phy", 1326 .ctr_name = "rx_crc_errors_phy", 1327 }, 1328 { 1329 .dpdk_name = "rx_in_range_len_errors_phy", 1330 .ctr_name = "rx_in_range_len_errors_phy", 1331 }, 1332 { 1333 .dpdk_name = "rx_symbol_err_phy", 1334 .ctr_name = "rx_symbol_err_phy", 1335 }, 1336 { 1337 .dpdk_name = "tx_errors_phy", 1338 .ctr_name = "tx_errors_phy", 1339 }, 1340 { 1341 .dpdk_name = "rx_out_of_buffer", 1342 .ctr_name = "out_of_buffer", 1343 .dev = 1, 1344 }, 1345 { 1346 .dpdk_name = "tx_packets_phy", 1347 .ctr_name = "tx_packets_phy", 1348 }, 1349 { 1350 .dpdk_name = "rx_packets_phy", 1351 .ctr_name = "rx_packets_phy", 1352 }, 1353 { 1354 .dpdk_name = "tx_discards_phy", 1355 .ctr_name = "tx_discards_phy", 1356 }, 1357 { 1358 .dpdk_name = "rx_discards_phy", 1359 .ctr_name = "rx_discards_phy", 1360 }, 1361 { 1362 .dpdk_name = "tx_bytes_phy", 1363 .ctr_name = "tx_bytes_phy", 1364 }, 1365 { 1366 .dpdk_name = "rx_bytes_phy", 1367 .ctr_name = "rx_bytes_phy", 1368 }, 1369 /* Representor only */ 1370 { 1371 .dpdk_name = "rx_packets", 1372 .ctr_name = "vport_rx_packets", 1373 }, 1374 { 1375 .dpdk_name = "rx_bytes", 1376 .ctr_name = "vport_rx_bytes", 1377 }, 1378 { 1379 .dpdk_name = "tx_packets", 1380 .ctr_name = "vport_tx_packets", 1381 }, 1382 { 1383 .dpdk_name = "tx_bytes", 1384 .ctr_name = "vport_tx_bytes", 1385 }, 1386 }; 1387 1388 static const unsigned int xstats_n = RTE_DIM(mlx5_counters_init); 1389 1390 /** 1391 * Init the structures to read device counters. 1392 * 1393 * @param dev 1394 * Pointer to Ethernet device. 1395 */ 1396 void 1397 mlx5_os_stats_init(struct rte_eth_dev *dev) 1398 { 1399 struct mlx5_priv *priv = dev->data->dev_private; 1400 struct mlx5_xstats_ctrl *xstats_ctrl = &priv->xstats_ctrl; 1401 struct mlx5_stats_ctrl *stats_ctrl = &priv->stats_ctrl; 1402 unsigned int i; 1403 unsigned int j; 1404 struct ifreq ifr; 1405 struct ethtool_gstrings *strings = NULL; 1406 unsigned int dev_stats_n; 1407 unsigned int str_sz; 1408 int ret; 1409 1410 /* So that it won't aggregate for each init. */ 1411 xstats_ctrl->mlx5_stats_n = 0; 1412 ret = mlx5_os_get_stats_n(dev); 1413 if (ret < 0) { 1414 DRV_LOG(WARNING, "port %u no extended statistics available", 1415 dev->data->port_id); 1416 return; 1417 } 1418 dev_stats_n = ret; 1419 /* Allocate memory to grab stat names and values. */ 1420 str_sz = dev_stats_n * ETH_GSTRING_LEN; 1421 strings = (struct ethtool_gstrings *) 1422 mlx5_malloc(0, str_sz + sizeof(struct ethtool_gstrings), 0, 1423 SOCKET_ID_ANY); 1424 if (!strings) { 1425 DRV_LOG(WARNING, "port %u unable to allocate memory for xstats", 1426 dev->data->port_id); 1427 return; 1428 } 1429 strings->cmd = ETHTOOL_GSTRINGS; 1430 strings->string_set = ETH_SS_STATS; 1431 strings->len = dev_stats_n; 1432 ifr.ifr_data = (caddr_t)strings; 1433 ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); 1434 if (ret) { 1435 DRV_LOG(WARNING, "port %u unable to get statistic names", 1436 dev->data->port_id); 1437 goto free; 1438 } 1439 for (i = 0; i != dev_stats_n; ++i) { 1440 const char *curr_string = (const char *) 1441 &strings->data[i * ETH_GSTRING_LEN]; 1442 1443 for (j = 0; j != xstats_n; ++j) { 1444 if (!strcmp(mlx5_counters_init[j].ctr_name, 1445 curr_string)) { 1446 unsigned int idx = xstats_ctrl->mlx5_stats_n++; 1447 1448 xstats_ctrl->dev_table_idx[idx] = i; 1449 xstats_ctrl->info[idx] = mlx5_counters_init[j]; 1450 break; 1451 } 1452 } 1453 } 1454 /* Add dev counters. */ 1455 for (i = 0; i != xstats_n; ++i) { 1456 if (mlx5_counters_init[i].dev) { 1457 unsigned int idx = xstats_ctrl->mlx5_stats_n++; 1458 1459 xstats_ctrl->info[idx] = mlx5_counters_init[i]; 1460 xstats_ctrl->hw_stats[idx] = 0; 1461 } 1462 } 1463 MLX5_ASSERT(xstats_ctrl->mlx5_stats_n <= MLX5_MAX_XSTATS); 1464 xstats_ctrl->stats_n = dev_stats_n; 1465 /* Copy to base at first time. */ 1466 ret = mlx5_os_read_dev_counters(dev, xstats_ctrl->base); 1467 if (ret) 1468 DRV_LOG(ERR, "port %u cannot read device counters: %s", 1469 dev->data->port_id, strerror(rte_errno)); 1470 mlx5_os_read_dev_stat(priv, "out_of_buffer", &stats_ctrl->imissed_base); 1471 stats_ctrl->imissed = 0; 1472 free: 1473 mlx5_free(strings); 1474 } 1475 1476 /** 1477 * Get MAC address by querying netdevice. 1478 * 1479 * @param[in] dev 1480 * Pointer to Ethernet device. 1481 * @param[out] mac 1482 * MAC address output buffer. 1483 * 1484 * @return 1485 * 0 on success, a negative errno value otherwise and rte_errno is set. 1486 */ 1487 int 1488 mlx5_get_mac(struct rte_eth_dev *dev, uint8_t (*mac)[RTE_ETHER_ADDR_LEN]) 1489 { 1490 struct ifreq request; 1491 int ret; 1492 1493 ret = mlx5_ifreq(dev, SIOCGIFHWADDR, &request); 1494 if (ret) 1495 return ret; 1496 memcpy(mac, request.ifr_hwaddr.sa_data, RTE_ETHER_ADDR_LEN); 1497 return 0; 1498 } 1499 1500