1 /*- 2 * BSD LICENSE 3 * 4 * Copyright 2015 6WIND S.A. 5 * Copyright 2015 Mellanox. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of 6WIND S.A. nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <stddef.h> 35 #include <assert.h> 36 #include <unistd.h> 37 #include <stdint.h> 38 #include <stdio.h> 39 #include <string.h> 40 #include <stdlib.h> 41 #include <errno.h> 42 #include <dirent.h> 43 #include <net/if.h> 44 #include <sys/ioctl.h> 45 #include <sys/socket.h> 46 #include <netinet/in.h> 47 #include <linux/if.h> 48 49 /* DPDK headers don't like -pedantic. */ 50 #ifdef PEDANTIC 51 #pragma GCC diagnostic ignored "-pedantic" 52 #endif 53 #include <rte_atomic.h> 54 #include <rte_ethdev.h> 55 #include <rte_mbuf.h> 56 #include <rte_common.h> 57 #ifdef PEDANTIC 58 #pragma GCC diagnostic error "-pedantic" 59 #endif 60 61 #include "mlx5.h" 62 #include "mlx5_rxtx.h" 63 #include "mlx5_utils.h" 64 65 /** 66 * Get interface name from private structure. 67 * 68 * @param[in] priv 69 * Pointer to private structure. 70 * @param[out] ifname 71 * Interface name output buffer. 72 * 73 * @return 74 * 0 on success, -1 on failure and errno is set. 75 */ 76 int 77 priv_get_ifname(const struct priv *priv, char (*ifname)[IF_NAMESIZE]) 78 { 79 DIR *dir; 80 struct dirent *dent; 81 unsigned int dev_type = 0; 82 unsigned int dev_port_prev = ~0u; 83 char match[IF_NAMESIZE] = ""; 84 85 { 86 MKSTR(path, "%s/device/net", priv->ctx->device->ibdev_path); 87 88 dir = opendir(path); 89 if (dir == NULL) 90 return -1; 91 } 92 while ((dent = readdir(dir)) != NULL) { 93 char *name = dent->d_name; 94 FILE *file; 95 unsigned int dev_port; 96 int r; 97 98 if ((name[0] == '.') && 99 ((name[1] == '\0') || 100 ((name[1] == '.') && (name[2] == '\0')))) 101 continue; 102 103 MKSTR(path, "%s/device/net/%s/%s", 104 priv->ctx->device->ibdev_path, name, 105 (dev_type ? "dev_id" : "dev_port")); 106 107 file = fopen(path, "rb"); 108 if (file == NULL) { 109 if (errno != ENOENT) 110 continue; 111 /* 112 * Switch to dev_id when dev_port does not exist as 113 * is the case with Linux kernel versions < 3.15. 114 */ 115 try_dev_id: 116 match[0] = '\0'; 117 if (dev_type) 118 break; 119 dev_type = 1; 120 dev_port_prev = ~0u; 121 rewinddir(dir); 122 continue; 123 } 124 r = fscanf(file, (dev_type ? "%x" : "%u"), &dev_port); 125 fclose(file); 126 if (r != 1) 127 continue; 128 /* 129 * Switch to dev_id when dev_port returns the same value for 130 * all ports. May happen when using a MOFED release older than 131 * 3.0 with a Linux kernel >= 3.15. 132 */ 133 if (dev_port == dev_port_prev) 134 goto try_dev_id; 135 dev_port_prev = dev_port; 136 if (dev_port == (priv->port - 1u)) 137 snprintf(match, sizeof(match), "%s", name); 138 } 139 closedir(dir); 140 if (match[0] == '\0') 141 return -1; 142 strncpy(*ifname, match, sizeof(*ifname)); 143 return 0; 144 } 145 146 /** 147 * Read from sysfs entry. 148 * 149 * @param[in] priv 150 * Pointer to private structure. 151 * @param[in] entry 152 * Entry name relative to sysfs path. 153 * @param[out] buf 154 * Data output buffer. 155 * @param size 156 * Buffer size. 157 * 158 * @return 159 * 0 on success, -1 on failure and errno is set. 160 */ 161 static int 162 priv_sysfs_read(const struct priv *priv, const char *entry, 163 char *buf, size_t size) 164 { 165 char ifname[IF_NAMESIZE]; 166 FILE *file; 167 int ret; 168 int err; 169 170 if (priv_get_ifname(priv, &ifname)) 171 return -1; 172 173 MKSTR(path, "%s/device/net/%s/%s", priv->ctx->device->ibdev_path, 174 ifname, entry); 175 176 file = fopen(path, "rb"); 177 if (file == NULL) 178 return -1; 179 ret = fread(buf, 1, size, file); 180 err = errno; 181 if (((size_t)ret < size) && (ferror(file))) 182 ret = -1; 183 else 184 ret = size; 185 fclose(file); 186 errno = err; 187 return ret; 188 } 189 190 /** 191 * Write to sysfs entry. 192 * 193 * @param[in] priv 194 * Pointer to private structure. 195 * @param[in] entry 196 * Entry name relative to sysfs path. 197 * @param[in] buf 198 * Data buffer. 199 * @param size 200 * Buffer size. 201 * 202 * @return 203 * 0 on success, -1 on failure and errno is set. 204 */ 205 static int 206 priv_sysfs_write(const struct priv *priv, const char *entry, 207 char *buf, size_t size) 208 { 209 char ifname[IF_NAMESIZE]; 210 FILE *file; 211 int ret; 212 int err; 213 214 if (priv_get_ifname(priv, &ifname)) 215 return -1; 216 217 MKSTR(path, "%s/device/net/%s/%s", priv->ctx->device->ibdev_path, 218 ifname, entry); 219 220 file = fopen(path, "wb"); 221 if (file == NULL) 222 return -1; 223 ret = fwrite(buf, 1, size, file); 224 err = errno; 225 if (((size_t)ret < size) || (ferror(file))) 226 ret = -1; 227 else 228 ret = size; 229 fclose(file); 230 errno = err; 231 return ret; 232 } 233 234 /** 235 * Get unsigned long sysfs property. 236 * 237 * @param priv 238 * Pointer to private structure. 239 * @param[in] name 240 * Entry name relative to sysfs path. 241 * @param[out] value 242 * Value output buffer. 243 * 244 * @return 245 * 0 on success, -1 on failure and errno is set. 246 */ 247 static int 248 priv_get_sysfs_ulong(struct priv *priv, const char *name, unsigned long *value) 249 { 250 int ret; 251 unsigned long value_ret; 252 char value_str[32]; 253 254 ret = priv_sysfs_read(priv, name, value_str, (sizeof(value_str) - 1)); 255 if (ret == -1) { 256 DEBUG("cannot read %s value from sysfs: %s", 257 name, strerror(errno)); 258 return -1; 259 } 260 value_str[ret] = '\0'; 261 errno = 0; 262 value_ret = strtoul(value_str, NULL, 0); 263 if (errno) { 264 DEBUG("invalid %s value `%s': %s", name, value_str, 265 strerror(errno)); 266 return -1; 267 } 268 *value = value_ret; 269 return 0; 270 } 271 272 /** 273 * Set unsigned long sysfs property. 274 * 275 * @param priv 276 * Pointer to private structure. 277 * @param[in] name 278 * Entry name relative to sysfs path. 279 * @param value 280 * Value to set. 281 * 282 * @return 283 * 0 on success, -1 on failure and errno is set. 284 */ 285 static int 286 priv_set_sysfs_ulong(struct priv *priv, const char *name, unsigned long value) 287 { 288 int ret; 289 MKSTR(value_str, "%lu", value); 290 291 ret = priv_sysfs_write(priv, name, value_str, (sizeof(value_str) - 1)); 292 if (ret == -1) { 293 DEBUG("cannot write %s `%s' (%lu) to sysfs: %s", 294 name, value_str, value, strerror(errno)); 295 return -1; 296 } 297 return 0; 298 } 299 300 /** 301 * Perform ifreq ioctl() on associated Ethernet device. 302 * 303 * @param[in] priv 304 * Pointer to private structure. 305 * @param req 306 * Request number to pass to ioctl(). 307 * @param[out] ifr 308 * Interface request structure output buffer. 309 * 310 * @return 311 * 0 on success, -1 on failure and errno is set. 312 */ 313 int 314 priv_ifreq(const struct priv *priv, int req, struct ifreq *ifr) 315 { 316 int sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP); 317 int ret = -1; 318 319 if (sock == -1) 320 return ret; 321 if (priv_get_ifname(priv, &ifr->ifr_name) == 0) 322 ret = ioctl(sock, req, ifr); 323 close(sock); 324 return ret; 325 } 326 327 /** 328 * Get device MTU. 329 * 330 * @param priv 331 * Pointer to private structure. 332 * @param[out] mtu 333 * MTU value output buffer. 334 * 335 * @return 336 * 0 on success, -1 on failure and errno is set. 337 */ 338 int 339 priv_get_mtu(struct priv *priv, uint16_t *mtu) 340 { 341 unsigned long ulong_mtu; 342 343 if (priv_get_sysfs_ulong(priv, "mtu", &ulong_mtu) == -1) 344 return -1; 345 *mtu = ulong_mtu; 346 return 0; 347 } 348 349 /** 350 * Set device flags. 351 * 352 * @param priv 353 * Pointer to private structure. 354 * @param keep 355 * Bitmask for flags that must remain untouched. 356 * @param flags 357 * Bitmask for flags to modify. 358 * 359 * @return 360 * 0 on success, -1 on failure and errno is set. 361 */ 362 int 363 priv_set_flags(struct priv *priv, unsigned int keep, unsigned int flags) 364 { 365 unsigned long tmp; 366 367 if (priv_get_sysfs_ulong(priv, "flags", &tmp) == -1) 368 return -1; 369 tmp &= keep; 370 tmp |= flags; 371 return priv_set_sysfs_ulong(priv, "flags", tmp); 372 } 373 374 /** 375 * Ethernet device configuration. 376 * 377 * Prepare the driver for a given number of TX and RX queues. 378 * Allocate parent RSS queue when several RX queues are requested. 379 * 380 * @param dev 381 * Pointer to Ethernet device structure. 382 * 383 * @return 384 * 0 on success, errno value on failure. 385 */ 386 static int 387 dev_configure(struct rte_eth_dev *dev) 388 { 389 struct priv *priv = dev->data->dev_private; 390 unsigned int rxqs_n = dev->data->nb_rx_queues; 391 unsigned int txqs_n = dev->data->nb_tx_queues; 392 unsigned int tmp; 393 int ret; 394 395 priv->rxqs = (void *)dev->data->rx_queues; 396 priv->txqs = (void *)dev->data->tx_queues; 397 if (txqs_n != priv->txqs_n) { 398 INFO("%p: TX queues number update: %u -> %u", 399 (void *)dev, priv->txqs_n, txqs_n); 400 priv->txqs_n = txqs_n; 401 } 402 if (rxqs_n == priv->rxqs_n) 403 return 0; 404 INFO("%p: RX queues number update: %u -> %u", 405 (void *)dev, priv->rxqs_n, rxqs_n); 406 /* If RSS is enabled, disable it first. */ 407 if (priv->rss) { 408 unsigned int i; 409 410 /* Only if there are no remaining child RX queues. */ 411 for (i = 0; (i != priv->rxqs_n); ++i) 412 if ((*priv->rxqs)[i] != NULL) 413 return EINVAL; 414 rxq_cleanup(&priv->rxq_parent); 415 priv->rss = 0; 416 priv->rxqs_n = 0; 417 } 418 if (rxqs_n <= 1) { 419 /* Nothing else to do. */ 420 priv->rxqs_n = rxqs_n; 421 return 0; 422 } 423 /* Allocate a new RSS parent queue if supported by hardware. */ 424 if (!priv->hw_rss) { 425 ERROR("%p: only a single RX queue can be configured when" 426 " hardware doesn't support RSS", 427 (void *)dev); 428 return EINVAL; 429 } 430 /* Fail if hardware doesn't support that many RSS queues. */ 431 if (rxqs_n >= priv->max_rss_tbl_sz) { 432 ERROR("%p: only %u RX queues can be configured for RSS", 433 (void *)dev, priv->max_rss_tbl_sz); 434 return EINVAL; 435 } 436 priv->rss = 1; 437 tmp = priv->rxqs_n; 438 priv->rxqs_n = rxqs_n; 439 ret = rxq_setup(dev, &priv->rxq_parent, 0, 0, NULL, NULL); 440 if (!ret) 441 return 0; 442 /* Failure, rollback. */ 443 priv->rss = 0; 444 priv->rxqs_n = tmp; 445 assert(ret > 0); 446 return ret; 447 } 448 449 /** 450 * DPDK callback for Ethernet device configuration. 451 * 452 * @param dev 453 * Pointer to Ethernet device structure. 454 * 455 * @return 456 * 0 on success, negative errno value on failure. 457 */ 458 int 459 mlx5_dev_configure(struct rte_eth_dev *dev) 460 { 461 struct priv *priv = dev->data->dev_private; 462 int ret; 463 464 priv_lock(priv); 465 ret = dev_configure(dev); 466 assert(ret >= 0); 467 priv_unlock(priv); 468 return -ret; 469 } 470 471 /** 472 * DPDK callback to get information about the device. 473 * 474 * @param dev 475 * Pointer to Ethernet device structure. 476 * @param[out] info 477 * Info structure output buffer. 478 */ 479 void 480 mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info) 481 { 482 struct priv *priv = dev->data->dev_private; 483 unsigned int max; 484 char ifname[IF_NAMESIZE]; 485 486 priv_lock(priv); 487 /* FIXME: we should ask the device for these values. */ 488 info->min_rx_bufsize = 32; 489 info->max_rx_pktlen = 65536; 490 /* 491 * Since we need one CQ per QP, the limit is the minimum number 492 * between the two values. 493 */ 494 max = ((priv->device_attr.max_cq > priv->device_attr.max_qp) ? 495 priv->device_attr.max_qp : priv->device_attr.max_cq); 496 /* If max >= 65535 then max = 0, max_rx_queues is uint16_t. */ 497 if (max >= 65535) 498 max = 65535; 499 info->max_rx_queues = max; 500 info->max_tx_queues = max; 501 /* Last array entry is reserved for broadcast. */ 502 info->max_mac_addrs = (RTE_DIM(priv->mac) - 1); 503 info->rx_offload_capa = 504 (priv->hw_csum ? 505 (DEV_RX_OFFLOAD_IPV4_CKSUM | 506 DEV_RX_OFFLOAD_UDP_CKSUM | 507 DEV_RX_OFFLOAD_TCP_CKSUM) : 508 0); 509 info->tx_offload_capa = 510 (priv->hw_csum ? 511 (DEV_TX_OFFLOAD_IPV4_CKSUM | 512 DEV_TX_OFFLOAD_UDP_CKSUM | 513 DEV_TX_OFFLOAD_TCP_CKSUM) : 514 0); 515 if (priv_get_ifname(priv, &ifname) == 0) 516 info->if_index = if_nametoindex(ifname); 517 priv_unlock(priv); 518 } 519 520 /** 521 * Get PCI information from struct ibv_device. 522 * 523 * @param device 524 * Pointer to Ethernet device structure. 525 * @param[out] pci_addr 526 * PCI bus address output buffer. 527 * 528 * @return 529 * 0 on success, -1 on failure and errno is set. 530 */ 531 int 532 mlx5_ibv_device_to_pci_addr(const struct ibv_device *device, 533 struct rte_pci_addr *pci_addr) 534 { 535 FILE *file; 536 char line[32]; 537 MKSTR(path, "%s/device/uevent", device->ibdev_path); 538 539 file = fopen(path, "rb"); 540 if (file == NULL) 541 return -1; 542 while (fgets(line, sizeof(line), file) == line) { 543 size_t len = strlen(line); 544 int ret; 545 546 /* Truncate long lines. */ 547 if (len == (sizeof(line) - 1)) 548 while (line[(len - 1)] != '\n') { 549 ret = fgetc(file); 550 if (ret == EOF) 551 break; 552 line[(len - 1)] = ret; 553 } 554 /* Extract information. */ 555 if (sscanf(line, 556 "PCI_SLOT_NAME=" 557 "%" SCNx16 ":%" SCNx8 ":%" SCNx8 ".%" SCNx8 "\n", 558 &pci_addr->domain, 559 &pci_addr->bus, 560 &pci_addr->devid, 561 &pci_addr->function) == 4) { 562 ret = 0; 563 break; 564 } 565 } 566 fclose(file); 567 return 0; 568 } 569