1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2020 Mellanox Technologies, Ltd 3 */ 4 5 #include <sys/types.h> 6 #include <unistd.h> 7 #include <string.h> 8 #include <stdio.h> 9 #ifdef RTE_IBVERBS_LINK_DLOPEN 10 #include <dlfcn.h> 11 #endif 12 #include <dirent.h> 13 #include <net/if.h> 14 15 #include <rte_errno.h> 16 #include <rte_string_fns.h> 17 #include <rte_bus_pci.h> 18 #include <rte_bus_auxiliary.h> 19 20 #include "mlx5_common.h" 21 #include "mlx5_nl.h" 22 #include "mlx5_common_log.h" 23 #include "mlx5_common_private.h" 24 #include "mlx5_common_defs.h" 25 #include "mlx5_common_os.h" 26 #include "mlx5_glue.h" 27 28 #ifdef MLX5_GLUE 29 const struct mlx5_glue *mlx5_glue; 30 #endif 31 32 int 33 mlx5_get_pci_addr(const char *dev_path, struct rte_pci_addr *pci_addr) 34 { 35 FILE *file; 36 char line[32]; 37 int rc = -ENOENT; 38 MKSTR(path, "%s/device/uevent", dev_path); 39 40 file = fopen(path, "rb"); 41 if (file == NULL) { 42 rte_errno = errno; 43 return -rte_errno; 44 } 45 while (fgets(line, sizeof(line), file) == line) { 46 size_t len = strlen(line); 47 48 /* Truncate long lines. */ 49 if (len == (sizeof(line) - 1)) { 50 while (line[(len - 1)] != '\n') { 51 int ret = fgetc(file); 52 53 if (ret == EOF) 54 goto exit; 55 line[(len - 1)] = ret; 56 } 57 /* No match for long lines. */ 58 continue; 59 } 60 /* Extract information. */ 61 if (sscanf(line, 62 "PCI_SLOT_NAME=" 63 "%" SCNx32 ":%" SCNx8 ":%" SCNx8 ".%" SCNx8 "\n", 64 &pci_addr->domain, 65 &pci_addr->bus, 66 &pci_addr->devid, 67 &pci_addr->function) == 4) { 68 rc = 0; 69 break; 70 } 71 } 72 exit: 73 fclose(file); 74 if (rc) 75 rte_errno = -rc; 76 return rc; 77 } 78 79 /** 80 * Extract port name, as a number, from sysfs or netlink information. 81 * 82 * @param[in] port_name_in 83 * String representing the port name. 84 * @param[out] port_info_out 85 * Port information, including port name as a number and port name 86 * type if recognized 87 * 88 * @return 89 * port_name field set according to recognized name format. 90 */ 91 void 92 mlx5_translate_port_name(const char *port_name_in, 93 struct mlx5_switch_info *port_info_out) 94 { 95 char ctrl = 0, pf_c1, pf_c2, vf_c1, vf_c2, eol; 96 char *end; 97 int sc_items; 98 99 sc_items = sscanf(port_name_in, "%c%d", 100 &ctrl, &port_info_out->ctrl_num); 101 if (sc_items == 2 && ctrl == 'c') { 102 port_name_in++; /* 'c' */ 103 port_name_in += snprintf(NULL, 0, "%d", 104 port_info_out->ctrl_num); 105 } 106 /* Check for port-name as a string of the form pf0vf0 or pf0sf0 */ 107 sc_items = sscanf(port_name_in, "%c%c%d%c%c%d%c", 108 &pf_c1, &pf_c2, &port_info_out->pf_num, 109 &vf_c1, &vf_c2, &port_info_out->port_name, &eol); 110 if (sc_items == 6 && pf_c1 == 'p' && pf_c2 == 'f') { 111 if (vf_c1 == 'v' && vf_c2 == 'f') { 112 /* Kernel ver >= 5.0 or OFED ver >= 4.6 */ 113 port_info_out->name_type = 114 MLX5_PHYS_PORT_NAME_TYPE_PFVF; 115 return; 116 } 117 if (vf_c1 == 's' && vf_c2 == 'f') { 118 /* Kernel ver >= 5.11 or OFED ver >= 5.1 */ 119 port_info_out->name_type = 120 MLX5_PHYS_PORT_NAME_TYPE_PFSF; 121 return; 122 } 123 } 124 /* 125 * Check for port-name as a string of the form p0 126 * (support kernel ver >= 5.0, or OFED ver >= 4.6). 127 */ 128 sc_items = sscanf(port_name_in, "%c%d%c", 129 &pf_c1, &port_info_out->port_name, &eol); 130 if (sc_items == 2 && pf_c1 == 'p') { 131 port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_UPLINK; 132 return; 133 } 134 /* 135 * Check for port-name as a string of the form pf0 136 * (support kernel ver >= 5.7 for HPF representor on BF). 137 */ 138 sc_items = sscanf(port_name_in, "%c%c%d%c", 139 &pf_c1, &pf_c2, &port_info_out->pf_num, &eol); 140 if (sc_items == 3 && pf_c1 == 'p' && pf_c2 == 'f') { 141 port_info_out->port_name = -1; 142 port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_PFHPF; 143 return; 144 } 145 /* Check for port-name as a number (support kernel ver < 5.0 */ 146 errno = 0; 147 port_info_out->port_name = strtol(port_name_in, &end, 0); 148 if (!errno && 149 (size_t)(end - port_name_in) == strlen(port_name_in)) { 150 port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_LEGACY; 151 return; 152 } 153 port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN; 154 } 155 156 int 157 mlx5_get_ifname_sysfs(const char *ibdev_path, char *ifname) 158 { 159 DIR *dir; 160 struct dirent *dent; 161 unsigned int dev_type = 0; 162 unsigned int dev_port_prev = ~0u; 163 char match[IF_NAMESIZE] = ""; 164 165 MLX5_ASSERT(ibdev_path); 166 { 167 MKSTR(path, "%s/device/net", ibdev_path); 168 169 dir = opendir(path); 170 if (dir == NULL) { 171 rte_errno = errno; 172 return -rte_errno; 173 } 174 } 175 while ((dent = readdir(dir)) != NULL) { 176 char *name = dent->d_name; 177 FILE *file; 178 unsigned int dev_port; 179 int r; 180 181 if ((name[0] == '.') && 182 ((name[1] == '\0') || 183 ((name[1] == '.') && (name[2] == '\0')))) 184 continue; 185 186 MKSTR(path, "%s/device/net/%s/%s", 187 ibdev_path, name, 188 (dev_type ? "dev_id" : "dev_port")); 189 190 file = fopen(path, "rb"); 191 if (file == NULL) { 192 if (errno != ENOENT) 193 continue; 194 /* 195 * Switch to dev_id when dev_port does not exist as 196 * is the case with Linux kernel versions < 3.15. 197 */ 198 try_dev_id: 199 match[0] = '\0'; 200 if (dev_type) 201 break; 202 dev_type = 1; 203 dev_port_prev = ~0u; 204 rewinddir(dir); 205 continue; 206 } 207 r = fscanf(file, (dev_type ? "%x" : "%u"), &dev_port); 208 fclose(file); 209 if (r != 1) 210 continue; 211 /* 212 * Switch to dev_id when dev_port returns the same value for 213 * all ports. May happen when using a MOFED release older than 214 * 3.0 with a Linux kernel >= 3.15. 215 */ 216 if (dev_port == dev_port_prev) 217 goto try_dev_id; 218 dev_port_prev = dev_port; 219 if (dev_port == 0) 220 strlcpy(match, name, IF_NAMESIZE); 221 } 222 closedir(dir); 223 if (match[0] == '\0') { 224 rte_errno = ENOENT; 225 return -rte_errno; 226 } 227 strncpy(ifname, match, IF_NAMESIZE); 228 return 0; 229 } 230 231 #ifdef MLX5_GLUE 232 233 /** 234 * Suffix RTE_EAL_PMD_PATH with "-glue". 235 * 236 * This function performs a sanity check on RTE_EAL_PMD_PATH before 237 * suffixing its last component. 238 * 239 * @param buf[out] 240 * Output buffer, should be large enough otherwise NULL is returned. 241 * @param size 242 * Size of @p out. 243 * 244 * @return 245 * Pointer to @p buf or @p NULL in case suffix cannot be appended. 246 */ 247 static char * 248 mlx5_glue_path(char *buf, size_t size) 249 { 250 static const char *const bad[] = { "/", ".", "..", NULL }; 251 const char *path = RTE_EAL_PMD_PATH; 252 size_t len = strlen(path); 253 size_t off; 254 int i; 255 256 while (len && path[len - 1] == '/') 257 --len; 258 for (off = len; off && path[off - 1] != '/'; --off) 259 ; 260 for (i = 0; bad[i]; ++i) 261 if (!strncmp(path + off, bad[i], (int)(len - off))) 262 goto error; 263 i = snprintf(buf, size, "%.*s-glue", (int)len, path); 264 if (i == -1 || (size_t)i >= size) 265 goto error; 266 return buf; 267 error: 268 RTE_LOG(ERR, PMD, "unable to append \"-glue\" to last component of" 269 " RTE_EAL_PMD_PATH (\"" RTE_EAL_PMD_PATH "\"), please" 270 " re-configure DPDK"); 271 return NULL; 272 } 273 274 static int 275 mlx5_glue_dlopen(void) 276 { 277 char glue_path[sizeof(RTE_EAL_PMD_PATH) - 1 + sizeof("-glue")]; 278 void *handle = NULL; 279 280 char const *path[] = { 281 /* 282 * A basic security check is necessary before trusting 283 * MLX5_GLUE_PATH, which may override RTE_EAL_PMD_PATH. 284 */ 285 (geteuid() == getuid() && getegid() == getgid() ? 286 getenv("MLX5_GLUE_PATH") : NULL), 287 /* 288 * When RTE_EAL_PMD_PATH is set, use its glue-suffixed 289 * variant, otherwise let dlopen() look up libraries on its 290 * own. 291 */ 292 (*RTE_EAL_PMD_PATH ? 293 mlx5_glue_path(glue_path, sizeof(glue_path)) : ""), 294 }; 295 unsigned int i = 0; 296 void **sym; 297 const char *dlmsg; 298 299 while (!handle && i != RTE_DIM(path)) { 300 const char *end; 301 size_t len; 302 int ret; 303 304 if (!path[i]) { 305 ++i; 306 continue; 307 } 308 end = strpbrk(path[i], ":;"); 309 if (!end) 310 end = path[i] + strlen(path[i]); 311 len = end - path[i]; 312 ret = 0; 313 do { 314 char name[ret + 1]; 315 316 ret = snprintf(name, sizeof(name), "%.*s%s" MLX5_GLUE, 317 (int)len, path[i], 318 (!len || *(end - 1) == '/') ? "" : "/"); 319 if (ret == -1) 320 break; 321 if (sizeof(name) != (size_t)ret + 1) 322 continue; 323 DRV_LOG(DEBUG, "Looking for rdma-core glue as " 324 "\"%s\"", name); 325 handle = dlopen(name, RTLD_LAZY); 326 break; 327 } while (1); 328 path[i] = end + 1; 329 if (!*end) 330 ++i; 331 } 332 if (!handle) { 333 rte_errno = EINVAL; 334 dlmsg = dlerror(); 335 if (dlmsg) 336 DRV_LOG(WARNING, "Cannot load glue library: %s", dlmsg); 337 goto glue_error; 338 } 339 sym = dlsym(handle, "mlx5_glue"); 340 if (!sym || !*sym) { 341 rte_errno = EINVAL; 342 dlmsg = dlerror(); 343 if (dlmsg) 344 DRV_LOG(ERR, "Cannot resolve glue symbol: %s", dlmsg); 345 goto glue_error; 346 } 347 mlx5_glue = *sym; 348 return 0; 349 350 glue_error: 351 if (handle) 352 dlclose(handle); 353 return -1; 354 } 355 356 #endif 357 358 /** 359 * Initialization routine for run-time dependency on rdma-core. 360 */ 361 void 362 mlx5_glue_constructor(void) 363 { 364 /* 365 * RDMAV_HUGEPAGES_SAFE tells ibv_fork_init() we intend to use 366 * huge pages. Calling ibv_fork_init() during init allows 367 * applications to use fork() safely for purposes other than 368 * using this PMD, which is not supported in forked processes. 369 */ 370 setenv("RDMAV_HUGEPAGES_SAFE", "1", 1); 371 /* Match the size of Rx completion entry to the size of a cacheline. */ 372 if (RTE_CACHE_LINE_SIZE == 128) 373 setenv("MLX5_CQE_SIZE", "128", 0); 374 /* 375 * MLX5_DEVICE_FATAL_CLEANUP tells ibv_destroy functions to 376 * cleanup all the Verbs resources even when the device was removed. 377 */ 378 setenv("MLX5_DEVICE_FATAL_CLEANUP", "1", 1); 379 380 #ifdef MLX5_GLUE 381 if (mlx5_glue_dlopen() != 0) 382 goto glue_error; 383 #endif 384 385 #ifdef RTE_LIBRTE_MLX5_DEBUG 386 /* Glue structure must not contain any NULL pointers. */ 387 { 388 unsigned int i; 389 390 for (i = 0; i != sizeof(*mlx5_glue) / sizeof(void *); ++i) 391 MLX5_ASSERT(((const void *const *)mlx5_glue)[i]); 392 } 393 #endif 394 if (strcmp(mlx5_glue->version, MLX5_GLUE_VERSION)) { 395 rte_errno = EINVAL; 396 DRV_LOG(ERR, "rdma-core glue \"%s\" mismatch: \"%s\" is " 397 "required", mlx5_glue->version, MLX5_GLUE_VERSION); 398 goto glue_error; 399 } 400 mlx5_glue->fork_init(); 401 return; 402 403 glue_error: 404 DRV_LOG(WARNING, "Cannot initialize MLX5 common due to missing" 405 " run-time dependency on rdma-core libraries (libibverbs," 406 " libmlx5)"); 407 mlx5_glue = NULL; 408 } 409 410 /** 411 * Validate user arguments for remote PD and CTX. 412 * 413 * @param config 414 * Pointer to device configuration structure. 415 * 416 * @return 417 * 0 on success, a negative errno value otherwise and rte_errno is set. 418 */ 419 int 420 mlx5_os_remote_pd_and_ctx_validate(struct mlx5_common_dev_config *config) 421 { 422 int device_fd = config->device_fd; 423 int pd_handle = config->pd_handle; 424 425 #ifdef HAVE_MLX5_IBV_IMPORT_CTX_PD_AND_MR 426 if (device_fd == MLX5_ARG_UNSET && pd_handle != MLX5_ARG_UNSET) { 427 DRV_LOG(ERR, "Remote PD without CTX is not supported."); 428 rte_errno = EINVAL; 429 return -rte_errno; 430 } 431 if (device_fd != MLX5_ARG_UNSET && pd_handle == MLX5_ARG_UNSET) { 432 DRV_LOG(ERR, "Remote CTX without PD is not supported."); 433 rte_errno = EINVAL; 434 return -rte_errno; 435 } 436 DRV_LOG(DEBUG, "Remote PD and CTX is supported: (cmd_fd=%d, " 437 "pd_handle=%d).", device_fd, pd_handle); 438 #else 439 if (pd_handle != MLX5_ARG_UNSET || device_fd != MLX5_ARG_UNSET) { 440 DRV_LOG(ERR, 441 "Remote PD and CTX is not supported - maybe old rdma-core version?"); 442 rte_errno = ENOTSUP; 443 return -rte_errno; 444 } 445 #endif 446 return 0; 447 } 448 449 /** 450 * Release Protection Domain object. 451 * 452 * @param[out] cdev 453 * Pointer to the mlx5 device. 454 * 455 * @return 456 * 0 on success, a negative errno value otherwise. 457 */ 458 int 459 mlx5_os_pd_release(struct mlx5_common_device *cdev) 460 { 461 if (cdev->config.pd_handle == MLX5_ARG_UNSET) 462 return mlx5_glue->dealloc_pd(cdev->pd); 463 else 464 return mlx5_glue->unimport_pd(cdev->pd); 465 } 466 467 /** 468 * Allocate Protection Domain object. 469 * 470 * @param[out] cdev 471 * Pointer to the mlx5 device. 472 * 473 * @return 474 * 0 on success, a negative errno value otherwise. 475 */ 476 static int 477 mlx5_os_pd_create(struct mlx5_common_device *cdev) 478 { 479 cdev->pd = mlx5_glue->alloc_pd(cdev->ctx); 480 if (cdev->pd == NULL) { 481 DRV_LOG(ERR, "Failed to allocate PD: %s", rte_strerror(errno)); 482 return errno ? -errno : -ENOMEM; 483 } 484 return 0; 485 } 486 487 /** 488 * Import Protection Domain object according to given PD handle. 489 * 490 * @param[out] cdev 491 * Pointer to the mlx5 device. 492 * 493 * @return 494 * 0 on success, a negative errno value otherwise. 495 */ 496 static int 497 mlx5_os_pd_import(struct mlx5_common_device *cdev) 498 { 499 cdev->pd = mlx5_glue->import_pd(cdev->ctx, cdev->config.pd_handle); 500 if (cdev->pd == NULL) { 501 DRV_LOG(ERR, "Failed to import PD using handle=%d: %s", 502 cdev->config.pd_handle, rte_strerror(errno)); 503 return errno ? -errno : -ENOMEM; 504 } 505 return 0; 506 } 507 508 /** 509 * Prepare Protection Domain object and extract its pdn using DV API. 510 * 511 * @param[out] cdev 512 * Pointer to the mlx5 device. 513 * 514 * @return 515 * 0 on success, a negative errno value otherwise and rte_errno is set. 516 */ 517 int 518 mlx5_os_pd_prepare(struct mlx5_common_device *cdev) 519 { 520 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 521 struct mlx5dv_obj obj; 522 struct mlx5dv_pd pd_info; 523 #endif 524 int ret; 525 526 if (cdev->config.pd_handle == MLX5_ARG_UNSET) 527 ret = mlx5_os_pd_create(cdev); 528 else 529 ret = mlx5_os_pd_import(cdev); 530 if (ret) { 531 rte_errno = -ret; 532 return ret; 533 } 534 if (cdev->config.devx == 0) 535 return 0; 536 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 537 obj.pd.in = cdev->pd; 538 obj.pd.out = &pd_info; 539 ret = mlx5_glue->dv_init_obj(&obj, MLX5DV_OBJ_PD); 540 if (ret != 0) { 541 DRV_LOG(ERR, "Fail to get PD object info."); 542 rte_errno = errno; 543 claim_zero(mlx5_os_pd_release(cdev)); 544 cdev->pd = NULL; 545 return -rte_errno; 546 } 547 cdev->pdn = pd_info.pdn; 548 return 0; 549 #else 550 DRV_LOG(ERR, "Cannot get pdn - no DV support."); 551 rte_errno = ENOTSUP; 552 return -rte_errno; 553 #endif /* HAVE_IBV_FLOW_DV_SUPPORT */ 554 } 555 556 static struct ibv_device * 557 mlx5_os_get_ibv_device(const struct rte_pci_addr *addr) 558 { 559 int n; 560 struct ibv_device **ibv_list = mlx5_glue->get_device_list(&n); 561 struct ibv_device *ibv_match = NULL; 562 uint8_t guid1[32] = {0}; 563 uint8_t guid2[32] = {0}; 564 int ret1, ret2 = -1; 565 struct rte_pci_addr paddr; 566 567 if (ibv_list == NULL || !n) { 568 rte_errno = ENOSYS; 569 if (ibv_list) 570 mlx5_glue->free_device_list(ibv_list); 571 return NULL; 572 } 573 ret1 = mlx5_get_device_guid(addr, guid1, sizeof(guid1)); 574 while (n-- > 0) { 575 DRV_LOG(DEBUG, "Checking device \"%s\"..", ibv_list[n]->name); 576 if (mlx5_get_pci_addr(ibv_list[n]->ibdev_path, &paddr) != 0) 577 continue; 578 if (ret1 > 0) 579 ret2 = mlx5_get_device_guid(&paddr, guid2, sizeof(guid2)); 580 /* Bond device can bond secondary PCIe */ 581 if ((strstr(ibv_list[n]->name, "bond") && 582 ((ret1 > 0 && ret2 > 0 && !memcmp(guid1, guid2, sizeof(guid1))) || 583 (addr->domain == paddr.domain && addr->bus == paddr.bus && 584 addr->devid == paddr.devid))) || 585 !rte_pci_addr_cmp(addr, &paddr)) { 586 ibv_match = ibv_list[n]; 587 break; 588 } 589 } 590 if (ibv_match == NULL) { 591 DRV_LOG(WARNING, 592 "No Verbs device matches PCI device " PCI_PRI_FMT "," 593 " are kernel drivers loaded?", 594 addr->domain, addr->bus, addr->devid, addr->function); 595 rte_errno = ENOENT; 596 } 597 mlx5_glue->free_device_list(ibv_list); 598 return ibv_match; 599 } 600 601 /* Try to disable ROCE by Netlink\Devlink. */ 602 static int 603 mlx5_nl_roce_disable(const char *addr) 604 { 605 int nlsk_fd = mlx5_nl_init(NETLINK_GENERIC, 0); 606 int devlink_id; 607 int enable; 608 int ret; 609 610 if (nlsk_fd < 0) 611 return nlsk_fd; 612 devlink_id = mlx5_nl_devlink_family_id_get(nlsk_fd); 613 if (devlink_id < 0) { 614 ret = devlink_id; 615 DRV_LOG(DEBUG, 616 "Failed to get devlink id for ROCE operations by Netlink."); 617 goto close; 618 } 619 ret = mlx5_nl_enable_roce_get(nlsk_fd, devlink_id, addr, &enable); 620 if (ret) { 621 DRV_LOG(DEBUG, "Failed to get ROCE enable by Netlink: %d.", 622 ret); 623 goto close; 624 } else if (!enable) { 625 DRV_LOG(INFO, "ROCE has already disabled(Netlink)."); 626 goto close; 627 } 628 ret = mlx5_nl_enable_roce_set(nlsk_fd, devlink_id, addr, 0); 629 if (ret) 630 DRV_LOG(DEBUG, "Failed to disable ROCE by Netlink: %d.", ret); 631 else 632 DRV_LOG(INFO, "ROCE is disabled by Netlink successfully."); 633 close: 634 close(nlsk_fd); 635 return ret; 636 } 637 638 /* Try to disable ROCE by sysfs. */ 639 static int 640 mlx5_sys_roce_disable(const char *addr) 641 { 642 FILE *file_o; 643 int enable; 644 int ret; 645 646 MKSTR(file_p, "/sys/bus/pci/devices/%s/roce_enable", addr); 647 file_o = fopen(file_p, "rb"); 648 if (!file_o) { 649 rte_errno = ENOTSUP; 650 return -ENOTSUP; 651 } 652 ret = fscanf(file_o, "%d", &enable); 653 if (ret != 1) { 654 rte_errno = EINVAL; 655 ret = EINVAL; 656 goto close; 657 } else if (!enable) { 658 ret = 0; 659 DRV_LOG(INFO, "ROCE has already disabled(sysfs)."); 660 goto close; 661 } 662 fclose(file_o); 663 file_o = fopen(file_p, "wb"); 664 if (!file_o) { 665 rte_errno = ENOTSUP; 666 return -ENOTSUP; 667 } 668 fprintf(file_o, "0\n"); 669 ret = 0; 670 close: 671 if (ret) 672 DRV_LOG(DEBUG, "Failed to disable ROCE by sysfs: %d.", ret); 673 else 674 DRV_LOG(INFO, "ROCE is disabled by sysfs successfully."); 675 fclose(file_o); 676 return ret; 677 } 678 679 static int 680 mlx5_roce_disable(const struct rte_device *dev) 681 { 682 char pci_addr[PCI_PRI_STR_SIZE] = { 0 }; 683 684 if (mlx5_dev_to_pci_str(dev, pci_addr, sizeof(pci_addr)) < 0) 685 return -rte_errno; 686 /* Firstly try to disable ROCE by Netlink and fallback to sysfs. */ 687 if (mlx5_nl_roce_disable(pci_addr) != 0 && 688 mlx5_sys_roce_disable(pci_addr) != 0) 689 return -rte_errno; 690 return 0; 691 } 692 693 static struct ibv_device * 694 mlx5_os_get_ibv_dev(const struct rte_device *dev) 695 { 696 struct ibv_device *ibv; 697 698 if (mlx5_dev_is_pci(dev)) 699 ibv = mlx5_os_get_ibv_device(&RTE_DEV_TO_PCI_CONST(dev)->addr); 700 else 701 ibv = mlx5_get_aux_ibv_device(RTE_DEV_TO_AUXILIARY_CONST(dev)); 702 if (ibv == NULL) { 703 rte_errno = ENODEV; 704 DRV_LOG(ERR, "Verbs device not found: %s", dev->name); 705 } 706 return ibv; 707 } 708 709 static struct ibv_device * 710 mlx5_vdpa_get_ibv_dev(const struct rte_device *dev) 711 { 712 struct ibv_device *ibv; 713 int retry; 714 715 if (mlx5_roce_disable(dev) != 0) { 716 DRV_LOG(WARNING, "Failed to disable ROCE for \"%s\".", 717 dev->name); 718 return NULL; 719 } 720 /* Wait for the IB device to appear again after reload. */ 721 for (retry = MLX5_VDPA_MAX_RETRIES; retry > 0; --retry) { 722 ibv = mlx5_os_get_ibv_dev(dev); 723 if (ibv != NULL) 724 return ibv; 725 usleep(MLX5_VDPA_USEC); 726 } 727 DRV_LOG(ERR, 728 "Cannot get IB device after disabling RoCE for \"%s\", retries exceed %d.", 729 dev->name, MLX5_VDPA_MAX_RETRIES); 730 rte_errno = EAGAIN; 731 return NULL; 732 } 733 734 static int 735 mlx5_config_doorbell_mapping_env(int dbnc) 736 { 737 char *env; 738 int value; 739 740 MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY); 741 /* Get environment variable to store. */ 742 env = getenv(MLX5_SHUT_UP_BF); 743 value = env ? !!strcmp(env, "0") : MLX5_ARG_UNSET; 744 if (dbnc == MLX5_ARG_UNSET) 745 setenv(MLX5_SHUT_UP_BF, MLX5_SHUT_UP_BF_DEFAULT, 1); 746 else 747 setenv(MLX5_SHUT_UP_BF, 748 dbnc == MLX5_SQ_DB_NCACHED ? "1" : "0", 1); 749 return value; 750 } 751 752 static void 753 mlx5_restore_doorbell_mapping_env(int value) 754 { 755 MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY); 756 /* Restore the original environment variable state. */ 757 if (value == MLX5_ARG_UNSET) 758 unsetenv(MLX5_SHUT_UP_BF); 759 else 760 setenv(MLX5_SHUT_UP_BF, value ? "1" : "0", 1); 761 } 762 763 /** 764 * Function API to open IB device. 765 * 766 * @param cdev 767 * Pointer to the mlx5 device. 768 * @param classes 769 * Chosen classes come from device arguments. 770 * 771 * @return 772 * Pointer to ibv_context on success, NULL otherwise and rte_errno is set. 773 */ 774 static struct ibv_context * 775 mlx5_open_device(struct mlx5_common_device *cdev, uint32_t classes) 776 { 777 struct ibv_device *ibv; 778 struct ibv_context *ctx = NULL; 779 int dbmap_env; 780 781 MLX5_ASSERT(cdev->config.device_fd == MLX5_ARG_UNSET); 782 if (classes & MLX5_CLASS_VDPA) 783 ibv = mlx5_vdpa_get_ibv_dev(cdev->dev); 784 else 785 ibv = mlx5_os_get_ibv_dev(cdev->dev); 786 if (!ibv) 787 return NULL; 788 DRV_LOG(INFO, "Dev information matches for device \"%s\".", ibv->name); 789 /* 790 * Configure environment variable "MLX5_BF_SHUT_UP" before the device 791 * creation. The rdma_core library checks the variable at device 792 * creation and stores the result internally. 793 */ 794 dbmap_env = mlx5_config_doorbell_mapping_env(cdev->config.dbnc); 795 /* Try to open IB device with DV first, then usual Verbs. */ 796 errno = 0; 797 ctx = mlx5_glue->dv_open_device(ibv); 798 if (ctx) { 799 cdev->config.devx = 1; 800 } else if (classes == MLX5_CLASS_ETH) { 801 /* The environment variable is still configured. */ 802 ctx = mlx5_glue->open_device(ibv); 803 if (ctx == NULL) 804 goto error; 805 } else { 806 goto error; 807 } 808 /* The device is created, no need for environment. */ 809 mlx5_restore_doorbell_mapping_env(dbmap_env); 810 return ctx; 811 error: 812 rte_errno = errno ? errno : ENODEV; 813 /* The device creation is failed, no need for environment. */ 814 mlx5_restore_doorbell_mapping_env(dbmap_env); 815 DRV_LOG(ERR, "Failed to open IB device \"%s\".", ibv->name); 816 return NULL; 817 } 818 819 /** 820 * Function API to import IB device. 821 * 822 * @param cdev 823 * Pointer to the mlx5 device. 824 * 825 * @return 826 * Pointer to ibv_context on success, NULL otherwise and rte_errno is set. 827 */ 828 static struct ibv_context * 829 mlx5_import_device(struct mlx5_common_device *cdev) 830 { 831 struct ibv_context *ctx = NULL; 832 833 MLX5_ASSERT(cdev->config.device_fd != MLX5_ARG_UNSET); 834 ctx = mlx5_glue->import_device(cdev->config.device_fd); 835 if (!ctx) { 836 DRV_LOG(ERR, "Failed to import device for fd=%d: %s", 837 cdev->config.device_fd, rte_strerror(errno)); 838 rte_errno = errno; 839 } 840 return ctx; 841 } 842 843 /** 844 * Function API to prepare IB device. 845 * 846 * @param cdev 847 * Pointer to the mlx5 device. 848 * @param classes 849 * Chosen classes come from device arguments. 850 * 851 * @return 852 * 0 on success, a negative errno value otherwise and rte_errno is set. 853 */ 854 int 855 mlx5_os_open_device(struct mlx5_common_device *cdev, uint32_t classes) 856 { 857 858 struct ibv_context *ctx = NULL; 859 860 if (cdev->config.device_fd == MLX5_ARG_UNSET) 861 ctx = mlx5_open_device(cdev, classes); 862 else 863 ctx = mlx5_import_device(cdev); 864 if (ctx == NULL) 865 return -rte_errno; 866 /* Hint libmlx5 to use PMD allocator for data plane resources */ 867 mlx5_set_context_attr(cdev->dev, ctx); 868 cdev->ctx = ctx; 869 return 0; 870 } 871 872 int 873 mlx5_get_device_guid(const struct rte_pci_addr *dev, uint8_t *guid, size_t len) 874 { 875 char tmp[512]; 876 char cur_ifname[IF_NAMESIZE + 1]; 877 FILE *id_file; 878 DIR *dir; 879 struct dirent *ptr; 880 int ret; 881 882 if (guid == NULL || len < sizeof(u_int64_t) + 1) 883 return -1; 884 memset(guid, 0, len); 885 snprintf(tmp, sizeof(tmp), "/sys/bus/pci/devices/%04x:%02x:%02x.%x/net", 886 dev->domain, dev->bus, dev->devid, dev->function); 887 dir = opendir(tmp); 888 if (dir == NULL) 889 return -1; 890 /* Traverse to identify PF interface */ 891 do { 892 ptr = readdir(dir); 893 if (ptr == NULL || ptr->d_type != DT_DIR) { 894 closedir(dir); 895 return -1; 896 } 897 } while (strchr(ptr->d_name, '.') || strchr(ptr->d_name, '_') || 898 strchr(ptr->d_name, 'v')); 899 snprintf(cur_ifname, sizeof(cur_ifname), "%s", ptr->d_name); 900 closedir(dir); 901 snprintf(tmp + strlen(tmp), sizeof(tmp) - strlen(tmp), 902 "/%s/phys_switch_id", cur_ifname); 903 /* Older OFED like 5.3 doesn't support read */ 904 id_file = fopen(tmp, "r"); 905 if (!id_file) 906 return 0; 907 ret = fscanf(id_file, "%16s", guid); 908 fclose(id_file); 909 return ret; 910 } 911 912 /* 913 * Create direct mkey using the kernel ibv_reg_mr API and wrap it with a new 914 * indirect mkey created by the DevX API. 915 * This mkey should be used for DevX commands requesting mkey as a parameter. 916 */ 917 int 918 mlx5_os_wrapped_mkey_create(void *ctx, void *pd, uint32_t pdn, void *addr, 919 size_t length, struct mlx5_pmd_wrapped_mr *pmd_mr) 920 { 921 struct mlx5_klm klm = { 922 .byte_count = length, 923 .address = (uintptr_t)addr, 924 }; 925 struct mlx5_devx_mkey_attr mkey_attr = { 926 .pd = pdn, 927 .klm_array = &klm, 928 .klm_num = 1, 929 }; 930 struct mlx5_devx_obj *mkey; 931 struct ibv_mr *ibv_mr = mlx5_glue->reg_mr(pd, addr, length, 932 IBV_ACCESS_LOCAL_WRITE | 933 (haswell_broadwell_cpu ? 0 : 934 IBV_ACCESS_RELAXED_ORDERING)); 935 936 if (!ibv_mr) { 937 rte_errno = errno; 938 return -rte_errno; 939 } 940 klm.mkey = ibv_mr->lkey; 941 mkey_attr.addr = (uintptr_t)addr; 942 mkey_attr.size = length; 943 mkey = mlx5_devx_cmd_mkey_create(ctx, &mkey_attr); 944 if (!mkey) { 945 claim_zero(mlx5_glue->dereg_mr(ibv_mr)); 946 return -rte_errno; 947 } 948 pmd_mr->addr = addr; 949 pmd_mr->len = length; 950 pmd_mr->obj = (void *)ibv_mr; 951 pmd_mr->imkey = mkey; 952 pmd_mr->lkey = mkey->id; 953 return 0; 954 } 955 956 void 957 mlx5_os_wrapped_mkey_destroy(struct mlx5_pmd_wrapped_mr *pmd_mr) 958 { 959 if (!pmd_mr) 960 return; 961 if (pmd_mr->imkey) 962 claim_zero(mlx5_devx_cmd_destroy(pmd_mr->imkey)); 963 if (pmd_mr->obj) 964 claim_zero(mlx5_glue->dereg_mr(pmd_mr->obj)); 965 memset(pmd_mr, 0, sizeof(*pmd_mr)); 966 } 967