1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2020 Mellanox Technologies, Ltd 3 */ 4 5 #include <sys/types.h> 6 #include <unistd.h> 7 #include <string.h> 8 #include <stdio.h> 9 #ifdef RTE_IBVERBS_LINK_DLOPEN 10 #include <dlfcn.h> 11 #endif 12 #include <dirent.h> 13 #include <net/if.h> 14 15 #include <rte_errno.h> 16 #include <rte_string_fns.h> 17 #include <rte_bus_pci.h> 18 #include <rte_bus_auxiliary.h> 19 20 #include "mlx5_common.h" 21 #include "mlx5_nl.h" 22 #include "mlx5_common_log.h" 23 #include "mlx5_common_private.h" 24 #include "mlx5_common_defs.h" 25 #include "mlx5_common_os.h" 26 #include "mlx5_glue.h" 27 28 #ifdef MLX5_GLUE 29 const struct mlx5_glue *mlx5_glue; 30 #endif 31 32 int 33 mlx5_get_pci_addr(const char *dev_path, struct rte_pci_addr *pci_addr) 34 { 35 FILE *file; 36 char line[32]; 37 int rc = -ENOENT; 38 MKSTR(path, "%s/device/uevent", dev_path); 39 40 file = fopen(path, "rb"); 41 if (file == NULL) { 42 rte_errno = errno; 43 return -rte_errno; 44 } 45 while (fgets(line, sizeof(line), file) == line) { 46 size_t len = strlen(line); 47 48 /* Truncate long lines. */ 49 if (len == (sizeof(line) - 1)) { 50 while (line[(len - 1)] != '\n') { 51 int ret = fgetc(file); 52 53 if (ret == EOF) 54 goto exit; 55 line[(len - 1)] = ret; 56 } 57 /* No match for long lines. */ 58 continue; 59 } 60 /* Extract information. */ 61 if (sscanf(line, 62 "PCI_SLOT_NAME=" 63 "%" SCNx32 ":%" SCNx8 ":%" SCNx8 ".%" SCNx8 "\n", 64 &pci_addr->domain, 65 &pci_addr->bus, 66 &pci_addr->devid, 67 &pci_addr->function) == 4) { 68 rc = 0; 69 break; 70 } 71 } 72 exit: 73 fclose(file); 74 if (rc) 75 rte_errno = -rc; 76 return rc; 77 } 78 79 /** 80 * Extract port name, as a number, from sysfs or netlink information. 81 * 82 * @param[in] port_name_in 83 * String representing the port name. 84 * @param[out] port_info_out 85 * Port information, including port name as a number and port name 86 * type if recognized 87 * 88 * @return 89 * port_name field set according to recognized name format. 90 */ 91 void 92 mlx5_translate_port_name(const char *port_name_in, 93 struct mlx5_switch_info *port_info_out) 94 { 95 char ctrl = 0, pf_c1, pf_c2, vf_c1, vf_c2, eol; 96 char *end; 97 int sc_items; 98 99 sc_items = sscanf(port_name_in, "%c%d", 100 &ctrl, &port_info_out->ctrl_num); 101 if (sc_items == 2 && ctrl == 'c') { 102 port_name_in++; /* 'c' */ 103 port_name_in += snprintf(NULL, 0, "%d", 104 port_info_out->ctrl_num); 105 } 106 /* Check for port-name as a string of the form pf0vf0 or pf0sf0 */ 107 sc_items = sscanf(port_name_in, "%c%c%d%c%c%d%c", 108 &pf_c1, &pf_c2, &port_info_out->pf_num, 109 &vf_c1, &vf_c2, &port_info_out->port_name, &eol); 110 if (sc_items == 6 && pf_c1 == 'p' && pf_c2 == 'f') { 111 if (vf_c1 == 'v' && vf_c2 == 'f') { 112 /* Kernel ver >= 5.0 or OFED ver >= 4.6 */ 113 port_info_out->name_type = 114 MLX5_PHYS_PORT_NAME_TYPE_PFVF; 115 return; 116 } 117 if (vf_c1 == 's' && vf_c2 == 'f') { 118 /* Kernel ver >= 5.11 or OFED ver >= 5.1 */ 119 port_info_out->name_type = 120 MLX5_PHYS_PORT_NAME_TYPE_PFSF; 121 return; 122 } 123 } 124 /* 125 * Check for port-name as a string of the form p0 126 * (support kernel ver >= 5.0, or OFED ver >= 4.6). 127 */ 128 sc_items = sscanf(port_name_in, "%c%d%c", 129 &pf_c1, &port_info_out->port_name, &eol); 130 if (sc_items == 2 && pf_c1 == 'p') { 131 port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_UPLINK; 132 return; 133 } 134 /* 135 * Check for port-name as a string of the form pf0 136 * (support kernel ver >= 5.7 for HPF representor on BF). 137 */ 138 sc_items = sscanf(port_name_in, "%c%c%d%c", 139 &pf_c1, &pf_c2, &port_info_out->pf_num, &eol); 140 if (sc_items == 3 && pf_c1 == 'p' && pf_c2 == 'f') { 141 port_info_out->port_name = -1; 142 port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_PFHPF; 143 return; 144 } 145 /* Check for port-name as a number (support kernel ver < 5.0 */ 146 errno = 0; 147 port_info_out->port_name = strtol(port_name_in, &end, 0); 148 if (!errno && 149 (size_t)(end - port_name_in) == strlen(port_name_in)) { 150 port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_LEGACY; 151 return; 152 } 153 port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN; 154 } 155 156 int 157 mlx5_get_ifname_sysfs(const char *ibdev_path, char *ifname) 158 { 159 DIR *dir; 160 struct dirent *dent; 161 unsigned int dev_type = 0; 162 unsigned int dev_port_prev = ~0u; 163 char match[IF_NAMESIZE] = ""; 164 165 MLX5_ASSERT(ibdev_path); 166 { 167 MKSTR(path, "%s/device/net", ibdev_path); 168 169 dir = opendir(path); 170 if (dir == NULL) { 171 rte_errno = errno; 172 return -rte_errno; 173 } 174 } 175 while ((dent = readdir(dir)) != NULL) { 176 char *name = dent->d_name; 177 FILE *file; 178 unsigned int dev_port; 179 int r; 180 181 if ((name[0] == '.') && 182 ((name[1] == '\0') || 183 ((name[1] == '.') && (name[2] == '\0')))) 184 continue; 185 186 MKSTR(path, "%s/device/net/%s/%s", 187 ibdev_path, name, 188 (dev_type ? "dev_id" : "dev_port")); 189 190 file = fopen(path, "rb"); 191 if (file == NULL) { 192 if (errno != ENOENT) 193 continue; 194 /* 195 * Switch to dev_id when dev_port does not exist as 196 * is the case with Linux kernel versions < 3.15. 197 */ 198 try_dev_id: 199 match[0] = '\0'; 200 if (dev_type) 201 break; 202 dev_type = 1; 203 dev_port_prev = ~0u; 204 rewinddir(dir); 205 continue; 206 } 207 r = fscanf(file, (dev_type ? "%x" : "%u"), &dev_port); 208 fclose(file); 209 if (r != 1) 210 continue; 211 /* 212 * Switch to dev_id when dev_port returns the same value for 213 * all ports. May happen when using a MOFED release older than 214 * 3.0 with a Linux kernel >= 3.15. 215 */ 216 if (dev_port == dev_port_prev) 217 goto try_dev_id; 218 dev_port_prev = dev_port; 219 if (dev_port == 0) 220 strlcpy(match, name, IF_NAMESIZE); 221 } 222 closedir(dir); 223 if (match[0] == '\0') { 224 rte_errno = ENOENT; 225 return -rte_errno; 226 } 227 strncpy(ifname, match, IF_NAMESIZE); 228 return 0; 229 } 230 231 #ifdef MLX5_GLUE 232 233 /** 234 * Suffix RTE_EAL_PMD_PATH with "-glue". 235 * 236 * This function performs a sanity check on RTE_EAL_PMD_PATH before 237 * suffixing its last component. 238 * 239 * @param buf[out] 240 * Output buffer, should be large enough otherwise NULL is returned. 241 * @param size 242 * Size of @p out. 243 * 244 * @return 245 * Pointer to @p buf or @p NULL in case suffix cannot be appended. 246 */ 247 static char * 248 mlx5_glue_path(char *buf, size_t size) 249 { 250 static const char *const bad[] = { "/", ".", "..", NULL }; 251 const char *path = RTE_EAL_PMD_PATH; 252 size_t len = strlen(path); 253 size_t off; 254 int i; 255 256 while (len && path[len - 1] == '/') 257 --len; 258 for (off = len; off && path[off - 1] != '/'; --off) 259 ; 260 for (i = 0; bad[i]; ++i) 261 if (!strncmp(path + off, bad[i], (int)(len - off))) 262 goto error; 263 i = snprintf(buf, size, "%.*s-glue", (int)len, path); 264 if (i == -1 || (size_t)i >= size) 265 goto error; 266 return buf; 267 error: 268 RTE_LOG(ERR, PMD, "unable to append \"-glue\" to last component of" 269 " RTE_EAL_PMD_PATH (\"" RTE_EAL_PMD_PATH "\"), please" 270 " re-configure DPDK"); 271 return NULL; 272 } 273 274 static int 275 mlx5_glue_dlopen(void) 276 { 277 char glue_path[sizeof(RTE_EAL_PMD_PATH) - 1 + sizeof("-glue")]; 278 void *handle = NULL; 279 280 char const *path[] = { 281 /* 282 * A basic security check is necessary before trusting 283 * MLX5_GLUE_PATH, which may override RTE_EAL_PMD_PATH. 284 */ 285 (geteuid() == getuid() && getegid() == getgid() ? 286 getenv("MLX5_GLUE_PATH") : NULL), 287 /* 288 * When RTE_EAL_PMD_PATH is set, use its glue-suffixed 289 * variant, otherwise let dlopen() look up libraries on its 290 * own. 291 */ 292 (*RTE_EAL_PMD_PATH ? 293 mlx5_glue_path(glue_path, sizeof(glue_path)) : ""), 294 }; 295 unsigned int i = 0; 296 void **sym; 297 const char *dlmsg; 298 299 while (!handle && i != RTE_DIM(path)) { 300 const char *end; 301 size_t len; 302 int ret; 303 304 if (!path[i]) { 305 ++i; 306 continue; 307 } 308 end = strpbrk(path[i], ":;"); 309 if (!end) 310 end = path[i] + strlen(path[i]); 311 len = end - path[i]; 312 ret = 0; 313 do { 314 char name[ret + 1]; 315 316 ret = snprintf(name, sizeof(name), "%.*s%s" MLX5_GLUE, 317 (int)len, path[i], 318 (!len || *(end - 1) == '/') ? "" : "/"); 319 if (ret == -1) 320 break; 321 if (sizeof(name) != (size_t)ret + 1) 322 continue; 323 DRV_LOG(DEBUG, "Looking for rdma-core glue as " 324 "\"%s\"", name); 325 handle = dlopen(name, RTLD_LAZY); 326 break; 327 } while (1); 328 path[i] = end + 1; 329 if (!*end) 330 ++i; 331 } 332 if (!handle) { 333 rte_errno = EINVAL; 334 dlmsg = dlerror(); 335 if (dlmsg) 336 DRV_LOG(WARNING, "Cannot load glue library: %s", dlmsg); 337 goto glue_error; 338 } 339 sym = dlsym(handle, "mlx5_glue"); 340 if (!sym || !*sym) { 341 rte_errno = EINVAL; 342 dlmsg = dlerror(); 343 if (dlmsg) 344 DRV_LOG(ERR, "Cannot resolve glue symbol: %s", dlmsg); 345 goto glue_error; 346 } 347 mlx5_glue = *sym; 348 return 0; 349 350 glue_error: 351 if (handle) 352 dlclose(handle); 353 return -1; 354 } 355 356 #endif 357 358 /** 359 * Initialization routine for run-time dependency on rdma-core. 360 */ 361 void 362 mlx5_glue_constructor(void) 363 { 364 /* 365 * RDMAV_HUGEPAGES_SAFE tells ibv_fork_init() we intend to use 366 * huge pages. Calling ibv_fork_init() during init allows 367 * applications to use fork() safely for purposes other than 368 * using this PMD, which is not supported in forked processes. 369 */ 370 setenv("RDMAV_HUGEPAGES_SAFE", "1", 1); 371 /* Match the size of Rx completion entry to the size of a cacheline. */ 372 if (RTE_CACHE_LINE_SIZE == 128) 373 setenv("MLX5_CQE_SIZE", "128", 0); 374 /* 375 * MLX5_DEVICE_FATAL_CLEANUP tells ibv_destroy functions to 376 * cleanup all the Verbs resources even when the device was removed. 377 */ 378 setenv("MLX5_DEVICE_FATAL_CLEANUP", "1", 1); 379 380 #ifdef MLX5_GLUE 381 if (mlx5_glue_dlopen() != 0) 382 goto glue_error; 383 #endif 384 385 #ifdef RTE_LIBRTE_MLX5_DEBUG 386 /* Glue structure must not contain any NULL pointers. */ 387 { 388 unsigned int i; 389 390 for (i = 0; i != sizeof(*mlx5_glue) / sizeof(void *); ++i) 391 MLX5_ASSERT(((const void *const *)mlx5_glue)[i]); 392 } 393 #endif 394 if (strcmp(mlx5_glue->version, MLX5_GLUE_VERSION)) { 395 rte_errno = EINVAL; 396 DRV_LOG(ERR, "rdma-core glue \"%s\" mismatch: \"%s\" is " 397 "required", mlx5_glue->version, MLX5_GLUE_VERSION); 398 goto glue_error; 399 } 400 mlx5_glue->fork_init(); 401 return; 402 403 glue_error: 404 DRV_LOG(WARNING, "Cannot initialize MLX5 common due to missing" 405 " run-time dependency on rdma-core libraries (libibverbs," 406 " libmlx5)"); 407 mlx5_glue = NULL; 408 } 409 410 /** 411 * Validate user arguments for remote PD and CTX. 412 * 413 * @param config 414 * Pointer to device configuration structure. 415 * 416 * @return 417 * 0 on success, a negative errno value otherwise and rte_errno is set. 418 */ 419 int 420 mlx5_os_remote_pd_and_ctx_validate(struct mlx5_common_dev_config *config) 421 { 422 int device_fd = config->device_fd; 423 int pd_handle = config->pd_handle; 424 425 #ifdef HAVE_MLX5_IBV_IMPORT_CTX_PD_AND_MR 426 if (device_fd == MLX5_ARG_UNSET && pd_handle != MLX5_ARG_UNSET) { 427 DRV_LOG(ERR, "Remote PD without CTX is not supported."); 428 rte_errno = EINVAL; 429 return -rte_errno; 430 } 431 if (device_fd != MLX5_ARG_UNSET && pd_handle == MLX5_ARG_UNSET) { 432 DRV_LOG(ERR, "Remote CTX without PD is not supported."); 433 rte_errno = EINVAL; 434 return -rte_errno; 435 } 436 DRV_LOG(DEBUG, "Remote PD and CTX is supported: (cmd_fd=%d, " 437 "pd_handle=%d).", device_fd, pd_handle); 438 #else 439 if (pd_handle != MLX5_ARG_UNSET || device_fd != MLX5_ARG_UNSET) { 440 DRV_LOG(ERR, 441 "Remote PD and CTX is not supported - maybe old rdma-core version?"); 442 rte_errno = ENOTSUP; 443 return -rte_errno; 444 } 445 #endif 446 return 0; 447 } 448 449 /** 450 * Release Protection Domain object. 451 * 452 * @param[out] cdev 453 * Pointer to the mlx5 device. 454 * 455 * @return 456 * 0 on success, a negative errno value otherwise. 457 */ 458 int 459 mlx5_os_pd_release(struct mlx5_common_device *cdev) 460 { 461 if (cdev->config.pd_handle == MLX5_ARG_UNSET) 462 return mlx5_glue->dealloc_pd(cdev->pd); 463 else 464 return mlx5_glue->unimport_pd(cdev->pd); 465 } 466 467 /** 468 * Allocate Protection Domain object. 469 * 470 * @param[out] cdev 471 * Pointer to the mlx5 device. 472 * 473 * @return 474 * 0 on success, a negative errno value otherwise. 475 */ 476 static int 477 mlx5_os_pd_create(struct mlx5_common_device *cdev) 478 { 479 cdev->pd = mlx5_glue->alloc_pd(cdev->ctx); 480 if (cdev->pd == NULL) { 481 DRV_LOG(ERR, "Failed to allocate PD: %s", rte_strerror(errno)); 482 return errno ? -errno : -ENOMEM; 483 } 484 return 0; 485 } 486 487 /** 488 * Import Protection Domain object according to given PD handle. 489 * 490 * @param[out] cdev 491 * Pointer to the mlx5 device. 492 * 493 * @return 494 * 0 on success, a negative errno value otherwise. 495 */ 496 static int 497 mlx5_os_pd_import(struct mlx5_common_device *cdev) 498 { 499 cdev->pd = mlx5_glue->import_pd(cdev->ctx, cdev->config.pd_handle); 500 if (cdev->pd == NULL) { 501 DRV_LOG(ERR, "Failed to import PD using handle=%d: %s", 502 cdev->config.pd_handle, rte_strerror(errno)); 503 return errno ? -errno : -ENOMEM; 504 } 505 return 0; 506 } 507 508 /** 509 * Prepare Protection Domain object and extract its pdn using DV API. 510 * 511 * @param[out] cdev 512 * Pointer to the mlx5 device. 513 * 514 * @return 515 * 0 on success, a negative errno value otherwise and rte_errno is set. 516 */ 517 int 518 mlx5_os_pd_prepare(struct mlx5_common_device *cdev) 519 { 520 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 521 struct mlx5dv_obj obj; 522 struct mlx5dv_pd pd_info; 523 #endif 524 int ret; 525 526 if (cdev->config.pd_handle == MLX5_ARG_UNSET) 527 ret = mlx5_os_pd_create(cdev); 528 else 529 ret = mlx5_os_pd_import(cdev); 530 if (ret) { 531 rte_errno = -ret; 532 return ret; 533 } 534 if (cdev->config.devx == 0) 535 return 0; 536 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 537 obj.pd.in = cdev->pd; 538 obj.pd.out = &pd_info; 539 ret = mlx5_glue->dv_init_obj(&obj, MLX5DV_OBJ_PD); 540 if (ret != 0) { 541 DRV_LOG(ERR, "Fail to get PD object info."); 542 rte_errno = errno; 543 claim_zero(mlx5_os_pd_release(cdev)); 544 cdev->pd = NULL; 545 return -rte_errno; 546 } 547 cdev->pdn = pd_info.pdn; 548 return 0; 549 #else 550 DRV_LOG(ERR, "Cannot get pdn - no DV support."); 551 rte_errno = ENOTSUP; 552 return -rte_errno; 553 #endif /* HAVE_IBV_FLOW_DV_SUPPORT */ 554 } 555 556 static struct ibv_device * 557 mlx5_os_get_ibv_device(const struct rte_pci_addr *addr) 558 { 559 int n; 560 struct ibv_device **ibv_list = mlx5_glue->get_device_list(&n); 561 struct ibv_device *ibv_match = NULL; 562 563 if (ibv_list == NULL) { 564 rte_errno = ENOSYS; 565 return NULL; 566 } 567 while (n-- > 0) { 568 struct rte_pci_addr paddr; 569 570 DRV_LOG(DEBUG, "Checking device \"%s\"..", ibv_list[n]->name); 571 if (mlx5_get_pci_addr(ibv_list[n]->ibdev_path, &paddr) != 0) 572 continue; 573 if (rte_pci_addr_cmp(addr, &paddr) != 0) 574 continue; 575 ibv_match = ibv_list[n]; 576 break; 577 } 578 if (ibv_match == NULL) { 579 DRV_LOG(WARNING, 580 "No Verbs device matches PCI device " PCI_PRI_FMT "," 581 " are kernel drivers loaded?", 582 addr->domain, addr->bus, addr->devid, addr->function); 583 rte_errno = ENOENT; 584 } 585 mlx5_glue->free_device_list(ibv_list); 586 return ibv_match; 587 } 588 589 /* Try to disable ROCE by Netlink\Devlink. */ 590 static int 591 mlx5_nl_roce_disable(const char *addr) 592 { 593 int nlsk_fd = mlx5_nl_init(NETLINK_GENERIC); 594 int devlink_id; 595 int enable; 596 int ret; 597 598 if (nlsk_fd < 0) 599 return nlsk_fd; 600 devlink_id = mlx5_nl_devlink_family_id_get(nlsk_fd); 601 if (devlink_id < 0) { 602 ret = devlink_id; 603 DRV_LOG(DEBUG, 604 "Failed to get devlink id for ROCE operations by Netlink."); 605 goto close; 606 } 607 ret = mlx5_nl_enable_roce_get(nlsk_fd, devlink_id, addr, &enable); 608 if (ret) { 609 DRV_LOG(DEBUG, "Failed to get ROCE enable by Netlink: %d.", 610 ret); 611 goto close; 612 } else if (!enable) { 613 DRV_LOG(INFO, "ROCE has already disabled(Netlink)."); 614 goto close; 615 } 616 ret = mlx5_nl_enable_roce_set(nlsk_fd, devlink_id, addr, 0); 617 if (ret) 618 DRV_LOG(DEBUG, "Failed to disable ROCE by Netlink: %d.", ret); 619 else 620 DRV_LOG(INFO, "ROCE is disabled by Netlink successfully."); 621 close: 622 close(nlsk_fd); 623 return ret; 624 } 625 626 /* Try to disable ROCE by sysfs. */ 627 static int 628 mlx5_sys_roce_disable(const char *addr) 629 { 630 FILE *file_o; 631 int enable; 632 int ret; 633 634 MKSTR(file_p, "/sys/bus/pci/devices/%s/roce_enable", addr); 635 file_o = fopen(file_p, "rb"); 636 if (!file_o) { 637 rte_errno = ENOTSUP; 638 return -ENOTSUP; 639 } 640 ret = fscanf(file_o, "%d", &enable); 641 if (ret != 1) { 642 rte_errno = EINVAL; 643 ret = EINVAL; 644 goto close; 645 } else if (!enable) { 646 ret = 0; 647 DRV_LOG(INFO, "ROCE has already disabled(sysfs)."); 648 goto close; 649 } 650 fclose(file_o); 651 file_o = fopen(file_p, "wb"); 652 if (!file_o) { 653 rte_errno = ENOTSUP; 654 return -ENOTSUP; 655 } 656 fprintf(file_o, "0\n"); 657 ret = 0; 658 close: 659 if (ret) 660 DRV_LOG(DEBUG, "Failed to disable ROCE by sysfs: %d.", ret); 661 else 662 DRV_LOG(INFO, "ROCE is disabled by sysfs successfully."); 663 fclose(file_o); 664 return ret; 665 } 666 667 static int 668 mlx5_roce_disable(const struct rte_device *dev) 669 { 670 char pci_addr[PCI_PRI_STR_SIZE] = { 0 }; 671 672 if (mlx5_dev_to_pci_str(dev, pci_addr, sizeof(pci_addr)) < 0) 673 return -rte_errno; 674 /* Firstly try to disable ROCE by Netlink and fallback to sysfs. */ 675 if (mlx5_nl_roce_disable(pci_addr) != 0 && 676 mlx5_sys_roce_disable(pci_addr) != 0) 677 return -rte_errno; 678 return 0; 679 } 680 681 static struct ibv_device * 682 mlx5_os_get_ibv_dev(const struct rte_device *dev) 683 { 684 struct ibv_device *ibv; 685 686 if (mlx5_dev_is_pci(dev)) 687 ibv = mlx5_os_get_ibv_device(&RTE_DEV_TO_PCI_CONST(dev)->addr); 688 else 689 ibv = mlx5_get_aux_ibv_device(RTE_DEV_TO_AUXILIARY_CONST(dev)); 690 if (ibv == NULL) { 691 rte_errno = ENODEV; 692 DRV_LOG(ERR, "Verbs device not found: %s", dev->name); 693 } 694 return ibv; 695 } 696 697 static struct ibv_device * 698 mlx5_vdpa_get_ibv_dev(const struct rte_device *dev) 699 { 700 struct ibv_device *ibv; 701 int retry; 702 703 if (mlx5_roce_disable(dev) != 0) { 704 DRV_LOG(WARNING, "Failed to disable ROCE for \"%s\".", 705 dev->name); 706 return NULL; 707 } 708 /* Wait for the IB device to appear again after reload. */ 709 for (retry = MLX5_VDPA_MAX_RETRIES; retry > 0; --retry) { 710 ibv = mlx5_os_get_ibv_dev(dev); 711 if (ibv != NULL) 712 return ibv; 713 usleep(MLX5_VDPA_USEC); 714 } 715 DRV_LOG(ERR, 716 "Cannot get IB device after disabling RoCE for \"%s\", retries exceed %d.", 717 dev->name, MLX5_VDPA_MAX_RETRIES); 718 rte_errno = EAGAIN; 719 return NULL; 720 } 721 722 static int 723 mlx5_config_doorbell_mapping_env(int dbnc) 724 { 725 char *env; 726 int value; 727 728 MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY); 729 /* Get environment variable to store. */ 730 env = getenv(MLX5_SHUT_UP_BF); 731 value = env ? !!strcmp(env, "0") : MLX5_ARG_UNSET; 732 if (dbnc == MLX5_ARG_UNSET) 733 setenv(MLX5_SHUT_UP_BF, MLX5_SHUT_UP_BF_DEFAULT, 1); 734 else 735 setenv(MLX5_SHUT_UP_BF, 736 dbnc == MLX5_SQ_DB_NCACHED ? "1" : "0", 1); 737 return value; 738 } 739 740 static void 741 mlx5_restore_doorbell_mapping_env(int value) 742 { 743 MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY); 744 /* Restore the original environment variable state. */ 745 if (value == MLX5_ARG_UNSET) 746 unsetenv(MLX5_SHUT_UP_BF); 747 else 748 setenv(MLX5_SHUT_UP_BF, value ? "1" : "0", 1); 749 } 750 751 /** 752 * Function API to open IB device. 753 * 754 * @param cdev 755 * Pointer to the mlx5 device. 756 * @param classes 757 * Chosen classes come from device arguments. 758 * 759 * @return 760 * Pointer to ibv_context on success, NULL otherwise and rte_errno is set. 761 */ 762 static struct ibv_context * 763 mlx5_open_device(struct mlx5_common_device *cdev, uint32_t classes) 764 { 765 struct ibv_device *ibv; 766 struct ibv_context *ctx = NULL; 767 int dbmap_env; 768 769 MLX5_ASSERT(cdev->config.device_fd == MLX5_ARG_UNSET); 770 if (classes & MLX5_CLASS_VDPA) 771 ibv = mlx5_vdpa_get_ibv_dev(cdev->dev); 772 else 773 ibv = mlx5_os_get_ibv_dev(cdev->dev); 774 if (!ibv) 775 return NULL; 776 DRV_LOG(INFO, "Dev information matches for device \"%s\".", ibv->name); 777 /* 778 * Configure environment variable "MLX5_BF_SHUT_UP" before the device 779 * creation. The rdma_core library checks the variable at device 780 * creation and stores the result internally. 781 */ 782 dbmap_env = mlx5_config_doorbell_mapping_env(cdev->config.dbnc); 783 /* Try to open IB device with DV first, then usual Verbs. */ 784 errno = 0; 785 ctx = mlx5_glue->dv_open_device(ibv); 786 if (ctx) { 787 cdev->config.devx = 1; 788 } else if (classes == MLX5_CLASS_ETH) { 789 /* The environment variable is still configured. */ 790 ctx = mlx5_glue->open_device(ibv); 791 if (ctx == NULL) 792 goto error; 793 } else { 794 goto error; 795 } 796 /* The device is created, no need for environment. */ 797 mlx5_restore_doorbell_mapping_env(dbmap_env); 798 return ctx; 799 error: 800 rte_errno = errno ? errno : ENODEV; 801 /* The device creation is failed, no need for environment. */ 802 mlx5_restore_doorbell_mapping_env(dbmap_env); 803 DRV_LOG(ERR, "Failed to open IB device \"%s\".", ibv->name); 804 return NULL; 805 } 806 807 /** 808 * Function API to import IB device. 809 * 810 * @param cdev 811 * Pointer to the mlx5 device. 812 * 813 * @return 814 * Pointer to ibv_context on success, NULL otherwise and rte_errno is set. 815 */ 816 static struct ibv_context * 817 mlx5_import_device(struct mlx5_common_device *cdev) 818 { 819 struct ibv_context *ctx = NULL; 820 821 MLX5_ASSERT(cdev->config.device_fd != MLX5_ARG_UNSET); 822 ctx = mlx5_glue->import_device(cdev->config.device_fd); 823 if (!ctx) { 824 DRV_LOG(ERR, "Failed to import device for fd=%d: %s", 825 cdev->config.device_fd, rte_strerror(errno)); 826 rte_errno = errno; 827 } 828 return ctx; 829 } 830 831 /** 832 * Function API to prepare IB device. 833 * 834 * @param cdev 835 * Pointer to the mlx5 device. 836 * @param classes 837 * Chosen classes come from device arguments. 838 * 839 * @return 840 * 0 on success, a negative errno value otherwise and rte_errno is set. 841 */ 842 int 843 mlx5_os_open_device(struct mlx5_common_device *cdev, uint32_t classes) 844 { 845 846 struct ibv_context *ctx = NULL; 847 848 if (cdev->config.device_fd == MLX5_ARG_UNSET) 849 ctx = mlx5_open_device(cdev, classes); 850 else 851 ctx = mlx5_import_device(cdev); 852 if (ctx == NULL) 853 return -rte_errno; 854 /* Hint libmlx5 to use PMD allocator for data plane resources */ 855 mlx5_set_context_attr(cdev->dev, ctx); 856 cdev->ctx = ctx; 857 return 0; 858 } 859 860 int 861 mlx5_get_device_guid(const struct rte_pci_addr *dev, uint8_t *guid, size_t len) 862 { 863 char tmp[512]; 864 char cur_ifname[IF_NAMESIZE + 1]; 865 FILE *id_file; 866 DIR *dir; 867 struct dirent *ptr; 868 int ret; 869 870 if (guid == NULL || len < sizeof(u_int64_t) + 1) 871 return -1; 872 memset(guid, 0, len); 873 snprintf(tmp, sizeof(tmp), "/sys/bus/pci/devices/%04x:%02x:%02x.%x/net", 874 dev->domain, dev->bus, dev->devid, dev->function); 875 dir = opendir(tmp); 876 if (dir == NULL) 877 return -1; 878 /* Traverse to identify PF interface */ 879 do { 880 ptr = readdir(dir); 881 if (ptr == NULL || ptr->d_type != DT_DIR) { 882 closedir(dir); 883 return -1; 884 } 885 } while (strchr(ptr->d_name, '.') || strchr(ptr->d_name, '_') || 886 strchr(ptr->d_name, 'v')); 887 snprintf(cur_ifname, sizeof(cur_ifname), "%s", ptr->d_name); 888 closedir(dir); 889 snprintf(tmp + strlen(tmp), sizeof(tmp) - strlen(tmp), 890 "/%s/phys_switch_id", cur_ifname); 891 /* Older OFED like 5.3 doesn't support read */ 892 id_file = fopen(tmp, "r"); 893 if (!id_file) 894 return 0; 895 ret = fscanf(id_file, "%16s", guid); 896 fclose(id_file); 897 return ret; 898 } 899 900 /* 901 * Create direct mkey using the kernel ibv_reg_mr API and wrap it with a new 902 * indirect mkey created by the DevX API. 903 * This mkey should be used for DevX commands requesting mkey as a parameter. 904 */ 905 int 906 mlx5_os_wrapped_mkey_create(void *ctx, void *pd, uint32_t pdn, void *addr, 907 size_t length, struct mlx5_pmd_wrapped_mr *pmd_mr) 908 { 909 struct mlx5_klm klm = { 910 .byte_count = length, 911 .address = (uintptr_t)addr, 912 }; 913 struct mlx5_devx_mkey_attr mkey_attr = { 914 .pd = pdn, 915 .klm_array = &klm, 916 .klm_num = 1, 917 }; 918 struct mlx5_devx_obj *mkey; 919 struct ibv_mr *ibv_mr = mlx5_glue->reg_mr(pd, addr, length, 920 IBV_ACCESS_LOCAL_WRITE | 921 (haswell_broadwell_cpu ? 0 : 922 IBV_ACCESS_RELAXED_ORDERING)); 923 924 if (!ibv_mr) { 925 rte_errno = errno; 926 return -rte_errno; 927 } 928 klm.mkey = ibv_mr->lkey; 929 mkey_attr.addr = (uintptr_t)addr; 930 mkey_attr.size = length; 931 mkey = mlx5_devx_cmd_mkey_create(ctx, &mkey_attr); 932 if (!mkey) { 933 claim_zero(mlx5_glue->dereg_mr(ibv_mr)); 934 return -rte_errno; 935 } 936 pmd_mr->addr = addr; 937 pmd_mr->len = length; 938 pmd_mr->obj = (void *)ibv_mr; 939 pmd_mr->imkey = mkey; 940 pmd_mr->lkey = mkey->id; 941 return 0; 942 } 943 944 void 945 mlx5_os_wrapped_mkey_destroy(struct mlx5_pmd_wrapped_mr *pmd_mr) 946 { 947 if (!pmd_mr) 948 return; 949 if (pmd_mr->imkey) 950 claim_zero(mlx5_devx_cmd_destroy(pmd_mr->imkey)); 951 if (pmd_mr->obj) 952 claim_zero(mlx5_glue->dereg_mr(pmd_mr->obj)); 953 memset(pmd_mr, 0, sizeof(*pmd_mr)); 954 } 955