1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2020 Mellanox Technologies, Ltd 3 */ 4 5 #include <unistd.h> 6 #include <string.h> 7 #include <stdio.h> 8 #ifdef RTE_IBVERBS_LINK_DLOPEN 9 #include <dlfcn.h> 10 #endif 11 #include <dirent.h> 12 #include <net/if.h> 13 14 #include <rte_errno.h> 15 #include <rte_string_fns.h> 16 17 #include "mlx5_common.h" 18 #include "mlx5_common_log.h" 19 #include "mlx5_common_defs.h" 20 #include "mlx5_common_os.h" 21 #include "mlx5_glue.h" 22 23 #ifdef MLX5_GLUE 24 const struct mlx5_glue *mlx5_glue; 25 #endif 26 27 int 28 mlx5_get_pci_addr(const char *dev_path, struct rte_pci_addr *pci_addr) 29 { 30 FILE *file; 31 char line[32]; 32 int rc = -ENOENT; 33 MKSTR(path, "%s/device/uevent", dev_path); 34 35 file = fopen(path, "rb"); 36 if (file == NULL) { 37 rte_errno = errno; 38 return -rte_errno; 39 } 40 while (fgets(line, sizeof(line), file) == line) { 41 size_t len = strlen(line); 42 43 /* Truncate long lines. */ 44 if (len == (sizeof(line) - 1)) { 45 while (line[(len - 1)] != '\n') { 46 int ret = fgetc(file); 47 48 if (ret == EOF) 49 goto exit; 50 line[(len - 1)] = ret; 51 } 52 /* No match for long lines. */ 53 continue; 54 } 55 /* Extract information. */ 56 if (sscanf(line, 57 "PCI_SLOT_NAME=" 58 "%" SCNx32 ":%" SCNx8 ":%" SCNx8 ".%" SCNx8 "\n", 59 &pci_addr->domain, 60 &pci_addr->bus, 61 &pci_addr->devid, 62 &pci_addr->function) == 4) { 63 rc = 0; 64 break; 65 } 66 } 67 exit: 68 fclose(file); 69 if (rc) 70 rte_errno = -rc; 71 return rc; 72 } 73 74 /** 75 * Extract port name, as a number, from sysfs or netlink information. 76 * 77 * @param[in] port_name_in 78 * String representing the port name. 79 * @param[out] port_info_out 80 * Port information, including port name as a number and port name 81 * type if recognized 82 * 83 * @return 84 * port_name field set according to recognized name format. 85 */ 86 void 87 mlx5_translate_port_name(const char *port_name_in, 88 struct mlx5_switch_info *port_info_out) 89 { 90 char ctrl = 0, pf_c1, pf_c2, vf_c1, vf_c2, eol; 91 char *end; 92 int sc_items; 93 94 sc_items = sscanf(port_name_in, "%c%d", 95 &ctrl, &port_info_out->ctrl_num); 96 if (sc_items == 2 && ctrl == 'c') { 97 port_name_in++; /* 'c' */ 98 port_name_in += snprintf(NULL, 0, "%d", 99 port_info_out->ctrl_num); 100 } 101 /* Check for port-name as a string of the form pf0vf0 or pf0sf0 */ 102 sc_items = sscanf(port_name_in, "%c%c%d%c%c%d%c", 103 &pf_c1, &pf_c2, &port_info_out->pf_num, 104 &vf_c1, &vf_c2, &port_info_out->port_name, &eol); 105 if (sc_items == 6 && pf_c1 == 'p' && pf_c2 == 'f') { 106 if (vf_c1 == 'v' && vf_c2 == 'f') { 107 /* Kernel ver >= 5.0 or OFED ver >= 4.6 */ 108 port_info_out->name_type = 109 MLX5_PHYS_PORT_NAME_TYPE_PFVF; 110 return; 111 } 112 if (vf_c1 == 's' && vf_c2 == 'f') { 113 /* Kernel ver >= 5.11 or OFED ver >= 5.1 */ 114 port_info_out->name_type = 115 MLX5_PHYS_PORT_NAME_TYPE_PFSF; 116 return; 117 } 118 } 119 /* 120 * Check for port-name as a string of the form p0 121 * (support kernel ver >= 5.0, or OFED ver >= 4.6). 122 */ 123 sc_items = sscanf(port_name_in, "%c%d%c", 124 &pf_c1, &port_info_out->port_name, &eol); 125 if (sc_items == 2 && pf_c1 == 'p') { 126 port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_UPLINK; 127 return; 128 } 129 /* 130 * Check for port-name as a string of the form pf0 131 * (support kernel ver >= 5.7 for HPF representor on BF). 132 */ 133 sc_items = sscanf(port_name_in, "%c%c%d%c", 134 &pf_c1, &pf_c2, &port_info_out->pf_num, &eol); 135 if (sc_items == 3 && pf_c1 == 'p' && pf_c2 == 'f') { 136 port_info_out->port_name = -1; 137 port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_PFHPF; 138 return; 139 } 140 /* Check for port-name as a number (support kernel ver < 5.0 */ 141 errno = 0; 142 port_info_out->port_name = strtol(port_name_in, &end, 0); 143 if (!errno && 144 (size_t)(end - port_name_in) == strlen(port_name_in)) { 145 port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_LEGACY; 146 return; 147 } 148 port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN; 149 } 150 151 int 152 mlx5_get_ifname_sysfs(const char *ibdev_path, char *ifname) 153 { 154 DIR *dir; 155 struct dirent *dent; 156 unsigned int dev_type = 0; 157 unsigned int dev_port_prev = ~0u; 158 char match[IF_NAMESIZE] = ""; 159 160 MLX5_ASSERT(ibdev_path); 161 { 162 MKSTR(path, "%s/device/net", ibdev_path); 163 164 dir = opendir(path); 165 if (dir == NULL) { 166 rte_errno = errno; 167 return -rte_errno; 168 } 169 } 170 while ((dent = readdir(dir)) != NULL) { 171 char *name = dent->d_name; 172 FILE *file; 173 unsigned int dev_port; 174 int r; 175 176 if ((name[0] == '.') && 177 ((name[1] == '\0') || 178 ((name[1] == '.') && (name[2] == '\0')))) 179 continue; 180 181 MKSTR(path, "%s/device/net/%s/%s", 182 ibdev_path, name, 183 (dev_type ? "dev_id" : "dev_port")); 184 185 file = fopen(path, "rb"); 186 if (file == NULL) { 187 if (errno != ENOENT) 188 continue; 189 /* 190 * Switch to dev_id when dev_port does not exist as 191 * is the case with Linux kernel versions < 3.15. 192 */ 193 try_dev_id: 194 match[0] = '\0'; 195 if (dev_type) 196 break; 197 dev_type = 1; 198 dev_port_prev = ~0u; 199 rewinddir(dir); 200 continue; 201 } 202 r = fscanf(file, (dev_type ? "%x" : "%u"), &dev_port); 203 fclose(file); 204 if (r != 1) 205 continue; 206 /* 207 * Switch to dev_id when dev_port returns the same value for 208 * all ports. May happen when using a MOFED release older than 209 * 3.0 with a Linux kernel >= 3.15. 210 */ 211 if (dev_port == dev_port_prev) 212 goto try_dev_id; 213 dev_port_prev = dev_port; 214 if (dev_port == 0) 215 strlcpy(match, name, IF_NAMESIZE); 216 } 217 closedir(dir); 218 if (match[0] == '\0') { 219 rte_errno = ENOENT; 220 return -rte_errno; 221 } 222 strncpy(ifname, match, IF_NAMESIZE); 223 return 0; 224 } 225 226 #ifdef MLX5_GLUE 227 228 /** 229 * Suffix RTE_EAL_PMD_PATH with "-glue". 230 * 231 * This function performs a sanity check on RTE_EAL_PMD_PATH before 232 * suffixing its last component. 233 * 234 * @param buf[out] 235 * Output buffer, should be large enough otherwise NULL is returned. 236 * @param size 237 * Size of @p out. 238 * 239 * @return 240 * Pointer to @p buf or @p NULL in case suffix cannot be appended. 241 */ 242 static char * 243 mlx5_glue_path(char *buf, size_t size) 244 { 245 static const char *const bad[] = { "/", ".", "..", NULL }; 246 const char *path = RTE_EAL_PMD_PATH; 247 size_t len = strlen(path); 248 size_t off; 249 int i; 250 251 while (len && path[len - 1] == '/') 252 --len; 253 for (off = len; off && path[off - 1] != '/'; --off) 254 ; 255 for (i = 0; bad[i]; ++i) 256 if (!strncmp(path + off, bad[i], (int)(len - off))) 257 goto error; 258 i = snprintf(buf, size, "%.*s-glue", (int)len, path); 259 if (i == -1 || (size_t)i >= size) 260 goto error; 261 return buf; 262 error: 263 RTE_LOG(ERR, PMD, "unable to append \"-glue\" to last component of" 264 " RTE_EAL_PMD_PATH (\"" RTE_EAL_PMD_PATH "\"), please" 265 " re-configure DPDK"); 266 return NULL; 267 } 268 269 static int 270 mlx5_glue_dlopen(void) 271 { 272 char glue_path[sizeof(RTE_EAL_PMD_PATH) - 1 + sizeof("-glue")]; 273 void *handle = NULL; 274 275 char const *path[] = { 276 /* 277 * A basic security check is necessary before trusting 278 * MLX5_GLUE_PATH, which may override RTE_EAL_PMD_PATH. 279 */ 280 (geteuid() == getuid() && getegid() == getgid() ? 281 getenv("MLX5_GLUE_PATH") : NULL), 282 /* 283 * When RTE_EAL_PMD_PATH is set, use its glue-suffixed 284 * variant, otherwise let dlopen() look up libraries on its 285 * own. 286 */ 287 (*RTE_EAL_PMD_PATH ? 288 mlx5_glue_path(glue_path, sizeof(glue_path)) : ""), 289 }; 290 unsigned int i = 0; 291 void **sym; 292 const char *dlmsg; 293 294 while (!handle && i != RTE_DIM(path)) { 295 const char *end; 296 size_t len; 297 int ret; 298 299 if (!path[i]) { 300 ++i; 301 continue; 302 } 303 end = strpbrk(path[i], ":;"); 304 if (!end) 305 end = path[i] + strlen(path[i]); 306 len = end - path[i]; 307 ret = 0; 308 do { 309 char name[ret + 1]; 310 311 ret = snprintf(name, sizeof(name), "%.*s%s" MLX5_GLUE, 312 (int)len, path[i], 313 (!len || *(end - 1) == '/') ? "" : "/"); 314 if (ret == -1) 315 break; 316 if (sizeof(name) != (size_t)ret + 1) 317 continue; 318 DRV_LOG(DEBUG, "Looking for rdma-core glue as " 319 "\"%s\"", name); 320 handle = dlopen(name, RTLD_LAZY); 321 break; 322 } while (1); 323 path[i] = end + 1; 324 if (!*end) 325 ++i; 326 } 327 if (!handle) { 328 rte_errno = EINVAL; 329 dlmsg = dlerror(); 330 if (dlmsg) 331 DRV_LOG(WARNING, "Cannot load glue library: %s", dlmsg); 332 goto glue_error; 333 } 334 sym = dlsym(handle, "mlx5_glue"); 335 if (!sym || !*sym) { 336 rte_errno = EINVAL; 337 dlmsg = dlerror(); 338 if (dlmsg) 339 DRV_LOG(ERR, "Cannot resolve glue symbol: %s", dlmsg); 340 goto glue_error; 341 } 342 mlx5_glue = *sym; 343 return 0; 344 345 glue_error: 346 if (handle) 347 dlclose(handle); 348 return -1; 349 } 350 351 #endif 352 353 /** 354 * Initialization routine for run-time dependency on rdma-core. 355 */ 356 void 357 mlx5_glue_constructor(void) 358 { 359 /* 360 * RDMAV_HUGEPAGES_SAFE tells ibv_fork_init() we intend to use 361 * huge pages. Calling ibv_fork_init() during init allows 362 * applications to use fork() safely for purposes other than 363 * using this PMD, which is not supported in forked processes. 364 */ 365 setenv("RDMAV_HUGEPAGES_SAFE", "1", 1); 366 /* Match the size of Rx completion entry to the size of a cacheline. */ 367 if (RTE_CACHE_LINE_SIZE == 128) 368 setenv("MLX5_CQE_SIZE", "128", 0); 369 /* 370 * MLX5_DEVICE_FATAL_CLEANUP tells ibv_destroy functions to 371 * cleanup all the Verbs resources even when the device was removed. 372 */ 373 setenv("MLX5_DEVICE_FATAL_CLEANUP", "1", 1); 374 375 #ifdef MLX5_GLUE 376 if (mlx5_glue_dlopen() != 0) 377 goto glue_error; 378 #endif 379 380 #ifdef RTE_LIBRTE_MLX5_DEBUG 381 /* Glue structure must not contain any NULL pointers. */ 382 { 383 unsigned int i; 384 385 for (i = 0; i != sizeof(*mlx5_glue) / sizeof(void *); ++i) 386 MLX5_ASSERT(((const void *const *)mlx5_glue)[i]); 387 } 388 #endif 389 if (strcmp(mlx5_glue->version, MLX5_GLUE_VERSION)) { 390 rte_errno = EINVAL; 391 DRV_LOG(ERR, "rdma-core glue \"%s\" mismatch: \"%s\" is " 392 "required", mlx5_glue->version, MLX5_GLUE_VERSION); 393 goto glue_error; 394 } 395 mlx5_glue->fork_init(); 396 return; 397 398 glue_error: 399 DRV_LOG(WARNING, "Cannot initialize MLX5 common due to missing" 400 " run-time dependency on rdma-core libraries (libibverbs," 401 " libmlx5)"); 402 mlx5_glue = NULL; 403 } 404 405 struct ibv_device * 406 mlx5_os_get_ibv_device(const struct rte_pci_addr *addr) 407 { 408 int n; 409 struct ibv_device **ibv_list = mlx5_glue->get_device_list(&n); 410 struct ibv_device *ibv_match = NULL; 411 412 if (ibv_list == NULL) { 413 rte_errno = ENOSYS; 414 return NULL; 415 } 416 while (n-- > 0) { 417 struct rte_pci_addr paddr; 418 419 DRV_LOG(DEBUG, "Checking device \"%s\"..", ibv_list[n]->name); 420 if (mlx5_get_pci_addr(ibv_list[n]->ibdev_path, &paddr) != 0) 421 continue; 422 if (rte_pci_addr_cmp(addr, &paddr) != 0) 423 continue; 424 ibv_match = ibv_list[n]; 425 break; 426 } 427 if (ibv_match == NULL) 428 rte_errno = ENOENT; 429 mlx5_glue->free_device_list(ibv_list); 430 return ibv_match; 431 } 432 433 static int 434 mlx5_config_doorbell_mapping_env(int dbnc) 435 { 436 char *env; 437 int value; 438 439 MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY); 440 /* Get environment variable to store. */ 441 env = getenv(MLX5_SHUT_UP_BF); 442 value = env ? !!strcmp(env, "0") : MLX5_ARG_UNSET; 443 if (dbnc == MLX5_ARG_UNSET) 444 setenv(MLX5_SHUT_UP_BF, MLX5_SHUT_UP_BF_DEFAULT, 1); 445 else 446 setenv(MLX5_SHUT_UP_BF, 447 dbnc == MLX5_TXDB_NCACHED ? "1" : "0", 1); 448 return value; 449 } 450 451 static void 452 mlx5_restore_doorbell_mapping_env(int value) 453 { 454 MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY); 455 /* Restore the original environment variable state. */ 456 if (value == MLX5_ARG_UNSET) 457 unsetenv(MLX5_SHUT_UP_BF); 458 else 459 setenv(MLX5_SHUT_UP_BF, value ? "1" : "0", 1); 460 } 461 462 /** 463 * Function API to open IB device. 464 * 465 * 466 * @param cdev 467 * Pointer to the mlx5 device. 468 * @param ctx_ptr 469 * Pointer to fill inside pointer to device context. 470 * 471 * @return 472 * 0 on success, a negative errno value otherwise and rte_errno is set. 473 */ 474 int 475 mlx5_os_open_device(struct mlx5_common_device *cdev, void **ctx_ptr) 476 { 477 struct ibv_device *ibv; 478 struct ibv_context *ctx = NULL; 479 int dbmap_env; 480 481 ibv = mlx5_os_get_ibv_dev(cdev->dev); 482 if (!ibv) 483 return -rte_errno; 484 DRV_LOG(INFO, "Dev information matches for device \"%s\".", ibv->name); 485 /* 486 * Configure environment variable "MLX5_BF_SHUT_UP" before the device 487 * creation. The rdma_core library checks the variable at device 488 * creation and stores the result internally. 489 */ 490 dbmap_env = mlx5_config_doorbell_mapping_env(cdev->config.dbnc); 491 /* Try to open IB device with DV first, then usual Verbs. */ 492 errno = 0; 493 ctx = mlx5_glue->dv_open_device(ibv); 494 if (ctx) { 495 cdev->config.devx = 1; 496 DRV_LOG(DEBUG, "DevX is supported."); 497 } else { 498 /* The environment variable is still configured. */ 499 ctx = mlx5_glue->open_device(ibv); 500 if (ctx == NULL) 501 goto error; 502 DRV_LOG(DEBUG, "DevX is NOT supported."); 503 } 504 /* The device is created, no need for environment. */ 505 mlx5_restore_doorbell_mapping_env(dbmap_env); 506 /* Hint libmlx5 to use PMD allocator for data plane resources */ 507 mlx5_set_context_attr(cdev->dev, ctx); 508 *ctx_ptr = (void *)ctx; 509 return 0; 510 error: 511 rte_errno = errno ? errno : ENODEV; 512 /* The device creation is failed, no need for environment. */ 513 mlx5_restore_doorbell_mapping_env(dbmap_env); 514 DRV_LOG(ERR, "Failed to open IB device \"%s\".", ibv->name); 515 return -rte_errno; 516 } 517