179aa4307SOphir Munk /* SPDX-License-Identifier: BSD-3-Clause 279aa4307SOphir Munk * Copyright 2020 Mellanox Technologies, Ltd 379aa4307SOphir Munk */ 479aa4307SOphir Munk 579aa4307SOphir Munk #include <unistd.h> 679aa4307SOphir Munk #include <string.h> 779aa4307SOphir Munk #include <stdio.h> 879aa4307SOphir Munk #ifdef RTE_IBVERBS_LINK_DLOPEN 979aa4307SOphir Munk #include <dlfcn.h> 1079aa4307SOphir Munk #endif 11aec086c9SMatan Azrad #include <dirent.h> 12aec086c9SMatan Azrad #include <net/if.h> 1379aa4307SOphir Munk 1479aa4307SOphir Munk #include <rte_errno.h> 15aec086c9SMatan Azrad #include <rte_string_fns.h> 16*662d0dc6SMichael Baum #include <rte_bus_pci.h> 17*662d0dc6SMichael Baum #include <rte_bus_auxiliary.h> 1879aa4307SOphir Munk 1979aa4307SOphir Munk #include "mlx5_common.h" 20*662d0dc6SMichael Baum #include "mlx5_nl.h" 2125245d5dSShiri Kuzin #include "mlx5_common_log.h" 22*662d0dc6SMichael Baum #include "mlx5_common_private.h" 23887183efSMichael Baum #include "mlx5_common_defs.h" 24c31f3f7fSShiri Kuzin #include "mlx5_common_os.h" 2579aa4307SOphir Munk #include "mlx5_glue.h" 2679aa4307SOphir Munk 2779aa4307SOphir Munk #ifdef MLX5_GLUE 2879aa4307SOphir Munk const struct mlx5_glue *mlx5_glue; 2979aa4307SOphir Munk #endif 3079aa4307SOphir Munk 3179aa4307SOphir Munk int 324d567938SThomas Monjalon mlx5_get_pci_addr(const char *dev_path, struct rte_pci_addr *pci_addr) 3379aa4307SOphir Munk { 3479aa4307SOphir Munk FILE *file; 3579aa4307SOphir Munk char line[32]; 36482a1d34SViacheslav Ovsiienko int rc = -ENOENT; 3779aa4307SOphir Munk MKSTR(path, "%s/device/uevent", dev_path); 3879aa4307SOphir Munk 3979aa4307SOphir Munk file = fopen(path, "rb"); 4079aa4307SOphir Munk if (file == NULL) { 4179aa4307SOphir Munk rte_errno = errno; 4279aa4307SOphir Munk return -rte_errno; 4379aa4307SOphir Munk } 4479aa4307SOphir Munk while (fgets(line, sizeof(line), file) == line) { 4579aa4307SOphir Munk size_t len = strlen(line); 4679aa4307SOphir Munk 4779aa4307SOphir Munk /* Truncate long lines. */ 48482a1d34SViacheslav Ovsiienko if (len == (sizeof(line) - 1)) { 4979aa4307SOphir Munk while (line[(len - 1)] != '\n') { 50482a1d34SViacheslav Ovsiienko int ret = fgetc(file); 51482a1d34SViacheslav Ovsiienko 5279aa4307SOphir Munk if (ret == EOF) 53482a1d34SViacheslav Ovsiienko goto exit; 5479aa4307SOphir Munk line[(len - 1)] = ret; 5579aa4307SOphir Munk } 56482a1d34SViacheslav Ovsiienko /* No match for long lines. */ 57482a1d34SViacheslav Ovsiienko continue; 58482a1d34SViacheslav Ovsiienko } 5979aa4307SOphir Munk /* Extract information. */ 6079aa4307SOphir Munk if (sscanf(line, 6179aa4307SOphir Munk "PCI_SLOT_NAME=" 6279aa4307SOphir Munk "%" SCNx32 ":%" SCNx8 ":%" SCNx8 ".%" SCNx8 "\n", 6379aa4307SOphir Munk &pci_addr->domain, 6479aa4307SOphir Munk &pci_addr->bus, 6579aa4307SOphir Munk &pci_addr->devid, 6679aa4307SOphir Munk &pci_addr->function) == 4) { 67482a1d34SViacheslav Ovsiienko rc = 0; 6879aa4307SOphir Munk break; 6979aa4307SOphir Munk } 7079aa4307SOphir Munk } 71482a1d34SViacheslav Ovsiienko exit: 7279aa4307SOphir Munk fclose(file); 73482a1d34SViacheslav Ovsiienko if (rc) 74482a1d34SViacheslav Ovsiienko rte_errno = -rc; 75482a1d34SViacheslav Ovsiienko return rc; 7679aa4307SOphir Munk } 7779aa4307SOphir Munk 7879aa4307SOphir Munk /** 7979aa4307SOphir Munk * Extract port name, as a number, from sysfs or netlink information. 8079aa4307SOphir Munk * 8179aa4307SOphir Munk * @param[in] port_name_in 8279aa4307SOphir Munk * String representing the port name. 8379aa4307SOphir Munk * @param[out] port_info_out 8479aa4307SOphir Munk * Port information, including port name as a number and port name 8579aa4307SOphir Munk * type if recognized 8679aa4307SOphir Munk * 8779aa4307SOphir Munk * @return 8879aa4307SOphir Munk * port_name field set according to recognized name format. 8979aa4307SOphir Munk */ 9079aa4307SOphir Munk void 9179aa4307SOphir Munk mlx5_translate_port_name(const char *port_name_in, 9279aa4307SOphir Munk struct mlx5_switch_info *port_info_out) 9379aa4307SOphir Munk { 9459df97f1SXueming Li char ctrl = 0, pf_c1, pf_c2, vf_c1, vf_c2, eol; 9579aa4307SOphir Munk char *end; 9679aa4307SOphir Munk int sc_items; 9779aa4307SOphir Munk 9859df97f1SXueming Li sc_items = sscanf(port_name_in, "%c%d", 9959df97f1SXueming Li &ctrl, &port_info_out->ctrl_num); 10059df97f1SXueming Li if (sc_items == 2 && ctrl == 'c') { 10159df97f1SXueming Li port_name_in++; /* 'c' */ 10259df97f1SXueming Li port_name_in += snprintf(NULL, 0, "%d", 10359df97f1SXueming Li port_info_out->ctrl_num); 10459df97f1SXueming Li } 10559df97f1SXueming Li /* Check for port-name as a string of the form pf0vf0 or pf0sf0 */ 1063590881bSViacheslav Ovsiienko sc_items = sscanf(port_name_in, "%c%c%d%c%c%d%c", 10779aa4307SOphir Munk &pf_c1, &pf_c2, &port_info_out->pf_num, 1083590881bSViacheslav Ovsiienko &vf_c1, &vf_c2, &port_info_out->port_name, &eol); 10959df97f1SXueming Li if (sc_items == 6 && pf_c1 == 'p' && pf_c2 == 'f') { 11059df97f1SXueming Li if (vf_c1 == 'v' && vf_c2 == 'f') { 11159df97f1SXueming Li /* Kernel ver >= 5.0 or OFED ver >= 4.6 */ 11259df97f1SXueming Li port_info_out->name_type = 11359df97f1SXueming Li MLX5_PHYS_PORT_NAME_TYPE_PFVF; 11479aa4307SOphir Munk return; 11579aa4307SOphir Munk } 11659df97f1SXueming Li if (vf_c1 == 's' && vf_c2 == 'f') { 11759df97f1SXueming Li /* Kernel ver >= 5.11 or OFED ver >= 5.1 */ 11859df97f1SXueming Li port_info_out->name_type = 11959df97f1SXueming Li MLX5_PHYS_PORT_NAME_TYPE_PFSF; 12059df97f1SXueming Li return; 12159df97f1SXueming Li } 12259df97f1SXueming Li } 12379aa4307SOphir Munk /* 12479aa4307SOphir Munk * Check for port-name as a string of the form p0 12579aa4307SOphir Munk * (support kernel ver >= 5.0, or OFED ver >= 4.6). 12679aa4307SOphir Munk */ 1273590881bSViacheslav Ovsiienko sc_items = sscanf(port_name_in, "%c%d%c", 1283590881bSViacheslav Ovsiienko &pf_c1, &port_info_out->port_name, &eol); 12979aa4307SOphir Munk if (sc_items == 2 && pf_c1 == 'p') { 13079aa4307SOphir Munk port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_UPLINK; 13179aa4307SOphir Munk return; 13279aa4307SOphir Munk } 133420bbdaeSViacheslav Ovsiienko /* 134420bbdaeSViacheslav Ovsiienko * Check for port-name as a string of the form pf0 135420bbdaeSViacheslav Ovsiienko * (support kernel ver >= 5.7 for HPF representor on BF). 136420bbdaeSViacheslav Ovsiienko */ 1373590881bSViacheslav Ovsiienko sc_items = sscanf(port_name_in, "%c%c%d%c", 1383590881bSViacheslav Ovsiienko &pf_c1, &pf_c2, &port_info_out->pf_num, &eol); 139420bbdaeSViacheslav Ovsiienko if (sc_items == 3 && pf_c1 == 'p' && pf_c2 == 'f') { 140420bbdaeSViacheslav Ovsiienko port_info_out->port_name = -1; 141420bbdaeSViacheslav Ovsiienko port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_PFHPF; 142420bbdaeSViacheslav Ovsiienko return; 143420bbdaeSViacheslav Ovsiienko } 14479aa4307SOphir Munk /* Check for port-name as a number (support kernel ver < 5.0 */ 14579aa4307SOphir Munk errno = 0; 14679aa4307SOphir Munk port_info_out->port_name = strtol(port_name_in, &end, 0); 14779aa4307SOphir Munk if (!errno && 14879aa4307SOphir Munk (size_t)(end - port_name_in) == strlen(port_name_in)) { 14979aa4307SOphir Munk port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_LEGACY; 15079aa4307SOphir Munk return; 15179aa4307SOphir Munk } 15279aa4307SOphir Munk port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN; 15379aa4307SOphir Munk } 15479aa4307SOphir Munk 155aec086c9SMatan Azrad int 156aec086c9SMatan Azrad mlx5_get_ifname_sysfs(const char *ibdev_path, char *ifname) 157aec086c9SMatan Azrad { 158aec086c9SMatan Azrad DIR *dir; 159aec086c9SMatan Azrad struct dirent *dent; 160aec086c9SMatan Azrad unsigned int dev_type = 0; 161aec086c9SMatan Azrad unsigned int dev_port_prev = ~0u; 162aec086c9SMatan Azrad char match[IF_NAMESIZE] = ""; 163aec086c9SMatan Azrad 164aec086c9SMatan Azrad MLX5_ASSERT(ibdev_path); 165aec086c9SMatan Azrad { 166aec086c9SMatan Azrad MKSTR(path, "%s/device/net", ibdev_path); 167aec086c9SMatan Azrad 168aec086c9SMatan Azrad dir = opendir(path); 169aec086c9SMatan Azrad if (dir == NULL) { 170aec086c9SMatan Azrad rte_errno = errno; 171aec086c9SMatan Azrad return -rte_errno; 172aec086c9SMatan Azrad } 173aec086c9SMatan Azrad } 174aec086c9SMatan Azrad while ((dent = readdir(dir)) != NULL) { 175aec086c9SMatan Azrad char *name = dent->d_name; 176aec086c9SMatan Azrad FILE *file; 177aec086c9SMatan Azrad unsigned int dev_port; 178aec086c9SMatan Azrad int r; 179aec086c9SMatan Azrad 180aec086c9SMatan Azrad if ((name[0] == '.') && 181aec086c9SMatan Azrad ((name[1] == '\0') || 182aec086c9SMatan Azrad ((name[1] == '.') && (name[2] == '\0')))) 183aec086c9SMatan Azrad continue; 184aec086c9SMatan Azrad 185aec086c9SMatan Azrad MKSTR(path, "%s/device/net/%s/%s", 186aec086c9SMatan Azrad ibdev_path, name, 187aec086c9SMatan Azrad (dev_type ? "dev_id" : "dev_port")); 188aec086c9SMatan Azrad 189aec086c9SMatan Azrad file = fopen(path, "rb"); 190aec086c9SMatan Azrad if (file == NULL) { 191aec086c9SMatan Azrad if (errno != ENOENT) 192aec086c9SMatan Azrad continue; 193aec086c9SMatan Azrad /* 194aec086c9SMatan Azrad * Switch to dev_id when dev_port does not exist as 195aec086c9SMatan Azrad * is the case with Linux kernel versions < 3.15. 196aec086c9SMatan Azrad */ 197aec086c9SMatan Azrad try_dev_id: 198aec086c9SMatan Azrad match[0] = '\0'; 199aec086c9SMatan Azrad if (dev_type) 200aec086c9SMatan Azrad break; 201aec086c9SMatan Azrad dev_type = 1; 202aec086c9SMatan Azrad dev_port_prev = ~0u; 203aec086c9SMatan Azrad rewinddir(dir); 204aec086c9SMatan Azrad continue; 205aec086c9SMatan Azrad } 206aec086c9SMatan Azrad r = fscanf(file, (dev_type ? "%x" : "%u"), &dev_port); 207aec086c9SMatan Azrad fclose(file); 208aec086c9SMatan Azrad if (r != 1) 209aec086c9SMatan Azrad continue; 210aec086c9SMatan Azrad /* 211aec086c9SMatan Azrad * Switch to dev_id when dev_port returns the same value for 212aec086c9SMatan Azrad * all ports. May happen when using a MOFED release older than 213aec086c9SMatan Azrad * 3.0 with a Linux kernel >= 3.15. 214aec086c9SMatan Azrad */ 215aec086c9SMatan Azrad if (dev_port == dev_port_prev) 216aec086c9SMatan Azrad goto try_dev_id; 217aec086c9SMatan Azrad dev_port_prev = dev_port; 218aec086c9SMatan Azrad if (dev_port == 0) 219aec086c9SMatan Azrad strlcpy(match, name, IF_NAMESIZE); 220aec086c9SMatan Azrad } 221aec086c9SMatan Azrad closedir(dir); 222aec086c9SMatan Azrad if (match[0] == '\0') { 223aec086c9SMatan Azrad rte_errno = ENOENT; 224aec086c9SMatan Azrad return -rte_errno; 225aec086c9SMatan Azrad } 226aec086c9SMatan Azrad strncpy(ifname, match, IF_NAMESIZE); 227aec086c9SMatan Azrad return 0; 228aec086c9SMatan Azrad } 229aec086c9SMatan Azrad 23079aa4307SOphir Munk #ifdef MLX5_GLUE 23179aa4307SOphir Munk 23279aa4307SOphir Munk /** 23379aa4307SOphir Munk * Suffix RTE_EAL_PMD_PATH with "-glue". 23479aa4307SOphir Munk * 23579aa4307SOphir Munk * This function performs a sanity check on RTE_EAL_PMD_PATH before 23679aa4307SOphir Munk * suffixing its last component. 23779aa4307SOphir Munk * 23879aa4307SOphir Munk * @param buf[out] 23979aa4307SOphir Munk * Output buffer, should be large enough otherwise NULL is returned. 24079aa4307SOphir Munk * @param size 24179aa4307SOphir Munk * Size of @p out. 24279aa4307SOphir Munk * 24379aa4307SOphir Munk * @return 24479aa4307SOphir Munk * Pointer to @p buf or @p NULL in case suffix cannot be appended. 24579aa4307SOphir Munk */ 24679aa4307SOphir Munk static char * 24779aa4307SOphir Munk mlx5_glue_path(char *buf, size_t size) 24879aa4307SOphir Munk { 24979aa4307SOphir Munk static const char *const bad[] = { "/", ".", "..", NULL }; 25079aa4307SOphir Munk const char *path = RTE_EAL_PMD_PATH; 25179aa4307SOphir Munk size_t len = strlen(path); 25279aa4307SOphir Munk size_t off; 25379aa4307SOphir Munk int i; 25479aa4307SOphir Munk 25579aa4307SOphir Munk while (len && path[len - 1] == '/') 25679aa4307SOphir Munk --len; 25779aa4307SOphir Munk for (off = len; off && path[off - 1] != '/'; --off) 25879aa4307SOphir Munk ; 25979aa4307SOphir Munk for (i = 0; bad[i]; ++i) 26079aa4307SOphir Munk if (!strncmp(path + off, bad[i], (int)(len - off))) 26179aa4307SOphir Munk goto error; 26279aa4307SOphir Munk i = snprintf(buf, size, "%.*s-glue", (int)len, path); 26379aa4307SOphir Munk if (i == -1 || (size_t)i >= size) 26479aa4307SOphir Munk goto error; 26579aa4307SOphir Munk return buf; 26679aa4307SOphir Munk error: 26779aa4307SOphir Munk RTE_LOG(ERR, PMD, "unable to append \"-glue\" to last component of" 26879aa4307SOphir Munk " RTE_EAL_PMD_PATH (\"" RTE_EAL_PMD_PATH "\"), please" 26979aa4307SOphir Munk " re-configure DPDK"); 27079aa4307SOphir Munk return NULL; 27179aa4307SOphir Munk } 27279aa4307SOphir Munk 27379aa4307SOphir Munk static int 27479aa4307SOphir Munk mlx5_glue_dlopen(void) 27579aa4307SOphir Munk { 27679aa4307SOphir Munk char glue_path[sizeof(RTE_EAL_PMD_PATH) - 1 + sizeof("-glue")]; 27779aa4307SOphir Munk void *handle = NULL; 27879aa4307SOphir Munk 27979aa4307SOphir Munk char const *path[] = { 28079aa4307SOphir Munk /* 28179aa4307SOphir Munk * A basic security check is necessary before trusting 28279aa4307SOphir Munk * MLX5_GLUE_PATH, which may override RTE_EAL_PMD_PATH. 28379aa4307SOphir Munk */ 28479aa4307SOphir Munk (geteuid() == getuid() && getegid() == getgid() ? 28579aa4307SOphir Munk getenv("MLX5_GLUE_PATH") : NULL), 28679aa4307SOphir Munk /* 28779aa4307SOphir Munk * When RTE_EAL_PMD_PATH is set, use its glue-suffixed 28879aa4307SOphir Munk * variant, otherwise let dlopen() look up libraries on its 28979aa4307SOphir Munk * own. 29079aa4307SOphir Munk */ 29179aa4307SOphir Munk (*RTE_EAL_PMD_PATH ? 29279aa4307SOphir Munk mlx5_glue_path(glue_path, sizeof(glue_path)) : ""), 29379aa4307SOphir Munk }; 29479aa4307SOphir Munk unsigned int i = 0; 29579aa4307SOphir Munk void **sym; 29679aa4307SOphir Munk const char *dlmsg; 29779aa4307SOphir Munk 29879aa4307SOphir Munk while (!handle && i != RTE_DIM(path)) { 29979aa4307SOphir Munk const char *end; 30079aa4307SOphir Munk size_t len; 30179aa4307SOphir Munk int ret; 30279aa4307SOphir Munk 30379aa4307SOphir Munk if (!path[i]) { 30479aa4307SOphir Munk ++i; 30579aa4307SOphir Munk continue; 30679aa4307SOphir Munk } 30779aa4307SOphir Munk end = strpbrk(path[i], ":;"); 30879aa4307SOphir Munk if (!end) 30979aa4307SOphir Munk end = path[i] + strlen(path[i]); 31079aa4307SOphir Munk len = end - path[i]; 31179aa4307SOphir Munk ret = 0; 31279aa4307SOphir Munk do { 31379aa4307SOphir Munk char name[ret + 1]; 31479aa4307SOphir Munk 31579aa4307SOphir Munk ret = snprintf(name, sizeof(name), "%.*s%s" MLX5_GLUE, 31679aa4307SOphir Munk (int)len, path[i], 31779aa4307SOphir Munk (!len || *(end - 1) == '/') ? "" : "/"); 31879aa4307SOphir Munk if (ret == -1) 31979aa4307SOphir Munk break; 32079aa4307SOphir Munk if (sizeof(name) != (size_t)ret + 1) 32179aa4307SOphir Munk continue; 32279aa4307SOphir Munk DRV_LOG(DEBUG, "Looking for rdma-core glue as " 32379aa4307SOphir Munk "\"%s\"", name); 32479aa4307SOphir Munk handle = dlopen(name, RTLD_LAZY); 32579aa4307SOphir Munk break; 32679aa4307SOphir Munk } while (1); 32779aa4307SOphir Munk path[i] = end + 1; 32879aa4307SOphir Munk if (!*end) 32979aa4307SOphir Munk ++i; 33079aa4307SOphir Munk } 33179aa4307SOphir Munk if (!handle) { 33279aa4307SOphir Munk rte_errno = EINVAL; 33379aa4307SOphir Munk dlmsg = dlerror(); 33479aa4307SOphir Munk if (dlmsg) 33579aa4307SOphir Munk DRV_LOG(WARNING, "Cannot load glue library: %s", dlmsg); 33679aa4307SOphir Munk goto glue_error; 33779aa4307SOphir Munk } 33879aa4307SOphir Munk sym = dlsym(handle, "mlx5_glue"); 33979aa4307SOphir Munk if (!sym || !*sym) { 34079aa4307SOphir Munk rte_errno = EINVAL; 34179aa4307SOphir Munk dlmsg = dlerror(); 34279aa4307SOphir Munk if (dlmsg) 34379aa4307SOphir Munk DRV_LOG(ERR, "Cannot resolve glue symbol: %s", dlmsg); 34479aa4307SOphir Munk goto glue_error; 34579aa4307SOphir Munk } 34679aa4307SOphir Munk mlx5_glue = *sym; 34779aa4307SOphir Munk return 0; 34879aa4307SOphir Munk 34979aa4307SOphir Munk glue_error: 35079aa4307SOphir Munk if (handle) 35179aa4307SOphir Munk dlclose(handle); 35279aa4307SOphir Munk return -1; 35379aa4307SOphir Munk } 35479aa4307SOphir Munk 35579aa4307SOphir Munk #endif 35679aa4307SOphir Munk 35779aa4307SOphir Munk /** 35879aa4307SOphir Munk * Initialization routine for run-time dependency on rdma-core. 35979aa4307SOphir Munk */ 36079aa4307SOphir Munk void 36179aa4307SOphir Munk mlx5_glue_constructor(void) 36279aa4307SOphir Munk { 36379aa4307SOphir Munk /* 36479aa4307SOphir Munk * RDMAV_HUGEPAGES_SAFE tells ibv_fork_init() we intend to use 36579aa4307SOphir Munk * huge pages. Calling ibv_fork_init() during init allows 36679aa4307SOphir Munk * applications to use fork() safely for purposes other than 36779aa4307SOphir Munk * using this PMD, which is not supported in forked processes. 36879aa4307SOphir Munk */ 36979aa4307SOphir Munk setenv("RDMAV_HUGEPAGES_SAFE", "1", 1); 37079aa4307SOphir Munk /* Match the size of Rx completion entry to the size of a cacheline. */ 37179aa4307SOphir Munk if (RTE_CACHE_LINE_SIZE == 128) 37279aa4307SOphir Munk setenv("MLX5_CQE_SIZE", "128", 0); 37379aa4307SOphir Munk /* 37479aa4307SOphir Munk * MLX5_DEVICE_FATAL_CLEANUP tells ibv_destroy functions to 37579aa4307SOphir Munk * cleanup all the Verbs resources even when the device was removed. 37679aa4307SOphir Munk */ 37779aa4307SOphir Munk setenv("MLX5_DEVICE_FATAL_CLEANUP", "1", 1); 37879aa4307SOphir Munk 37979aa4307SOphir Munk #ifdef MLX5_GLUE 38079aa4307SOphir Munk if (mlx5_glue_dlopen() != 0) 38179aa4307SOphir Munk goto glue_error; 38279aa4307SOphir Munk #endif 38379aa4307SOphir Munk 38479aa4307SOphir Munk #ifdef RTE_LIBRTE_MLX5_DEBUG 38579aa4307SOphir Munk /* Glue structure must not contain any NULL pointers. */ 38679aa4307SOphir Munk { 38779aa4307SOphir Munk unsigned int i; 38879aa4307SOphir Munk 38979aa4307SOphir Munk for (i = 0; i != sizeof(*mlx5_glue) / sizeof(void *); ++i) 39079aa4307SOphir Munk MLX5_ASSERT(((const void *const *)mlx5_glue)[i]); 39179aa4307SOphir Munk } 39279aa4307SOphir Munk #endif 39379aa4307SOphir Munk if (strcmp(mlx5_glue->version, MLX5_GLUE_VERSION)) { 39479aa4307SOphir Munk rte_errno = EINVAL; 39579aa4307SOphir Munk DRV_LOG(ERR, "rdma-core glue \"%s\" mismatch: \"%s\" is " 39679aa4307SOphir Munk "required", mlx5_glue->version, MLX5_GLUE_VERSION); 39779aa4307SOphir Munk goto glue_error; 39879aa4307SOphir Munk } 39979aa4307SOphir Munk mlx5_glue->fork_init(); 40079aa4307SOphir Munk return; 40179aa4307SOphir Munk 40279aa4307SOphir Munk glue_error: 40379aa4307SOphir Munk DRV_LOG(WARNING, "Cannot initialize MLX5 common due to missing" 40479aa4307SOphir Munk " run-time dependency on rdma-core libraries (libibverbs," 40579aa4307SOphir Munk " libmlx5)"); 40679aa4307SOphir Munk mlx5_glue = NULL; 40779aa4307SOphir Munk } 408262c7ad0SOri Kam 409*662d0dc6SMichael Baum static struct ibv_device * 410ad435d32SXueming Li mlx5_os_get_ibv_device(const struct rte_pci_addr *addr) 411c31f3f7fSShiri Kuzin { 412c31f3f7fSShiri Kuzin int n; 413c31f3f7fSShiri Kuzin struct ibv_device **ibv_list = mlx5_glue->get_device_list(&n); 414c31f3f7fSShiri Kuzin struct ibv_device *ibv_match = NULL; 415c31f3f7fSShiri Kuzin 416c31f3f7fSShiri Kuzin if (ibv_list == NULL) { 417c31f3f7fSShiri Kuzin rte_errno = ENOSYS; 418c31f3f7fSShiri Kuzin return NULL; 419c31f3f7fSShiri Kuzin } 420c31f3f7fSShiri Kuzin while (n-- > 0) { 421c31f3f7fSShiri Kuzin struct rte_pci_addr paddr; 422c31f3f7fSShiri Kuzin 423c31f3f7fSShiri Kuzin DRV_LOG(DEBUG, "Checking device \"%s\"..", ibv_list[n]->name); 4244d567938SThomas Monjalon if (mlx5_get_pci_addr(ibv_list[n]->ibdev_path, &paddr) != 0) 425c31f3f7fSShiri Kuzin continue; 426c31f3f7fSShiri Kuzin if (rte_pci_addr_cmp(addr, &paddr) != 0) 427c31f3f7fSShiri Kuzin continue; 428c31f3f7fSShiri Kuzin ibv_match = ibv_list[n]; 429c31f3f7fSShiri Kuzin break; 430c31f3f7fSShiri Kuzin } 431ca1418ceSMichael Baum if (ibv_match == NULL) { 432ca1418ceSMichael Baum DRV_LOG(WARNING, 433ca1418ceSMichael Baum "No Verbs device matches PCI device " PCI_PRI_FMT "," 434ca1418ceSMichael Baum " are kernel drivers loaded?", 435ca1418ceSMichael Baum addr->domain, addr->bus, addr->devid, addr->function); 436c31f3f7fSShiri Kuzin rte_errno = ENOENT; 437ca1418ceSMichael Baum } 438c31f3f7fSShiri Kuzin mlx5_glue->free_device_list(ibv_list); 439c31f3f7fSShiri Kuzin return ibv_match; 440c31f3f7fSShiri Kuzin } 441887183efSMichael Baum 442*662d0dc6SMichael Baum /* Try to disable ROCE by Netlink\Devlink. */ 443*662d0dc6SMichael Baum static int 444*662d0dc6SMichael Baum mlx5_nl_roce_disable(const char *addr) 445*662d0dc6SMichael Baum { 446*662d0dc6SMichael Baum int nlsk_fd = mlx5_nl_init(NETLINK_GENERIC); 447*662d0dc6SMichael Baum int devlink_id; 448*662d0dc6SMichael Baum int enable; 449*662d0dc6SMichael Baum int ret; 450*662d0dc6SMichael Baum 451*662d0dc6SMichael Baum if (nlsk_fd < 0) 452*662d0dc6SMichael Baum return nlsk_fd; 453*662d0dc6SMichael Baum devlink_id = mlx5_nl_devlink_family_id_get(nlsk_fd); 454*662d0dc6SMichael Baum if (devlink_id < 0) { 455*662d0dc6SMichael Baum ret = devlink_id; 456*662d0dc6SMichael Baum DRV_LOG(DEBUG, 457*662d0dc6SMichael Baum "Failed to get devlink id for ROCE operations by Netlink."); 458*662d0dc6SMichael Baum goto close; 459*662d0dc6SMichael Baum } 460*662d0dc6SMichael Baum ret = mlx5_nl_enable_roce_get(nlsk_fd, devlink_id, addr, &enable); 461*662d0dc6SMichael Baum if (ret) { 462*662d0dc6SMichael Baum DRV_LOG(DEBUG, "Failed to get ROCE enable by Netlink: %d.", 463*662d0dc6SMichael Baum ret); 464*662d0dc6SMichael Baum goto close; 465*662d0dc6SMichael Baum } else if (!enable) { 466*662d0dc6SMichael Baum DRV_LOG(INFO, "ROCE has already disabled(Netlink)."); 467*662d0dc6SMichael Baum goto close; 468*662d0dc6SMichael Baum } 469*662d0dc6SMichael Baum ret = mlx5_nl_enable_roce_set(nlsk_fd, devlink_id, addr, 0); 470*662d0dc6SMichael Baum if (ret) 471*662d0dc6SMichael Baum DRV_LOG(DEBUG, "Failed to disable ROCE by Netlink: %d.", ret); 472*662d0dc6SMichael Baum else 473*662d0dc6SMichael Baum DRV_LOG(INFO, "ROCE is disabled by Netlink successfully."); 474*662d0dc6SMichael Baum close: 475*662d0dc6SMichael Baum close(nlsk_fd); 476*662d0dc6SMichael Baum return ret; 477*662d0dc6SMichael Baum } 478*662d0dc6SMichael Baum 479*662d0dc6SMichael Baum /* Try to disable ROCE by sysfs. */ 480*662d0dc6SMichael Baum static int 481*662d0dc6SMichael Baum mlx5_sys_roce_disable(const char *addr) 482*662d0dc6SMichael Baum { 483*662d0dc6SMichael Baum FILE *file_o; 484*662d0dc6SMichael Baum int enable; 485*662d0dc6SMichael Baum int ret; 486*662d0dc6SMichael Baum 487*662d0dc6SMichael Baum MKSTR(file_p, "/sys/bus/pci/devices/%s/roce_enable", addr); 488*662d0dc6SMichael Baum file_o = fopen(file_p, "rb"); 489*662d0dc6SMichael Baum if (!file_o) { 490*662d0dc6SMichael Baum rte_errno = ENOTSUP; 491*662d0dc6SMichael Baum return -ENOTSUP; 492*662d0dc6SMichael Baum } 493*662d0dc6SMichael Baum ret = fscanf(file_o, "%d", &enable); 494*662d0dc6SMichael Baum if (ret != 1) { 495*662d0dc6SMichael Baum rte_errno = EINVAL; 496*662d0dc6SMichael Baum ret = EINVAL; 497*662d0dc6SMichael Baum goto close; 498*662d0dc6SMichael Baum } else if (!enable) { 499*662d0dc6SMichael Baum ret = 0; 500*662d0dc6SMichael Baum DRV_LOG(INFO, "ROCE has already disabled(sysfs)."); 501*662d0dc6SMichael Baum goto close; 502*662d0dc6SMichael Baum } 503*662d0dc6SMichael Baum fclose(file_o); 504*662d0dc6SMichael Baum file_o = fopen(file_p, "wb"); 505*662d0dc6SMichael Baum if (!file_o) { 506*662d0dc6SMichael Baum rte_errno = ENOTSUP; 507*662d0dc6SMichael Baum return -ENOTSUP; 508*662d0dc6SMichael Baum } 509*662d0dc6SMichael Baum fprintf(file_o, "0\n"); 510*662d0dc6SMichael Baum ret = 0; 511*662d0dc6SMichael Baum close: 512*662d0dc6SMichael Baum if (ret) 513*662d0dc6SMichael Baum DRV_LOG(DEBUG, "Failed to disable ROCE by sysfs: %d.", ret); 514*662d0dc6SMichael Baum else 515*662d0dc6SMichael Baum DRV_LOG(INFO, "ROCE is disabled by sysfs successfully."); 516*662d0dc6SMichael Baum fclose(file_o); 517*662d0dc6SMichael Baum return ret; 518*662d0dc6SMichael Baum } 519*662d0dc6SMichael Baum 520*662d0dc6SMichael Baum static int 521*662d0dc6SMichael Baum mlx5_roce_disable(const struct rte_device *dev) 522*662d0dc6SMichael Baum { 523*662d0dc6SMichael Baum char pci_addr[PCI_PRI_STR_SIZE] = { 0 }; 524*662d0dc6SMichael Baum 525*662d0dc6SMichael Baum if (mlx5_dev_to_pci_str(dev, pci_addr, sizeof(pci_addr)) < 0) 526*662d0dc6SMichael Baum return -rte_errno; 527*662d0dc6SMichael Baum /* Firstly try to disable ROCE by Netlink and fallback to sysfs. */ 528*662d0dc6SMichael Baum if (mlx5_nl_roce_disable(pci_addr) != 0 && 529*662d0dc6SMichael Baum mlx5_sys_roce_disable(pci_addr) != 0) 530*662d0dc6SMichael Baum return -rte_errno; 531*662d0dc6SMichael Baum return 0; 532*662d0dc6SMichael Baum } 533*662d0dc6SMichael Baum 534*662d0dc6SMichael Baum static struct ibv_device * 535*662d0dc6SMichael Baum mlx5_os_get_ibv_dev(const struct rte_device *dev) 536*662d0dc6SMichael Baum { 537*662d0dc6SMichael Baum struct ibv_device *ibv; 538*662d0dc6SMichael Baum 539*662d0dc6SMichael Baum if (mlx5_dev_is_pci(dev)) 540*662d0dc6SMichael Baum ibv = mlx5_os_get_ibv_device(&RTE_DEV_TO_PCI_CONST(dev)->addr); 541*662d0dc6SMichael Baum else 542*662d0dc6SMichael Baum ibv = mlx5_get_aux_ibv_device(RTE_DEV_TO_AUXILIARY_CONST(dev)); 543*662d0dc6SMichael Baum if (ibv == NULL) { 544*662d0dc6SMichael Baum rte_errno = ENODEV; 545*662d0dc6SMichael Baum DRV_LOG(ERR, "Verbs device not found: %s", dev->name); 546*662d0dc6SMichael Baum } 547*662d0dc6SMichael Baum return ibv; 548*662d0dc6SMichael Baum } 549*662d0dc6SMichael Baum 550*662d0dc6SMichael Baum static struct ibv_device * 551*662d0dc6SMichael Baum mlx5_vdpa_get_ibv_dev(const struct rte_device *dev) 552*662d0dc6SMichael Baum { 553*662d0dc6SMichael Baum struct ibv_device *ibv; 554*662d0dc6SMichael Baum int retry; 555*662d0dc6SMichael Baum 556*662d0dc6SMichael Baum if (mlx5_roce_disable(dev) != 0) { 557*662d0dc6SMichael Baum DRV_LOG(WARNING, "Failed to disable ROCE for \"%s\".", 558*662d0dc6SMichael Baum dev->name); 559*662d0dc6SMichael Baum return NULL; 560*662d0dc6SMichael Baum } 561*662d0dc6SMichael Baum /* Wait for the IB device to appear again after reload. */ 562*662d0dc6SMichael Baum for (retry = MLX5_VDPA_MAX_RETRIES; retry > 0; --retry) { 563*662d0dc6SMichael Baum ibv = mlx5_os_get_ibv_dev(dev); 564*662d0dc6SMichael Baum if (ibv != NULL) 565*662d0dc6SMichael Baum return ibv; 566*662d0dc6SMichael Baum usleep(MLX5_VDPA_USEC); 567*662d0dc6SMichael Baum } 568*662d0dc6SMichael Baum DRV_LOG(ERR, 569*662d0dc6SMichael Baum "Cannot get IB device after disabling RoCE for \"%s\", retries exceed %d.", 570*662d0dc6SMichael Baum dev->name, MLX5_VDPA_MAX_RETRIES); 571*662d0dc6SMichael Baum rte_errno = EAGAIN; 572*662d0dc6SMichael Baum return NULL; 573*662d0dc6SMichael Baum } 574*662d0dc6SMichael Baum 575887183efSMichael Baum static int 576887183efSMichael Baum mlx5_config_doorbell_mapping_env(int dbnc) 577887183efSMichael Baum { 578887183efSMichael Baum char *env; 579887183efSMichael Baum int value; 580887183efSMichael Baum 581887183efSMichael Baum MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY); 582887183efSMichael Baum /* Get environment variable to store. */ 583887183efSMichael Baum env = getenv(MLX5_SHUT_UP_BF); 584887183efSMichael Baum value = env ? !!strcmp(env, "0") : MLX5_ARG_UNSET; 585887183efSMichael Baum if (dbnc == MLX5_ARG_UNSET) 586887183efSMichael Baum setenv(MLX5_SHUT_UP_BF, MLX5_SHUT_UP_BF_DEFAULT, 1); 587887183efSMichael Baum else 588887183efSMichael Baum setenv(MLX5_SHUT_UP_BF, 589887183efSMichael Baum dbnc == MLX5_TXDB_NCACHED ? "1" : "0", 1); 590887183efSMichael Baum return value; 591887183efSMichael Baum } 592887183efSMichael Baum 593887183efSMichael Baum static void 594887183efSMichael Baum mlx5_restore_doorbell_mapping_env(int value) 595887183efSMichael Baum { 596887183efSMichael Baum MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY); 597887183efSMichael Baum /* Restore the original environment variable state. */ 598887183efSMichael Baum if (value == MLX5_ARG_UNSET) 599887183efSMichael Baum unsetenv(MLX5_SHUT_UP_BF); 600887183efSMichael Baum else 601887183efSMichael Baum setenv(MLX5_SHUT_UP_BF, value ? "1" : "0", 1); 602887183efSMichael Baum } 603887183efSMichael Baum 604887183efSMichael Baum /** 605887183efSMichael Baum * Function API to open IB device. 606887183efSMichael Baum * 607887183efSMichael Baum * 608887183efSMichael Baum * @param cdev 609887183efSMichael Baum * Pointer to the mlx5 device. 610ca1418ceSMichael Baum * @param classes 611ca1418ceSMichael Baum * Chosen classes come from device arguments. 612887183efSMichael Baum * 613887183efSMichael Baum * @return 614887183efSMichael Baum * 0 on success, a negative errno value otherwise and rte_errno is set. 615887183efSMichael Baum */ 616887183efSMichael Baum int 617ca1418ceSMichael Baum mlx5_os_open_device(struct mlx5_common_device *cdev, uint32_t classes) 618887183efSMichael Baum { 619887183efSMichael Baum struct ibv_device *ibv; 620887183efSMichael Baum struct ibv_context *ctx = NULL; 621887183efSMichael Baum int dbmap_env; 622887183efSMichael Baum 623*662d0dc6SMichael Baum if (classes & MLX5_CLASS_VDPA) 624*662d0dc6SMichael Baum ibv = mlx5_vdpa_get_ibv_dev(cdev->dev); 625*662d0dc6SMichael Baum else 626887183efSMichael Baum ibv = mlx5_os_get_ibv_dev(cdev->dev); 627887183efSMichael Baum if (!ibv) 628887183efSMichael Baum return -rte_errno; 629887183efSMichael Baum DRV_LOG(INFO, "Dev information matches for device \"%s\".", ibv->name); 630887183efSMichael Baum /* 631887183efSMichael Baum * Configure environment variable "MLX5_BF_SHUT_UP" before the device 632887183efSMichael Baum * creation. The rdma_core library checks the variable at device 633887183efSMichael Baum * creation and stores the result internally. 634887183efSMichael Baum */ 635887183efSMichael Baum dbmap_env = mlx5_config_doorbell_mapping_env(cdev->config.dbnc); 636887183efSMichael Baum /* Try to open IB device with DV first, then usual Verbs. */ 637887183efSMichael Baum errno = 0; 638887183efSMichael Baum ctx = mlx5_glue->dv_open_device(ibv); 639887183efSMichael Baum if (ctx) { 640887183efSMichael Baum cdev->config.devx = 1; 641887183efSMichael Baum DRV_LOG(DEBUG, "DevX is supported."); 642ca1418ceSMichael Baum } else if (classes == MLX5_CLASS_ETH) { 643887183efSMichael Baum /* The environment variable is still configured. */ 644887183efSMichael Baum ctx = mlx5_glue->open_device(ibv); 645887183efSMichael Baum if (ctx == NULL) 646887183efSMichael Baum goto error; 647887183efSMichael Baum DRV_LOG(DEBUG, "DevX is NOT supported."); 648ca1418ceSMichael Baum } else { 649ca1418ceSMichael Baum goto error; 650887183efSMichael Baum } 651887183efSMichael Baum /* The device is created, no need for environment. */ 652887183efSMichael Baum mlx5_restore_doorbell_mapping_env(dbmap_env); 653887183efSMichael Baum /* Hint libmlx5 to use PMD allocator for data plane resources */ 654887183efSMichael Baum mlx5_set_context_attr(cdev->dev, ctx); 655ca1418ceSMichael Baum cdev->ctx = ctx; 656887183efSMichael Baum return 0; 657887183efSMichael Baum error: 658887183efSMichael Baum rte_errno = errno ? errno : ENODEV; 659887183efSMichael Baum /* The device creation is failed, no need for environment. */ 660887183efSMichael Baum mlx5_restore_doorbell_mapping_env(dbmap_env); 661887183efSMichael Baum DRV_LOG(ERR, "Failed to open IB device \"%s\".", ibv->name); 662887183efSMichael Baum return -rte_errno; 663887183efSMichael Baum } 664