179aa4307SOphir Munk /* SPDX-License-Identifier: BSD-3-Clause 279aa4307SOphir Munk * Copyright 2020 Mellanox Technologies, Ltd 379aa4307SOphir Munk */ 479aa4307SOphir Munk 54c74ad3eSRongwei Liu #include <sys/types.h> 679aa4307SOphir Munk #include <unistd.h> 779aa4307SOphir Munk #include <string.h> 879aa4307SOphir Munk #include <stdio.h> 979aa4307SOphir Munk #ifdef RTE_IBVERBS_LINK_DLOPEN 1079aa4307SOphir Munk #include <dlfcn.h> 1179aa4307SOphir Munk #endif 12aec086c9SMatan Azrad #include <dirent.h> 13aec086c9SMatan Azrad #include <net/if.h> 1472d7efe4SSpike Du #include <fcntl.h> 1579aa4307SOphir Munk 1679aa4307SOphir Munk #include <rte_errno.h> 17aec086c9SMatan Azrad #include <rte_string_fns.h> 181f37cb2bSDavid Marchand #include <bus_pci_driver.h> 19b3f89090SDavid Marchand #include <bus_auxiliary_driver.h> 2079aa4307SOphir Munk 2179aa4307SOphir Munk #include "mlx5_common.h" 22662d0dc6SMichael Baum #include "mlx5_nl.h" 2325245d5dSShiri Kuzin #include "mlx5_common_log.h" 24662d0dc6SMichael Baum #include "mlx5_common_private.h" 25887183efSMichael Baum #include "mlx5_common_defs.h" 26c31f3f7fSShiri Kuzin #include "mlx5_common_os.h" 2779aa4307SOphir Munk #include "mlx5_glue.h" 2879aa4307SOphir Munk 2979aa4307SOphir Munk #ifdef MLX5_GLUE 3079aa4307SOphir Munk const struct mlx5_glue *mlx5_glue; 3179aa4307SOphir Munk #endif 3279aa4307SOphir Munk 3379aa4307SOphir Munk int 344d567938SThomas Monjalon mlx5_get_pci_addr(const char *dev_path, struct rte_pci_addr *pci_addr) 3579aa4307SOphir Munk { 3679aa4307SOphir Munk FILE *file; 3779aa4307SOphir Munk char line[32]; 38482a1d34SViacheslav Ovsiienko int rc = -ENOENT; 3979aa4307SOphir Munk MKSTR(path, "%s/device/uevent", dev_path); 4079aa4307SOphir Munk 4179aa4307SOphir Munk file = fopen(path, "rb"); 4279aa4307SOphir Munk if (file == NULL) { 4379aa4307SOphir Munk rte_errno = errno; 4479aa4307SOphir Munk return -rte_errno; 4579aa4307SOphir Munk } 4679aa4307SOphir Munk while (fgets(line, sizeof(line), file) == line) { 4779aa4307SOphir Munk size_t len = strlen(line); 4879aa4307SOphir Munk 4979aa4307SOphir Munk /* Truncate long lines. */ 50482a1d34SViacheslav Ovsiienko if (len == (sizeof(line) - 1)) { 5179aa4307SOphir Munk while (line[(len - 1)] != '\n') { 52482a1d34SViacheslav Ovsiienko int ret = fgetc(file); 53482a1d34SViacheslav Ovsiienko 5479aa4307SOphir Munk if (ret == EOF) 55482a1d34SViacheslav Ovsiienko goto exit; 5679aa4307SOphir Munk line[(len - 1)] = ret; 5779aa4307SOphir Munk } 58482a1d34SViacheslav Ovsiienko /* No match for long lines. */ 59482a1d34SViacheslav Ovsiienko continue; 60482a1d34SViacheslav Ovsiienko } 6179aa4307SOphir Munk /* Extract information. */ 6279aa4307SOphir Munk if (sscanf(line, 6379aa4307SOphir Munk "PCI_SLOT_NAME=" 6479aa4307SOphir Munk "%" SCNx32 ":%" SCNx8 ":%" SCNx8 ".%" SCNx8 "\n", 6579aa4307SOphir Munk &pci_addr->domain, 6679aa4307SOphir Munk &pci_addr->bus, 6779aa4307SOphir Munk &pci_addr->devid, 6879aa4307SOphir Munk &pci_addr->function) == 4) { 69482a1d34SViacheslav Ovsiienko rc = 0; 7079aa4307SOphir Munk break; 7179aa4307SOphir Munk } 7279aa4307SOphir Munk } 73482a1d34SViacheslav Ovsiienko exit: 7479aa4307SOphir Munk fclose(file); 75482a1d34SViacheslav Ovsiienko if (rc) 76482a1d34SViacheslav Ovsiienko rte_errno = -rc; 77482a1d34SViacheslav Ovsiienko return rc; 7879aa4307SOphir Munk } 7979aa4307SOphir Munk 8079aa4307SOphir Munk /** 8179aa4307SOphir Munk * Extract port name, as a number, from sysfs or netlink information. 8279aa4307SOphir Munk * 8379aa4307SOphir Munk * @param[in] port_name_in 8479aa4307SOphir Munk * String representing the port name. 8579aa4307SOphir Munk * @param[out] port_info_out 8679aa4307SOphir Munk * Port information, including port name as a number and port name 8779aa4307SOphir Munk * type if recognized 8879aa4307SOphir Munk * 8979aa4307SOphir Munk * @return 9079aa4307SOphir Munk * port_name field set according to recognized name format. 9179aa4307SOphir Munk */ 9279aa4307SOphir Munk void 9379aa4307SOphir Munk mlx5_translate_port_name(const char *port_name_in, 9479aa4307SOphir Munk struct mlx5_switch_info *port_info_out) 9579aa4307SOphir Munk { 9659df97f1SXueming Li char ctrl = 0, pf_c1, pf_c2, vf_c1, vf_c2, eol; 9779aa4307SOphir Munk char *end; 9879aa4307SOphir Munk int sc_items; 9979aa4307SOphir Munk 10059df97f1SXueming Li sc_items = sscanf(port_name_in, "%c%d", 10159df97f1SXueming Li &ctrl, &port_info_out->ctrl_num); 10259df97f1SXueming Li if (sc_items == 2 && ctrl == 'c') { 10359df97f1SXueming Li port_name_in++; /* 'c' */ 10459df97f1SXueming Li port_name_in += snprintf(NULL, 0, "%d", 10559df97f1SXueming Li port_info_out->ctrl_num); 10659df97f1SXueming Li } 10759df97f1SXueming Li /* Check for port-name as a string of the form pf0vf0 or pf0sf0 */ 1083590881bSViacheslav Ovsiienko sc_items = sscanf(port_name_in, "%c%c%d%c%c%d%c", 10979aa4307SOphir Munk &pf_c1, &pf_c2, &port_info_out->pf_num, 1103590881bSViacheslav Ovsiienko &vf_c1, &vf_c2, &port_info_out->port_name, &eol); 11159df97f1SXueming Li if (sc_items == 6 && pf_c1 == 'p' && pf_c2 == 'f') { 11259df97f1SXueming Li if (vf_c1 == 'v' && vf_c2 == 'f') { 11359df97f1SXueming Li /* Kernel ver >= 5.0 or OFED ver >= 4.6 */ 11459df97f1SXueming Li port_info_out->name_type = 11559df97f1SXueming Li MLX5_PHYS_PORT_NAME_TYPE_PFVF; 11679aa4307SOphir Munk return; 11779aa4307SOphir Munk } 11859df97f1SXueming Li if (vf_c1 == 's' && vf_c2 == 'f') { 11959df97f1SXueming Li /* Kernel ver >= 5.11 or OFED ver >= 5.1 */ 12059df97f1SXueming Li port_info_out->name_type = 12159df97f1SXueming Li MLX5_PHYS_PORT_NAME_TYPE_PFSF; 12259df97f1SXueming Li return; 12359df97f1SXueming Li } 12459df97f1SXueming Li } 12579aa4307SOphir Munk /* 12679aa4307SOphir Munk * Check for port-name as a string of the form p0 12779aa4307SOphir Munk * (support kernel ver >= 5.0, or OFED ver >= 4.6). 12879aa4307SOphir Munk */ 1293590881bSViacheslav Ovsiienko sc_items = sscanf(port_name_in, "%c%d%c", 1303590881bSViacheslav Ovsiienko &pf_c1, &port_info_out->port_name, &eol); 13179aa4307SOphir Munk if (sc_items == 2 && pf_c1 == 'p') { 13279aa4307SOphir Munk port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_UPLINK; 13379aa4307SOphir Munk return; 13479aa4307SOphir Munk } 135420bbdaeSViacheslav Ovsiienko /* 136420bbdaeSViacheslav Ovsiienko * Check for port-name as a string of the form pf0 137420bbdaeSViacheslav Ovsiienko * (support kernel ver >= 5.7 for HPF representor on BF). 138420bbdaeSViacheslav Ovsiienko */ 1393590881bSViacheslav Ovsiienko sc_items = sscanf(port_name_in, "%c%c%d%c", 1403590881bSViacheslav Ovsiienko &pf_c1, &pf_c2, &port_info_out->pf_num, &eol); 141420bbdaeSViacheslav Ovsiienko if (sc_items == 3 && pf_c1 == 'p' && pf_c2 == 'f') { 142420bbdaeSViacheslav Ovsiienko port_info_out->port_name = -1; 143420bbdaeSViacheslav Ovsiienko port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_PFHPF; 144420bbdaeSViacheslav Ovsiienko return; 145420bbdaeSViacheslav Ovsiienko } 14679aa4307SOphir Munk /* Check for port-name as a number (support kernel ver < 5.0 */ 14779aa4307SOphir Munk errno = 0; 14879aa4307SOphir Munk port_info_out->port_name = strtol(port_name_in, &end, 0); 14979aa4307SOphir Munk if (!errno && 15079aa4307SOphir Munk (size_t)(end - port_name_in) == strlen(port_name_in)) { 15179aa4307SOphir Munk port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_LEGACY; 15279aa4307SOphir Munk return; 15379aa4307SOphir Munk } 15479aa4307SOphir Munk port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN; 15579aa4307SOphir Munk } 15679aa4307SOphir Munk 157aec086c9SMatan Azrad int 158aec086c9SMatan Azrad mlx5_get_ifname_sysfs(const char *ibdev_path, char *ifname) 159aec086c9SMatan Azrad { 160aec086c9SMatan Azrad DIR *dir; 161aec086c9SMatan Azrad struct dirent *dent; 162aec086c9SMatan Azrad unsigned int dev_type = 0; 163aec086c9SMatan Azrad unsigned int dev_port_prev = ~0u; 164aec086c9SMatan Azrad char match[IF_NAMESIZE] = ""; 165aec086c9SMatan Azrad 166aec086c9SMatan Azrad MLX5_ASSERT(ibdev_path); 167aec086c9SMatan Azrad { 168aec086c9SMatan Azrad MKSTR(path, "%s/device/net", ibdev_path); 169aec086c9SMatan Azrad 170aec086c9SMatan Azrad dir = opendir(path); 171aec086c9SMatan Azrad if (dir == NULL) { 172aec086c9SMatan Azrad rte_errno = errno; 173aec086c9SMatan Azrad return -rte_errno; 174aec086c9SMatan Azrad } 175aec086c9SMatan Azrad } 176aec086c9SMatan Azrad while ((dent = readdir(dir)) != NULL) { 177aec086c9SMatan Azrad char *name = dent->d_name; 178aec086c9SMatan Azrad FILE *file; 179aec086c9SMatan Azrad unsigned int dev_port; 180aec086c9SMatan Azrad int r; 181aec086c9SMatan Azrad 182aec086c9SMatan Azrad if ((name[0] == '.') && 183aec086c9SMatan Azrad ((name[1] == '\0') || 184aec086c9SMatan Azrad ((name[1] == '.') && (name[2] == '\0')))) 185aec086c9SMatan Azrad continue; 186aec086c9SMatan Azrad 187aec086c9SMatan Azrad MKSTR(path, "%s/device/net/%s/%s", 188aec086c9SMatan Azrad ibdev_path, name, 189aec086c9SMatan Azrad (dev_type ? "dev_id" : "dev_port")); 190aec086c9SMatan Azrad 191aec086c9SMatan Azrad file = fopen(path, "rb"); 192aec086c9SMatan Azrad if (file == NULL) { 193aec086c9SMatan Azrad if (errno != ENOENT) 194aec086c9SMatan Azrad continue; 195aec086c9SMatan Azrad /* 196aec086c9SMatan Azrad * Switch to dev_id when dev_port does not exist as 197aec086c9SMatan Azrad * is the case with Linux kernel versions < 3.15. 198aec086c9SMatan Azrad */ 199aec086c9SMatan Azrad try_dev_id: 200aec086c9SMatan Azrad match[0] = '\0'; 201aec086c9SMatan Azrad if (dev_type) 202aec086c9SMatan Azrad break; 203aec086c9SMatan Azrad dev_type = 1; 204aec086c9SMatan Azrad dev_port_prev = ~0u; 205aec086c9SMatan Azrad rewinddir(dir); 206aec086c9SMatan Azrad continue; 207aec086c9SMatan Azrad } 208aec086c9SMatan Azrad r = fscanf(file, (dev_type ? "%x" : "%u"), &dev_port); 209aec086c9SMatan Azrad fclose(file); 210aec086c9SMatan Azrad if (r != 1) 211aec086c9SMatan Azrad continue; 212aec086c9SMatan Azrad /* 213aec086c9SMatan Azrad * Switch to dev_id when dev_port returns the same value for 214aec086c9SMatan Azrad * all ports. May happen when using a MOFED release older than 215aec086c9SMatan Azrad * 3.0 with a Linux kernel >= 3.15. 216aec086c9SMatan Azrad */ 217aec086c9SMatan Azrad if (dev_port == dev_port_prev) 218aec086c9SMatan Azrad goto try_dev_id; 219aec086c9SMatan Azrad dev_port_prev = dev_port; 220aec086c9SMatan Azrad if (dev_port == 0) 221aec086c9SMatan Azrad strlcpy(match, name, IF_NAMESIZE); 222aec086c9SMatan Azrad } 223aec086c9SMatan Azrad closedir(dir); 224aec086c9SMatan Azrad if (match[0] == '\0') { 225aec086c9SMatan Azrad rte_errno = ENOENT; 226aec086c9SMatan Azrad return -rte_errno; 227aec086c9SMatan Azrad } 228aec086c9SMatan Azrad strncpy(ifname, match, IF_NAMESIZE); 229aec086c9SMatan Azrad return 0; 230aec086c9SMatan Azrad } 231aec086c9SMatan Azrad 23279aa4307SOphir Munk #ifdef MLX5_GLUE 23379aa4307SOphir Munk 23479aa4307SOphir Munk /** 23579aa4307SOphir Munk * Suffix RTE_EAL_PMD_PATH with "-glue". 23679aa4307SOphir Munk * 23779aa4307SOphir Munk * This function performs a sanity check on RTE_EAL_PMD_PATH before 23879aa4307SOphir Munk * suffixing its last component. 23979aa4307SOphir Munk * 24079aa4307SOphir Munk * @param buf[out] 24179aa4307SOphir Munk * Output buffer, should be large enough otherwise NULL is returned. 24279aa4307SOphir Munk * @param size 24379aa4307SOphir Munk * Size of @p out. 24479aa4307SOphir Munk * 24579aa4307SOphir Munk * @return 24679aa4307SOphir Munk * Pointer to @p buf or @p NULL in case suffix cannot be appended. 24779aa4307SOphir Munk */ 24879aa4307SOphir Munk static char * 24979aa4307SOphir Munk mlx5_glue_path(char *buf, size_t size) 25079aa4307SOphir Munk { 25179aa4307SOphir Munk static const char *const bad[] = { "/", ".", "..", NULL }; 25279aa4307SOphir Munk const char *path = RTE_EAL_PMD_PATH; 25379aa4307SOphir Munk size_t len = strlen(path); 25479aa4307SOphir Munk size_t off; 25579aa4307SOphir Munk int i; 25679aa4307SOphir Munk 25779aa4307SOphir Munk while (len && path[len - 1] == '/') 25879aa4307SOphir Munk --len; 25979aa4307SOphir Munk for (off = len; off && path[off - 1] != '/'; --off) 26079aa4307SOphir Munk ; 26179aa4307SOphir Munk for (i = 0; bad[i]; ++i) 26279aa4307SOphir Munk if (!strncmp(path + off, bad[i], (int)(len - off))) 26379aa4307SOphir Munk goto error; 26479aa4307SOphir Munk i = snprintf(buf, size, "%.*s-glue", (int)len, path); 26579aa4307SOphir Munk if (i == -1 || (size_t)i >= size) 26679aa4307SOphir Munk goto error; 26779aa4307SOphir Munk return buf; 26879aa4307SOphir Munk error: 26979aa4307SOphir Munk RTE_LOG(ERR, PMD, "unable to append \"-glue\" to last component of" 27079aa4307SOphir Munk " RTE_EAL_PMD_PATH (\"" RTE_EAL_PMD_PATH "\"), please" 27179aa4307SOphir Munk " re-configure DPDK"); 27279aa4307SOphir Munk return NULL; 27379aa4307SOphir Munk } 27479aa4307SOphir Munk 27579aa4307SOphir Munk static int 27679aa4307SOphir Munk mlx5_glue_dlopen(void) 27779aa4307SOphir Munk { 27879aa4307SOphir Munk char glue_path[sizeof(RTE_EAL_PMD_PATH) - 1 + sizeof("-glue")]; 27979aa4307SOphir Munk void *handle = NULL; 28079aa4307SOphir Munk 28179aa4307SOphir Munk char const *path[] = { 28279aa4307SOphir Munk /* 28379aa4307SOphir Munk * A basic security check is necessary before trusting 28479aa4307SOphir Munk * MLX5_GLUE_PATH, which may override RTE_EAL_PMD_PATH. 28579aa4307SOphir Munk */ 28679aa4307SOphir Munk (geteuid() == getuid() && getegid() == getgid() ? 28779aa4307SOphir Munk getenv("MLX5_GLUE_PATH") : NULL), 28879aa4307SOphir Munk /* 28979aa4307SOphir Munk * When RTE_EAL_PMD_PATH is set, use its glue-suffixed 29079aa4307SOphir Munk * variant, otherwise let dlopen() look up libraries on its 29179aa4307SOphir Munk * own. 29279aa4307SOphir Munk */ 29379aa4307SOphir Munk (*RTE_EAL_PMD_PATH ? 29479aa4307SOphir Munk mlx5_glue_path(glue_path, sizeof(glue_path)) : ""), 29579aa4307SOphir Munk }; 29679aa4307SOphir Munk unsigned int i = 0; 29779aa4307SOphir Munk void **sym; 29879aa4307SOphir Munk const char *dlmsg; 29979aa4307SOphir Munk 30079aa4307SOphir Munk while (!handle && i != RTE_DIM(path)) { 30179aa4307SOphir Munk const char *end; 30279aa4307SOphir Munk size_t len; 30379aa4307SOphir Munk int ret; 30479aa4307SOphir Munk 30579aa4307SOphir Munk if (!path[i]) { 30679aa4307SOphir Munk ++i; 30779aa4307SOphir Munk continue; 30879aa4307SOphir Munk } 30979aa4307SOphir Munk end = strpbrk(path[i], ":;"); 31079aa4307SOphir Munk if (!end) 31179aa4307SOphir Munk end = path[i] + strlen(path[i]); 31279aa4307SOphir Munk len = end - path[i]; 31379aa4307SOphir Munk ret = 0; 31479aa4307SOphir Munk do { 31579aa4307SOphir Munk char name[ret + 1]; 31679aa4307SOphir Munk 31779aa4307SOphir Munk ret = snprintf(name, sizeof(name), "%.*s%s" MLX5_GLUE, 31879aa4307SOphir Munk (int)len, path[i], 31979aa4307SOphir Munk (!len || *(end - 1) == '/') ? "" : "/"); 32079aa4307SOphir Munk if (ret == -1) 32179aa4307SOphir Munk break; 32279aa4307SOphir Munk if (sizeof(name) != (size_t)ret + 1) 32379aa4307SOphir Munk continue; 32479aa4307SOphir Munk DRV_LOG(DEBUG, "Looking for rdma-core glue as " 32579aa4307SOphir Munk "\"%s\"", name); 32679aa4307SOphir Munk handle = dlopen(name, RTLD_LAZY); 32779aa4307SOphir Munk break; 32879aa4307SOphir Munk } while (1); 32979aa4307SOphir Munk path[i] = end + 1; 33079aa4307SOphir Munk if (!*end) 33179aa4307SOphir Munk ++i; 33279aa4307SOphir Munk } 33379aa4307SOphir Munk if (!handle) { 33479aa4307SOphir Munk rte_errno = EINVAL; 33579aa4307SOphir Munk dlmsg = dlerror(); 33679aa4307SOphir Munk if (dlmsg) 33779aa4307SOphir Munk DRV_LOG(WARNING, "Cannot load glue library: %s", dlmsg); 33879aa4307SOphir Munk goto glue_error; 33979aa4307SOphir Munk } 34079aa4307SOphir Munk sym = dlsym(handle, "mlx5_glue"); 34179aa4307SOphir Munk if (!sym || !*sym) { 34279aa4307SOphir Munk rte_errno = EINVAL; 34379aa4307SOphir Munk dlmsg = dlerror(); 34479aa4307SOphir Munk if (dlmsg) 34579aa4307SOphir Munk DRV_LOG(ERR, "Cannot resolve glue symbol: %s", dlmsg); 34679aa4307SOphir Munk goto glue_error; 34779aa4307SOphir Munk } 34879aa4307SOphir Munk mlx5_glue = *sym; 34979aa4307SOphir Munk return 0; 35079aa4307SOphir Munk 35179aa4307SOphir Munk glue_error: 35279aa4307SOphir Munk if (handle) 35379aa4307SOphir Munk dlclose(handle); 35479aa4307SOphir Munk return -1; 35579aa4307SOphir Munk } 35679aa4307SOphir Munk 35779aa4307SOphir Munk #endif 35879aa4307SOphir Munk 35979aa4307SOphir Munk /** 36079aa4307SOphir Munk * Initialization routine for run-time dependency on rdma-core. 36179aa4307SOphir Munk */ 36279aa4307SOphir Munk void 36379aa4307SOphir Munk mlx5_glue_constructor(void) 36479aa4307SOphir Munk { 36579aa4307SOphir Munk /* 36679aa4307SOphir Munk * RDMAV_HUGEPAGES_SAFE tells ibv_fork_init() we intend to use 36779aa4307SOphir Munk * huge pages. Calling ibv_fork_init() during init allows 36879aa4307SOphir Munk * applications to use fork() safely for purposes other than 36979aa4307SOphir Munk * using this PMD, which is not supported in forked processes. 37079aa4307SOphir Munk */ 37179aa4307SOphir Munk setenv("RDMAV_HUGEPAGES_SAFE", "1", 1); 37279aa4307SOphir Munk /* Match the size of Rx completion entry to the size of a cacheline. */ 37379aa4307SOphir Munk if (RTE_CACHE_LINE_SIZE == 128) 37479aa4307SOphir Munk setenv("MLX5_CQE_SIZE", "128", 0); 37579aa4307SOphir Munk /* 37679aa4307SOphir Munk * MLX5_DEVICE_FATAL_CLEANUP tells ibv_destroy functions to 37779aa4307SOphir Munk * cleanup all the Verbs resources even when the device was removed. 37879aa4307SOphir Munk */ 37979aa4307SOphir Munk setenv("MLX5_DEVICE_FATAL_CLEANUP", "1", 1); 38079aa4307SOphir Munk 38179aa4307SOphir Munk #ifdef MLX5_GLUE 38279aa4307SOphir Munk if (mlx5_glue_dlopen() != 0) 38379aa4307SOphir Munk goto glue_error; 38479aa4307SOphir Munk #endif 38579aa4307SOphir Munk 38679aa4307SOphir Munk #ifdef RTE_LIBRTE_MLX5_DEBUG 38779aa4307SOphir Munk /* Glue structure must not contain any NULL pointers. */ 38879aa4307SOphir Munk { 38979aa4307SOphir Munk unsigned int i; 39079aa4307SOphir Munk 39179aa4307SOphir Munk for (i = 0; i != sizeof(*mlx5_glue) / sizeof(void *); ++i) 39279aa4307SOphir Munk MLX5_ASSERT(((const void *const *)mlx5_glue)[i]); 39379aa4307SOphir Munk } 39479aa4307SOphir Munk #endif 39579aa4307SOphir Munk if (strcmp(mlx5_glue->version, MLX5_GLUE_VERSION)) { 39679aa4307SOphir Munk rte_errno = EINVAL; 39779aa4307SOphir Munk DRV_LOG(ERR, "rdma-core glue \"%s\" mismatch: \"%s\" is " 39879aa4307SOphir Munk "required", mlx5_glue->version, MLX5_GLUE_VERSION); 39979aa4307SOphir Munk goto glue_error; 40079aa4307SOphir Munk } 40179aa4307SOphir Munk mlx5_glue->fork_init(); 40279aa4307SOphir Munk return; 40379aa4307SOphir Munk 40479aa4307SOphir Munk glue_error: 40579aa4307SOphir Munk DRV_LOG(WARNING, "Cannot initialize MLX5 common due to missing" 40679aa4307SOphir Munk " run-time dependency on rdma-core libraries (libibverbs," 40779aa4307SOphir Munk " libmlx5)"); 40879aa4307SOphir Munk mlx5_glue = NULL; 40979aa4307SOphir Munk } 410262c7ad0SOri Kam 411e35ccf24SMichael Baum /** 4129d936f4fSMichael Baum * Validate user arguments for remote PD and CTX. 4139d936f4fSMichael Baum * 4149d936f4fSMichael Baum * @param config 4159d936f4fSMichael Baum * Pointer to device configuration structure. 4169d936f4fSMichael Baum * 4179d936f4fSMichael Baum * @return 4189d936f4fSMichael Baum * 0 on success, a negative errno value otherwise and rte_errno is set. 4199d936f4fSMichael Baum */ 4209d936f4fSMichael Baum int 4219d936f4fSMichael Baum mlx5_os_remote_pd_and_ctx_validate(struct mlx5_common_dev_config *config) 4229d936f4fSMichael Baum { 4239d936f4fSMichael Baum int device_fd = config->device_fd; 4249d936f4fSMichael Baum int pd_handle = config->pd_handle; 4259d936f4fSMichael Baum 4269d936f4fSMichael Baum #ifdef HAVE_MLX5_IBV_IMPORT_CTX_PD_AND_MR 4279d936f4fSMichael Baum if (device_fd == MLX5_ARG_UNSET && pd_handle != MLX5_ARG_UNSET) { 4289d936f4fSMichael Baum DRV_LOG(ERR, "Remote PD without CTX is not supported."); 4299d936f4fSMichael Baum rte_errno = EINVAL; 4309d936f4fSMichael Baum return -rte_errno; 4319d936f4fSMichael Baum } 4329d936f4fSMichael Baum if (device_fd != MLX5_ARG_UNSET && pd_handle == MLX5_ARG_UNSET) { 4339d936f4fSMichael Baum DRV_LOG(ERR, "Remote CTX without PD is not supported."); 4349d936f4fSMichael Baum rte_errno = EINVAL; 4359d936f4fSMichael Baum return -rte_errno; 4369d936f4fSMichael Baum } 4379d936f4fSMichael Baum DRV_LOG(DEBUG, "Remote PD and CTX is supported: (cmd_fd=%d, " 4389d936f4fSMichael Baum "pd_handle=%d).", device_fd, pd_handle); 4399d936f4fSMichael Baum #else 4409d936f4fSMichael Baum if (pd_handle != MLX5_ARG_UNSET || device_fd != MLX5_ARG_UNSET) { 4419d936f4fSMichael Baum DRV_LOG(ERR, 4429d936f4fSMichael Baum "Remote PD and CTX is not supported - maybe old rdma-core version?"); 4439d936f4fSMichael Baum rte_errno = ENOTSUP; 4449d936f4fSMichael Baum return -rte_errno; 4459d936f4fSMichael Baum } 4469d936f4fSMichael Baum #endif 4479d936f4fSMichael Baum return 0; 4489d936f4fSMichael Baum } 4499d936f4fSMichael Baum 4509d936f4fSMichael Baum /** 4519d936f4fSMichael Baum * Release Protection Domain object. 4529d936f4fSMichael Baum * 4539d936f4fSMichael Baum * @param[out] cdev 4549d936f4fSMichael Baum * Pointer to the mlx5 device. 4559d936f4fSMichael Baum * 4569d936f4fSMichael Baum * @return 4579d936f4fSMichael Baum * 0 on success, a negative errno value otherwise. 4589d936f4fSMichael Baum */ 4599d936f4fSMichael Baum int 4609d936f4fSMichael Baum mlx5_os_pd_release(struct mlx5_common_device *cdev) 4619d936f4fSMichael Baum { 4629d936f4fSMichael Baum if (cdev->config.pd_handle == MLX5_ARG_UNSET) 4639d936f4fSMichael Baum return mlx5_glue->dealloc_pd(cdev->pd); 4649d936f4fSMichael Baum else 4659d936f4fSMichael Baum return mlx5_glue->unimport_pd(cdev->pd); 4669d936f4fSMichael Baum } 4679d936f4fSMichael Baum 4689d936f4fSMichael Baum /** 4699d936f4fSMichael Baum * Allocate Protection Domain object. 4709d936f4fSMichael Baum * 4719d936f4fSMichael Baum * @param[out] cdev 4729d936f4fSMichael Baum * Pointer to the mlx5 device. 4739d936f4fSMichael Baum * 4749d936f4fSMichael Baum * @return 4759d936f4fSMichael Baum * 0 on success, a negative errno value otherwise. 4769d936f4fSMichael Baum */ 4779d936f4fSMichael Baum static int 4789d936f4fSMichael Baum mlx5_os_pd_create(struct mlx5_common_device *cdev) 4799d936f4fSMichael Baum { 4809d936f4fSMichael Baum cdev->pd = mlx5_glue->alloc_pd(cdev->ctx); 4819d936f4fSMichael Baum if (cdev->pd == NULL) { 4829d936f4fSMichael Baum DRV_LOG(ERR, "Failed to allocate PD: %s", rte_strerror(errno)); 4839d936f4fSMichael Baum return errno ? -errno : -ENOMEM; 4849d936f4fSMichael Baum } 4859d936f4fSMichael Baum return 0; 4869d936f4fSMichael Baum } 4879d936f4fSMichael Baum 4889d936f4fSMichael Baum /** 4899d936f4fSMichael Baum * Import Protection Domain object according to given PD handle. 4909d936f4fSMichael Baum * 4919d936f4fSMichael Baum * @param[out] cdev 4929d936f4fSMichael Baum * Pointer to the mlx5 device. 4939d936f4fSMichael Baum * 4949d936f4fSMichael Baum * @return 4959d936f4fSMichael Baum * 0 on success, a negative errno value otherwise. 4969d936f4fSMichael Baum */ 4979d936f4fSMichael Baum static int 4989d936f4fSMichael Baum mlx5_os_pd_import(struct mlx5_common_device *cdev) 4999d936f4fSMichael Baum { 5009d936f4fSMichael Baum cdev->pd = mlx5_glue->import_pd(cdev->ctx, cdev->config.pd_handle); 5019d936f4fSMichael Baum if (cdev->pd == NULL) { 5029d936f4fSMichael Baum DRV_LOG(ERR, "Failed to import PD using handle=%d: %s", 5039d936f4fSMichael Baum cdev->config.pd_handle, rte_strerror(errno)); 5049d936f4fSMichael Baum return errno ? -errno : -ENOMEM; 5059d936f4fSMichael Baum } 5069d936f4fSMichael Baum return 0; 5079d936f4fSMichael Baum } 5089d936f4fSMichael Baum 5099d936f4fSMichael Baum /** 5109d936f4fSMichael Baum * Prepare Protection Domain object and extract its pdn using DV API. 511e35ccf24SMichael Baum * 512e35ccf24SMichael Baum * @param[out] cdev 513e35ccf24SMichael Baum * Pointer to the mlx5 device. 514e35ccf24SMichael Baum * 515e35ccf24SMichael Baum * @return 516e35ccf24SMichael Baum * 0 on success, a negative errno value otherwise and rte_errno is set. 517e35ccf24SMichael Baum */ 518e35ccf24SMichael Baum int 5199d936f4fSMichael Baum mlx5_os_pd_prepare(struct mlx5_common_device *cdev) 520e35ccf24SMichael Baum { 521e35ccf24SMichael Baum #ifdef HAVE_IBV_FLOW_DV_SUPPORT 522e35ccf24SMichael Baum struct mlx5dv_obj obj; 523e35ccf24SMichael Baum struct mlx5dv_pd pd_info; 524e35ccf24SMichael Baum #endif 5259d936f4fSMichael Baum int ret; 526e35ccf24SMichael Baum 5279d936f4fSMichael Baum if (cdev->config.pd_handle == MLX5_ARG_UNSET) 5289d936f4fSMichael Baum ret = mlx5_os_pd_create(cdev); 5299d936f4fSMichael Baum else 5309d936f4fSMichael Baum ret = mlx5_os_pd_import(cdev); 5319d936f4fSMichael Baum if (ret) { 5329d936f4fSMichael Baum rte_errno = -ret; 5339d936f4fSMichael Baum return ret; 534e35ccf24SMichael Baum } 535e35ccf24SMichael Baum if (cdev->config.devx == 0) 536e35ccf24SMichael Baum return 0; 537e35ccf24SMichael Baum #ifdef HAVE_IBV_FLOW_DV_SUPPORT 538e35ccf24SMichael Baum obj.pd.in = cdev->pd; 539e35ccf24SMichael Baum obj.pd.out = &pd_info; 540e35ccf24SMichael Baum ret = mlx5_glue->dv_init_obj(&obj, MLX5DV_OBJ_PD); 541e35ccf24SMichael Baum if (ret != 0) { 542e35ccf24SMichael Baum DRV_LOG(ERR, "Fail to get PD object info."); 5439d936f4fSMichael Baum rte_errno = errno; 5449d936f4fSMichael Baum claim_zero(mlx5_os_pd_release(cdev)); 545e35ccf24SMichael Baum cdev->pd = NULL; 5469d936f4fSMichael Baum return -rte_errno; 547e35ccf24SMichael Baum } 548e35ccf24SMichael Baum cdev->pdn = pd_info.pdn; 549e35ccf24SMichael Baum return 0; 550e35ccf24SMichael Baum #else 551e35ccf24SMichael Baum DRV_LOG(ERR, "Cannot get pdn - no DV support."); 5529d936f4fSMichael Baum rte_errno = ENOTSUP; 5539d936f4fSMichael Baum return -rte_errno; 554e35ccf24SMichael Baum #endif /* HAVE_IBV_FLOW_DV_SUPPORT */ 555e35ccf24SMichael Baum } 556e35ccf24SMichael Baum 557662d0dc6SMichael Baum static struct ibv_device * 558*37ca457dSBing Zhao mlx5_os_get_ibv_device(const struct rte_pci_device *pci_dev) 559c31f3f7fSShiri Kuzin { 560c31f3f7fSShiri Kuzin int n; 561c31f3f7fSShiri Kuzin struct ibv_device **ibv_list = mlx5_glue->get_device_list(&n); 562c31f3f7fSShiri Kuzin struct ibv_device *ibv_match = NULL; 563f956d3d4SRongwei Liu uint8_t guid1[32] = {0}; 564f956d3d4SRongwei Liu uint8_t guid2[32] = {0}; 565f956d3d4SRongwei Liu int ret1, ret2 = -1; 566c31f3f7fSShiri Kuzin struct rte_pci_addr paddr; 567*37ca457dSBing Zhao const struct rte_pci_addr *addr = &pci_dev->addr; 568*37ca457dSBing Zhao bool is_vf_dev = mlx5_dev_is_vf_pci(pci_dev); 569c31f3f7fSShiri Kuzin 570f956d3d4SRongwei Liu if (ibv_list == NULL || !n) { 571f956d3d4SRongwei Liu rte_errno = ENOSYS; 572f956d3d4SRongwei Liu if (ibv_list) 573f956d3d4SRongwei Liu mlx5_glue->free_device_list(ibv_list); 574f956d3d4SRongwei Liu return NULL; 575f956d3d4SRongwei Liu } 576f956d3d4SRongwei Liu ret1 = mlx5_get_device_guid(addr, guid1, sizeof(guid1)); 577f956d3d4SRongwei Liu while (n-- > 0) { 578c31f3f7fSShiri Kuzin DRV_LOG(DEBUG, "Checking device \"%s\"..", ibv_list[n]->name); 5794d567938SThomas Monjalon if (mlx5_get_pci_addr(ibv_list[n]->ibdev_path, &paddr) != 0) 580c31f3f7fSShiri Kuzin continue; 581f956d3d4SRongwei Liu if (ret1 > 0) 582f956d3d4SRongwei Liu ret2 = mlx5_get_device_guid(&paddr, guid2, sizeof(guid2)); 583f956d3d4SRongwei Liu /* Bond device can bond secondary PCIe */ 584*37ca457dSBing Zhao if ((strstr(ibv_list[n]->name, "bond") && !is_vf_dev && 585f956d3d4SRongwei Liu ((ret1 > 0 && ret2 > 0 && !memcmp(guid1, guid2, sizeof(guid1))) || 586f956d3d4SRongwei Liu (addr->domain == paddr.domain && addr->bus == paddr.bus && 587f956d3d4SRongwei Liu addr->devid == paddr.devid))) || 588f956d3d4SRongwei Liu !rte_pci_addr_cmp(addr, &paddr)) { 589c31f3f7fSShiri Kuzin ibv_match = ibv_list[n]; 590c31f3f7fSShiri Kuzin break; 591c31f3f7fSShiri Kuzin } 592f956d3d4SRongwei Liu } 593ca1418ceSMichael Baum if (ibv_match == NULL) { 594ca1418ceSMichael Baum DRV_LOG(WARNING, 595ca1418ceSMichael Baum "No Verbs device matches PCI device " PCI_PRI_FMT "," 596ca1418ceSMichael Baum " are kernel drivers loaded?", 597ca1418ceSMichael Baum addr->domain, addr->bus, addr->devid, addr->function); 598c31f3f7fSShiri Kuzin rte_errno = ENOENT; 599ca1418ceSMichael Baum } 600c31f3f7fSShiri Kuzin mlx5_glue->free_device_list(ibv_list); 601c31f3f7fSShiri Kuzin return ibv_match; 602c31f3f7fSShiri Kuzin } 603887183efSMichael Baum 604662d0dc6SMichael Baum /* Try to disable ROCE by Netlink\Devlink. */ 605662d0dc6SMichael Baum static int 606662d0dc6SMichael Baum mlx5_nl_roce_disable(const char *addr) 607662d0dc6SMichael Baum { 608be66461cSDmitry Kozlyuk int nlsk_fd = mlx5_nl_init(NETLINK_GENERIC, 0); 609662d0dc6SMichael Baum int devlink_id; 610662d0dc6SMichael Baum int enable; 611662d0dc6SMichael Baum int ret; 612662d0dc6SMichael Baum 613662d0dc6SMichael Baum if (nlsk_fd < 0) 614662d0dc6SMichael Baum return nlsk_fd; 615662d0dc6SMichael Baum devlink_id = mlx5_nl_devlink_family_id_get(nlsk_fd); 616662d0dc6SMichael Baum if (devlink_id < 0) { 617662d0dc6SMichael Baum ret = devlink_id; 618662d0dc6SMichael Baum DRV_LOG(DEBUG, 619662d0dc6SMichael Baum "Failed to get devlink id for ROCE operations by Netlink."); 620662d0dc6SMichael Baum goto close; 621662d0dc6SMichael Baum } 622662d0dc6SMichael Baum ret = mlx5_nl_enable_roce_get(nlsk_fd, devlink_id, addr, &enable); 623662d0dc6SMichael Baum if (ret) { 624662d0dc6SMichael Baum DRV_LOG(DEBUG, "Failed to get ROCE enable by Netlink: %d.", 625662d0dc6SMichael Baum ret); 626662d0dc6SMichael Baum goto close; 627662d0dc6SMichael Baum } else if (!enable) { 628662d0dc6SMichael Baum DRV_LOG(INFO, "ROCE has already disabled(Netlink)."); 629662d0dc6SMichael Baum goto close; 630662d0dc6SMichael Baum } 631662d0dc6SMichael Baum ret = mlx5_nl_enable_roce_set(nlsk_fd, devlink_id, addr, 0); 632662d0dc6SMichael Baum if (ret) 633662d0dc6SMichael Baum DRV_LOG(DEBUG, "Failed to disable ROCE by Netlink: %d.", ret); 634662d0dc6SMichael Baum else 635662d0dc6SMichael Baum DRV_LOG(INFO, "ROCE is disabled by Netlink successfully."); 636662d0dc6SMichael Baum close: 637662d0dc6SMichael Baum close(nlsk_fd); 638662d0dc6SMichael Baum return ret; 639662d0dc6SMichael Baum } 640662d0dc6SMichael Baum 641662d0dc6SMichael Baum /* Try to disable ROCE by sysfs. */ 642662d0dc6SMichael Baum static int 643662d0dc6SMichael Baum mlx5_sys_roce_disable(const char *addr) 644662d0dc6SMichael Baum { 645662d0dc6SMichael Baum FILE *file_o; 646662d0dc6SMichael Baum int enable; 647662d0dc6SMichael Baum int ret; 648662d0dc6SMichael Baum 649662d0dc6SMichael Baum MKSTR(file_p, "/sys/bus/pci/devices/%s/roce_enable", addr); 650662d0dc6SMichael Baum file_o = fopen(file_p, "rb"); 651662d0dc6SMichael Baum if (!file_o) { 652662d0dc6SMichael Baum rte_errno = ENOTSUP; 653662d0dc6SMichael Baum return -ENOTSUP; 654662d0dc6SMichael Baum } 655662d0dc6SMichael Baum ret = fscanf(file_o, "%d", &enable); 656662d0dc6SMichael Baum if (ret != 1) { 657662d0dc6SMichael Baum rte_errno = EINVAL; 658662d0dc6SMichael Baum ret = EINVAL; 659662d0dc6SMichael Baum goto close; 660662d0dc6SMichael Baum } else if (!enable) { 661662d0dc6SMichael Baum ret = 0; 662662d0dc6SMichael Baum DRV_LOG(INFO, "ROCE has already disabled(sysfs)."); 663662d0dc6SMichael Baum goto close; 664662d0dc6SMichael Baum } 665662d0dc6SMichael Baum fclose(file_o); 666662d0dc6SMichael Baum file_o = fopen(file_p, "wb"); 667662d0dc6SMichael Baum if (!file_o) { 668662d0dc6SMichael Baum rte_errno = ENOTSUP; 669662d0dc6SMichael Baum return -ENOTSUP; 670662d0dc6SMichael Baum } 671662d0dc6SMichael Baum fprintf(file_o, "0\n"); 672662d0dc6SMichael Baum ret = 0; 673662d0dc6SMichael Baum close: 674662d0dc6SMichael Baum if (ret) 675662d0dc6SMichael Baum DRV_LOG(DEBUG, "Failed to disable ROCE by sysfs: %d.", ret); 676662d0dc6SMichael Baum else 677662d0dc6SMichael Baum DRV_LOG(INFO, "ROCE is disabled by sysfs successfully."); 678662d0dc6SMichael Baum fclose(file_o); 679662d0dc6SMichael Baum return ret; 680662d0dc6SMichael Baum } 681662d0dc6SMichael Baum 682662d0dc6SMichael Baum static int 683662d0dc6SMichael Baum mlx5_roce_disable(const struct rte_device *dev) 684662d0dc6SMichael Baum { 685662d0dc6SMichael Baum char pci_addr[PCI_PRI_STR_SIZE] = { 0 }; 686662d0dc6SMichael Baum 687662d0dc6SMichael Baum if (mlx5_dev_to_pci_str(dev, pci_addr, sizeof(pci_addr)) < 0) 688662d0dc6SMichael Baum return -rte_errno; 689662d0dc6SMichael Baum /* Firstly try to disable ROCE by Netlink and fallback to sysfs. */ 690662d0dc6SMichael Baum if (mlx5_nl_roce_disable(pci_addr) != 0 && 691662d0dc6SMichael Baum mlx5_sys_roce_disable(pci_addr) != 0) 692662d0dc6SMichael Baum return -rte_errno; 693662d0dc6SMichael Baum return 0; 694662d0dc6SMichael Baum } 695662d0dc6SMichael Baum 696662d0dc6SMichael Baum static struct ibv_device * 697662d0dc6SMichael Baum mlx5_os_get_ibv_dev(const struct rte_device *dev) 698662d0dc6SMichael Baum { 699662d0dc6SMichael Baum struct ibv_device *ibv; 700662d0dc6SMichael Baum 701662d0dc6SMichael Baum if (mlx5_dev_is_pci(dev)) 702*37ca457dSBing Zhao ibv = mlx5_os_get_ibv_device(RTE_DEV_TO_PCI_CONST(dev)); 703662d0dc6SMichael Baum else 704662d0dc6SMichael Baum ibv = mlx5_get_aux_ibv_device(RTE_DEV_TO_AUXILIARY_CONST(dev)); 705662d0dc6SMichael Baum if (ibv == NULL) { 706662d0dc6SMichael Baum rte_errno = ENODEV; 707662d0dc6SMichael Baum DRV_LOG(ERR, "Verbs device not found: %s", dev->name); 708662d0dc6SMichael Baum } 709662d0dc6SMichael Baum return ibv; 710662d0dc6SMichael Baum } 711662d0dc6SMichael Baum 712662d0dc6SMichael Baum static struct ibv_device * 713662d0dc6SMichael Baum mlx5_vdpa_get_ibv_dev(const struct rte_device *dev) 714662d0dc6SMichael Baum { 715662d0dc6SMichael Baum struct ibv_device *ibv; 716662d0dc6SMichael Baum int retry; 717662d0dc6SMichael Baum 718662d0dc6SMichael Baum if (mlx5_roce_disable(dev) != 0) { 719662d0dc6SMichael Baum DRV_LOG(WARNING, "Failed to disable ROCE for \"%s\".", 720662d0dc6SMichael Baum dev->name); 721662d0dc6SMichael Baum return NULL; 722662d0dc6SMichael Baum } 723662d0dc6SMichael Baum /* Wait for the IB device to appear again after reload. */ 724662d0dc6SMichael Baum for (retry = MLX5_VDPA_MAX_RETRIES; retry > 0; --retry) { 725662d0dc6SMichael Baum ibv = mlx5_os_get_ibv_dev(dev); 726662d0dc6SMichael Baum if (ibv != NULL) 727662d0dc6SMichael Baum return ibv; 728662d0dc6SMichael Baum usleep(MLX5_VDPA_USEC); 729662d0dc6SMichael Baum } 730662d0dc6SMichael Baum DRV_LOG(ERR, 731662d0dc6SMichael Baum "Cannot get IB device after disabling RoCE for \"%s\", retries exceed %d.", 732662d0dc6SMichael Baum dev->name, MLX5_VDPA_MAX_RETRIES); 733662d0dc6SMichael Baum rte_errno = EAGAIN; 734662d0dc6SMichael Baum return NULL; 735662d0dc6SMichael Baum } 736662d0dc6SMichael Baum 737887183efSMichael Baum static int 738887183efSMichael Baum mlx5_config_doorbell_mapping_env(int dbnc) 739887183efSMichael Baum { 740887183efSMichael Baum char *env; 741887183efSMichael Baum int value; 742887183efSMichael Baum 743887183efSMichael Baum MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY); 744887183efSMichael Baum /* Get environment variable to store. */ 745887183efSMichael Baum env = getenv(MLX5_SHUT_UP_BF); 746887183efSMichael Baum value = env ? !!strcmp(env, "0") : MLX5_ARG_UNSET; 747887183efSMichael Baum if (dbnc == MLX5_ARG_UNSET) 748887183efSMichael Baum setenv(MLX5_SHUT_UP_BF, MLX5_SHUT_UP_BF_DEFAULT, 1); 749887183efSMichael Baum else 750887183efSMichael Baum setenv(MLX5_SHUT_UP_BF, 751a6b9d5a5SMichael Baum dbnc == MLX5_SQ_DB_NCACHED ? "1" : "0", 1); 752887183efSMichael Baum return value; 753887183efSMichael Baum } 754887183efSMichael Baum 755887183efSMichael Baum static void 756887183efSMichael Baum mlx5_restore_doorbell_mapping_env(int value) 757887183efSMichael Baum { 758887183efSMichael Baum MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY); 759887183efSMichael Baum /* Restore the original environment variable state. */ 760887183efSMichael Baum if (value == MLX5_ARG_UNSET) 761887183efSMichael Baum unsetenv(MLX5_SHUT_UP_BF); 762887183efSMichael Baum else 763887183efSMichael Baum setenv(MLX5_SHUT_UP_BF, value ? "1" : "0", 1); 764887183efSMichael Baum } 765887183efSMichael Baum 766887183efSMichael Baum /** 767887183efSMichael Baum * Function API to open IB device. 768887183efSMichael Baum * 769887183efSMichael Baum * @param cdev 770887183efSMichael Baum * Pointer to the mlx5 device. 771ca1418ceSMichael Baum * @param classes 772ca1418ceSMichael Baum * Chosen classes come from device arguments. 773887183efSMichael Baum * 774887183efSMichael Baum * @return 7759d936f4fSMichael Baum * Pointer to ibv_context on success, NULL otherwise and rte_errno is set. 776887183efSMichael Baum */ 7779d936f4fSMichael Baum static struct ibv_context * 7789d936f4fSMichael Baum mlx5_open_device(struct mlx5_common_device *cdev, uint32_t classes) 779887183efSMichael Baum { 780887183efSMichael Baum struct ibv_device *ibv; 781887183efSMichael Baum struct ibv_context *ctx = NULL; 782887183efSMichael Baum int dbmap_env; 783887183efSMichael Baum 7849d936f4fSMichael Baum MLX5_ASSERT(cdev->config.device_fd == MLX5_ARG_UNSET); 785662d0dc6SMichael Baum if (classes & MLX5_CLASS_VDPA) 786662d0dc6SMichael Baum ibv = mlx5_vdpa_get_ibv_dev(cdev->dev); 787662d0dc6SMichael Baum else 788887183efSMichael Baum ibv = mlx5_os_get_ibv_dev(cdev->dev); 789887183efSMichael Baum if (!ibv) 7909d936f4fSMichael Baum return NULL; 791887183efSMichael Baum DRV_LOG(INFO, "Dev information matches for device \"%s\".", ibv->name); 792887183efSMichael Baum /* 793887183efSMichael Baum * Configure environment variable "MLX5_BF_SHUT_UP" before the device 794887183efSMichael Baum * creation. The rdma_core library checks the variable at device 795887183efSMichael Baum * creation and stores the result internally. 796887183efSMichael Baum */ 797887183efSMichael Baum dbmap_env = mlx5_config_doorbell_mapping_env(cdev->config.dbnc); 798887183efSMichael Baum /* Try to open IB device with DV first, then usual Verbs. */ 799887183efSMichael Baum errno = 0; 800887183efSMichael Baum ctx = mlx5_glue->dv_open_device(ibv); 801887183efSMichael Baum if (ctx) { 802887183efSMichael Baum cdev->config.devx = 1; 803ca1418ceSMichael Baum } else if (classes == MLX5_CLASS_ETH) { 804887183efSMichael Baum /* The environment variable is still configured. */ 805887183efSMichael Baum ctx = mlx5_glue->open_device(ibv); 806887183efSMichael Baum if (ctx == NULL) 807887183efSMichael Baum goto error; 808ca1418ceSMichael Baum } else { 809ca1418ceSMichael Baum goto error; 810887183efSMichael Baum } 811887183efSMichael Baum /* The device is created, no need for environment. */ 812887183efSMichael Baum mlx5_restore_doorbell_mapping_env(dbmap_env); 8139d936f4fSMichael Baum return ctx; 814887183efSMichael Baum error: 815887183efSMichael Baum rte_errno = errno ? errno : ENODEV; 816887183efSMichael Baum /* The device creation is failed, no need for environment. */ 817887183efSMichael Baum mlx5_restore_doorbell_mapping_env(dbmap_env); 818887183efSMichael Baum DRV_LOG(ERR, "Failed to open IB device \"%s\".", ibv->name); 8199d936f4fSMichael Baum return NULL; 820887183efSMichael Baum } 8219d936f4fSMichael Baum 8229d936f4fSMichael Baum /** 8239d936f4fSMichael Baum * Function API to import IB device. 8249d936f4fSMichael Baum * 8259d936f4fSMichael Baum * @param cdev 8269d936f4fSMichael Baum * Pointer to the mlx5 device. 8279d936f4fSMichael Baum * 8289d936f4fSMichael Baum * @return 8299d936f4fSMichael Baum * Pointer to ibv_context on success, NULL otherwise and rte_errno is set. 8309d936f4fSMichael Baum */ 8319d936f4fSMichael Baum static struct ibv_context * 8329d936f4fSMichael Baum mlx5_import_device(struct mlx5_common_device *cdev) 8339d936f4fSMichael Baum { 8349d936f4fSMichael Baum struct ibv_context *ctx = NULL; 8359d936f4fSMichael Baum 8369d936f4fSMichael Baum MLX5_ASSERT(cdev->config.device_fd != MLX5_ARG_UNSET); 8379d936f4fSMichael Baum ctx = mlx5_glue->import_device(cdev->config.device_fd); 8389d936f4fSMichael Baum if (!ctx) { 8399d936f4fSMichael Baum DRV_LOG(ERR, "Failed to import device for fd=%d: %s", 8409d936f4fSMichael Baum cdev->config.device_fd, rte_strerror(errno)); 8419d936f4fSMichael Baum rte_errno = errno; 8429d936f4fSMichael Baum } 8439d936f4fSMichael Baum return ctx; 8449d936f4fSMichael Baum } 8459d936f4fSMichael Baum 8469d936f4fSMichael Baum /** 8479d936f4fSMichael Baum * Function API to prepare IB device. 8489d936f4fSMichael Baum * 8499d936f4fSMichael Baum * @param cdev 8509d936f4fSMichael Baum * Pointer to the mlx5 device. 8519d936f4fSMichael Baum * @param classes 8529d936f4fSMichael Baum * Chosen classes come from device arguments. 8539d936f4fSMichael Baum * 8549d936f4fSMichael Baum * @return 8559d936f4fSMichael Baum * 0 on success, a negative errno value otherwise and rte_errno is set. 8569d936f4fSMichael Baum */ 8579d936f4fSMichael Baum int 8589d936f4fSMichael Baum mlx5_os_open_device(struct mlx5_common_device *cdev, uint32_t classes) 8599d936f4fSMichael Baum { 8609d936f4fSMichael Baum 8619d936f4fSMichael Baum struct ibv_context *ctx = NULL; 8629d936f4fSMichael Baum 8639d936f4fSMichael Baum if (cdev->config.device_fd == MLX5_ARG_UNSET) 8649d936f4fSMichael Baum ctx = mlx5_open_device(cdev, classes); 8659d936f4fSMichael Baum else 8669d936f4fSMichael Baum ctx = mlx5_import_device(cdev); 8679d936f4fSMichael Baum if (ctx == NULL) 8689d936f4fSMichael Baum return -rte_errno; 8699d936f4fSMichael Baum /* Hint libmlx5 to use PMD allocator for data plane resources */ 8709d936f4fSMichael Baum mlx5_set_context_attr(cdev->dev, ctx); 8719d936f4fSMichael Baum cdev->ctx = ctx; 8729d936f4fSMichael Baum return 0; 8739d936f4fSMichael Baum } 8749d936f4fSMichael Baum 8754c74ad3eSRongwei Liu int 8764c74ad3eSRongwei Liu mlx5_get_device_guid(const struct rte_pci_addr *dev, uint8_t *guid, size_t len) 8774c74ad3eSRongwei Liu { 8784c74ad3eSRongwei Liu char tmp[512]; 8794c74ad3eSRongwei Liu char cur_ifname[IF_NAMESIZE + 1]; 8804c74ad3eSRongwei Liu FILE *id_file; 8814c74ad3eSRongwei Liu DIR *dir; 8824c74ad3eSRongwei Liu struct dirent *ptr; 8834c74ad3eSRongwei Liu int ret; 8844c74ad3eSRongwei Liu 8854c74ad3eSRongwei Liu if (guid == NULL || len < sizeof(u_int64_t) + 1) 8864c74ad3eSRongwei Liu return -1; 8874c74ad3eSRongwei Liu memset(guid, 0, len); 8884c74ad3eSRongwei Liu snprintf(tmp, sizeof(tmp), "/sys/bus/pci/devices/%04x:%02x:%02x.%x/net", 8894c74ad3eSRongwei Liu dev->domain, dev->bus, dev->devid, dev->function); 8904c74ad3eSRongwei Liu dir = opendir(tmp); 8914c74ad3eSRongwei Liu if (dir == NULL) 8924c74ad3eSRongwei Liu return -1; 8934c74ad3eSRongwei Liu /* Traverse to identify PF interface */ 8944c74ad3eSRongwei Liu do { 8954c74ad3eSRongwei Liu ptr = readdir(dir); 8964c74ad3eSRongwei Liu if (ptr == NULL || ptr->d_type != DT_DIR) { 8974c74ad3eSRongwei Liu closedir(dir); 8984c74ad3eSRongwei Liu return -1; 8994c74ad3eSRongwei Liu } 9004c74ad3eSRongwei Liu } while (strchr(ptr->d_name, '.') || strchr(ptr->d_name, '_') || 9014c74ad3eSRongwei Liu strchr(ptr->d_name, 'v')); 9024c74ad3eSRongwei Liu snprintf(cur_ifname, sizeof(cur_ifname), "%s", ptr->d_name); 9034c74ad3eSRongwei Liu closedir(dir); 9044c74ad3eSRongwei Liu snprintf(tmp + strlen(tmp), sizeof(tmp) - strlen(tmp), 9054c74ad3eSRongwei Liu "/%s/phys_switch_id", cur_ifname); 9064c74ad3eSRongwei Liu /* Older OFED like 5.3 doesn't support read */ 9074c74ad3eSRongwei Liu id_file = fopen(tmp, "r"); 9084c74ad3eSRongwei Liu if (!id_file) 9094c74ad3eSRongwei Liu return 0; 9104c74ad3eSRongwei Liu ret = fscanf(id_file, "%16s", guid); 9114c74ad3eSRongwei Liu fclose(id_file); 9124c74ad3eSRongwei Liu return ret; 9134c74ad3eSRongwei Liu } 91476b5bdf8SMatan Azrad 91576b5bdf8SMatan Azrad /* 91676b5bdf8SMatan Azrad * Create direct mkey using the kernel ibv_reg_mr API and wrap it with a new 91776b5bdf8SMatan Azrad * indirect mkey created by the DevX API. 91876b5bdf8SMatan Azrad * This mkey should be used for DevX commands requesting mkey as a parameter. 91976b5bdf8SMatan Azrad */ 92076b5bdf8SMatan Azrad int 92176b5bdf8SMatan Azrad mlx5_os_wrapped_mkey_create(void *ctx, void *pd, uint32_t pdn, void *addr, 92276b5bdf8SMatan Azrad size_t length, struct mlx5_pmd_wrapped_mr *pmd_mr) 92376b5bdf8SMatan Azrad { 92476b5bdf8SMatan Azrad struct mlx5_klm klm = { 92576b5bdf8SMatan Azrad .byte_count = length, 92676b5bdf8SMatan Azrad .address = (uintptr_t)addr, 92776b5bdf8SMatan Azrad }; 92876b5bdf8SMatan Azrad struct mlx5_devx_mkey_attr mkey_attr = { 92976b5bdf8SMatan Azrad .pd = pdn, 93076b5bdf8SMatan Azrad .klm_array = &klm, 93176b5bdf8SMatan Azrad .klm_num = 1, 93276b5bdf8SMatan Azrad }; 93376b5bdf8SMatan Azrad struct mlx5_devx_obj *mkey; 93476b5bdf8SMatan Azrad struct ibv_mr *ibv_mr = mlx5_glue->reg_mr(pd, addr, length, 93576b5bdf8SMatan Azrad IBV_ACCESS_LOCAL_WRITE | 93676b5bdf8SMatan Azrad (haswell_broadwell_cpu ? 0 : 93776b5bdf8SMatan Azrad IBV_ACCESS_RELAXED_ORDERING)); 93876b5bdf8SMatan Azrad 93976b5bdf8SMatan Azrad if (!ibv_mr) { 94076b5bdf8SMatan Azrad rte_errno = errno; 94176b5bdf8SMatan Azrad return -rte_errno; 94276b5bdf8SMatan Azrad } 94376b5bdf8SMatan Azrad klm.mkey = ibv_mr->lkey; 94476b5bdf8SMatan Azrad mkey_attr.addr = (uintptr_t)addr; 94576b5bdf8SMatan Azrad mkey_attr.size = length; 94676b5bdf8SMatan Azrad mkey = mlx5_devx_cmd_mkey_create(ctx, &mkey_attr); 94776b5bdf8SMatan Azrad if (!mkey) { 94876b5bdf8SMatan Azrad claim_zero(mlx5_glue->dereg_mr(ibv_mr)); 94976b5bdf8SMatan Azrad return -rte_errno; 95076b5bdf8SMatan Azrad } 95176b5bdf8SMatan Azrad pmd_mr->addr = addr; 95276b5bdf8SMatan Azrad pmd_mr->len = length; 95376b5bdf8SMatan Azrad pmd_mr->obj = (void *)ibv_mr; 95476b5bdf8SMatan Azrad pmd_mr->imkey = mkey; 95576b5bdf8SMatan Azrad pmd_mr->lkey = mkey->id; 95676b5bdf8SMatan Azrad return 0; 95776b5bdf8SMatan Azrad } 95876b5bdf8SMatan Azrad 95976b5bdf8SMatan Azrad void 96076b5bdf8SMatan Azrad mlx5_os_wrapped_mkey_destroy(struct mlx5_pmd_wrapped_mr *pmd_mr) 96176b5bdf8SMatan Azrad { 96276b5bdf8SMatan Azrad if (!pmd_mr) 96376b5bdf8SMatan Azrad return; 96476b5bdf8SMatan Azrad if (pmd_mr->imkey) 96576b5bdf8SMatan Azrad claim_zero(mlx5_devx_cmd_destroy(pmd_mr->imkey)); 96676b5bdf8SMatan Azrad if (pmd_mr->obj) 96776b5bdf8SMatan Azrad claim_zero(mlx5_glue->dereg_mr(pmd_mr->obj)); 96876b5bdf8SMatan Azrad memset(pmd_mr, 0, sizeof(*pmd_mr)); 96976b5bdf8SMatan Azrad } 97072d7efe4SSpike Du 97172d7efe4SSpike Du /** 97272d7efe4SSpike Du * Rte_intr_handle create and init helper. 97372d7efe4SSpike Du * 97472d7efe4SSpike Du * @param[in] mode 97572d7efe4SSpike Du * interrupt instance can be shared between primary and secondary 97672d7efe4SSpike Du * processes or not. 97772d7efe4SSpike Du * @param[in] set_fd_nonblock 97872d7efe4SSpike Du * Whether to set fd to O_NONBLOCK. 97972d7efe4SSpike Du * @param[in] fd 98072d7efe4SSpike Du * Fd to set in created intr_handle. 98172d7efe4SSpike Du * @param[in] cb 98272d7efe4SSpike Du * Callback to register for intr_handle. 98372d7efe4SSpike Du * @param[in] cb_arg 98472d7efe4SSpike Du * Callback argument for cb. 98572d7efe4SSpike Du * 98672d7efe4SSpike Du * @return 98772d7efe4SSpike Du * - Interrupt handle on success. 98872d7efe4SSpike Du * - NULL on failure, with rte_errno set. 98972d7efe4SSpike Du */ 99072d7efe4SSpike Du struct rte_intr_handle * 99172d7efe4SSpike Du mlx5_os_interrupt_handler_create(int mode, bool set_fd_nonblock, int fd, 99272d7efe4SSpike Du rte_intr_callback_fn cb, void *cb_arg) 99372d7efe4SSpike Du { 99472d7efe4SSpike Du struct rte_intr_handle *tmp_intr_handle; 99572d7efe4SSpike Du int ret, flags; 99672d7efe4SSpike Du 99772d7efe4SSpike Du tmp_intr_handle = rte_intr_instance_alloc(mode); 99872d7efe4SSpike Du if (!tmp_intr_handle) { 99972d7efe4SSpike Du rte_errno = ENOMEM; 100072d7efe4SSpike Du goto err; 100172d7efe4SSpike Du } 100272d7efe4SSpike Du if (set_fd_nonblock) { 100372d7efe4SSpike Du flags = fcntl(fd, F_GETFL); 100472d7efe4SSpike Du ret = fcntl(fd, F_SETFL, flags | O_NONBLOCK); 100572d7efe4SSpike Du if (ret) { 100672d7efe4SSpike Du rte_errno = errno; 100772d7efe4SSpike Du goto err; 100872d7efe4SSpike Du } 100972d7efe4SSpike Du } 101072d7efe4SSpike Du ret = rte_intr_fd_set(tmp_intr_handle, fd); 101172d7efe4SSpike Du if (ret) 101272d7efe4SSpike Du goto err; 101372d7efe4SSpike Du ret = rte_intr_type_set(tmp_intr_handle, RTE_INTR_HANDLE_EXT); 101472d7efe4SSpike Du if (ret) 101572d7efe4SSpike Du goto err; 101672d7efe4SSpike Du ret = rte_intr_callback_register(tmp_intr_handle, cb, cb_arg); 101772d7efe4SSpike Du if (ret) { 101872d7efe4SSpike Du rte_errno = -ret; 101972d7efe4SSpike Du goto err; 102072d7efe4SSpike Du } 102172d7efe4SSpike Du return tmp_intr_handle; 102272d7efe4SSpike Du err: 102372d7efe4SSpike Du rte_intr_instance_free(tmp_intr_handle); 102472d7efe4SSpike Du return NULL; 102572d7efe4SSpike Du } 102672d7efe4SSpike Du 102772d7efe4SSpike Du /* Safe unregistration for interrupt callback. */ 102872d7efe4SSpike Du static void 102972d7efe4SSpike Du mlx5_intr_callback_unregister(const struct rte_intr_handle *handle, 103072d7efe4SSpike Du rte_intr_callback_fn cb_fn, void *cb_arg) 103172d7efe4SSpike Du { 103272d7efe4SSpike Du uint64_t twait = 0; 103372d7efe4SSpike Du uint64_t start = 0; 103472d7efe4SSpike Du 103572d7efe4SSpike Du do { 103672d7efe4SSpike Du int ret; 103772d7efe4SSpike Du 103872d7efe4SSpike Du ret = rte_intr_callback_unregister(handle, cb_fn, cb_arg); 103972d7efe4SSpike Du if (ret >= 0) 104072d7efe4SSpike Du return; 104172d7efe4SSpike Du if (ret != -EAGAIN) { 104272d7efe4SSpike Du DRV_LOG(INFO, "failed to unregister interrupt" 104372d7efe4SSpike Du " handler (error: %d)", ret); 104472d7efe4SSpike Du MLX5_ASSERT(false); 104572d7efe4SSpike Du return; 104672d7efe4SSpike Du } 104772d7efe4SSpike Du if (twait) { 104872d7efe4SSpike Du struct timespec onems; 104972d7efe4SSpike Du 105072d7efe4SSpike Du /* Wait one millisecond and try again. */ 105172d7efe4SSpike Du onems.tv_sec = 0; 105272d7efe4SSpike Du onems.tv_nsec = NS_PER_S / MS_PER_S; 105372d7efe4SSpike Du nanosleep(&onems, 0); 105472d7efe4SSpike Du /* Check whether one second elapsed. */ 105572d7efe4SSpike Du if ((rte_get_timer_cycles() - start) <= twait) 105672d7efe4SSpike Du continue; 105772d7efe4SSpike Du } else { 105872d7efe4SSpike Du /* 105972d7efe4SSpike Du * We get the amount of timer ticks for one second. 106072d7efe4SSpike Du * If this amount elapsed it means we spent one 106172d7efe4SSpike Du * second in waiting. This branch is executed once 106272d7efe4SSpike Du * on first iteration. 106372d7efe4SSpike Du */ 106472d7efe4SSpike Du twait = rte_get_timer_hz(); 106572d7efe4SSpike Du MLX5_ASSERT(twait); 106672d7efe4SSpike Du } 106772d7efe4SSpike Du /* 106872d7efe4SSpike Du * Timeout elapsed, show message (once a second) and retry. 106972d7efe4SSpike Du * We have no other acceptable option here, if we ignore 107072d7efe4SSpike Du * the unregistering return code the handler will not 107172d7efe4SSpike Du * be unregistered, fd will be closed and we may get the 107272d7efe4SSpike Du * crush. Hanging and messaging in the loop seems not to be 107372d7efe4SSpike Du * the worst choice. 107472d7efe4SSpike Du */ 107572d7efe4SSpike Du DRV_LOG(INFO, "Retrying to unregister interrupt handler"); 107672d7efe4SSpike Du start = rte_get_timer_cycles(); 107772d7efe4SSpike Du } while (true); 107872d7efe4SSpike Du } 107972d7efe4SSpike Du 108072d7efe4SSpike Du /** 108172d7efe4SSpike Du * Rte_intr_handle destroy helper. 108272d7efe4SSpike Du * 108372d7efe4SSpike Du * @param[in] intr_handle 108472d7efe4SSpike Du * Rte_intr_handle to destroy. 108572d7efe4SSpike Du * @param[in] cb 108672d7efe4SSpike Du * Callback which is registered to intr_handle. 108772d7efe4SSpike Du * @param[in] cb_arg 108872d7efe4SSpike Du * Callback argument for cb. 108972d7efe4SSpike Du * 109072d7efe4SSpike Du */ 109172d7efe4SSpike Du void 109272d7efe4SSpike Du mlx5_os_interrupt_handler_destroy(struct rte_intr_handle *intr_handle, 109372d7efe4SSpike Du rte_intr_callback_fn cb, void *cb_arg) 109472d7efe4SSpike Du { 109572d7efe4SSpike Du if (rte_intr_fd_get(intr_handle) >= 0) 109672d7efe4SSpike Du mlx5_intr_callback_unregister(intr_handle, cb, cb_arg); 109772d7efe4SSpike Du rte_intr_instance_free(intr_handle); 109872d7efe4SSpike Du } 1099