179aa4307SOphir Munk /* SPDX-License-Identifier: BSD-3-Clause
279aa4307SOphir Munk * Copyright 2020 Mellanox Technologies, Ltd
379aa4307SOphir Munk */
479aa4307SOphir Munk
54c74ad3eSRongwei Liu #include <sys/types.h>
679aa4307SOphir Munk #include <unistd.h>
779aa4307SOphir Munk #include <string.h>
879aa4307SOphir Munk #include <stdio.h>
979aa4307SOphir Munk #ifdef RTE_IBVERBS_LINK_DLOPEN
1079aa4307SOphir Munk #include <dlfcn.h>
1179aa4307SOphir Munk #endif
12aec086c9SMatan Azrad #include <dirent.h>
13aec086c9SMatan Azrad #include <net/if.h>
1472d7efe4SSpike Du #include <fcntl.h>
1579aa4307SOphir Munk
1679aa4307SOphir Munk #include <rte_errno.h>
17aec086c9SMatan Azrad #include <rte_string_fns.h>
181f37cb2bSDavid Marchand #include <bus_pci_driver.h>
19b3f89090SDavid Marchand #include <bus_auxiliary_driver.h>
2079aa4307SOphir Munk
2179aa4307SOphir Munk #include "mlx5_common.h"
22662d0dc6SMichael Baum #include "mlx5_nl.h"
2325245d5dSShiri Kuzin #include "mlx5_common_log.h"
24662d0dc6SMichael Baum #include "mlx5_common_private.h"
25887183efSMichael Baum #include "mlx5_common_defs.h"
26c31f3f7fSShiri Kuzin #include "mlx5_common_os.h"
2779aa4307SOphir Munk #include "mlx5_glue.h"
2879aa4307SOphir Munk
2979aa4307SOphir Munk #ifdef MLX5_GLUE
3079aa4307SOphir Munk const struct mlx5_glue *mlx5_glue;
3179aa4307SOphir Munk #endif
3279aa4307SOphir Munk
3379aa4307SOphir Munk int
mlx5_get_pci_addr(const char * dev_path,struct rte_pci_addr * pci_addr)344d567938SThomas Monjalon mlx5_get_pci_addr(const char *dev_path, struct rte_pci_addr *pci_addr)
3579aa4307SOphir Munk {
3679aa4307SOphir Munk FILE *file;
3779aa4307SOphir Munk char line[32];
38482a1d34SViacheslav Ovsiienko int rc = -ENOENT;
3979aa4307SOphir Munk MKSTR(path, "%s/device/uevent", dev_path);
4079aa4307SOphir Munk
4179aa4307SOphir Munk file = fopen(path, "rb");
4279aa4307SOphir Munk if (file == NULL) {
4379aa4307SOphir Munk rte_errno = errno;
4479aa4307SOphir Munk return -rte_errno;
4579aa4307SOphir Munk }
4679aa4307SOphir Munk while (fgets(line, sizeof(line), file) == line) {
4779aa4307SOphir Munk size_t len = strlen(line);
4879aa4307SOphir Munk
4979aa4307SOphir Munk /* Truncate long lines. */
50482a1d34SViacheslav Ovsiienko if (len == (sizeof(line) - 1)) {
5179aa4307SOphir Munk while (line[(len - 1)] != '\n') {
52482a1d34SViacheslav Ovsiienko int ret = fgetc(file);
53482a1d34SViacheslav Ovsiienko
5479aa4307SOphir Munk if (ret == EOF)
55482a1d34SViacheslav Ovsiienko goto exit;
5679aa4307SOphir Munk line[(len - 1)] = ret;
5779aa4307SOphir Munk }
58482a1d34SViacheslav Ovsiienko /* No match for long lines. */
59482a1d34SViacheslav Ovsiienko continue;
60482a1d34SViacheslav Ovsiienko }
6179aa4307SOphir Munk /* Extract information. */
6279aa4307SOphir Munk if (sscanf(line,
6379aa4307SOphir Munk "PCI_SLOT_NAME="
6479aa4307SOphir Munk "%" SCNx32 ":%" SCNx8 ":%" SCNx8 ".%" SCNx8 "\n",
6579aa4307SOphir Munk &pci_addr->domain,
6679aa4307SOphir Munk &pci_addr->bus,
6779aa4307SOphir Munk &pci_addr->devid,
6879aa4307SOphir Munk &pci_addr->function) == 4) {
69482a1d34SViacheslav Ovsiienko rc = 0;
7079aa4307SOphir Munk break;
7179aa4307SOphir Munk }
7279aa4307SOphir Munk }
73482a1d34SViacheslav Ovsiienko exit:
7479aa4307SOphir Munk fclose(file);
75482a1d34SViacheslav Ovsiienko if (rc)
76482a1d34SViacheslav Ovsiienko rte_errno = -rc;
77482a1d34SViacheslav Ovsiienko return rc;
7879aa4307SOphir Munk }
7979aa4307SOphir Munk
8079aa4307SOphir Munk /**
8179aa4307SOphir Munk * Extract port name, as a number, from sysfs or netlink information.
8279aa4307SOphir Munk *
8379aa4307SOphir Munk * @param[in] port_name_in
8479aa4307SOphir Munk * String representing the port name.
8579aa4307SOphir Munk * @param[out] port_info_out
8679aa4307SOphir Munk * Port information, including port name as a number and port name
8779aa4307SOphir Munk * type if recognized
8879aa4307SOphir Munk *
8979aa4307SOphir Munk * @return
9079aa4307SOphir Munk * port_name field set according to recognized name format.
9179aa4307SOphir Munk */
9279aa4307SOphir Munk void
mlx5_translate_port_name(const char * port_name_in,struct mlx5_switch_info * port_info_out)9379aa4307SOphir Munk mlx5_translate_port_name(const char *port_name_in,
9479aa4307SOphir Munk struct mlx5_switch_info *port_info_out)
9579aa4307SOphir Munk {
9659df97f1SXueming Li char ctrl = 0, pf_c1, pf_c2, vf_c1, vf_c2, eol;
9779aa4307SOphir Munk char *end;
9879aa4307SOphir Munk int sc_items;
99*3cd5e500SDariusz Sosnowski int32_t ctrl_num = -1;
10079aa4307SOphir Munk
101*3cd5e500SDariusz Sosnowski sc_items = sscanf(port_name_in, "%c%d", &ctrl, &ctrl_num);
10259df97f1SXueming Li if (sc_items == 2 && ctrl == 'c') {
103*3cd5e500SDariusz Sosnowski port_info_out->ctrl_num = ctrl_num;
10459df97f1SXueming Li port_name_in++; /* 'c' */
10559df97f1SXueming Li port_name_in += snprintf(NULL, 0, "%d",
10659df97f1SXueming Li port_info_out->ctrl_num);
10759df97f1SXueming Li }
10859df97f1SXueming Li /* Check for port-name as a string of the form pf0vf0 or pf0sf0 */
1093590881bSViacheslav Ovsiienko sc_items = sscanf(port_name_in, "%c%c%d%c%c%d%c",
11079aa4307SOphir Munk &pf_c1, &pf_c2, &port_info_out->pf_num,
1113590881bSViacheslav Ovsiienko &vf_c1, &vf_c2, &port_info_out->port_name, &eol);
11259df97f1SXueming Li if (sc_items == 6 && pf_c1 == 'p' && pf_c2 == 'f') {
11359df97f1SXueming Li if (vf_c1 == 'v' && vf_c2 == 'f') {
11459df97f1SXueming Li /* Kernel ver >= 5.0 or OFED ver >= 4.6 */
11559df97f1SXueming Li port_info_out->name_type =
11659df97f1SXueming Li MLX5_PHYS_PORT_NAME_TYPE_PFVF;
11779aa4307SOphir Munk return;
11879aa4307SOphir Munk }
11959df97f1SXueming Li if (vf_c1 == 's' && vf_c2 == 'f') {
12059df97f1SXueming Li /* Kernel ver >= 5.11 or OFED ver >= 5.1 */
12159df97f1SXueming Li port_info_out->name_type =
12259df97f1SXueming Li MLX5_PHYS_PORT_NAME_TYPE_PFSF;
12359df97f1SXueming Li return;
12459df97f1SXueming Li }
12559df97f1SXueming Li }
12679aa4307SOphir Munk /*
12779aa4307SOphir Munk * Check for port-name as a string of the form p0
12879aa4307SOphir Munk * (support kernel ver >= 5.0, or OFED ver >= 4.6).
12979aa4307SOphir Munk */
1303590881bSViacheslav Ovsiienko sc_items = sscanf(port_name_in, "%c%d%c",
1313590881bSViacheslav Ovsiienko &pf_c1, &port_info_out->port_name, &eol);
13279aa4307SOphir Munk if (sc_items == 2 && pf_c1 == 'p') {
13379aa4307SOphir Munk port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_UPLINK;
13479aa4307SOphir Munk return;
13579aa4307SOphir Munk }
136420bbdaeSViacheslav Ovsiienko /*
137420bbdaeSViacheslav Ovsiienko * Check for port-name as a string of the form pf0
138420bbdaeSViacheslav Ovsiienko * (support kernel ver >= 5.7 for HPF representor on BF).
139420bbdaeSViacheslav Ovsiienko */
1403590881bSViacheslav Ovsiienko sc_items = sscanf(port_name_in, "%c%c%d%c",
1413590881bSViacheslav Ovsiienko &pf_c1, &pf_c2, &port_info_out->pf_num, &eol);
142420bbdaeSViacheslav Ovsiienko if (sc_items == 3 && pf_c1 == 'p' && pf_c2 == 'f') {
143420bbdaeSViacheslav Ovsiienko port_info_out->port_name = -1;
144420bbdaeSViacheslav Ovsiienko port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_PFHPF;
145420bbdaeSViacheslav Ovsiienko return;
146420bbdaeSViacheslav Ovsiienko }
14779aa4307SOphir Munk /* Check for port-name as a number (support kernel ver < 5.0 */
14879aa4307SOphir Munk errno = 0;
14979aa4307SOphir Munk port_info_out->port_name = strtol(port_name_in, &end, 0);
15079aa4307SOphir Munk if (!errno &&
15179aa4307SOphir Munk (size_t)(end - port_name_in) == strlen(port_name_in)) {
15279aa4307SOphir Munk port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_LEGACY;
15379aa4307SOphir Munk return;
15479aa4307SOphir Munk }
15579aa4307SOphir Munk port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN;
15679aa4307SOphir Munk }
15779aa4307SOphir Munk
158aec086c9SMatan Azrad int
mlx5_get_ifname_sysfs(const char * ibdev_path,char * ifname)159aec086c9SMatan Azrad mlx5_get_ifname_sysfs(const char *ibdev_path, char *ifname)
160aec086c9SMatan Azrad {
161aec086c9SMatan Azrad DIR *dir;
162aec086c9SMatan Azrad struct dirent *dent;
163aec086c9SMatan Azrad unsigned int dev_type = 0;
164aec086c9SMatan Azrad unsigned int dev_port_prev = ~0u;
165aec086c9SMatan Azrad char match[IF_NAMESIZE] = "";
166aec086c9SMatan Azrad
167aec086c9SMatan Azrad MLX5_ASSERT(ibdev_path);
168aec086c9SMatan Azrad {
169aec086c9SMatan Azrad MKSTR(path, "%s/device/net", ibdev_path);
170aec086c9SMatan Azrad
171aec086c9SMatan Azrad dir = opendir(path);
172aec086c9SMatan Azrad if (dir == NULL) {
173aec086c9SMatan Azrad rte_errno = errno;
174aec086c9SMatan Azrad return -rte_errno;
175aec086c9SMatan Azrad }
176aec086c9SMatan Azrad }
177aec086c9SMatan Azrad while ((dent = readdir(dir)) != NULL) {
178aec086c9SMatan Azrad char *name = dent->d_name;
179aec086c9SMatan Azrad FILE *file;
180aec086c9SMatan Azrad unsigned int dev_port;
181aec086c9SMatan Azrad int r;
182aec086c9SMatan Azrad
183aec086c9SMatan Azrad if ((name[0] == '.') &&
184aec086c9SMatan Azrad ((name[1] == '\0') ||
185aec086c9SMatan Azrad ((name[1] == '.') && (name[2] == '\0'))))
186aec086c9SMatan Azrad continue;
187aec086c9SMatan Azrad
188aec086c9SMatan Azrad MKSTR(path, "%s/device/net/%s/%s",
189aec086c9SMatan Azrad ibdev_path, name,
190aec086c9SMatan Azrad (dev_type ? "dev_id" : "dev_port"));
191aec086c9SMatan Azrad
192aec086c9SMatan Azrad file = fopen(path, "rb");
193aec086c9SMatan Azrad if (file == NULL) {
194aec086c9SMatan Azrad if (errno != ENOENT)
195aec086c9SMatan Azrad continue;
196aec086c9SMatan Azrad /*
197aec086c9SMatan Azrad * Switch to dev_id when dev_port does not exist as
198aec086c9SMatan Azrad * is the case with Linux kernel versions < 3.15.
199aec086c9SMatan Azrad */
200aec086c9SMatan Azrad try_dev_id:
201aec086c9SMatan Azrad match[0] = '\0';
202aec086c9SMatan Azrad if (dev_type)
203aec086c9SMatan Azrad break;
204aec086c9SMatan Azrad dev_type = 1;
205aec086c9SMatan Azrad dev_port_prev = ~0u;
206aec086c9SMatan Azrad rewinddir(dir);
207aec086c9SMatan Azrad continue;
208aec086c9SMatan Azrad }
209aec086c9SMatan Azrad r = fscanf(file, (dev_type ? "%x" : "%u"), &dev_port);
210aec086c9SMatan Azrad fclose(file);
211aec086c9SMatan Azrad if (r != 1)
212aec086c9SMatan Azrad continue;
213aec086c9SMatan Azrad /*
214aec086c9SMatan Azrad * Switch to dev_id when dev_port returns the same value for
215aec086c9SMatan Azrad * all ports. May happen when using a MOFED release older than
216aec086c9SMatan Azrad * 3.0 with a Linux kernel >= 3.15.
217aec086c9SMatan Azrad */
218aec086c9SMatan Azrad if (dev_port == dev_port_prev)
219aec086c9SMatan Azrad goto try_dev_id;
220aec086c9SMatan Azrad dev_port_prev = dev_port;
221aec086c9SMatan Azrad if (dev_port == 0)
222aec086c9SMatan Azrad strlcpy(match, name, IF_NAMESIZE);
223aec086c9SMatan Azrad }
224aec086c9SMatan Azrad closedir(dir);
225aec086c9SMatan Azrad if (match[0] == '\0') {
226aec086c9SMatan Azrad rte_errno = ENOENT;
227aec086c9SMatan Azrad return -rte_errno;
228aec086c9SMatan Azrad }
229aec086c9SMatan Azrad strncpy(ifname, match, IF_NAMESIZE);
230aec086c9SMatan Azrad return 0;
231aec086c9SMatan Azrad }
232aec086c9SMatan Azrad
23379aa4307SOphir Munk #ifdef MLX5_GLUE
23479aa4307SOphir Munk
23579aa4307SOphir Munk /**
23679aa4307SOphir Munk * Suffix RTE_EAL_PMD_PATH with "-glue".
23779aa4307SOphir Munk *
23879aa4307SOphir Munk * This function performs a sanity check on RTE_EAL_PMD_PATH before
23979aa4307SOphir Munk * suffixing its last component.
24079aa4307SOphir Munk *
24179aa4307SOphir Munk * @param buf[out]
24279aa4307SOphir Munk * Output buffer, should be large enough otherwise NULL is returned.
24379aa4307SOphir Munk * @param size
24479aa4307SOphir Munk * Size of @p out.
24579aa4307SOphir Munk *
24679aa4307SOphir Munk * @return
24779aa4307SOphir Munk * Pointer to @p buf or @p NULL in case suffix cannot be appended.
24879aa4307SOphir Munk */
24979aa4307SOphir Munk static char *
mlx5_glue_path(char * buf,size_t size)25079aa4307SOphir Munk mlx5_glue_path(char *buf, size_t size)
25179aa4307SOphir Munk {
25279aa4307SOphir Munk static const char *const bad[] = { "/", ".", "..", NULL };
25379aa4307SOphir Munk const char *path = RTE_EAL_PMD_PATH;
25479aa4307SOphir Munk size_t len = strlen(path);
25579aa4307SOphir Munk size_t off;
25679aa4307SOphir Munk int i;
25779aa4307SOphir Munk
25879aa4307SOphir Munk while (len && path[len - 1] == '/')
25979aa4307SOphir Munk --len;
26079aa4307SOphir Munk for (off = len; off && path[off - 1] != '/'; --off)
26179aa4307SOphir Munk ;
26279aa4307SOphir Munk for (i = 0; bad[i]; ++i)
26379aa4307SOphir Munk if (!strncmp(path + off, bad[i], (int)(len - off)))
26479aa4307SOphir Munk goto error;
26579aa4307SOphir Munk i = snprintf(buf, size, "%.*s-glue", (int)len, path);
26679aa4307SOphir Munk if (i == -1 || (size_t)i >= size)
26779aa4307SOphir Munk goto error;
26879aa4307SOphir Munk return buf;
26979aa4307SOphir Munk error:
2708c3a4688SStephen Hemminger DRV_LOG(ERR, "unable to append \"-glue\" to last component of"
27179aa4307SOphir Munk " RTE_EAL_PMD_PATH (\"" RTE_EAL_PMD_PATH "\"), please"
27279aa4307SOphir Munk " re-configure DPDK");
27379aa4307SOphir Munk return NULL;
27479aa4307SOphir Munk }
27579aa4307SOphir Munk
27679aa4307SOphir Munk static int
mlx5_glue_dlopen(void)27779aa4307SOphir Munk mlx5_glue_dlopen(void)
27879aa4307SOphir Munk {
27979aa4307SOphir Munk char glue_path[sizeof(RTE_EAL_PMD_PATH) - 1 + sizeof("-glue")];
28079aa4307SOphir Munk void *handle = NULL;
28179aa4307SOphir Munk
28279aa4307SOphir Munk char const *path[] = {
28379aa4307SOphir Munk /*
28479aa4307SOphir Munk * A basic security check is necessary before trusting
28579aa4307SOphir Munk * MLX5_GLUE_PATH, which may override RTE_EAL_PMD_PATH.
28679aa4307SOphir Munk */
28779aa4307SOphir Munk (geteuid() == getuid() && getegid() == getgid() ?
28879aa4307SOphir Munk getenv("MLX5_GLUE_PATH") : NULL),
28979aa4307SOphir Munk /*
29079aa4307SOphir Munk * When RTE_EAL_PMD_PATH is set, use its glue-suffixed
29179aa4307SOphir Munk * variant, otherwise let dlopen() look up libraries on its
29279aa4307SOphir Munk * own.
29379aa4307SOphir Munk */
29479aa4307SOphir Munk (*RTE_EAL_PMD_PATH ?
29579aa4307SOphir Munk mlx5_glue_path(glue_path, sizeof(glue_path)) : ""),
29679aa4307SOphir Munk };
29779aa4307SOphir Munk unsigned int i = 0;
29879aa4307SOphir Munk void **sym;
29979aa4307SOphir Munk const char *dlmsg;
30079aa4307SOphir Munk
30179aa4307SOphir Munk while (!handle && i != RTE_DIM(path)) {
30279aa4307SOphir Munk const char *end;
30379aa4307SOphir Munk size_t len;
30479aa4307SOphir Munk int ret;
30579aa4307SOphir Munk
30679aa4307SOphir Munk if (!path[i]) {
30779aa4307SOphir Munk ++i;
30879aa4307SOphir Munk continue;
30979aa4307SOphir Munk }
31079aa4307SOphir Munk end = strpbrk(path[i], ":;");
31179aa4307SOphir Munk if (!end)
31279aa4307SOphir Munk end = path[i] + strlen(path[i]);
31379aa4307SOphir Munk len = end - path[i];
31479aa4307SOphir Munk ret = 0;
31579aa4307SOphir Munk do {
31679aa4307SOphir Munk char name[ret + 1];
31779aa4307SOphir Munk
31879aa4307SOphir Munk ret = snprintf(name, sizeof(name), "%.*s%s" MLX5_GLUE,
31979aa4307SOphir Munk (int)len, path[i],
32079aa4307SOphir Munk (!len || *(end - 1) == '/') ? "" : "/");
32179aa4307SOphir Munk if (ret == -1)
32279aa4307SOphir Munk break;
32379aa4307SOphir Munk if (sizeof(name) != (size_t)ret + 1)
32479aa4307SOphir Munk continue;
32579aa4307SOphir Munk DRV_LOG(DEBUG, "Looking for rdma-core glue as "
32679aa4307SOphir Munk "\"%s\"", name);
32779aa4307SOphir Munk handle = dlopen(name, RTLD_LAZY);
32879aa4307SOphir Munk break;
32979aa4307SOphir Munk } while (1);
33079aa4307SOphir Munk path[i] = end + 1;
33179aa4307SOphir Munk if (!*end)
33279aa4307SOphir Munk ++i;
33379aa4307SOphir Munk }
33479aa4307SOphir Munk if (!handle) {
33579aa4307SOphir Munk rte_errno = EINVAL;
33679aa4307SOphir Munk dlmsg = dlerror();
33779aa4307SOphir Munk if (dlmsg)
33879aa4307SOphir Munk DRV_LOG(WARNING, "Cannot load glue library: %s", dlmsg);
33979aa4307SOphir Munk goto glue_error;
34079aa4307SOphir Munk }
34179aa4307SOphir Munk sym = dlsym(handle, "mlx5_glue");
34279aa4307SOphir Munk if (!sym || !*sym) {
34379aa4307SOphir Munk rte_errno = EINVAL;
34479aa4307SOphir Munk dlmsg = dlerror();
34579aa4307SOphir Munk if (dlmsg)
34679aa4307SOphir Munk DRV_LOG(ERR, "Cannot resolve glue symbol: %s", dlmsg);
34779aa4307SOphir Munk goto glue_error;
34879aa4307SOphir Munk }
34979aa4307SOphir Munk mlx5_glue = *sym;
35079aa4307SOphir Munk return 0;
35179aa4307SOphir Munk
35279aa4307SOphir Munk glue_error:
35379aa4307SOphir Munk if (handle)
35479aa4307SOphir Munk dlclose(handle);
35579aa4307SOphir Munk return -1;
35679aa4307SOphir Munk }
35779aa4307SOphir Munk
35879aa4307SOphir Munk #endif
35979aa4307SOphir Munk
36079aa4307SOphir Munk /**
36179aa4307SOphir Munk * Initialization routine for run-time dependency on rdma-core.
36279aa4307SOphir Munk */
36379aa4307SOphir Munk void
mlx5_glue_constructor(void)36479aa4307SOphir Munk mlx5_glue_constructor(void)
36579aa4307SOphir Munk {
36679aa4307SOphir Munk /*
36779aa4307SOphir Munk * RDMAV_HUGEPAGES_SAFE tells ibv_fork_init() we intend to use
36879aa4307SOphir Munk * huge pages. Calling ibv_fork_init() during init allows
36979aa4307SOphir Munk * applications to use fork() safely for purposes other than
37079aa4307SOphir Munk * using this PMD, which is not supported in forked processes.
37179aa4307SOphir Munk */
37279aa4307SOphir Munk setenv("RDMAV_HUGEPAGES_SAFE", "1", 1);
37379aa4307SOphir Munk /* Match the size of Rx completion entry to the size of a cacheline. */
37479aa4307SOphir Munk if (RTE_CACHE_LINE_SIZE == 128)
37579aa4307SOphir Munk setenv("MLX5_CQE_SIZE", "128", 0);
37679aa4307SOphir Munk /*
37779aa4307SOphir Munk * MLX5_DEVICE_FATAL_CLEANUP tells ibv_destroy functions to
37879aa4307SOphir Munk * cleanup all the Verbs resources even when the device was removed.
37979aa4307SOphir Munk */
38079aa4307SOphir Munk setenv("MLX5_DEVICE_FATAL_CLEANUP", "1", 1);
38179aa4307SOphir Munk
38279aa4307SOphir Munk #ifdef MLX5_GLUE
38379aa4307SOphir Munk if (mlx5_glue_dlopen() != 0)
38479aa4307SOphir Munk goto glue_error;
38579aa4307SOphir Munk #endif
38679aa4307SOphir Munk
38779aa4307SOphir Munk #ifdef RTE_LIBRTE_MLX5_DEBUG
38879aa4307SOphir Munk /* Glue structure must not contain any NULL pointers. */
38979aa4307SOphir Munk {
39079aa4307SOphir Munk unsigned int i;
39179aa4307SOphir Munk
39279aa4307SOphir Munk for (i = 0; i != sizeof(*mlx5_glue) / sizeof(void *); ++i)
39379aa4307SOphir Munk MLX5_ASSERT(((const void *const *)mlx5_glue)[i]);
39479aa4307SOphir Munk }
39579aa4307SOphir Munk #endif
39679aa4307SOphir Munk if (strcmp(mlx5_glue->version, MLX5_GLUE_VERSION)) {
39779aa4307SOphir Munk rte_errno = EINVAL;
39879aa4307SOphir Munk DRV_LOG(ERR, "rdma-core glue \"%s\" mismatch: \"%s\" is "
39979aa4307SOphir Munk "required", mlx5_glue->version, MLX5_GLUE_VERSION);
40079aa4307SOphir Munk goto glue_error;
40179aa4307SOphir Munk }
40279aa4307SOphir Munk mlx5_glue->fork_init();
40379aa4307SOphir Munk return;
40479aa4307SOphir Munk
40579aa4307SOphir Munk glue_error:
40679aa4307SOphir Munk DRV_LOG(WARNING, "Cannot initialize MLX5 common due to missing"
40779aa4307SOphir Munk " run-time dependency on rdma-core libraries (libibverbs,"
40879aa4307SOphir Munk " libmlx5)");
40979aa4307SOphir Munk mlx5_glue = NULL;
41079aa4307SOphir Munk }
411262c7ad0SOri Kam
412e35ccf24SMichael Baum /**
4139d936f4fSMichael Baum * Validate user arguments for remote PD and CTX.
4149d936f4fSMichael Baum *
4159d936f4fSMichael Baum * @param config
4169d936f4fSMichael Baum * Pointer to device configuration structure.
4179d936f4fSMichael Baum *
4189d936f4fSMichael Baum * @return
4199d936f4fSMichael Baum * 0 on success, a negative errno value otherwise and rte_errno is set.
4209d936f4fSMichael Baum */
4219d936f4fSMichael Baum int
mlx5_os_remote_pd_and_ctx_validate(struct mlx5_common_dev_config * config)4229d936f4fSMichael Baum mlx5_os_remote_pd_and_ctx_validate(struct mlx5_common_dev_config *config)
4239d936f4fSMichael Baum {
4249d936f4fSMichael Baum int device_fd = config->device_fd;
4259d936f4fSMichael Baum int pd_handle = config->pd_handle;
4269d936f4fSMichael Baum
4279d936f4fSMichael Baum #ifdef HAVE_MLX5_IBV_IMPORT_CTX_PD_AND_MR
4289d936f4fSMichael Baum if (device_fd == MLX5_ARG_UNSET && pd_handle != MLX5_ARG_UNSET) {
4299d936f4fSMichael Baum DRV_LOG(ERR, "Remote PD without CTX is not supported.");
4309d936f4fSMichael Baum rte_errno = EINVAL;
4319d936f4fSMichael Baum return -rte_errno;
4329d936f4fSMichael Baum }
4339d936f4fSMichael Baum if (device_fd != MLX5_ARG_UNSET && pd_handle == MLX5_ARG_UNSET) {
4349d936f4fSMichael Baum DRV_LOG(ERR, "Remote CTX without PD is not supported.");
4359d936f4fSMichael Baum rte_errno = EINVAL;
4369d936f4fSMichael Baum return -rte_errno;
4379d936f4fSMichael Baum }
4389d936f4fSMichael Baum DRV_LOG(DEBUG, "Remote PD and CTX is supported: (cmd_fd=%d, "
4399d936f4fSMichael Baum "pd_handle=%d).", device_fd, pd_handle);
4409d936f4fSMichael Baum #else
4419d936f4fSMichael Baum if (pd_handle != MLX5_ARG_UNSET || device_fd != MLX5_ARG_UNSET) {
4429d936f4fSMichael Baum DRV_LOG(ERR,
4439d936f4fSMichael Baum "Remote PD and CTX is not supported - maybe old rdma-core version?");
4449d936f4fSMichael Baum rte_errno = ENOTSUP;
4459d936f4fSMichael Baum return -rte_errno;
4469d936f4fSMichael Baum }
4479d936f4fSMichael Baum #endif
4489d936f4fSMichael Baum return 0;
4499d936f4fSMichael Baum }
4509d936f4fSMichael Baum
4519d936f4fSMichael Baum /**
4529d936f4fSMichael Baum * Release Protection Domain object.
4539d936f4fSMichael Baum *
4549d936f4fSMichael Baum * @param[out] cdev
4559d936f4fSMichael Baum * Pointer to the mlx5 device.
4569d936f4fSMichael Baum *
4579d936f4fSMichael Baum * @return
4589d936f4fSMichael Baum * 0 on success, a negative errno value otherwise.
4599d936f4fSMichael Baum */
4609d936f4fSMichael Baum int
mlx5_os_pd_release(struct mlx5_common_device * cdev)4619d936f4fSMichael Baum mlx5_os_pd_release(struct mlx5_common_device *cdev)
4629d936f4fSMichael Baum {
4639d936f4fSMichael Baum if (cdev->config.pd_handle == MLX5_ARG_UNSET)
4649d936f4fSMichael Baum return mlx5_glue->dealloc_pd(cdev->pd);
4659d936f4fSMichael Baum else
4669d936f4fSMichael Baum return mlx5_glue->unimport_pd(cdev->pd);
4679d936f4fSMichael Baum }
4689d936f4fSMichael Baum
4699d936f4fSMichael Baum /**
4709d936f4fSMichael Baum * Allocate Protection Domain object.
4719d936f4fSMichael Baum *
4729d936f4fSMichael Baum * @param[out] cdev
4739d936f4fSMichael Baum * Pointer to the mlx5 device.
4749d936f4fSMichael Baum *
4759d936f4fSMichael Baum * @return
4769d936f4fSMichael Baum * 0 on success, a negative errno value otherwise.
4779d936f4fSMichael Baum */
4789d936f4fSMichael Baum static int
mlx5_os_pd_create(struct mlx5_common_device * cdev)4799d936f4fSMichael Baum mlx5_os_pd_create(struct mlx5_common_device *cdev)
4809d936f4fSMichael Baum {
4819d936f4fSMichael Baum cdev->pd = mlx5_glue->alloc_pd(cdev->ctx);
4829d936f4fSMichael Baum if (cdev->pd == NULL) {
4839d936f4fSMichael Baum DRV_LOG(ERR, "Failed to allocate PD: %s", rte_strerror(errno));
4849d936f4fSMichael Baum return errno ? -errno : -ENOMEM;
4859d936f4fSMichael Baum }
4869d936f4fSMichael Baum return 0;
4879d936f4fSMichael Baum }
4889d936f4fSMichael Baum
4899d936f4fSMichael Baum /**
4909d936f4fSMichael Baum * Import Protection Domain object according to given PD handle.
4919d936f4fSMichael Baum *
4929d936f4fSMichael Baum * @param[out] cdev
4939d936f4fSMichael Baum * Pointer to the mlx5 device.
4949d936f4fSMichael Baum *
4959d936f4fSMichael Baum * @return
4969d936f4fSMichael Baum * 0 on success, a negative errno value otherwise.
4979d936f4fSMichael Baum */
4989d936f4fSMichael Baum static int
mlx5_os_pd_import(struct mlx5_common_device * cdev)4999d936f4fSMichael Baum mlx5_os_pd_import(struct mlx5_common_device *cdev)
5009d936f4fSMichael Baum {
5019d936f4fSMichael Baum cdev->pd = mlx5_glue->import_pd(cdev->ctx, cdev->config.pd_handle);
5029d936f4fSMichael Baum if (cdev->pd == NULL) {
5039d936f4fSMichael Baum DRV_LOG(ERR, "Failed to import PD using handle=%d: %s",
5049d936f4fSMichael Baum cdev->config.pd_handle, rte_strerror(errno));
5059d936f4fSMichael Baum return errno ? -errno : -ENOMEM;
5069d936f4fSMichael Baum }
5079d936f4fSMichael Baum return 0;
5089d936f4fSMichael Baum }
5099d936f4fSMichael Baum
5109d936f4fSMichael Baum /**
5119d936f4fSMichael Baum * Prepare Protection Domain object and extract its pdn using DV API.
512e35ccf24SMichael Baum *
513e35ccf24SMichael Baum * @param[out] cdev
514e35ccf24SMichael Baum * Pointer to the mlx5 device.
515e35ccf24SMichael Baum *
516e35ccf24SMichael Baum * @return
517e35ccf24SMichael Baum * 0 on success, a negative errno value otherwise and rte_errno is set.
518e35ccf24SMichael Baum */
519e35ccf24SMichael Baum int
mlx5_os_pd_prepare(struct mlx5_common_device * cdev)5209d936f4fSMichael Baum mlx5_os_pd_prepare(struct mlx5_common_device *cdev)
521e35ccf24SMichael Baum {
522e35ccf24SMichael Baum #ifdef HAVE_IBV_FLOW_DV_SUPPORT
523e35ccf24SMichael Baum struct mlx5dv_obj obj;
524e35ccf24SMichael Baum struct mlx5dv_pd pd_info;
525e35ccf24SMichael Baum #endif
5269d936f4fSMichael Baum int ret;
527e35ccf24SMichael Baum
5289d936f4fSMichael Baum if (cdev->config.pd_handle == MLX5_ARG_UNSET)
5299d936f4fSMichael Baum ret = mlx5_os_pd_create(cdev);
5309d936f4fSMichael Baum else
5319d936f4fSMichael Baum ret = mlx5_os_pd_import(cdev);
5329d936f4fSMichael Baum if (ret) {
5339d936f4fSMichael Baum rte_errno = -ret;
5349d936f4fSMichael Baum return ret;
535e35ccf24SMichael Baum }
536e35ccf24SMichael Baum if (cdev->config.devx == 0)
537e35ccf24SMichael Baum return 0;
538e35ccf24SMichael Baum #ifdef HAVE_IBV_FLOW_DV_SUPPORT
539e35ccf24SMichael Baum obj.pd.in = cdev->pd;
540e35ccf24SMichael Baum obj.pd.out = &pd_info;
541e35ccf24SMichael Baum ret = mlx5_glue->dv_init_obj(&obj, MLX5DV_OBJ_PD);
542e35ccf24SMichael Baum if (ret != 0) {
543e35ccf24SMichael Baum DRV_LOG(ERR, "Fail to get PD object info.");
5449d936f4fSMichael Baum rte_errno = errno;
5459d936f4fSMichael Baum claim_zero(mlx5_os_pd_release(cdev));
546e35ccf24SMichael Baum cdev->pd = NULL;
5479d936f4fSMichael Baum return -rte_errno;
548e35ccf24SMichael Baum }
549e35ccf24SMichael Baum cdev->pdn = pd_info.pdn;
550e35ccf24SMichael Baum return 0;
551e35ccf24SMichael Baum #else
552e35ccf24SMichael Baum DRV_LOG(ERR, "Cannot get pdn - no DV support.");
5539d936f4fSMichael Baum rte_errno = ENOTSUP;
5549d936f4fSMichael Baum return -rte_errno;
555e35ccf24SMichael Baum #endif /* HAVE_IBV_FLOW_DV_SUPPORT */
556e35ccf24SMichael Baum }
557e35ccf24SMichael Baum
558662d0dc6SMichael Baum static struct ibv_device *
mlx5_os_get_ibv_device(const struct rte_pci_device * pci_dev)55937ca457dSBing Zhao mlx5_os_get_ibv_device(const struct rte_pci_device *pci_dev)
560c31f3f7fSShiri Kuzin {
561c31f3f7fSShiri Kuzin int n;
562c31f3f7fSShiri Kuzin struct ibv_device **ibv_list = mlx5_glue->get_device_list(&n);
563c31f3f7fSShiri Kuzin struct ibv_device *ibv_match = NULL;
564f956d3d4SRongwei Liu uint8_t guid1[32] = {0};
565f956d3d4SRongwei Liu uint8_t guid2[32] = {0};
566f956d3d4SRongwei Liu int ret1, ret2 = -1;
567c31f3f7fSShiri Kuzin struct rte_pci_addr paddr;
56837ca457dSBing Zhao const struct rte_pci_addr *addr = &pci_dev->addr;
56937ca457dSBing Zhao bool is_vf_dev = mlx5_dev_is_vf_pci(pci_dev);
570c31f3f7fSShiri Kuzin
571f956d3d4SRongwei Liu if (ibv_list == NULL || !n) {
572f956d3d4SRongwei Liu rte_errno = ENOSYS;
573f956d3d4SRongwei Liu if (ibv_list)
574f956d3d4SRongwei Liu mlx5_glue->free_device_list(ibv_list);
575f956d3d4SRongwei Liu return NULL;
576f956d3d4SRongwei Liu }
577f956d3d4SRongwei Liu ret1 = mlx5_get_device_guid(addr, guid1, sizeof(guid1));
578f956d3d4SRongwei Liu while (n-- > 0) {
579c31f3f7fSShiri Kuzin DRV_LOG(DEBUG, "Checking device \"%s\"..", ibv_list[n]->name);
5804d567938SThomas Monjalon if (mlx5_get_pci_addr(ibv_list[n]->ibdev_path, &paddr) != 0)
581c31f3f7fSShiri Kuzin continue;
582f956d3d4SRongwei Liu if (ret1 > 0)
583f956d3d4SRongwei Liu ret2 = mlx5_get_device_guid(&paddr, guid2, sizeof(guid2));
584f956d3d4SRongwei Liu /* Bond device can bond secondary PCIe */
58537ca457dSBing Zhao if ((strstr(ibv_list[n]->name, "bond") && !is_vf_dev &&
586f956d3d4SRongwei Liu ((ret1 > 0 && ret2 > 0 && !memcmp(guid1, guid2, sizeof(guid1))) ||
587f956d3d4SRongwei Liu (addr->domain == paddr.domain && addr->bus == paddr.bus &&
588f956d3d4SRongwei Liu addr->devid == paddr.devid))) ||
589f956d3d4SRongwei Liu !rte_pci_addr_cmp(addr, &paddr)) {
590c31f3f7fSShiri Kuzin ibv_match = ibv_list[n];
591c31f3f7fSShiri Kuzin break;
592c31f3f7fSShiri Kuzin }
593f956d3d4SRongwei Liu }
594ca1418ceSMichael Baum if (ibv_match == NULL) {
595ca1418ceSMichael Baum DRV_LOG(WARNING,
596ca1418ceSMichael Baum "No Verbs device matches PCI device " PCI_PRI_FMT ","
597ca1418ceSMichael Baum " are kernel drivers loaded?",
598ca1418ceSMichael Baum addr->domain, addr->bus, addr->devid, addr->function);
599c31f3f7fSShiri Kuzin rte_errno = ENOENT;
600ca1418ceSMichael Baum }
601c31f3f7fSShiri Kuzin mlx5_glue->free_device_list(ibv_list);
602c31f3f7fSShiri Kuzin return ibv_match;
603c31f3f7fSShiri Kuzin }
604887183efSMichael Baum
605662d0dc6SMichael Baum /* Try to disable ROCE by Netlink\Devlink. */
606662d0dc6SMichael Baum static int
mlx5_nl_roce_disable(const char * addr)607662d0dc6SMichael Baum mlx5_nl_roce_disable(const char *addr)
608662d0dc6SMichael Baum {
609be66461cSDmitry Kozlyuk int nlsk_fd = mlx5_nl_init(NETLINK_GENERIC, 0);
610662d0dc6SMichael Baum int devlink_id;
611662d0dc6SMichael Baum int enable;
612662d0dc6SMichael Baum int ret;
613662d0dc6SMichael Baum
614662d0dc6SMichael Baum if (nlsk_fd < 0)
615662d0dc6SMichael Baum return nlsk_fd;
616662d0dc6SMichael Baum devlink_id = mlx5_nl_devlink_family_id_get(nlsk_fd);
617662d0dc6SMichael Baum if (devlink_id < 0) {
618662d0dc6SMichael Baum ret = devlink_id;
619662d0dc6SMichael Baum DRV_LOG(DEBUG,
620662d0dc6SMichael Baum "Failed to get devlink id for ROCE operations by Netlink.");
621662d0dc6SMichael Baum goto close;
622662d0dc6SMichael Baum }
623662d0dc6SMichael Baum ret = mlx5_nl_enable_roce_get(nlsk_fd, devlink_id, addr, &enable);
624662d0dc6SMichael Baum if (ret) {
625662d0dc6SMichael Baum DRV_LOG(DEBUG, "Failed to get ROCE enable by Netlink: %d.",
626662d0dc6SMichael Baum ret);
627662d0dc6SMichael Baum goto close;
628662d0dc6SMichael Baum } else if (!enable) {
629662d0dc6SMichael Baum DRV_LOG(INFO, "ROCE has already disabled(Netlink).");
630662d0dc6SMichael Baum goto close;
631662d0dc6SMichael Baum }
632662d0dc6SMichael Baum ret = mlx5_nl_enable_roce_set(nlsk_fd, devlink_id, addr, 0);
633662d0dc6SMichael Baum if (ret)
634662d0dc6SMichael Baum DRV_LOG(DEBUG, "Failed to disable ROCE by Netlink: %d.", ret);
635662d0dc6SMichael Baum else
636662d0dc6SMichael Baum DRV_LOG(INFO, "ROCE is disabled by Netlink successfully.");
637662d0dc6SMichael Baum close:
638662d0dc6SMichael Baum close(nlsk_fd);
639662d0dc6SMichael Baum return ret;
640662d0dc6SMichael Baum }
641662d0dc6SMichael Baum
642662d0dc6SMichael Baum /* Try to disable ROCE by sysfs. */
643662d0dc6SMichael Baum static int
mlx5_sys_roce_disable(const char * addr)644662d0dc6SMichael Baum mlx5_sys_roce_disable(const char *addr)
645662d0dc6SMichael Baum {
646662d0dc6SMichael Baum FILE *file_o;
647662d0dc6SMichael Baum int enable;
648662d0dc6SMichael Baum int ret;
649662d0dc6SMichael Baum
650662d0dc6SMichael Baum MKSTR(file_p, "/sys/bus/pci/devices/%s/roce_enable", addr);
651662d0dc6SMichael Baum file_o = fopen(file_p, "rb");
652662d0dc6SMichael Baum if (!file_o) {
653662d0dc6SMichael Baum rte_errno = ENOTSUP;
654662d0dc6SMichael Baum return -ENOTSUP;
655662d0dc6SMichael Baum }
656662d0dc6SMichael Baum ret = fscanf(file_o, "%d", &enable);
657662d0dc6SMichael Baum if (ret != 1) {
658662d0dc6SMichael Baum rte_errno = EINVAL;
659662d0dc6SMichael Baum ret = EINVAL;
660662d0dc6SMichael Baum goto close;
661662d0dc6SMichael Baum } else if (!enable) {
662662d0dc6SMichael Baum ret = 0;
663662d0dc6SMichael Baum DRV_LOG(INFO, "ROCE has already disabled(sysfs).");
664662d0dc6SMichael Baum goto close;
665662d0dc6SMichael Baum }
666662d0dc6SMichael Baum fclose(file_o);
667662d0dc6SMichael Baum file_o = fopen(file_p, "wb");
668662d0dc6SMichael Baum if (!file_o) {
669662d0dc6SMichael Baum rte_errno = ENOTSUP;
670662d0dc6SMichael Baum return -ENOTSUP;
671662d0dc6SMichael Baum }
672662d0dc6SMichael Baum fprintf(file_o, "0\n");
673662d0dc6SMichael Baum ret = 0;
674662d0dc6SMichael Baum close:
675662d0dc6SMichael Baum if (ret)
676662d0dc6SMichael Baum DRV_LOG(DEBUG, "Failed to disable ROCE by sysfs: %d.", ret);
677662d0dc6SMichael Baum else
678662d0dc6SMichael Baum DRV_LOG(INFO, "ROCE is disabled by sysfs successfully.");
679662d0dc6SMichael Baum fclose(file_o);
680662d0dc6SMichael Baum return ret;
681662d0dc6SMichael Baum }
682662d0dc6SMichael Baum
683662d0dc6SMichael Baum static int
mlx5_roce_disable(const struct rte_device * dev)684662d0dc6SMichael Baum mlx5_roce_disable(const struct rte_device *dev)
685662d0dc6SMichael Baum {
686662d0dc6SMichael Baum char pci_addr[PCI_PRI_STR_SIZE] = { 0 };
687662d0dc6SMichael Baum
688662d0dc6SMichael Baum if (mlx5_dev_to_pci_str(dev, pci_addr, sizeof(pci_addr)) < 0)
689662d0dc6SMichael Baum return -rte_errno;
690662d0dc6SMichael Baum /* Firstly try to disable ROCE by Netlink and fallback to sysfs. */
691662d0dc6SMichael Baum if (mlx5_nl_roce_disable(pci_addr) != 0 &&
692662d0dc6SMichael Baum mlx5_sys_roce_disable(pci_addr) != 0)
693662d0dc6SMichael Baum return -rte_errno;
694662d0dc6SMichael Baum return 0;
695662d0dc6SMichael Baum }
696662d0dc6SMichael Baum
697662d0dc6SMichael Baum static struct ibv_device *
mlx5_os_get_ibv_dev(const struct rte_device * dev)698662d0dc6SMichael Baum mlx5_os_get_ibv_dev(const struct rte_device *dev)
699662d0dc6SMichael Baum {
700662d0dc6SMichael Baum struct ibv_device *ibv;
701662d0dc6SMichael Baum
702662d0dc6SMichael Baum if (mlx5_dev_is_pci(dev))
70337ca457dSBing Zhao ibv = mlx5_os_get_ibv_device(RTE_DEV_TO_PCI_CONST(dev));
704662d0dc6SMichael Baum else
705662d0dc6SMichael Baum ibv = mlx5_get_aux_ibv_device(RTE_DEV_TO_AUXILIARY_CONST(dev));
706662d0dc6SMichael Baum if (ibv == NULL) {
707662d0dc6SMichael Baum rte_errno = ENODEV;
708662d0dc6SMichael Baum DRV_LOG(ERR, "Verbs device not found: %s", dev->name);
709662d0dc6SMichael Baum }
710662d0dc6SMichael Baum return ibv;
711662d0dc6SMichael Baum }
712662d0dc6SMichael Baum
713662d0dc6SMichael Baum static struct ibv_device *
mlx5_vdpa_get_ibv_dev(const struct rte_device * dev)714662d0dc6SMichael Baum mlx5_vdpa_get_ibv_dev(const struct rte_device *dev)
715662d0dc6SMichael Baum {
716662d0dc6SMichael Baum struct ibv_device *ibv;
717662d0dc6SMichael Baum int retry;
718662d0dc6SMichael Baum
719662d0dc6SMichael Baum if (mlx5_roce_disable(dev) != 0) {
720662d0dc6SMichael Baum DRV_LOG(WARNING, "Failed to disable ROCE for \"%s\".",
721662d0dc6SMichael Baum dev->name);
722662d0dc6SMichael Baum return NULL;
723662d0dc6SMichael Baum }
724662d0dc6SMichael Baum /* Wait for the IB device to appear again after reload. */
725662d0dc6SMichael Baum for (retry = MLX5_VDPA_MAX_RETRIES; retry > 0; --retry) {
726662d0dc6SMichael Baum ibv = mlx5_os_get_ibv_dev(dev);
727662d0dc6SMichael Baum if (ibv != NULL)
728662d0dc6SMichael Baum return ibv;
729662d0dc6SMichael Baum usleep(MLX5_VDPA_USEC);
730662d0dc6SMichael Baum }
731662d0dc6SMichael Baum DRV_LOG(ERR,
732662d0dc6SMichael Baum "Cannot get IB device after disabling RoCE for \"%s\", retries exceed %d.",
733662d0dc6SMichael Baum dev->name, MLX5_VDPA_MAX_RETRIES);
734662d0dc6SMichael Baum rte_errno = EAGAIN;
735662d0dc6SMichael Baum return NULL;
736662d0dc6SMichael Baum }
737662d0dc6SMichael Baum
738887183efSMichael Baum static int
mlx5_config_doorbell_mapping_env(int dbnc)739887183efSMichael Baum mlx5_config_doorbell_mapping_env(int dbnc)
740887183efSMichael Baum {
741887183efSMichael Baum char *env;
742887183efSMichael Baum int value;
743887183efSMichael Baum
744887183efSMichael Baum MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY);
745887183efSMichael Baum /* Get environment variable to store. */
746887183efSMichael Baum env = getenv(MLX5_SHUT_UP_BF);
747887183efSMichael Baum value = env ? !!strcmp(env, "0") : MLX5_ARG_UNSET;
748887183efSMichael Baum if (dbnc == MLX5_ARG_UNSET)
749887183efSMichael Baum setenv(MLX5_SHUT_UP_BF, MLX5_SHUT_UP_BF_DEFAULT, 1);
750887183efSMichael Baum else
751887183efSMichael Baum setenv(MLX5_SHUT_UP_BF,
752a6b9d5a5SMichael Baum dbnc == MLX5_SQ_DB_NCACHED ? "1" : "0", 1);
753887183efSMichael Baum return value;
754887183efSMichael Baum }
755887183efSMichael Baum
756887183efSMichael Baum static void
mlx5_restore_doorbell_mapping_env(int value)757887183efSMichael Baum mlx5_restore_doorbell_mapping_env(int value)
758887183efSMichael Baum {
759887183efSMichael Baum MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY);
760887183efSMichael Baum /* Restore the original environment variable state. */
761887183efSMichael Baum if (value == MLX5_ARG_UNSET)
762887183efSMichael Baum unsetenv(MLX5_SHUT_UP_BF);
763887183efSMichael Baum else
764887183efSMichael Baum setenv(MLX5_SHUT_UP_BF, value ? "1" : "0", 1);
765887183efSMichael Baum }
766887183efSMichael Baum
767887183efSMichael Baum /**
768887183efSMichael Baum * Function API to open IB device.
769887183efSMichael Baum *
770887183efSMichael Baum * @param cdev
771887183efSMichael Baum * Pointer to the mlx5 device.
772ca1418ceSMichael Baum * @param classes
773ca1418ceSMichael Baum * Chosen classes come from device arguments.
774887183efSMichael Baum *
775887183efSMichael Baum * @return
7769d936f4fSMichael Baum * Pointer to ibv_context on success, NULL otherwise and rte_errno is set.
777887183efSMichael Baum */
7789d936f4fSMichael Baum static struct ibv_context *
mlx5_open_device(struct mlx5_common_device * cdev,uint32_t classes)7799d936f4fSMichael Baum mlx5_open_device(struct mlx5_common_device *cdev, uint32_t classes)
780887183efSMichael Baum {
781887183efSMichael Baum struct ibv_device *ibv;
782887183efSMichael Baum struct ibv_context *ctx = NULL;
783887183efSMichael Baum int dbmap_env;
784887183efSMichael Baum
7859d936f4fSMichael Baum MLX5_ASSERT(cdev->config.device_fd == MLX5_ARG_UNSET);
786662d0dc6SMichael Baum if (classes & MLX5_CLASS_VDPA)
787662d0dc6SMichael Baum ibv = mlx5_vdpa_get_ibv_dev(cdev->dev);
788662d0dc6SMichael Baum else
789887183efSMichael Baum ibv = mlx5_os_get_ibv_dev(cdev->dev);
790887183efSMichael Baum if (!ibv)
7919d936f4fSMichael Baum return NULL;
792887183efSMichael Baum DRV_LOG(INFO, "Dev information matches for device \"%s\".", ibv->name);
793887183efSMichael Baum /*
794887183efSMichael Baum * Configure environment variable "MLX5_BF_SHUT_UP" before the device
795887183efSMichael Baum * creation. The rdma_core library checks the variable at device
796887183efSMichael Baum * creation and stores the result internally.
797887183efSMichael Baum */
798887183efSMichael Baum dbmap_env = mlx5_config_doorbell_mapping_env(cdev->config.dbnc);
799887183efSMichael Baum /* Try to open IB device with DV first, then usual Verbs. */
800887183efSMichael Baum errno = 0;
801887183efSMichael Baum ctx = mlx5_glue->dv_open_device(ibv);
802887183efSMichael Baum if (ctx) {
803887183efSMichael Baum cdev->config.devx = 1;
804ca1418ceSMichael Baum } else if (classes == MLX5_CLASS_ETH) {
805887183efSMichael Baum /* The environment variable is still configured. */
806887183efSMichael Baum ctx = mlx5_glue->open_device(ibv);
807887183efSMichael Baum if (ctx == NULL)
808887183efSMichael Baum goto error;
809ca1418ceSMichael Baum } else {
810ca1418ceSMichael Baum goto error;
811887183efSMichael Baum }
812887183efSMichael Baum /* The device is created, no need for environment. */
813887183efSMichael Baum mlx5_restore_doorbell_mapping_env(dbmap_env);
8149d936f4fSMichael Baum return ctx;
815887183efSMichael Baum error:
816887183efSMichael Baum rte_errno = errno ? errno : ENODEV;
817887183efSMichael Baum /* The device creation is failed, no need for environment. */
818887183efSMichael Baum mlx5_restore_doorbell_mapping_env(dbmap_env);
819887183efSMichael Baum DRV_LOG(ERR, "Failed to open IB device \"%s\".", ibv->name);
8209d936f4fSMichael Baum return NULL;
821887183efSMichael Baum }
8229d936f4fSMichael Baum
8239d936f4fSMichael Baum /**
8249d936f4fSMichael Baum * Function API to import IB device.
8259d936f4fSMichael Baum *
8269d936f4fSMichael Baum * @param cdev
8279d936f4fSMichael Baum * Pointer to the mlx5 device.
8289d936f4fSMichael Baum *
8299d936f4fSMichael Baum * @return
8309d936f4fSMichael Baum * Pointer to ibv_context on success, NULL otherwise and rte_errno is set.
8319d936f4fSMichael Baum */
8329d936f4fSMichael Baum static struct ibv_context *
mlx5_import_device(struct mlx5_common_device * cdev)8339d936f4fSMichael Baum mlx5_import_device(struct mlx5_common_device *cdev)
8349d936f4fSMichael Baum {
8359d936f4fSMichael Baum struct ibv_context *ctx = NULL;
8369d936f4fSMichael Baum
8379d936f4fSMichael Baum MLX5_ASSERT(cdev->config.device_fd != MLX5_ARG_UNSET);
8389d936f4fSMichael Baum ctx = mlx5_glue->import_device(cdev->config.device_fd);
8399d936f4fSMichael Baum if (!ctx) {
8409d936f4fSMichael Baum DRV_LOG(ERR, "Failed to import device for fd=%d: %s",
8419d936f4fSMichael Baum cdev->config.device_fd, rte_strerror(errno));
8429d936f4fSMichael Baum rte_errno = errno;
8439d936f4fSMichael Baum }
8449d936f4fSMichael Baum return ctx;
8459d936f4fSMichael Baum }
8469d936f4fSMichael Baum
8479d936f4fSMichael Baum /**
8489d936f4fSMichael Baum * Function API to prepare IB device.
8499d936f4fSMichael Baum *
8509d936f4fSMichael Baum * @param cdev
8519d936f4fSMichael Baum * Pointer to the mlx5 device.
8529d936f4fSMichael Baum * @param classes
8539d936f4fSMichael Baum * Chosen classes come from device arguments.
8549d936f4fSMichael Baum *
8559d936f4fSMichael Baum * @return
8569d936f4fSMichael Baum * 0 on success, a negative errno value otherwise and rte_errno is set.
8579d936f4fSMichael Baum */
8589d936f4fSMichael Baum int
mlx5_os_open_device(struct mlx5_common_device * cdev,uint32_t classes)8599d936f4fSMichael Baum mlx5_os_open_device(struct mlx5_common_device *cdev, uint32_t classes)
8609d936f4fSMichael Baum {
8619d936f4fSMichael Baum
8629d936f4fSMichael Baum struct ibv_context *ctx = NULL;
8639d936f4fSMichael Baum
8649d936f4fSMichael Baum if (cdev->config.device_fd == MLX5_ARG_UNSET)
8659d936f4fSMichael Baum ctx = mlx5_open_device(cdev, classes);
8669d936f4fSMichael Baum else
8679d936f4fSMichael Baum ctx = mlx5_import_device(cdev);
8689d936f4fSMichael Baum if (ctx == NULL)
8699d936f4fSMichael Baum return -rte_errno;
8709d936f4fSMichael Baum /* Hint libmlx5 to use PMD allocator for data plane resources */
8719d936f4fSMichael Baum mlx5_set_context_attr(cdev->dev, ctx);
8729d936f4fSMichael Baum cdev->ctx = ctx;
8739d936f4fSMichael Baum return 0;
8749d936f4fSMichael Baum }
8759d936f4fSMichael Baum
8764c74ad3eSRongwei Liu int
mlx5_get_device_guid(const struct rte_pci_addr * dev,uint8_t * guid,size_t len)8774c74ad3eSRongwei Liu mlx5_get_device_guid(const struct rte_pci_addr *dev, uint8_t *guid, size_t len)
8784c74ad3eSRongwei Liu {
8794c74ad3eSRongwei Liu char tmp[512];
8804c74ad3eSRongwei Liu char cur_ifname[IF_NAMESIZE + 1];
8814c74ad3eSRongwei Liu FILE *id_file;
8824c74ad3eSRongwei Liu DIR *dir;
8834c74ad3eSRongwei Liu struct dirent *ptr;
8844c74ad3eSRongwei Liu int ret;
8854c74ad3eSRongwei Liu
8864c74ad3eSRongwei Liu if (guid == NULL || len < sizeof(u_int64_t) + 1)
8874c74ad3eSRongwei Liu return -1;
8884c74ad3eSRongwei Liu memset(guid, 0, len);
8894c74ad3eSRongwei Liu snprintf(tmp, sizeof(tmp), "/sys/bus/pci/devices/%04x:%02x:%02x.%x/net",
8904c74ad3eSRongwei Liu dev->domain, dev->bus, dev->devid, dev->function);
8914c74ad3eSRongwei Liu dir = opendir(tmp);
8924c74ad3eSRongwei Liu if (dir == NULL)
8934c74ad3eSRongwei Liu return -1;
8944c74ad3eSRongwei Liu /* Traverse to identify PF interface */
8954c74ad3eSRongwei Liu do {
8964c74ad3eSRongwei Liu ptr = readdir(dir);
8974c74ad3eSRongwei Liu if (ptr == NULL || ptr->d_type != DT_DIR) {
8984c74ad3eSRongwei Liu closedir(dir);
8994c74ad3eSRongwei Liu return -1;
9004c74ad3eSRongwei Liu }
9014c74ad3eSRongwei Liu } while (strchr(ptr->d_name, '.') || strchr(ptr->d_name, '_') ||
9024c74ad3eSRongwei Liu strchr(ptr->d_name, 'v'));
9034c74ad3eSRongwei Liu snprintf(cur_ifname, sizeof(cur_ifname), "%s", ptr->d_name);
9044c74ad3eSRongwei Liu closedir(dir);
9054c74ad3eSRongwei Liu snprintf(tmp + strlen(tmp), sizeof(tmp) - strlen(tmp),
9064c74ad3eSRongwei Liu "/%s/phys_switch_id", cur_ifname);
9074c74ad3eSRongwei Liu /* Older OFED like 5.3 doesn't support read */
9084c74ad3eSRongwei Liu id_file = fopen(tmp, "r");
9094c74ad3eSRongwei Liu if (!id_file)
9104c74ad3eSRongwei Liu return 0;
9114c74ad3eSRongwei Liu ret = fscanf(id_file, "%16s", guid);
9124c74ad3eSRongwei Liu fclose(id_file);
9134c74ad3eSRongwei Liu return ret;
9144c74ad3eSRongwei Liu }
91576b5bdf8SMatan Azrad
91676b5bdf8SMatan Azrad /*
91776b5bdf8SMatan Azrad * Create direct mkey using the kernel ibv_reg_mr API and wrap it with a new
91876b5bdf8SMatan Azrad * indirect mkey created by the DevX API.
91976b5bdf8SMatan Azrad * This mkey should be used for DevX commands requesting mkey as a parameter.
92076b5bdf8SMatan Azrad */
92176b5bdf8SMatan Azrad int
mlx5_os_wrapped_mkey_create(void * ctx,void * pd,uint32_t pdn,void * addr,size_t length,struct mlx5_pmd_wrapped_mr * pmd_mr)92276b5bdf8SMatan Azrad mlx5_os_wrapped_mkey_create(void *ctx, void *pd, uint32_t pdn, void *addr,
92376b5bdf8SMatan Azrad size_t length, struct mlx5_pmd_wrapped_mr *pmd_mr)
92476b5bdf8SMatan Azrad {
92576b5bdf8SMatan Azrad struct mlx5_klm klm = {
92676b5bdf8SMatan Azrad .byte_count = length,
92776b5bdf8SMatan Azrad .address = (uintptr_t)addr,
92876b5bdf8SMatan Azrad };
92976b5bdf8SMatan Azrad struct mlx5_devx_mkey_attr mkey_attr = {
93076b5bdf8SMatan Azrad .pd = pdn,
93176b5bdf8SMatan Azrad .klm_array = &klm,
93276b5bdf8SMatan Azrad .klm_num = 1,
93376b5bdf8SMatan Azrad };
93476b5bdf8SMatan Azrad struct mlx5_devx_obj *mkey;
93576b5bdf8SMatan Azrad struct ibv_mr *ibv_mr = mlx5_glue->reg_mr(pd, addr, length,
93676b5bdf8SMatan Azrad IBV_ACCESS_LOCAL_WRITE |
93776b5bdf8SMatan Azrad (haswell_broadwell_cpu ? 0 :
93876b5bdf8SMatan Azrad IBV_ACCESS_RELAXED_ORDERING));
93976b5bdf8SMatan Azrad
94076b5bdf8SMatan Azrad if (!ibv_mr) {
94176b5bdf8SMatan Azrad rte_errno = errno;
94276b5bdf8SMatan Azrad return -rte_errno;
94376b5bdf8SMatan Azrad }
94476b5bdf8SMatan Azrad klm.mkey = ibv_mr->lkey;
94576b5bdf8SMatan Azrad mkey_attr.addr = (uintptr_t)addr;
94676b5bdf8SMatan Azrad mkey_attr.size = length;
94776b5bdf8SMatan Azrad mkey = mlx5_devx_cmd_mkey_create(ctx, &mkey_attr);
94876b5bdf8SMatan Azrad if (!mkey) {
94976b5bdf8SMatan Azrad claim_zero(mlx5_glue->dereg_mr(ibv_mr));
95076b5bdf8SMatan Azrad return -rte_errno;
95176b5bdf8SMatan Azrad }
95276b5bdf8SMatan Azrad pmd_mr->addr = addr;
95376b5bdf8SMatan Azrad pmd_mr->len = length;
95476b5bdf8SMatan Azrad pmd_mr->obj = (void *)ibv_mr;
95576b5bdf8SMatan Azrad pmd_mr->imkey = mkey;
95676b5bdf8SMatan Azrad pmd_mr->lkey = mkey->id;
95776b5bdf8SMatan Azrad return 0;
95876b5bdf8SMatan Azrad }
95976b5bdf8SMatan Azrad
96076b5bdf8SMatan Azrad void
mlx5_os_wrapped_mkey_destroy(struct mlx5_pmd_wrapped_mr * pmd_mr)96176b5bdf8SMatan Azrad mlx5_os_wrapped_mkey_destroy(struct mlx5_pmd_wrapped_mr *pmd_mr)
96276b5bdf8SMatan Azrad {
96376b5bdf8SMatan Azrad if (!pmd_mr)
96476b5bdf8SMatan Azrad return;
96576b5bdf8SMatan Azrad if (pmd_mr->imkey)
96676b5bdf8SMatan Azrad claim_zero(mlx5_devx_cmd_destroy(pmd_mr->imkey));
96776b5bdf8SMatan Azrad if (pmd_mr->obj)
96876b5bdf8SMatan Azrad claim_zero(mlx5_glue->dereg_mr(pmd_mr->obj));
96976b5bdf8SMatan Azrad memset(pmd_mr, 0, sizeof(*pmd_mr));
97076b5bdf8SMatan Azrad }
97172d7efe4SSpike Du
97272d7efe4SSpike Du /**
97372d7efe4SSpike Du * Rte_intr_handle create and init helper.
97472d7efe4SSpike Du *
97572d7efe4SSpike Du * @param[in] mode
97672d7efe4SSpike Du * interrupt instance can be shared between primary and secondary
97772d7efe4SSpike Du * processes or not.
97872d7efe4SSpike Du * @param[in] set_fd_nonblock
97972d7efe4SSpike Du * Whether to set fd to O_NONBLOCK.
98072d7efe4SSpike Du * @param[in] fd
98172d7efe4SSpike Du * Fd to set in created intr_handle.
98272d7efe4SSpike Du * @param[in] cb
98372d7efe4SSpike Du * Callback to register for intr_handle.
98472d7efe4SSpike Du * @param[in] cb_arg
98572d7efe4SSpike Du * Callback argument for cb.
98672d7efe4SSpike Du *
98772d7efe4SSpike Du * @return
98872d7efe4SSpike Du * - Interrupt handle on success.
98972d7efe4SSpike Du * - NULL on failure, with rte_errno set.
99072d7efe4SSpike Du */
99172d7efe4SSpike Du struct rte_intr_handle *
mlx5_os_interrupt_handler_create(int mode,bool set_fd_nonblock,int fd,rte_intr_callback_fn cb,void * cb_arg)99272d7efe4SSpike Du mlx5_os_interrupt_handler_create(int mode, bool set_fd_nonblock, int fd,
99372d7efe4SSpike Du rte_intr_callback_fn cb, void *cb_arg)
99472d7efe4SSpike Du {
99572d7efe4SSpike Du struct rte_intr_handle *tmp_intr_handle;
99672d7efe4SSpike Du int ret, flags;
99772d7efe4SSpike Du
99872d7efe4SSpike Du tmp_intr_handle = rte_intr_instance_alloc(mode);
99972d7efe4SSpike Du if (!tmp_intr_handle) {
100072d7efe4SSpike Du rte_errno = ENOMEM;
100172d7efe4SSpike Du goto err;
100272d7efe4SSpike Du }
100372d7efe4SSpike Du if (set_fd_nonblock) {
100472d7efe4SSpike Du flags = fcntl(fd, F_GETFL);
100572d7efe4SSpike Du ret = fcntl(fd, F_SETFL, flags | O_NONBLOCK);
100672d7efe4SSpike Du if (ret) {
100772d7efe4SSpike Du rte_errno = errno;
100872d7efe4SSpike Du goto err;
100972d7efe4SSpike Du }
101072d7efe4SSpike Du }
101172d7efe4SSpike Du ret = rte_intr_fd_set(tmp_intr_handle, fd);
101272d7efe4SSpike Du if (ret)
101372d7efe4SSpike Du goto err;
101472d7efe4SSpike Du ret = rte_intr_type_set(tmp_intr_handle, RTE_INTR_HANDLE_EXT);
101572d7efe4SSpike Du if (ret)
101672d7efe4SSpike Du goto err;
101772d7efe4SSpike Du ret = rte_intr_callback_register(tmp_intr_handle, cb, cb_arg);
101872d7efe4SSpike Du if (ret) {
101972d7efe4SSpike Du rte_errno = -ret;
102072d7efe4SSpike Du goto err;
102172d7efe4SSpike Du }
102272d7efe4SSpike Du return tmp_intr_handle;
102372d7efe4SSpike Du err:
102472d7efe4SSpike Du rte_intr_instance_free(tmp_intr_handle);
102572d7efe4SSpike Du return NULL;
102672d7efe4SSpike Du }
102772d7efe4SSpike Du
102872d7efe4SSpike Du /* Safe unregistration for interrupt callback. */
102972d7efe4SSpike Du static void
mlx5_intr_callback_unregister(const struct rte_intr_handle * handle,rte_intr_callback_fn cb_fn,void * cb_arg)103072d7efe4SSpike Du mlx5_intr_callback_unregister(const struct rte_intr_handle *handle,
103172d7efe4SSpike Du rte_intr_callback_fn cb_fn, void *cb_arg)
103272d7efe4SSpike Du {
103372d7efe4SSpike Du uint64_t twait = 0;
103472d7efe4SSpike Du uint64_t start = 0;
103572d7efe4SSpike Du
103672d7efe4SSpike Du do {
103772d7efe4SSpike Du int ret;
103872d7efe4SSpike Du
103972d7efe4SSpike Du ret = rte_intr_callback_unregister(handle, cb_fn, cb_arg);
104072d7efe4SSpike Du if (ret >= 0)
104172d7efe4SSpike Du return;
104272d7efe4SSpike Du if (ret != -EAGAIN) {
104372d7efe4SSpike Du DRV_LOG(INFO, "failed to unregister interrupt"
104472d7efe4SSpike Du " handler (error: %d)", ret);
104572d7efe4SSpike Du MLX5_ASSERT(false);
104672d7efe4SSpike Du return;
104772d7efe4SSpike Du }
104872d7efe4SSpike Du if (twait) {
104972d7efe4SSpike Du struct timespec onems;
105072d7efe4SSpike Du
105172d7efe4SSpike Du /* Wait one millisecond and try again. */
105272d7efe4SSpike Du onems.tv_sec = 0;
105372d7efe4SSpike Du onems.tv_nsec = NS_PER_S / MS_PER_S;
105472d7efe4SSpike Du nanosleep(&onems, 0);
105572d7efe4SSpike Du /* Check whether one second elapsed. */
105672d7efe4SSpike Du if ((rte_get_timer_cycles() - start) <= twait)
105772d7efe4SSpike Du continue;
105872d7efe4SSpike Du } else {
105972d7efe4SSpike Du /*
106072d7efe4SSpike Du * We get the amount of timer ticks for one second.
106172d7efe4SSpike Du * If this amount elapsed it means we spent one
106272d7efe4SSpike Du * second in waiting. This branch is executed once
106372d7efe4SSpike Du * on first iteration.
106472d7efe4SSpike Du */
106572d7efe4SSpike Du twait = rte_get_timer_hz();
106672d7efe4SSpike Du MLX5_ASSERT(twait);
106772d7efe4SSpike Du }
106872d7efe4SSpike Du /*
106972d7efe4SSpike Du * Timeout elapsed, show message (once a second) and retry.
107072d7efe4SSpike Du * We have no other acceptable option here, if we ignore
107172d7efe4SSpike Du * the unregistering return code the handler will not
107272d7efe4SSpike Du * be unregistered, fd will be closed and we may get the
107372d7efe4SSpike Du * crush. Hanging and messaging in the loop seems not to be
107472d7efe4SSpike Du * the worst choice.
107572d7efe4SSpike Du */
107672d7efe4SSpike Du DRV_LOG(INFO, "Retrying to unregister interrupt handler");
107772d7efe4SSpike Du start = rte_get_timer_cycles();
107872d7efe4SSpike Du } while (true);
107972d7efe4SSpike Du }
108072d7efe4SSpike Du
108172d7efe4SSpike Du /**
108272d7efe4SSpike Du * Rte_intr_handle destroy helper.
108372d7efe4SSpike Du *
108472d7efe4SSpike Du * @param[in] intr_handle
108572d7efe4SSpike Du * Rte_intr_handle to destroy.
108672d7efe4SSpike Du * @param[in] cb
108772d7efe4SSpike Du * Callback which is registered to intr_handle.
108872d7efe4SSpike Du * @param[in] cb_arg
108972d7efe4SSpike Du * Callback argument for cb.
109072d7efe4SSpike Du *
109172d7efe4SSpike Du */
109272d7efe4SSpike Du void
mlx5_os_interrupt_handler_destroy(struct rte_intr_handle * intr_handle,rte_intr_callback_fn cb,void * cb_arg)109372d7efe4SSpike Du mlx5_os_interrupt_handler_destroy(struct rte_intr_handle *intr_handle,
109472d7efe4SSpike Du rte_intr_callback_fn cb, void *cb_arg)
109572d7efe4SSpike Du {
109672d7efe4SSpike Du if (rte_intr_fd_get(intr_handle) >= 0)
109772d7efe4SSpike Du mlx5_intr_callback_unregister(intr_handle, cb, cb_arg);
109872d7efe4SSpike Du rte_intr_instance_free(intr_handle);
109972d7efe4SSpike Du }
1100