xref: /dpdk/drivers/common/mlx5/linux/mlx5_common_os.c (revision f956d3d4c33cdfac5e352f457050029bd5c9b8a8)
179aa4307SOphir Munk /* SPDX-License-Identifier: BSD-3-Clause
279aa4307SOphir Munk  * Copyright 2020 Mellanox Technologies, Ltd
379aa4307SOphir Munk  */
479aa4307SOphir Munk 
54c74ad3eSRongwei Liu #include <sys/types.h>
679aa4307SOphir Munk #include <unistd.h>
779aa4307SOphir Munk #include <string.h>
879aa4307SOphir Munk #include <stdio.h>
979aa4307SOphir Munk #ifdef RTE_IBVERBS_LINK_DLOPEN
1079aa4307SOphir Munk #include <dlfcn.h>
1179aa4307SOphir Munk #endif
12aec086c9SMatan Azrad #include <dirent.h>
13aec086c9SMatan Azrad #include <net/if.h>
1479aa4307SOphir Munk 
1579aa4307SOphir Munk #include <rte_errno.h>
16aec086c9SMatan Azrad #include <rte_string_fns.h>
17662d0dc6SMichael Baum #include <rte_bus_pci.h>
18662d0dc6SMichael Baum #include <rte_bus_auxiliary.h>
1979aa4307SOphir Munk 
2079aa4307SOphir Munk #include "mlx5_common.h"
21662d0dc6SMichael Baum #include "mlx5_nl.h"
2225245d5dSShiri Kuzin #include "mlx5_common_log.h"
23662d0dc6SMichael Baum #include "mlx5_common_private.h"
24887183efSMichael Baum #include "mlx5_common_defs.h"
25c31f3f7fSShiri Kuzin #include "mlx5_common_os.h"
2679aa4307SOphir Munk #include "mlx5_glue.h"
2779aa4307SOphir Munk 
2879aa4307SOphir Munk #ifdef MLX5_GLUE
2979aa4307SOphir Munk const struct mlx5_glue *mlx5_glue;
3079aa4307SOphir Munk #endif
3179aa4307SOphir Munk 
3279aa4307SOphir Munk int
334d567938SThomas Monjalon mlx5_get_pci_addr(const char *dev_path, struct rte_pci_addr *pci_addr)
3479aa4307SOphir Munk {
3579aa4307SOphir Munk 	FILE *file;
3679aa4307SOphir Munk 	char line[32];
37482a1d34SViacheslav Ovsiienko 	int rc = -ENOENT;
3879aa4307SOphir Munk 	MKSTR(path, "%s/device/uevent", dev_path);
3979aa4307SOphir Munk 
4079aa4307SOphir Munk 	file = fopen(path, "rb");
4179aa4307SOphir Munk 	if (file == NULL) {
4279aa4307SOphir Munk 		rte_errno = errno;
4379aa4307SOphir Munk 		return -rte_errno;
4479aa4307SOphir Munk 	}
4579aa4307SOphir Munk 	while (fgets(line, sizeof(line), file) == line) {
4679aa4307SOphir Munk 		size_t len = strlen(line);
4779aa4307SOphir Munk 
4879aa4307SOphir Munk 		/* Truncate long lines. */
49482a1d34SViacheslav Ovsiienko 		if (len == (sizeof(line) - 1)) {
5079aa4307SOphir Munk 			while (line[(len - 1)] != '\n') {
51482a1d34SViacheslav Ovsiienko 				int ret = fgetc(file);
52482a1d34SViacheslav Ovsiienko 
5379aa4307SOphir Munk 				if (ret == EOF)
54482a1d34SViacheslav Ovsiienko 					goto exit;
5579aa4307SOphir Munk 				line[(len - 1)] = ret;
5679aa4307SOphir Munk 			}
57482a1d34SViacheslav Ovsiienko 			/* No match for long lines. */
58482a1d34SViacheslav Ovsiienko 			continue;
59482a1d34SViacheslav Ovsiienko 		}
6079aa4307SOphir Munk 		/* Extract information. */
6179aa4307SOphir Munk 		if (sscanf(line,
6279aa4307SOphir Munk 			   "PCI_SLOT_NAME="
6379aa4307SOphir Munk 			   "%" SCNx32 ":%" SCNx8 ":%" SCNx8 ".%" SCNx8 "\n",
6479aa4307SOphir Munk 			   &pci_addr->domain,
6579aa4307SOphir Munk 			   &pci_addr->bus,
6679aa4307SOphir Munk 			   &pci_addr->devid,
6779aa4307SOphir Munk 			   &pci_addr->function) == 4) {
68482a1d34SViacheslav Ovsiienko 			rc = 0;
6979aa4307SOphir Munk 			break;
7079aa4307SOphir Munk 		}
7179aa4307SOphir Munk 	}
72482a1d34SViacheslav Ovsiienko exit:
7379aa4307SOphir Munk 	fclose(file);
74482a1d34SViacheslav Ovsiienko 	if (rc)
75482a1d34SViacheslav Ovsiienko 		rte_errno = -rc;
76482a1d34SViacheslav Ovsiienko 	return rc;
7779aa4307SOphir Munk }
7879aa4307SOphir Munk 
7979aa4307SOphir Munk /**
8079aa4307SOphir Munk  * Extract port name, as a number, from sysfs or netlink information.
8179aa4307SOphir Munk  *
8279aa4307SOphir Munk  * @param[in] port_name_in
8379aa4307SOphir Munk  *   String representing the port name.
8479aa4307SOphir Munk  * @param[out] port_info_out
8579aa4307SOphir Munk  *   Port information, including port name as a number and port name
8679aa4307SOphir Munk  *   type if recognized
8779aa4307SOphir Munk  *
8879aa4307SOphir Munk  * @return
8979aa4307SOphir Munk  *   port_name field set according to recognized name format.
9079aa4307SOphir Munk  */
9179aa4307SOphir Munk void
9279aa4307SOphir Munk mlx5_translate_port_name(const char *port_name_in,
9379aa4307SOphir Munk 			 struct mlx5_switch_info *port_info_out)
9479aa4307SOphir Munk {
9559df97f1SXueming Li 	char ctrl = 0, pf_c1, pf_c2, vf_c1, vf_c2, eol;
9679aa4307SOphir Munk 	char *end;
9779aa4307SOphir Munk 	int sc_items;
9879aa4307SOphir Munk 
9959df97f1SXueming Li 	sc_items = sscanf(port_name_in, "%c%d",
10059df97f1SXueming Li 			  &ctrl, &port_info_out->ctrl_num);
10159df97f1SXueming Li 	if (sc_items == 2 && ctrl == 'c') {
10259df97f1SXueming Li 		port_name_in++; /* 'c' */
10359df97f1SXueming Li 		port_name_in += snprintf(NULL, 0, "%d",
10459df97f1SXueming Li 					  port_info_out->ctrl_num);
10559df97f1SXueming Li 	}
10659df97f1SXueming Li 	/* Check for port-name as a string of the form pf0vf0 or pf0sf0 */
1073590881bSViacheslav Ovsiienko 	sc_items = sscanf(port_name_in, "%c%c%d%c%c%d%c",
10879aa4307SOphir Munk 			  &pf_c1, &pf_c2, &port_info_out->pf_num,
1093590881bSViacheslav Ovsiienko 			  &vf_c1, &vf_c2, &port_info_out->port_name, &eol);
11059df97f1SXueming Li 	if (sc_items == 6 && pf_c1 == 'p' && pf_c2 == 'f') {
11159df97f1SXueming Li 		if (vf_c1 == 'v' && vf_c2 == 'f') {
11259df97f1SXueming Li 			/* Kernel ver >= 5.0 or OFED ver >= 4.6 */
11359df97f1SXueming Li 			port_info_out->name_type =
11459df97f1SXueming Li 					MLX5_PHYS_PORT_NAME_TYPE_PFVF;
11579aa4307SOphir Munk 			return;
11679aa4307SOphir Munk 		}
11759df97f1SXueming Li 		if (vf_c1 == 's' && vf_c2 == 'f') {
11859df97f1SXueming Li 			/* Kernel ver >= 5.11 or OFED ver >= 5.1 */
11959df97f1SXueming Li 			port_info_out->name_type =
12059df97f1SXueming Li 					MLX5_PHYS_PORT_NAME_TYPE_PFSF;
12159df97f1SXueming Li 			return;
12259df97f1SXueming Li 		}
12359df97f1SXueming Li 	}
12479aa4307SOphir Munk 	/*
12579aa4307SOphir Munk 	 * Check for port-name as a string of the form p0
12679aa4307SOphir Munk 	 * (support kernel ver >= 5.0, or OFED ver >= 4.6).
12779aa4307SOphir Munk 	 */
1283590881bSViacheslav Ovsiienko 	sc_items = sscanf(port_name_in, "%c%d%c",
1293590881bSViacheslav Ovsiienko 			  &pf_c1, &port_info_out->port_name, &eol);
13079aa4307SOphir Munk 	if (sc_items == 2 && pf_c1 == 'p') {
13179aa4307SOphir Munk 		port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_UPLINK;
13279aa4307SOphir Munk 		return;
13379aa4307SOphir Munk 	}
134420bbdaeSViacheslav Ovsiienko 	/*
135420bbdaeSViacheslav Ovsiienko 	 * Check for port-name as a string of the form pf0
136420bbdaeSViacheslav Ovsiienko 	 * (support kernel ver >= 5.7 for HPF representor on BF).
137420bbdaeSViacheslav Ovsiienko 	 */
1383590881bSViacheslav Ovsiienko 	sc_items = sscanf(port_name_in, "%c%c%d%c",
1393590881bSViacheslav Ovsiienko 			  &pf_c1, &pf_c2, &port_info_out->pf_num, &eol);
140420bbdaeSViacheslav Ovsiienko 	if (sc_items == 3 && pf_c1 == 'p' && pf_c2 == 'f') {
141420bbdaeSViacheslav Ovsiienko 		port_info_out->port_name = -1;
142420bbdaeSViacheslav Ovsiienko 		port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_PFHPF;
143420bbdaeSViacheslav Ovsiienko 		return;
144420bbdaeSViacheslav Ovsiienko 	}
14579aa4307SOphir Munk 	/* Check for port-name as a number (support kernel ver < 5.0 */
14679aa4307SOphir Munk 	errno = 0;
14779aa4307SOphir Munk 	port_info_out->port_name = strtol(port_name_in, &end, 0);
14879aa4307SOphir Munk 	if (!errno &&
14979aa4307SOphir Munk 	    (size_t)(end - port_name_in) == strlen(port_name_in)) {
15079aa4307SOphir Munk 		port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_LEGACY;
15179aa4307SOphir Munk 		return;
15279aa4307SOphir Munk 	}
15379aa4307SOphir Munk 	port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN;
15479aa4307SOphir Munk }
15579aa4307SOphir Munk 
156aec086c9SMatan Azrad int
157aec086c9SMatan Azrad mlx5_get_ifname_sysfs(const char *ibdev_path, char *ifname)
158aec086c9SMatan Azrad {
159aec086c9SMatan Azrad 	DIR *dir;
160aec086c9SMatan Azrad 	struct dirent *dent;
161aec086c9SMatan Azrad 	unsigned int dev_type = 0;
162aec086c9SMatan Azrad 	unsigned int dev_port_prev = ~0u;
163aec086c9SMatan Azrad 	char match[IF_NAMESIZE] = "";
164aec086c9SMatan Azrad 
165aec086c9SMatan Azrad 	MLX5_ASSERT(ibdev_path);
166aec086c9SMatan Azrad 	{
167aec086c9SMatan Azrad 		MKSTR(path, "%s/device/net", ibdev_path);
168aec086c9SMatan Azrad 
169aec086c9SMatan Azrad 		dir = opendir(path);
170aec086c9SMatan Azrad 		if (dir == NULL) {
171aec086c9SMatan Azrad 			rte_errno = errno;
172aec086c9SMatan Azrad 			return -rte_errno;
173aec086c9SMatan Azrad 		}
174aec086c9SMatan Azrad 	}
175aec086c9SMatan Azrad 	while ((dent = readdir(dir)) != NULL) {
176aec086c9SMatan Azrad 		char *name = dent->d_name;
177aec086c9SMatan Azrad 		FILE *file;
178aec086c9SMatan Azrad 		unsigned int dev_port;
179aec086c9SMatan Azrad 		int r;
180aec086c9SMatan Azrad 
181aec086c9SMatan Azrad 		if ((name[0] == '.') &&
182aec086c9SMatan Azrad 		    ((name[1] == '\0') ||
183aec086c9SMatan Azrad 		     ((name[1] == '.') && (name[2] == '\0'))))
184aec086c9SMatan Azrad 			continue;
185aec086c9SMatan Azrad 
186aec086c9SMatan Azrad 		MKSTR(path, "%s/device/net/%s/%s",
187aec086c9SMatan Azrad 		      ibdev_path, name,
188aec086c9SMatan Azrad 		      (dev_type ? "dev_id" : "dev_port"));
189aec086c9SMatan Azrad 
190aec086c9SMatan Azrad 		file = fopen(path, "rb");
191aec086c9SMatan Azrad 		if (file == NULL) {
192aec086c9SMatan Azrad 			if (errno != ENOENT)
193aec086c9SMatan Azrad 				continue;
194aec086c9SMatan Azrad 			/*
195aec086c9SMatan Azrad 			 * Switch to dev_id when dev_port does not exist as
196aec086c9SMatan Azrad 			 * is the case with Linux kernel versions < 3.15.
197aec086c9SMatan Azrad 			 */
198aec086c9SMatan Azrad try_dev_id:
199aec086c9SMatan Azrad 			match[0] = '\0';
200aec086c9SMatan Azrad 			if (dev_type)
201aec086c9SMatan Azrad 				break;
202aec086c9SMatan Azrad 			dev_type = 1;
203aec086c9SMatan Azrad 			dev_port_prev = ~0u;
204aec086c9SMatan Azrad 			rewinddir(dir);
205aec086c9SMatan Azrad 			continue;
206aec086c9SMatan Azrad 		}
207aec086c9SMatan Azrad 		r = fscanf(file, (dev_type ? "%x" : "%u"), &dev_port);
208aec086c9SMatan Azrad 		fclose(file);
209aec086c9SMatan Azrad 		if (r != 1)
210aec086c9SMatan Azrad 			continue;
211aec086c9SMatan Azrad 		/*
212aec086c9SMatan Azrad 		 * Switch to dev_id when dev_port returns the same value for
213aec086c9SMatan Azrad 		 * all ports. May happen when using a MOFED release older than
214aec086c9SMatan Azrad 		 * 3.0 with a Linux kernel >= 3.15.
215aec086c9SMatan Azrad 		 */
216aec086c9SMatan Azrad 		if (dev_port == dev_port_prev)
217aec086c9SMatan Azrad 			goto try_dev_id;
218aec086c9SMatan Azrad 		dev_port_prev = dev_port;
219aec086c9SMatan Azrad 		if (dev_port == 0)
220aec086c9SMatan Azrad 			strlcpy(match, name, IF_NAMESIZE);
221aec086c9SMatan Azrad 	}
222aec086c9SMatan Azrad 	closedir(dir);
223aec086c9SMatan Azrad 	if (match[0] == '\0') {
224aec086c9SMatan Azrad 		rte_errno = ENOENT;
225aec086c9SMatan Azrad 		return -rte_errno;
226aec086c9SMatan Azrad 	}
227aec086c9SMatan Azrad 	strncpy(ifname, match, IF_NAMESIZE);
228aec086c9SMatan Azrad 	return 0;
229aec086c9SMatan Azrad }
230aec086c9SMatan Azrad 
23179aa4307SOphir Munk #ifdef MLX5_GLUE
23279aa4307SOphir Munk 
23379aa4307SOphir Munk /**
23479aa4307SOphir Munk  * Suffix RTE_EAL_PMD_PATH with "-glue".
23579aa4307SOphir Munk  *
23679aa4307SOphir Munk  * This function performs a sanity check on RTE_EAL_PMD_PATH before
23779aa4307SOphir Munk  * suffixing its last component.
23879aa4307SOphir Munk  *
23979aa4307SOphir Munk  * @param buf[out]
24079aa4307SOphir Munk  *   Output buffer, should be large enough otherwise NULL is returned.
24179aa4307SOphir Munk  * @param size
24279aa4307SOphir Munk  *   Size of @p out.
24379aa4307SOphir Munk  *
24479aa4307SOphir Munk  * @return
24579aa4307SOphir Munk  *   Pointer to @p buf or @p NULL in case suffix cannot be appended.
24679aa4307SOphir Munk  */
24779aa4307SOphir Munk static char *
24879aa4307SOphir Munk mlx5_glue_path(char *buf, size_t size)
24979aa4307SOphir Munk {
25079aa4307SOphir Munk 	static const char *const bad[] = { "/", ".", "..", NULL };
25179aa4307SOphir Munk 	const char *path = RTE_EAL_PMD_PATH;
25279aa4307SOphir Munk 	size_t len = strlen(path);
25379aa4307SOphir Munk 	size_t off;
25479aa4307SOphir Munk 	int i;
25579aa4307SOphir Munk 
25679aa4307SOphir Munk 	while (len && path[len - 1] == '/')
25779aa4307SOphir Munk 		--len;
25879aa4307SOphir Munk 	for (off = len; off && path[off - 1] != '/'; --off)
25979aa4307SOphir Munk 		;
26079aa4307SOphir Munk 	for (i = 0; bad[i]; ++i)
26179aa4307SOphir Munk 		if (!strncmp(path + off, bad[i], (int)(len - off)))
26279aa4307SOphir Munk 			goto error;
26379aa4307SOphir Munk 	i = snprintf(buf, size, "%.*s-glue", (int)len, path);
26479aa4307SOphir Munk 	if (i == -1 || (size_t)i >= size)
26579aa4307SOphir Munk 		goto error;
26679aa4307SOphir Munk 	return buf;
26779aa4307SOphir Munk error:
26879aa4307SOphir Munk 	RTE_LOG(ERR, PMD, "unable to append \"-glue\" to last component of"
26979aa4307SOphir Munk 		" RTE_EAL_PMD_PATH (\"" RTE_EAL_PMD_PATH "\"), please"
27079aa4307SOphir Munk 		" re-configure DPDK");
27179aa4307SOphir Munk 	return NULL;
27279aa4307SOphir Munk }
27379aa4307SOphir Munk 
27479aa4307SOphir Munk static int
27579aa4307SOphir Munk mlx5_glue_dlopen(void)
27679aa4307SOphir Munk {
27779aa4307SOphir Munk 	char glue_path[sizeof(RTE_EAL_PMD_PATH) - 1 + sizeof("-glue")];
27879aa4307SOphir Munk 	void *handle = NULL;
27979aa4307SOphir Munk 
28079aa4307SOphir Munk 	char const *path[] = {
28179aa4307SOphir Munk 		/*
28279aa4307SOphir Munk 		 * A basic security check is necessary before trusting
28379aa4307SOphir Munk 		 * MLX5_GLUE_PATH, which may override RTE_EAL_PMD_PATH.
28479aa4307SOphir Munk 		 */
28579aa4307SOphir Munk 		(geteuid() == getuid() && getegid() == getgid() ?
28679aa4307SOphir Munk 		 getenv("MLX5_GLUE_PATH") : NULL),
28779aa4307SOphir Munk 		/*
28879aa4307SOphir Munk 		 * When RTE_EAL_PMD_PATH is set, use its glue-suffixed
28979aa4307SOphir Munk 		 * variant, otherwise let dlopen() look up libraries on its
29079aa4307SOphir Munk 		 * own.
29179aa4307SOphir Munk 		 */
29279aa4307SOphir Munk 		(*RTE_EAL_PMD_PATH ?
29379aa4307SOphir Munk 		 mlx5_glue_path(glue_path, sizeof(glue_path)) : ""),
29479aa4307SOphir Munk 	};
29579aa4307SOphir Munk 	unsigned int i = 0;
29679aa4307SOphir Munk 	void **sym;
29779aa4307SOphir Munk 	const char *dlmsg;
29879aa4307SOphir Munk 
29979aa4307SOphir Munk 	while (!handle && i != RTE_DIM(path)) {
30079aa4307SOphir Munk 		const char *end;
30179aa4307SOphir Munk 		size_t len;
30279aa4307SOphir Munk 		int ret;
30379aa4307SOphir Munk 
30479aa4307SOphir Munk 		if (!path[i]) {
30579aa4307SOphir Munk 			++i;
30679aa4307SOphir Munk 			continue;
30779aa4307SOphir Munk 		}
30879aa4307SOphir Munk 		end = strpbrk(path[i], ":;");
30979aa4307SOphir Munk 		if (!end)
31079aa4307SOphir Munk 			end = path[i] + strlen(path[i]);
31179aa4307SOphir Munk 		len = end - path[i];
31279aa4307SOphir Munk 		ret = 0;
31379aa4307SOphir Munk 		do {
31479aa4307SOphir Munk 			char name[ret + 1];
31579aa4307SOphir Munk 
31679aa4307SOphir Munk 			ret = snprintf(name, sizeof(name), "%.*s%s" MLX5_GLUE,
31779aa4307SOphir Munk 				       (int)len, path[i],
31879aa4307SOphir Munk 				       (!len || *(end - 1) == '/') ? "" : "/");
31979aa4307SOphir Munk 			if (ret == -1)
32079aa4307SOphir Munk 				break;
32179aa4307SOphir Munk 			if (sizeof(name) != (size_t)ret + 1)
32279aa4307SOphir Munk 				continue;
32379aa4307SOphir Munk 			DRV_LOG(DEBUG, "Looking for rdma-core glue as "
32479aa4307SOphir Munk 				"\"%s\"", name);
32579aa4307SOphir Munk 			handle = dlopen(name, RTLD_LAZY);
32679aa4307SOphir Munk 			break;
32779aa4307SOphir Munk 		} while (1);
32879aa4307SOphir Munk 		path[i] = end + 1;
32979aa4307SOphir Munk 		if (!*end)
33079aa4307SOphir Munk 			++i;
33179aa4307SOphir Munk 	}
33279aa4307SOphir Munk 	if (!handle) {
33379aa4307SOphir Munk 		rte_errno = EINVAL;
33479aa4307SOphir Munk 		dlmsg = dlerror();
33579aa4307SOphir Munk 		if (dlmsg)
33679aa4307SOphir Munk 			DRV_LOG(WARNING, "Cannot load glue library: %s", dlmsg);
33779aa4307SOphir Munk 		goto glue_error;
33879aa4307SOphir Munk 	}
33979aa4307SOphir Munk 	sym = dlsym(handle, "mlx5_glue");
34079aa4307SOphir Munk 	if (!sym || !*sym) {
34179aa4307SOphir Munk 		rte_errno = EINVAL;
34279aa4307SOphir Munk 		dlmsg = dlerror();
34379aa4307SOphir Munk 		if (dlmsg)
34479aa4307SOphir Munk 			DRV_LOG(ERR, "Cannot resolve glue symbol: %s", dlmsg);
34579aa4307SOphir Munk 		goto glue_error;
34679aa4307SOphir Munk 	}
34779aa4307SOphir Munk 	mlx5_glue = *sym;
34879aa4307SOphir Munk 	return 0;
34979aa4307SOphir Munk 
35079aa4307SOphir Munk glue_error:
35179aa4307SOphir Munk 	if (handle)
35279aa4307SOphir Munk 		dlclose(handle);
35379aa4307SOphir Munk 	return -1;
35479aa4307SOphir Munk }
35579aa4307SOphir Munk 
35679aa4307SOphir Munk #endif
35779aa4307SOphir Munk 
35879aa4307SOphir Munk /**
35979aa4307SOphir Munk  * Initialization routine for run-time dependency on rdma-core.
36079aa4307SOphir Munk  */
36179aa4307SOphir Munk void
36279aa4307SOphir Munk mlx5_glue_constructor(void)
36379aa4307SOphir Munk {
36479aa4307SOphir Munk 	/*
36579aa4307SOphir Munk 	 * RDMAV_HUGEPAGES_SAFE tells ibv_fork_init() we intend to use
36679aa4307SOphir Munk 	 * huge pages. Calling ibv_fork_init() during init allows
36779aa4307SOphir Munk 	 * applications to use fork() safely for purposes other than
36879aa4307SOphir Munk 	 * using this PMD, which is not supported in forked processes.
36979aa4307SOphir Munk 	 */
37079aa4307SOphir Munk 	setenv("RDMAV_HUGEPAGES_SAFE", "1", 1);
37179aa4307SOphir Munk 	/* Match the size of Rx completion entry to the size of a cacheline. */
37279aa4307SOphir Munk 	if (RTE_CACHE_LINE_SIZE == 128)
37379aa4307SOphir Munk 		setenv("MLX5_CQE_SIZE", "128", 0);
37479aa4307SOphir Munk 	/*
37579aa4307SOphir Munk 	 * MLX5_DEVICE_FATAL_CLEANUP tells ibv_destroy functions to
37679aa4307SOphir Munk 	 * cleanup all the Verbs resources even when the device was removed.
37779aa4307SOphir Munk 	 */
37879aa4307SOphir Munk 	setenv("MLX5_DEVICE_FATAL_CLEANUP", "1", 1);
37979aa4307SOphir Munk 
38079aa4307SOphir Munk #ifdef MLX5_GLUE
38179aa4307SOphir Munk 	if (mlx5_glue_dlopen() != 0)
38279aa4307SOphir Munk 		goto glue_error;
38379aa4307SOphir Munk #endif
38479aa4307SOphir Munk 
38579aa4307SOphir Munk #ifdef RTE_LIBRTE_MLX5_DEBUG
38679aa4307SOphir Munk 	/* Glue structure must not contain any NULL pointers. */
38779aa4307SOphir Munk 	{
38879aa4307SOphir Munk 		unsigned int i;
38979aa4307SOphir Munk 
39079aa4307SOphir Munk 		for (i = 0; i != sizeof(*mlx5_glue) / sizeof(void *); ++i)
39179aa4307SOphir Munk 			MLX5_ASSERT(((const void *const *)mlx5_glue)[i]);
39279aa4307SOphir Munk 	}
39379aa4307SOphir Munk #endif
39479aa4307SOphir Munk 	if (strcmp(mlx5_glue->version, MLX5_GLUE_VERSION)) {
39579aa4307SOphir Munk 		rte_errno = EINVAL;
39679aa4307SOphir Munk 		DRV_LOG(ERR, "rdma-core glue \"%s\" mismatch: \"%s\" is "
39779aa4307SOphir Munk 			"required", mlx5_glue->version, MLX5_GLUE_VERSION);
39879aa4307SOphir Munk 		goto glue_error;
39979aa4307SOphir Munk 	}
40079aa4307SOphir Munk 	mlx5_glue->fork_init();
40179aa4307SOphir Munk 	return;
40279aa4307SOphir Munk 
40379aa4307SOphir Munk glue_error:
40479aa4307SOphir Munk 	DRV_LOG(WARNING, "Cannot initialize MLX5 common due to missing"
40579aa4307SOphir Munk 		" run-time dependency on rdma-core libraries (libibverbs,"
40679aa4307SOphir Munk 		" libmlx5)");
40779aa4307SOphir Munk 	mlx5_glue = NULL;
40879aa4307SOphir Munk }
409262c7ad0SOri Kam 
410e35ccf24SMichael Baum /**
4119d936f4fSMichael Baum  * Validate user arguments for remote PD and CTX.
4129d936f4fSMichael Baum  *
4139d936f4fSMichael Baum  * @param config
4149d936f4fSMichael Baum  *   Pointer to device configuration structure.
4159d936f4fSMichael Baum  *
4169d936f4fSMichael Baum  * @return
4179d936f4fSMichael Baum  *   0 on success, a negative errno value otherwise and rte_errno is set.
4189d936f4fSMichael Baum  */
4199d936f4fSMichael Baum int
4209d936f4fSMichael Baum mlx5_os_remote_pd_and_ctx_validate(struct mlx5_common_dev_config *config)
4219d936f4fSMichael Baum {
4229d936f4fSMichael Baum 	int device_fd = config->device_fd;
4239d936f4fSMichael Baum 	int pd_handle = config->pd_handle;
4249d936f4fSMichael Baum 
4259d936f4fSMichael Baum #ifdef HAVE_MLX5_IBV_IMPORT_CTX_PD_AND_MR
4269d936f4fSMichael Baum 	if (device_fd == MLX5_ARG_UNSET && pd_handle != MLX5_ARG_UNSET) {
4279d936f4fSMichael Baum 		DRV_LOG(ERR, "Remote PD without CTX is not supported.");
4289d936f4fSMichael Baum 		rte_errno = EINVAL;
4299d936f4fSMichael Baum 		return -rte_errno;
4309d936f4fSMichael Baum 	}
4319d936f4fSMichael Baum 	if (device_fd != MLX5_ARG_UNSET && pd_handle == MLX5_ARG_UNSET) {
4329d936f4fSMichael Baum 		DRV_LOG(ERR, "Remote CTX without PD is not supported.");
4339d936f4fSMichael Baum 		rte_errno = EINVAL;
4349d936f4fSMichael Baum 		return -rte_errno;
4359d936f4fSMichael Baum 	}
4369d936f4fSMichael Baum 	DRV_LOG(DEBUG, "Remote PD and CTX is supported: (cmd_fd=%d, "
4379d936f4fSMichael Baum 		"pd_handle=%d).", device_fd, pd_handle);
4389d936f4fSMichael Baum #else
4399d936f4fSMichael Baum 	if (pd_handle != MLX5_ARG_UNSET || device_fd != MLX5_ARG_UNSET) {
4409d936f4fSMichael Baum 		DRV_LOG(ERR,
4419d936f4fSMichael Baum 			"Remote PD and CTX is not supported - maybe old rdma-core version?");
4429d936f4fSMichael Baum 		rte_errno = ENOTSUP;
4439d936f4fSMichael Baum 		return -rte_errno;
4449d936f4fSMichael Baum 	}
4459d936f4fSMichael Baum #endif
4469d936f4fSMichael Baum 	return 0;
4479d936f4fSMichael Baum }
4489d936f4fSMichael Baum 
4499d936f4fSMichael Baum /**
4509d936f4fSMichael Baum  * Release Protection Domain object.
4519d936f4fSMichael Baum  *
4529d936f4fSMichael Baum  * @param[out] cdev
4539d936f4fSMichael Baum  *   Pointer to the mlx5 device.
4549d936f4fSMichael Baum  *
4559d936f4fSMichael Baum  * @return
4569d936f4fSMichael Baum  *   0 on success, a negative errno value otherwise.
4579d936f4fSMichael Baum  */
4589d936f4fSMichael Baum int
4599d936f4fSMichael Baum mlx5_os_pd_release(struct mlx5_common_device *cdev)
4609d936f4fSMichael Baum {
4619d936f4fSMichael Baum 	if (cdev->config.pd_handle == MLX5_ARG_UNSET)
4629d936f4fSMichael Baum 		return mlx5_glue->dealloc_pd(cdev->pd);
4639d936f4fSMichael Baum 	else
4649d936f4fSMichael Baum 		return mlx5_glue->unimport_pd(cdev->pd);
4659d936f4fSMichael Baum }
4669d936f4fSMichael Baum 
4679d936f4fSMichael Baum /**
4689d936f4fSMichael Baum  * Allocate Protection Domain object.
4699d936f4fSMichael Baum  *
4709d936f4fSMichael Baum  * @param[out] cdev
4719d936f4fSMichael Baum  *   Pointer to the mlx5 device.
4729d936f4fSMichael Baum  *
4739d936f4fSMichael Baum  * @return
4749d936f4fSMichael Baum  *   0 on success, a negative errno value otherwise.
4759d936f4fSMichael Baum  */
4769d936f4fSMichael Baum static int
4779d936f4fSMichael Baum mlx5_os_pd_create(struct mlx5_common_device *cdev)
4789d936f4fSMichael Baum {
4799d936f4fSMichael Baum 	cdev->pd = mlx5_glue->alloc_pd(cdev->ctx);
4809d936f4fSMichael Baum 	if (cdev->pd == NULL) {
4819d936f4fSMichael Baum 		DRV_LOG(ERR, "Failed to allocate PD: %s", rte_strerror(errno));
4829d936f4fSMichael Baum 		return errno ? -errno : -ENOMEM;
4839d936f4fSMichael Baum 	}
4849d936f4fSMichael Baum 	return 0;
4859d936f4fSMichael Baum }
4869d936f4fSMichael Baum 
4879d936f4fSMichael Baum /**
4889d936f4fSMichael Baum  * Import Protection Domain object according to given PD handle.
4899d936f4fSMichael Baum  *
4909d936f4fSMichael Baum  * @param[out] cdev
4919d936f4fSMichael Baum  *   Pointer to the mlx5 device.
4929d936f4fSMichael Baum  *
4939d936f4fSMichael Baum  * @return
4949d936f4fSMichael Baum  *   0 on success, a negative errno value otherwise.
4959d936f4fSMichael Baum  */
4969d936f4fSMichael Baum static int
4979d936f4fSMichael Baum mlx5_os_pd_import(struct mlx5_common_device *cdev)
4989d936f4fSMichael Baum {
4999d936f4fSMichael Baum 	cdev->pd = mlx5_glue->import_pd(cdev->ctx, cdev->config.pd_handle);
5009d936f4fSMichael Baum 	if (cdev->pd == NULL) {
5019d936f4fSMichael Baum 		DRV_LOG(ERR, "Failed to import PD using handle=%d: %s",
5029d936f4fSMichael Baum 			cdev->config.pd_handle, rte_strerror(errno));
5039d936f4fSMichael Baum 		return errno ? -errno : -ENOMEM;
5049d936f4fSMichael Baum 	}
5059d936f4fSMichael Baum 	return 0;
5069d936f4fSMichael Baum }
5079d936f4fSMichael Baum 
5089d936f4fSMichael Baum /**
5099d936f4fSMichael Baum  * Prepare Protection Domain object and extract its pdn using DV API.
510e35ccf24SMichael Baum  *
511e35ccf24SMichael Baum  * @param[out] cdev
512e35ccf24SMichael Baum  *   Pointer to the mlx5 device.
513e35ccf24SMichael Baum  *
514e35ccf24SMichael Baum  * @return
515e35ccf24SMichael Baum  *   0 on success, a negative errno value otherwise and rte_errno is set.
516e35ccf24SMichael Baum  */
517e35ccf24SMichael Baum int
5189d936f4fSMichael Baum mlx5_os_pd_prepare(struct mlx5_common_device *cdev)
519e35ccf24SMichael Baum {
520e35ccf24SMichael Baum #ifdef HAVE_IBV_FLOW_DV_SUPPORT
521e35ccf24SMichael Baum 	struct mlx5dv_obj obj;
522e35ccf24SMichael Baum 	struct mlx5dv_pd pd_info;
523e35ccf24SMichael Baum #endif
5249d936f4fSMichael Baum 	int ret;
525e35ccf24SMichael Baum 
5269d936f4fSMichael Baum 	if (cdev->config.pd_handle == MLX5_ARG_UNSET)
5279d936f4fSMichael Baum 		ret = mlx5_os_pd_create(cdev);
5289d936f4fSMichael Baum 	else
5299d936f4fSMichael Baum 		ret = mlx5_os_pd_import(cdev);
5309d936f4fSMichael Baum 	if (ret) {
5319d936f4fSMichael Baum 		rte_errno = -ret;
5329d936f4fSMichael Baum 		return ret;
533e35ccf24SMichael Baum 	}
534e35ccf24SMichael Baum 	if (cdev->config.devx == 0)
535e35ccf24SMichael Baum 		return 0;
536e35ccf24SMichael Baum #ifdef HAVE_IBV_FLOW_DV_SUPPORT
537e35ccf24SMichael Baum 	obj.pd.in = cdev->pd;
538e35ccf24SMichael Baum 	obj.pd.out = &pd_info;
539e35ccf24SMichael Baum 	ret = mlx5_glue->dv_init_obj(&obj, MLX5DV_OBJ_PD);
540e35ccf24SMichael Baum 	if (ret != 0) {
541e35ccf24SMichael Baum 		DRV_LOG(ERR, "Fail to get PD object info.");
5429d936f4fSMichael Baum 		rte_errno = errno;
5439d936f4fSMichael Baum 		claim_zero(mlx5_os_pd_release(cdev));
544e35ccf24SMichael Baum 		cdev->pd = NULL;
5459d936f4fSMichael Baum 		return -rte_errno;
546e35ccf24SMichael Baum 	}
547e35ccf24SMichael Baum 	cdev->pdn = pd_info.pdn;
548e35ccf24SMichael Baum 	return 0;
549e35ccf24SMichael Baum #else
550e35ccf24SMichael Baum 	DRV_LOG(ERR, "Cannot get pdn - no DV support.");
5519d936f4fSMichael Baum 	rte_errno = ENOTSUP;
5529d936f4fSMichael Baum 	return -rte_errno;
553e35ccf24SMichael Baum #endif /* HAVE_IBV_FLOW_DV_SUPPORT */
554e35ccf24SMichael Baum }
555e35ccf24SMichael Baum 
556662d0dc6SMichael Baum static struct ibv_device *
557ad435d32SXueming Li mlx5_os_get_ibv_device(const struct rte_pci_addr *addr)
558c31f3f7fSShiri Kuzin {
559c31f3f7fSShiri Kuzin 	int n;
560c31f3f7fSShiri Kuzin 	struct ibv_device **ibv_list = mlx5_glue->get_device_list(&n);
561c31f3f7fSShiri Kuzin 	struct ibv_device *ibv_match = NULL;
562*f956d3d4SRongwei Liu 	uint8_t guid1[32] = {0};
563*f956d3d4SRongwei Liu 	uint8_t guid2[32] = {0};
564*f956d3d4SRongwei Liu 	int ret1, ret2 = -1;
565c31f3f7fSShiri Kuzin 	struct rte_pci_addr paddr;
566c31f3f7fSShiri Kuzin 
567*f956d3d4SRongwei Liu 	if (ibv_list == NULL || !n) {
568*f956d3d4SRongwei Liu 		rte_errno = ENOSYS;
569*f956d3d4SRongwei Liu 		if (ibv_list)
570*f956d3d4SRongwei Liu 			mlx5_glue->free_device_list(ibv_list);
571*f956d3d4SRongwei Liu 		return NULL;
572*f956d3d4SRongwei Liu 	}
573*f956d3d4SRongwei Liu 	ret1 = mlx5_get_device_guid(addr, guid1, sizeof(guid1));
574*f956d3d4SRongwei Liu 	while (n-- > 0) {
575c31f3f7fSShiri Kuzin 		DRV_LOG(DEBUG, "Checking device \"%s\"..", ibv_list[n]->name);
5764d567938SThomas Monjalon 		if (mlx5_get_pci_addr(ibv_list[n]->ibdev_path, &paddr) != 0)
577c31f3f7fSShiri Kuzin 			continue;
578*f956d3d4SRongwei Liu 		if (ret1 > 0)
579*f956d3d4SRongwei Liu 			ret2 = mlx5_get_device_guid(&paddr, guid2, sizeof(guid2));
580*f956d3d4SRongwei Liu 		/* Bond device can bond secondary PCIe */
581*f956d3d4SRongwei Liu 		if ((strstr(ibv_list[n]->name, "bond") &&
582*f956d3d4SRongwei Liu 		    ((ret1 > 0 && ret2 > 0 && !memcmp(guid1, guid2, sizeof(guid1))) ||
583*f956d3d4SRongwei Liu 		    (addr->domain == paddr.domain && addr->bus == paddr.bus &&
584*f956d3d4SRongwei Liu 		     addr->devid == paddr.devid))) ||
585*f956d3d4SRongwei Liu 		     !rte_pci_addr_cmp(addr, &paddr)) {
586c31f3f7fSShiri Kuzin 			ibv_match = ibv_list[n];
587c31f3f7fSShiri Kuzin 			break;
588c31f3f7fSShiri Kuzin 		}
589*f956d3d4SRongwei Liu 	}
590ca1418ceSMichael Baum 	if (ibv_match == NULL) {
591ca1418ceSMichael Baum 		DRV_LOG(WARNING,
592ca1418ceSMichael Baum 			"No Verbs device matches PCI device " PCI_PRI_FMT ","
593ca1418ceSMichael Baum 			" are kernel drivers loaded?",
594ca1418ceSMichael Baum 			addr->domain, addr->bus, addr->devid, addr->function);
595c31f3f7fSShiri Kuzin 		rte_errno = ENOENT;
596ca1418ceSMichael Baum 	}
597c31f3f7fSShiri Kuzin 	mlx5_glue->free_device_list(ibv_list);
598c31f3f7fSShiri Kuzin 	return ibv_match;
599c31f3f7fSShiri Kuzin }
600887183efSMichael Baum 
601662d0dc6SMichael Baum /* Try to disable ROCE by Netlink\Devlink. */
602662d0dc6SMichael Baum static int
603662d0dc6SMichael Baum mlx5_nl_roce_disable(const char *addr)
604662d0dc6SMichael Baum {
605be66461cSDmitry Kozlyuk 	int nlsk_fd = mlx5_nl_init(NETLINK_GENERIC, 0);
606662d0dc6SMichael Baum 	int devlink_id;
607662d0dc6SMichael Baum 	int enable;
608662d0dc6SMichael Baum 	int ret;
609662d0dc6SMichael Baum 
610662d0dc6SMichael Baum 	if (nlsk_fd < 0)
611662d0dc6SMichael Baum 		return nlsk_fd;
612662d0dc6SMichael Baum 	devlink_id = mlx5_nl_devlink_family_id_get(nlsk_fd);
613662d0dc6SMichael Baum 	if (devlink_id < 0) {
614662d0dc6SMichael Baum 		ret = devlink_id;
615662d0dc6SMichael Baum 		DRV_LOG(DEBUG,
616662d0dc6SMichael Baum 			"Failed to get devlink id for ROCE operations by Netlink.");
617662d0dc6SMichael Baum 		goto close;
618662d0dc6SMichael Baum 	}
619662d0dc6SMichael Baum 	ret = mlx5_nl_enable_roce_get(nlsk_fd, devlink_id, addr, &enable);
620662d0dc6SMichael Baum 	if (ret) {
621662d0dc6SMichael Baum 		DRV_LOG(DEBUG, "Failed to get ROCE enable by Netlink: %d.",
622662d0dc6SMichael Baum 			ret);
623662d0dc6SMichael Baum 		goto close;
624662d0dc6SMichael Baum 	} else if (!enable) {
625662d0dc6SMichael Baum 		DRV_LOG(INFO, "ROCE has already disabled(Netlink).");
626662d0dc6SMichael Baum 		goto close;
627662d0dc6SMichael Baum 	}
628662d0dc6SMichael Baum 	ret = mlx5_nl_enable_roce_set(nlsk_fd, devlink_id, addr, 0);
629662d0dc6SMichael Baum 	if (ret)
630662d0dc6SMichael Baum 		DRV_LOG(DEBUG, "Failed to disable ROCE by Netlink: %d.", ret);
631662d0dc6SMichael Baum 	else
632662d0dc6SMichael Baum 		DRV_LOG(INFO, "ROCE is disabled by Netlink successfully.");
633662d0dc6SMichael Baum close:
634662d0dc6SMichael Baum 	close(nlsk_fd);
635662d0dc6SMichael Baum 	return ret;
636662d0dc6SMichael Baum }
637662d0dc6SMichael Baum 
638662d0dc6SMichael Baum /* Try to disable ROCE by sysfs. */
639662d0dc6SMichael Baum static int
640662d0dc6SMichael Baum mlx5_sys_roce_disable(const char *addr)
641662d0dc6SMichael Baum {
642662d0dc6SMichael Baum 	FILE *file_o;
643662d0dc6SMichael Baum 	int enable;
644662d0dc6SMichael Baum 	int ret;
645662d0dc6SMichael Baum 
646662d0dc6SMichael Baum 	MKSTR(file_p, "/sys/bus/pci/devices/%s/roce_enable", addr);
647662d0dc6SMichael Baum 	file_o = fopen(file_p, "rb");
648662d0dc6SMichael Baum 	if (!file_o) {
649662d0dc6SMichael Baum 		rte_errno = ENOTSUP;
650662d0dc6SMichael Baum 		return -ENOTSUP;
651662d0dc6SMichael Baum 	}
652662d0dc6SMichael Baum 	ret = fscanf(file_o, "%d", &enable);
653662d0dc6SMichael Baum 	if (ret != 1) {
654662d0dc6SMichael Baum 		rte_errno = EINVAL;
655662d0dc6SMichael Baum 		ret = EINVAL;
656662d0dc6SMichael Baum 		goto close;
657662d0dc6SMichael Baum 	} else if (!enable) {
658662d0dc6SMichael Baum 		ret = 0;
659662d0dc6SMichael Baum 		DRV_LOG(INFO, "ROCE has already disabled(sysfs).");
660662d0dc6SMichael Baum 		goto close;
661662d0dc6SMichael Baum 	}
662662d0dc6SMichael Baum 	fclose(file_o);
663662d0dc6SMichael Baum 	file_o = fopen(file_p, "wb");
664662d0dc6SMichael Baum 	if (!file_o) {
665662d0dc6SMichael Baum 		rte_errno = ENOTSUP;
666662d0dc6SMichael Baum 		return -ENOTSUP;
667662d0dc6SMichael Baum 	}
668662d0dc6SMichael Baum 	fprintf(file_o, "0\n");
669662d0dc6SMichael Baum 	ret = 0;
670662d0dc6SMichael Baum close:
671662d0dc6SMichael Baum 	if (ret)
672662d0dc6SMichael Baum 		DRV_LOG(DEBUG, "Failed to disable ROCE by sysfs: %d.", ret);
673662d0dc6SMichael Baum 	else
674662d0dc6SMichael Baum 		DRV_LOG(INFO, "ROCE is disabled by sysfs successfully.");
675662d0dc6SMichael Baum 	fclose(file_o);
676662d0dc6SMichael Baum 	return ret;
677662d0dc6SMichael Baum }
678662d0dc6SMichael Baum 
679662d0dc6SMichael Baum static int
680662d0dc6SMichael Baum mlx5_roce_disable(const struct rte_device *dev)
681662d0dc6SMichael Baum {
682662d0dc6SMichael Baum 	char pci_addr[PCI_PRI_STR_SIZE] = { 0 };
683662d0dc6SMichael Baum 
684662d0dc6SMichael Baum 	if (mlx5_dev_to_pci_str(dev, pci_addr, sizeof(pci_addr)) < 0)
685662d0dc6SMichael Baum 		return -rte_errno;
686662d0dc6SMichael Baum 	/* Firstly try to disable ROCE by Netlink and fallback to sysfs. */
687662d0dc6SMichael Baum 	if (mlx5_nl_roce_disable(pci_addr) != 0 &&
688662d0dc6SMichael Baum 	    mlx5_sys_roce_disable(pci_addr) != 0)
689662d0dc6SMichael Baum 		return -rte_errno;
690662d0dc6SMichael Baum 	return 0;
691662d0dc6SMichael Baum }
692662d0dc6SMichael Baum 
693662d0dc6SMichael Baum static struct ibv_device *
694662d0dc6SMichael Baum mlx5_os_get_ibv_dev(const struct rte_device *dev)
695662d0dc6SMichael Baum {
696662d0dc6SMichael Baum 	struct ibv_device *ibv;
697662d0dc6SMichael Baum 
698662d0dc6SMichael Baum 	if (mlx5_dev_is_pci(dev))
699662d0dc6SMichael Baum 		ibv = mlx5_os_get_ibv_device(&RTE_DEV_TO_PCI_CONST(dev)->addr);
700662d0dc6SMichael Baum 	else
701662d0dc6SMichael Baum 		ibv = mlx5_get_aux_ibv_device(RTE_DEV_TO_AUXILIARY_CONST(dev));
702662d0dc6SMichael Baum 	if (ibv == NULL) {
703662d0dc6SMichael Baum 		rte_errno = ENODEV;
704662d0dc6SMichael Baum 		DRV_LOG(ERR, "Verbs device not found: %s", dev->name);
705662d0dc6SMichael Baum 	}
706662d0dc6SMichael Baum 	return ibv;
707662d0dc6SMichael Baum }
708662d0dc6SMichael Baum 
709662d0dc6SMichael Baum static struct ibv_device *
710662d0dc6SMichael Baum mlx5_vdpa_get_ibv_dev(const struct rte_device *dev)
711662d0dc6SMichael Baum {
712662d0dc6SMichael Baum 	struct ibv_device *ibv;
713662d0dc6SMichael Baum 	int retry;
714662d0dc6SMichael Baum 
715662d0dc6SMichael Baum 	if (mlx5_roce_disable(dev) != 0) {
716662d0dc6SMichael Baum 		DRV_LOG(WARNING, "Failed to disable ROCE for \"%s\".",
717662d0dc6SMichael Baum 			dev->name);
718662d0dc6SMichael Baum 		return NULL;
719662d0dc6SMichael Baum 	}
720662d0dc6SMichael Baum 	/* Wait for the IB device to appear again after reload. */
721662d0dc6SMichael Baum 	for (retry = MLX5_VDPA_MAX_RETRIES; retry > 0; --retry) {
722662d0dc6SMichael Baum 		ibv = mlx5_os_get_ibv_dev(dev);
723662d0dc6SMichael Baum 		if (ibv != NULL)
724662d0dc6SMichael Baum 			return ibv;
725662d0dc6SMichael Baum 		usleep(MLX5_VDPA_USEC);
726662d0dc6SMichael Baum 	}
727662d0dc6SMichael Baum 	DRV_LOG(ERR,
728662d0dc6SMichael Baum 		"Cannot get IB device after disabling RoCE for \"%s\", retries exceed %d.",
729662d0dc6SMichael Baum 		dev->name, MLX5_VDPA_MAX_RETRIES);
730662d0dc6SMichael Baum 	rte_errno = EAGAIN;
731662d0dc6SMichael Baum 	return NULL;
732662d0dc6SMichael Baum }
733662d0dc6SMichael Baum 
734887183efSMichael Baum static int
735887183efSMichael Baum mlx5_config_doorbell_mapping_env(int dbnc)
736887183efSMichael Baum {
737887183efSMichael Baum 	char *env;
738887183efSMichael Baum 	int value;
739887183efSMichael Baum 
740887183efSMichael Baum 	MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY);
741887183efSMichael Baum 	/* Get environment variable to store. */
742887183efSMichael Baum 	env = getenv(MLX5_SHUT_UP_BF);
743887183efSMichael Baum 	value = env ? !!strcmp(env, "0") : MLX5_ARG_UNSET;
744887183efSMichael Baum 	if (dbnc == MLX5_ARG_UNSET)
745887183efSMichael Baum 		setenv(MLX5_SHUT_UP_BF, MLX5_SHUT_UP_BF_DEFAULT, 1);
746887183efSMichael Baum 	else
747887183efSMichael Baum 		setenv(MLX5_SHUT_UP_BF,
748a6b9d5a5SMichael Baum 		       dbnc == MLX5_SQ_DB_NCACHED ? "1" : "0", 1);
749887183efSMichael Baum 	return value;
750887183efSMichael Baum }
751887183efSMichael Baum 
752887183efSMichael Baum static void
753887183efSMichael Baum mlx5_restore_doorbell_mapping_env(int value)
754887183efSMichael Baum {
755887183efSMichael Baum 	MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY);
756887183efSMichael Baum 	/* Restore the original environment variable state. */
757887183efSMichael Baum 	if (value == MLX5_ARG_UNSET)
758887183efSMichael Baum 		unsetenv(MLX5_SHUT_UP_BF);
759887183efSMichael Baum 	else
760887183efSMichael Baum 		setenv(MLX5_SHUT_UP_BF, value ? "1" : "0", 1);
761887183efSMichael Baum }
762887183efSMichael Baum 
763887183efSMichael Baum /**
764887183efSMichael Baum  * Function API to open IB device.
765887183efSMichael Baum  *
766887183efSMichael Baum  * @param cdev
767887183efSMichael Baum  *   Pointer to the mlx5 device.
768ca1418ceSMichael Baum  * @param classes
769ca1418ceSMichael Baum  *   Chosen classes come from device arguments.
770887183efSMichael Baum  *
771887183efSMichael Baum  * @return
7729d936f4fSMichael Baum  *   Pointer to ibv_context on success, NULL otherwise and rte_errno is set.
773887183efSMichael Baum  */
7749d936f4fSMichael Baum static struct ibv_context *
7759d936f4fSMichael Baum mlx5_open_device(struct mlx5_common_device *cdev, uint32_t classes)
776887183efSMichael Baum {
777887183efSMichael Baum 	struct ibv_device *ibv;
778887183efSMichael Baum 	struct ibv_context *ctx = NULL;
779887183efSMichael Baum 	int dbmap_env;
780887183efSMichael Baum 
7819d936f4fSMichael Baum 	MLX5_ASSERT(cdev->config.device_fd == MLX5_ARG_UNSET);
782662d0dc6SMichael Baum 	if (classes & MLX5_CLASS_VDPA)
783662d0dc6SMichael Baum 		ibv = mlx5_vdpa_get_ibv_dev(cdev->dev);
784662d0dc6SMichael Baum 	else
785887183efSMichael Baum 		ibv = mlx5_os_get_ibv_dev(cdev->dev);
786887183efSMichael Baum 	if (!ibv)
7879d936f4fSMichael Baum 		return NULL;
788887183efSMichael Baum 	DRV_LOG(INFO, "Dev information matches for device \"%s\".", ibv->name);
789887183efSMichael Baum 	/*
790887183efSMichael Baum 	 * Configure environment variable "MLX5_BF_SHUT_UP" before the device
791887183efSMichael Baum 	 * creation. The rdma_core library checks the variable at device
792887183efSMichael Baum 	 * creation and stores the result internally.
793887183efSMichael Baum 	 */
794887183efSMichael Baum 	dbmap_env = mlx5_config_doorbell_mapping_env(cdev->config.dbnc);
795887183efSMichael Baum 	/* Try to open IB device with DV first, then usual Verbs. */
796887183efSMichael Baum 	errno = 0;
797887183efSMichael Baum 	ctx = mlx5_glue->dv_open_device(ibv);
798887183efSMichael Baum 	if (ctx) {
799887183efSMichael Baum 		cdev->config.devx = 1;
800ca1418ceSMichael Baum 	} else if (classes == MLX5_CLASS_ETH) {
801887183efSMichael Baum 		/* The environment variable is still configured. */
802887183efSMichael Baum 		ctx = mlx5_glue->open_device(ibv);
803887183efSMichael Baum 		if (ctx == NULL)
804887183efSMichael Baum 			goto error;
805ca1418ceSMichael Baum 	} else {
806ca1418ceSMichael Baum 		goto error;
807887183efSMichael Baum 	}
808887183efSMichael Baum 	/* The device is created, no need for environment. */
809887183efSMichael Baum 	mlx5_restore_doorbell_mapping_env(dbmap_env);
8109d936f4fSMichael Baum 	return ctx;
811887183efSMichael Baum error:
812887183efSMichael Baum 	rte_errno = errno ? errno : ENODEV;
813887183efSMichael Baum 	/* The device creation is failed, no need for environment. */
814887183efSMichael Baum 	mlx5_restore_doorbell_mapping_env(dbmap_env);
815887183efSMichael Baum 	DRV_LOG(ERR, "Failed to open IB device \"%s\".", ibv->name);
8169d936f4fSMichael Baum 	return NULL;
817887183efSMichael Baum }
8189d936f4fSMichael Baum 
8199d936f4fSMichael Baum /**
8209d936f4fSMichael Baum  * Function API to import IB device.
8219d936f4fSMichael Baum  *
8229d936f4fSMichael Baum  * @param cdev
8239d936f4fSMichael Baum  *   Pointer to the mlx5 device.
8249d936f4fSMichael Baum  *
8259d936f4fSMichael Baum  * @return
8269d936f4fSMichael Baum  *   Pointer to ibv_context on success, NULL otherwise and rte_errno is set.
8279d936f4fSMichael Baum  */
8289d936f4fSMichael Baum static struct ibv_context *
8299d936f4fSMichael Baum mlx5_import_device(struct mlx5_common_device *cdev)
8309d936f4fSMichael Baum {
8319d936f4fSMichael Baum 	struct ibv_context *ctx = NULL;
8329d936f4fSMichael Baum 
8339d936f4fSMichael Baum 	MLX5_ASSERT(cdev->config.device_fd != MLX5_ARG_UNSET);
8349d936f4fSMichael Baum 	ctx = mlx5_glue->import_device(cdev->config.device_fd);
8359d936f4fSMichael Baum 	if (!ctx) {
8369d936f4fSMichael Baum 		DRV_LOG(ERR, "Failed to import device for fd=%d: %s",
8379d936f4fSMichael Baum 			cdev->config.device_fd, rte_strerror(errno));
8389d936f4fSMichael Baum 		rte_errno = errno;
8399d936f4fSMichael Baum 	}
8409d936f4fSMichael Baum 	return ctx;
8419d936f4fSMichael Baum }
8429d936f4fSMichael Baum 
8439d936f4fSMichael Baum /**
8449d936f4fSMichael Baum  * Function API to prepare IB device.
8459d936f4fSMichael Baum  *
8469d936f4fSMichael Baum  * @param cdev
8479d936f4fSMichael Baum  *   Pointer to the mlx5 device.
8489d936f4fSMichael Baum  * @param classes
8499d936f4fSMichael Baum  *   Chosen classes come from device arguments.
8509d936f4fSMichael Baum  *
8519d936f4fSMichael Baum  * @return
8529d936f4fSMichael Baum  *   0 on success, a negative errno value otherwise and rte_errno is set.
8539d936f4fSMichael Baum  */
8549d936f4fSMichael Baum int
8559d936f4fSMichael Baum mlx5_os_open_device(struct mlx5_common_device *cdev, uint32_t classes)
8569d936f4fSMichael Baum {
8579d936f4fSMichael Baum 
8589d936f4fSMichael Baum 	struct ibv_context *ctx = NULL;
8599d936f4fSMichael Baum 
8609d936f4fSMichael Baum 	if (cdev->config.device_fd == MLX5_ARG_UNSET)
8619d936f4fSMichael Baum 		ctx = mlx5_open_device(cdev, classes);
8629d936f4fSMichael Baum 	else
8639d936f4fSMichael Baum 		ctx = mlx5_import_device(cdev);
8649d936f4fSMichael Baum 	if (ctx == NULL)
8659d936f4fSMichael Baum 		return -rte_errno;
8669d936f4fSMichael Baum 	/* Hint libmlx5 to use PMD allocator for data plane resources */
8679d936f4fSMichael Baum 	mlx5_set_context_attr(cdev->dev, ctx);
8689d936f4fSMichael Baum 	cdev->ctx = ctx;
8699d936f4fSMichael Baum 	return 0;
8709d936f4fSMichael Baum }
8719d936f4fSMichael Baum 
8724c74ad3eSRongwei Liu int
8734c74ad3eSRongwei Liu mlx5_get_device_guid(const struct rte_pci_addr *dev, uint8_t *guid, size_t len)
8744c74ad3eSRongwei Liu {
8754c74ad3eSRongwei Liu 	char tmp[512];
8764c74ad3eSRongwei Liu 	char cur_ifname[IF_NAMESIZE + 1];
8774c74ad3eSRongwei Liu 	FILE *id_file;
8784c74ad3eSRongwei Liu 	DIR *dir;
8794c74ad3eSRongwei Liu 	struct dirent *ptr;
8804c74ad3eSRongwei Liu 	int ret;
8814c74ad3eSRongwei Liu 
8824c74ad3eSRongwei Liu 	if (guid == NULL || len < sizeof(u_int64_t) + 1)
8834c74ad3eSRongwei Liu 		return -1;
8844c74ad3eSRongwei Liu 	memset(guid, 0, len);
8854c74ad3eSRongwei Liu 	snprintf(tmp, sizeof(tmp), "/sys/bus/pci/devices/%04x:%02x:%02x.%x/net",
8864c74ad3eSRongwei Liu 			dev->domain, dev->bus, dev->devid, dev->function);
8874c74ad3eSRongwei Liu 	dir = opendir(tmp);
8884c74ad3eSRongwei Liu 	if (dir == NULL)
8894c74ad3eSRongwei Liu 		return -1;
8904c74ad3eSRongwei Liu 	/* Traverse to identify PF interface */
8914c74ad3eSRongwei Liu 	do {
8924c74ad3eSRongwei Liu 		ptr = readdir(dir);
8934c74ad3eSRongwei Liu 		if (ptr == NULL || ptr->d_type != DT_DIR) {
8944c74ad3eSRongwei Liu 			closedir(dir);
8954c74ad3eSRongwei Liu 			return -1;
8964c74ad3eSRongwei Liu 		}
8974c74ad3eSRongwei Liu 	} while (strchr(ptr->d_name, '.') || strchr(ptr->d_name, '_') ||
8984c74ad3eSRongwei Liu 		 strchr(ptr->d_name, 'v'));
8994c74ad3eSRongwei Liu 	snprintf(cur_ifname, sizeof(cur_ifname), "%s", ptr->d_name);
9004c74ad3eSRongwei Liu 	closedir(dir);
9014c74ad3eSRongwei Liu 	snprintf(tmp + strlen(tmp), sizeof(tmp) - strlen(tmp),
9024c74ad3eSRongwei Liu 			"/%s/phys_switch_id", cur_ifname);
9034c74ad3eSRongwei Liu 	/* Older OFED like 5.3 doesn't support read */
9044c74ad3eSRongwei Liu 	id_file = fopen(tmp, "r");
9054c74ad3eSRongwei Liu 	if (!id_file)
9064c74ad3eSRongwei Liu 		return 0;
9074c74ad3eSRongwei Liu 	ret = fscanf(id_file, "%16s", guid);
9084c74ad3eSRongwei Liu 	fclose(id_file);
9094c74ad3eSRongwei Liu 	return ret;
9104c74ad3eSRongwei Liu }
91176b5bdf8SMatan Azrad 
91276b5bdf8SMatan Azrad /*
91376b5bdf8SMatan Azrad  * Create direct mkey using the kernel ibv_reg_mr API and wrap it with a new
91476b5bdf8SMatan Azrad  * indirect mkey created by the DevX API.
91576b5bdf8SMatan Azrad  * This mkey should be used for DevX commands requesting mkey as a parameter.
91676b5bdf8SMatan Azrad  */
91776b5bdf8SMatan Azrad int
91876b5bdf8SMatan Azrad mlx5_os_wrapped_mkey_create(void *ctx, void *pd, uint32_t pdn, void *addr,
91976b5bdf8SMatan Azrad 			    size_t length, struct mlx5_pmd_wrapped_mr *pmd_mr)
92076b5bdf8SMatan Azrad {
92176b5bdf8SMatan Azrad 	struct mlx5_klm klm = {
92276b5bdf8SMatan Azrad 		.byte_count = length,
92376b5bdf8SMatan Azrad 		.address = (uintptr_t)addr,
92476b5bdf8SMatan Azrad 	};
92576b5bdf8SMatan Azrad 	struct mlx5_devx_mkey_attr mkey_attr = {
92676b5bdf8SMatan Azrad 		.pd = pdn,
92776b5bdf8SMatan Azrad 		.klm_array = &klm,
92876b5bdf8SMatan Azrad 		.klm_num = 1,
92976b5bdf8SMatan Azrad 	};
93076b5bdf8SMatan Azrad 	struct mlx5_devx_obj *mkey;
93176b5bdf8SMatan Azrad 	struct ibv_mr *ibv_mr = mlx5_glue->reg_mr(pd, addr, length,
93276b5bdf8SMatan Azrad 						  IBV_ACCESS_LOCAL_WRITE |
93376b5bdf8SMatan Azrad 						  (haswell_broadwell_cpu ? 0 :
93476b5bdf8SMatan Azrad 						  IBV_ACCESS_RELAXED_ORDERING));
93576b5bdf8SMatan Azrad 
93676b5bdf8SMatan Azrad 	if (!ibv_mr) {
93776b5bdf8SMatan Azrad 		rte_errno = errno;
93876b5bdf8SMatan Azrad 		return -rte_errno;
93976b5bdf8SMatan Azrad 	}
94076b5bdf8SMatan Azrad 	klm.mkey = ibv_mr->lkey;
94176b5bdf8SMatan Azrad 	mkey_attr.addr = (uintptr_t)addr;
94276b5bdf8SMatan Azrad 	mkey_attr.size = length;
94376b5bdf8SMatan Azrad 	mkey = mlx5_devx_cmd_mkey_create(ctx, &mkey_attr);
94476b5bdf8SMatan Azrad 	if (!mkey) {
94576b5bdf8SMatan Azrad 		claim_zero(mlx5_glue->dereg_mr(ibv_mr));
94676b5bdf8SMatan Azrad 		return -rte_errno;
94776b5bdf8SMatan Azrad 	}
94876b5bdf8SMatan Azrad 	pmd_mr->addr = addr;
94976b5bdf8SMatan Azrad 	pmd_mr->len = length;
95076b5bdf8SMatan Azrad 	pmd_mr->obj = (void *)ibv_mr;
95176b5bdf8SMatan Azrad 	pmd_mr->imkey = mkey;
95276b5bdf8SMatan Azrad 	pmd_mr->lkey = mkey->id;
95376b5bdf8SMatan Azrad 	return 0;
95476b5bdf8SMatan Azrad }
95576b5bdf8SMatan Azrad 
95676b5bdf8SMatan Azrad void
95776b5bdf8SMatan Azrad mlx5_os_wrapped_mkey_destroy(struct mlx5_pmd_wrapped_mr *pmd_mr)
95876b5bdf8SMatan Azrad {
95976b5bdf8SMatan Azrad 	if (!pmd_mr)
96076b5bdf8SMatan Azrad 		return;
96176b5bdf8SMatan Azrad 	if (pmd_mr->imkey)
96276b5bdf8SMatan Azrad 		claim_zero(mlx5_devx_cmd_destroy(pmd_mr->imkey));
96376b5bdf8SMatan Azrad 	if (pmd_mr->obj)
96476b5bdf8SMatan Azrad 		claim_zero(mlx5_glue->dereg_mr(pmd_mr->obj));
96576b5bdf8SMatan Azrad 	memset(pmd_mr, 0, sizeof(*pmd_mr));
96676b5bdf8SMatan Azrad }
967