xref: /dpdk/drivers/common/mlx5/linux/mlx5_common_os.c (revision 662d0dc67192f424cd85beb4bbcba6e6759c16b9)
179aa4307SOphir Munk /* SPDX-License-Identifier: BSD-3-Clause
279aa4307SOphir Munk  * Copyright 2020 Mellanox Technologies, Ltd
379aa4307SOphir Munk  */
479aa4307SOphir Munk 
579aa4307SOphir Munk #include <unistd.h>
679aa4307SOphir Munk #include <string.h>
779aa4307SOphir Munk #include <stdio.h>
879aa4307SOphir Munk #ifdef RTE_IBVERBS_LINK_DLOPEN
979aa4307SOphir Munk #include <dlfcn.h>
1079aa4307SOphir Munk #endif
11aec086c9SMatan Azrad #include <dirent.h>
12aec086c9SMatan Azrad #include <net/if.h>
1379aa4307SOphir Munk 
1479aa4307SOphir Munk #include <rte_errno.h>
15aec086c9SMatan Azrad #include <rte_string_fns.h>
16*662d0dc6SMichael Baum #include <rte_bus_pci.h>
17*662d0dc6SMichael Baum #include <rte_bus_auxiliary.h>
1879aa4307SOphir Munk 
1979aa4307SOphir Munk #include "mlx5_common.h"
20*662d0dc6SMichael Baum #include "mlx5_nl.h"
2125245d5dSShiri Kuzin #include "mlx5_common_log.h"
22*662d0dc6SMichael Baum #include "mlx5_common_private.h"
23887183efSMichael Baum #include "mlx5_common_defs.h"
24c31f3f7fSShiri Kuzin #include "mlx5_common_os.h"
2579aa4307SOphir Munk #include "mlx5_glue.h"
2679aa4307SOphir Munk 
2779aa4307SOphir Munk #ifdef MLX5_GLUE
2879aa4307SOphir Munk const struct mlx5_glue *mlx5_glue;
2979aa4307SOphir Munk #endif
3079aa4307SOphir Munk 
3179aa4307SOphir Munk int
324d567938SThomas Monjalon mlx5_get_pci_addr(const char *dev_path, struct rte_pci_addr *pci_addr)
3379aa4307SOphir Munk {
3479aa4307SOphir Munk 	FILE *file;
3579aa4307SOphir Munk 	char line[32];
36482a1d34SViacheslav Ovsiienko 	int rc = -ENOENT;
3779aa4307SOphir Munk 	MKSTR(path, "%s/device/uevent", dev_path);
3879aa4307SOphir Munk 
3979aa4307SOphir Munk 	file = fopen(path, "rb");
4079aa4307SOphir Munk 	if (file == NULL) {
4179aa4307SOphir Munk 		rte_errno = errno;
4279aa4307SOphir Munk 		return -rte_errno;
4379aa4307SOphir Munk 	}
4479aa4307SOphir Munk 	while (fgets(line, sizeof(line), file) == line) {
4579aa4307SOphir Munk 		size_t len = strlen(line);
4679aa4307SOphir Munk 
4779aa4307SOphir Munk 		/* Truncate long lines. */
48482a1d34SViacheslav Ovsiienko 		if (len == (sizeof(line) - 1)) {
4979aa4307SOphir Munk 			while (line[(len - 1)] != '\n') {
50482a1d34SViacheslav Ovsiienko 				int ret = fgetc(file);
51482a1d34SViacheslav Ovsiienko 
5279aa4307SOphir Munk 				if (ret == EOF)
53482a1d34SViacheslav Ovsiienko 					goto exit;
5479aa4307SOphir Munk 				line[(len - 1)] = ret;
5579aa4307SOphir Munk 			}
56482a1d34SViacheslav Ovsiienko 			/* No match for long lines. */
57482a1d34SViacheslav Ovsiienko 			continue;
58482a1d34SViacheslav Ovsiienko 		}
5979aa4307SOphir Munk 		/* Extract information. */
6079aa4307SOphir Munk 		if (sscanf(line,
6179aa4307SOphir Munk 			   "PCI_SLOT_NAME="
6279aa4307SOphir Munk 			   "%" SCNx32 ":%" SCNx8 ":%" SCNx8 ".%" SCNx8 "\n",
6379aa4307SOphir Munk 			   &pci_addr->domain,
6479aa4307SOphir Munk 			   &pci_addr->bus,
6579aa4307SOphir Munk 			   &pci_addr->devid,
6679aa4307SOphir Munk 			   &pci_addr->function) == 4) {
67482a1d34SViacheslav Ovsiienko 			rc = 0;
6879aa4307SOphir Munk 			break;
6979aa4307SOphir Munk 		}
7079aa4307SOphir Munk 	}
71482a1d34SViacheslav Ovsiienko exit:
7279aa4307SOphir Munk 	fclose(file);
73482a1d34SViacheslav Ovsiienko 	if (rc)
74482a1d34SViacheslav Ovsiienko 		rte_errno = -rc;
75482a1d34SViacheslav Ovsiienko 	return rc;
7679aa4307SOphir Munk }
7779aa4307SOphir Munk 
7879aa4307SOphir Munk /**
7979aa4307SOphir Munk  * Extract port name, as a number, from sysfs or netlink information.
8079aa4307SOphir Munk  *
8179aa4307SOphir Munk  * @param[in] port_name_in
8279aa4307SOphir Munk  *   String representing the port name.
8379aa4307SOphir Munk  * @param[out] port_info_out
8479aa4307SOphir Munk  *   Port information, including port name as a number and port name
8579aa4307SOphir Munk  *   type if recognized
8679aa4307SOphir Munk  *
8779aa4307SOphir Munk  * @return
8879aa4307SOphir Munk  *   port_name field set according to recognized name format.
8979aa4307SOphir Munk  */
9079aa4307SOphir Munk void
9179aa4307SOphir Munk mlx5_translate_port_name(const char *port_name_in,
9279aa4307SOphir Munk 			 struct mlx5_switch_info *port_info_out)
9379aa4307SOphir Munk {
9459df97f1SXueming Li 	char ctrl = 0, pf_c1, pf_c2, vf_c1, vf_c2, eol;
9579aa4307SOphir Munk 	char *end;
9679aa4307SOphir Munk 	int sc_items;
9779aa4307SOphir Munk 
9859df97f1SXueming Li 	sc_items = sscanf(port_name_in, "%c%d",
9959df97f1SXueming Li 			  &ctrl, &port_info_out->ctrl_num);
10059df97f1SXueming Li 	if (sc_items == 2 && ctrl == 'c') {
10159df97f1SXueming Li 		port_name_in++; /* 'c' */
10259df97f1SXueming Li 		port_name_in += snprintf(NULL, 0, "%d",
10359df97f1SXueming Li 					  port_info_out->ctrl_num);
10459df97f1SXueming Li 	}
10559df97f1SXueming Li 	/* Check for port-name as a string of the form pf0vf0 or pf0sf0 */
1063590881bSViacheslav Ovsiienko 	sc_items = sscanf(port_name_in, "%c%c%d%c%c%d%c",
10779aa4307SOphir Munk 			  &pf_c1, &pf_c2, &port_info_out->pf_num,
1083590881bSViacheslav Ovsiienko 			  &vf_c1, &vf_c2, &port_info_out->port_name, &eol);
10959df97f1SXueming Li 	if (sc_items == 6 && pf_c1 == 'p' && pf_c2 == 'f') {
11059df97f1SXueming Li 		if (vf_c1 == 'v' && vf_c2 == 'f') {
11159df97f1SXueming Li 			/* Kernel ver >= 5.0 or OFED ver >= 4.6 */
11259df97f1SXueming Li 			port_info_out->name_type =
11359df97f1SXueming Li 					MLX5_PHYS_PORT_NAME_TYPE_PFVF;
11479aa4307SOphir Munk 			return;
11579aa4307SOphir Munk 		}
11659df97f1SXueming Li 		if (vf_c1 == 's' && vf_c2 == 'f') {
11759df97f1SXueming Li 			/* Kernel ver >= 5.11 or OFED ver >= 5.1 */
11859df97f1SXueming Li 			port_info_out->name_type =
11959df97f1SXueming Li 					MLX5_PHYS_PORT_NAME_TYPE_PFSF;
12059df97f1SXueming Li 			return;
12159df97f1SXueming Li 		}
12259df97f1SXueming Li 	}
12379aa4307SOphir Munk 	/*
12479aa4307SOphir Munk 	 * Check for port-name as a string of the form p0
12579aa4307SOphir Munk 	 * (support kernel ver >= 5.0, or OFED ver >= 4.6).
12679aa4307SOphir Munk 	 */
1273590881bSViacheslav Ovsiienko 	sc_items = sscanf(port_name_in, "%c%d%c",
1283590881bSViacheslav Ovsiienko 			  &pf_c1, &port_info_out->port_name, &eol);
12979aa4307SOphir Munk 	if (sc_items == 2 && pf_c1 == 'p') {
13079aa4307SOphir Munk 		port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_UPLINK;
13179aa4307SOphir Munk 		return;
13279aa4307SOphir Munk 	}
133420bbdaeSViacheslav Ovsiienko 	/*
134420bbdaeSViacheslav Ovsiienko 	 * Check for port-name as a string of the form pf0
135420bbdaeSViacheslav Ovsiienko 	 * (support kernel ver >= 5.7 for HPF representor on BF).
136420bbdaeSViacheslav Ovsiienko 	 */
1373590881bSViacheslav Ovsiienko 	sc_items = sscanf(port_name_in, "%c%c%d%c",
1383590881bSViacheslav Ovsiienko 			  &pf_c1, &pf_c2, &port_info_out->pf_num, &eol);
139420bbdaeSViacheslav Ovsiienko 	if (sc_items == 3 && pf_c1 == 'p' && pf_c2 == 'f') {
140420bbdaeSViacheslav Ovsiienko 		port_info_out->port_name = -1;
141420bbdaeSViacheslav Ovsiienko 		port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_PFHPF;
142420bbdaeSViacheslav Ovsiienko 		return;
143420bbdaeSViacheslav Ovsiienko 	}
14479aa4307SOphir Munk 	/* Check for port-name as a number (support kernel ver < 5.0 */
14579aa4307SOphir Munk 	errno = 0;
14679aa4307SOphir Munk 	port_info_out->port_name = strtol(port_name_in, &end, 0);
14779aa4307SOphir Munk 	if (!errno &&
14879aa4307SOphir Munk 	    (size_t)(end - port_name_in) == strlen(port_name_in)) {
14979aa4307SOphir Munk 		port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_LEGACY;
15079aa4307SOphir Munk 		return;
15179aa4307SOphir Munk 	}
15279aa4307SOphir Munk 	port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN;
15379aa4307SOphir Munk }
15479aa4307SOphir Munk 
155aec086c9SMatan Azrad int
156aec086c9SMatan Azrad mlx5_get_ifname_sysfs(const char *ibdev_path, char *ifname)
157aec086c9SMatan Azrad {
158aec086c9SMatan Azrad 	DIR *dir;
159aec086c9SMatan Azrad 	struct dirent *dent;
160aec086c9SMatan Azrad 	unsigned int dev_type = 0;
161aec086c9SMatan Azrad 	unsigned int dev_port_prev = ~0u;
162aec086c9SMatan Azrad 	char match[IF_NAMESIZE] = "";
163aec086c9SMatan Azrad 
164aec086c9SMatan Azrad 	MLX5_ASSERT(ibdev_path);
165aec086c9SMatan Azrad 	{
166aec086c9SMatan Azrad 		MKSTR(path, "%s/device/net", ibdev_path);
167aec086c9SMatan Azrad 
168aec086c9SMatan Azrad 		dir = opendir(path);
169aec086c9SMatan Azrad 		if (dir == NULL) {
170aec086c9SMatan Azrad 			rte_errno = errno;
171aec086c9SMatan Azrad 			return -rte_errno;
172aec086c9SMatan Azrad 		}
173aec086c9SMatan Azrad 	}
174aec086c9SMatan Azrad 	while ((dent = readdir(dir)) != NULL) {
175aec086c9SMatan Azrad 		char *name = dent->d_name;
176aec086c9SMatan Azrad 		FILE *file;
177aec086c9SMatan Azrad 		unsigned int dev_port;
178aec086c9SMatan Azrad 		int r;
179aec086c9SMatan Azrad 
180aec086c9SMatan Azrad 		if ((name[0] == '.') &&
181aec086c9SMatan Azrad 		    ((name[1] == '\0') ||
182aec086c9SMatan Azrad 		     ((name[1] == '.') && (name[2] == '\0'))))
183aec086c9SMatan Azrad 			continue;
184aec086c9SMatan Azrad 
185aec086c9SMatan Azrad 		MKSTR(path, "%s/device/net/%s/%s",
186aec086c9SMatan Azrad 		      ibdev_path, name,
187aec086c9SMatan Azrad 		      (dev_type ? "dev_id" : "dev_port"));
188aec086c9SMatan Azrad 
189aec086c9SMatan Azrad 		file = fopen(path, "rb");
190aec086c9SMatan Azrad 		if (file == NULL) {
191aec086c9SMatan Azrad 			if (errno != ENOENT)
192aec086c9SMatan Azrad 				continue;
193aec086c9SMatan Azrad 			/*
194aec086c9SMatan Azrad 			 * Switch to dev_id when dev_port does not exist as
195aec086c9SMatan Azrad 			 * is the case with Linux kernel versions < 3.15.
196aec086c9SMatan Azrad 			 */
197aec086c9SMatan Azrad try_dev_id:
198aec086c9SMatan Azrad 			match[0] = '\0';
199aec086c9SMatan Azrad 			if (dev_type)
200aec086c9SMatan Azrad 				break;
201aec086c9SMatan Azrad 			dev_type = 1;
202aec086c9SMatan Azrad 			dev_port_prev = ~0u;
203aec086c9SMatan Azrad 			rewinddir(dir);
204aec086c9SMatan Azrad 			continue;
205aec086c9SMatan Azrad 		}
206aec086c9SMatan Azrad 		r = fscanf(file, (dev_type ? "%x" : "%u"), &dev_port);
207aec086c9SMatan Azrad 		fclose(file);
208aec086c9SMatan Azrad 		if (r != 1)
209aec086c9SMatan Azrad 			continue;
210aec086c9SMatan Azrad 		/*
211aec086c9SMatan Azrad 		 * Switch to dev_id when dev_port returns the same value for
212aec086c9SMatan Azrad 		 * all ports. May happen when using a MOFED release older than
213aec086c9SMatan Azrad 		 * 3.0 with a Linux kernel >= 3.15.
214aec086c9SMatan Azrad 		 */
215aec086c9SMatan Azrad 		if (dev_port == dev_port_prev)
216aec086c9SMatan Azrad 			goto try_dev_id;
217aec086c9SMatan Azrad 		dev_port_prev = dev_port;
218aec086c9SMatan Azrad 		if (dev_port == 0)
219aec086c9SMatan Azrad 			strlcpy(match, name, IF_NAMESIZE);
220aec086c9SMatan Azrad 	}
221aec086c9SMatan Azrad 	closedir(dir);
222aec086c9SMatan Azrad 	if (match[0] == '\0') {
223aec086c9SMatan Azrad 		rte_errno = ENOENT;
224aec086c9SMatan Azrad 		return -rte_errno;
225aec086c9SMatan Azrad 	}
226aec086c9SMatan Azrad 	strncpy(ifname, match, IF_NAMESIZE);
227aec086c9SMatan Azrad 	return 0;
228aec086c9SMatan Azrad }
229aec086c9SMatan Azrad 
23079aa4307SOphir Munk #ifdef MLX5_GLUE
23179aa4307SOphir Munk 
23279aa4307SOphir Munk /**
23379aa4307SOphir Munk  * Suffix RTE_EAL_PMD_PATH with "-glue".
23479aa4307SOphir Munk  *
23579aa4307SOphir Munk  * This function performs a sanity check on RTE_EAL_PMD_PATH before
23679aa4307SOphir Munk  * suffixing its last component.
23779aa4307SOphir Munk  *
23879aa4307SOphir Munk  * @param buf[out]
23979aa4307SOphir Munk  *   Output buffer, should be large enough otherwise NULL is returned.
24079aa4307SOphir Munk  * @param size
24179aa4307SOphir Munk  *   Size of @p out.
24279aa4307SOphir Munk  *
24379aa4307SOphir Munk  * @return
24479aa4307SOphir Munk  *   Pointer to @p buf or @p NULL in case suffix cannot be appended.
24579aa4307SOphir Munk  */
24679aa4307SOphir Munk static char *
24779aa4307SOphir Munk mlx5_glue_path(char *buf, size_t size)
24879aa4307SOphir Munk {
24979aa4307SOphir Munk 	static const char *const bad[] = { "/", ".", "..", NULL };
25079aa4307SOphir Munk 	const char *path = RTE_EAL_PMD_PATH;
25179aa4307SOphir Munk 	size_t len = strlen(path);
25279aa4307SOphir Munk 	size_t off;
25379aa4307SOphir Munk 	int i;
25479aa4307SOphir Munk 
25579aa4307SOphir Munk 	while (len && path[len - 1] == '/')
25679aa4307SOphir Munk 		--len;
25779aa4307SOphir Munk 	for (off = len; off && path[off - 1] != '/'; --off)
25879aa4307SOphir Munk 		;
25979aa4307SOphir Munk 	for (i = 0; bad[i]; ++i)
26079aa4307SOphir Munk 		if (!strncmp(path + off, bad[i], (int)(len - off)))
26179aa4307SOphir Munk 			goto error;
26279aa4307SOphir Munk 	i = snprintf(buf, size, "%.*s-glue", (int)len, path);
26379aa4307SOphir Munk 	if (i == -1 || (size_t)i >= size)
26479aa4307SOphir Munk 		goto error;
26579aa4307SOphir Munk 	return buf;
26679aa4307SOphir Munk error:
26779aa4307SOphir Munk 	RTE_LOG(ERR, PMD, "unable to append \"-glue\" to last component of"
26879aa4307SOphir Munk 		" RTE_EAL_PMD_PATH (\"" RTE_EAL_PMD_PATH "\"), please"
26979aa4307SOphir Munk 		" re-configure DPDK");
27079aa4307SOphir Munk 	return NULL;
27179aa4307SOphir Munk }
27279aa4307SOphir Munk 
27379aa4307SOphir Munk static int
27479aa4307SOphir Munk mlx5_glue_dlopen(void)
27579aa4307SOphir Munk {
27679aa4307SOphir Munk 	char glue_path[sizeof(RTE_EAL_PMD_PATH) - 1 + sizeof("-glue")];
27779aa4307SOphir Munk 	void *handle = NULL;
27879aa4307SOphir Munk 
27979aa4307SOphir Munk 	char const *path[] = {
28079aa4307SOphir Munk 		/*
28179aa4307SOphir Munk 		 * A basic security check is necessary before trusting
28279aa4307SOphir Munk 		 * MLX5_GLUE_PATH, which may override RTE_EAL_PMD_PATH.
28379aa4307SOphir Munk 		 */
28479aa4307SOphir Munk 		(geteuid() == getuid() && getegid() == getgid() ?
28579aa4307SOphir Munk 		 getenv("MLX5_GLUE_PATH") : NULL),
28679aa4307SOphir Munk 		/*
28779aa4307SOphir Munk 		 * When RTE_EAL_PMD_PATH is set, use its glue-suffixed
28879aa4307SOphir Munk 		 * variant, otherwise let dlopen() look up libraries on its
28979aa4307SOphir Munk 		 * own.
29079aa4307SOphir Munk 		 */
29179aa4307SOphir Munk 		(*RTE_EAL_PMD_PATH ?
29279aa4307SOphir Munk 		 mlx5_glue_path(glue_path, sizeof(glue_path)) : ""),
29379aa4307SOphir Munk 	};
29479aa4307SOphir Munk 	unsigned int i = 0;
29579aa4307SOphir Munk 	void **sym;
29679aa4307SOphir Munk 	const char *dlmsg;
29779aa4307SOphir Munk 
29879aa4307SOphir Munk 	while (!handle && i != RTE_DIM(path)) {
29979aa4307SOphir Munk 		const char *end;
30079aa4307SOphir Munk 		size_t len;
30179aa4307SOphir Munk 		int ret;
30279aa4307SOphir Munk 
30379aa4307SOphir Munk 		if (!path[i]) {
30479aa4307SOphir Munk 			++i;
30579aa4307SOphir Munk 			continue;
30679aa4307SOphir Munk 		}
30779aa4307SOphir Munk 		end = strpbrk(path[i], ":;");
30879aa4307SOphir Munk 		if (!end)
30979aa4307SOphir Munk 			end = path[i] + strlen(path[i]);
31079aa4307SOphir Munk 		len = end - path[i];
31179aa4307SOphir Munk 		ret = 0;
31279aa4307SOphir Munk 		do {
31379aa4307SOphir Munk 			char name[ret + 1];
31479aa4307SOphir Munk 
31579aa4307SOphir Munk 			ret = snprintf(name, sizeof(name), "%.*s%s" MLX5_GLUE,
31679aa4307SOphir Munk 				       (int)len, path[i],
31779aa4307SOphir Munk 				       (!len || *(end - 1) == '/') ? "" : "/");
31879aa4307SOphir Munk 			if (ret == -1)
31979aa4307SOphir Munk 				break;
32079aa4307SOphir Munk 			if (sizeof(name) != (size_t)ret + 1)
32179aa4307SOphir Munk 				continue;
32279aa4307SOphir Munk 			DRV_LOG(DEBUG, "Looking for rdma-core glue as "
32379aa4307SOphir Munk 				"\"%s\"", name);
32479aa4307SOphir Munk 			handle = dlopen(name, RTLD_LAZY);
32579aa4307SOphir Munk 			break;
32679aa4307SOphir Munk 		} while (1);
32779aa4307SOphir Munk 		path[i] = end + 1;
32879aa4307SOphir Munk 		if (!*end)
32979aa4307SOphir Munk 			++i;
33079aa4307SOphir Munk 	}
33179aa4307SOphir Munk 	if (!handle) {
33279aa4307SOphir Munk 		rte_errno = EINVAL;
33379aa4307SOphir Munk 		dlmsg = dlerror();
33479aa4307SOphir Munk 		if (dlmsg)
33579aa4307SOphir Munk 			DRV_LOG(WARNING, "Cannot load glue library: %s", dlmsg);
33679aa4307SOphir Munk 		goto glue_error;
33779aa4307SOphir Munk 	}
33879aa4307SOphir Munk 	sym = dlsym(handle, "mlx5_glue");
33979aa4307SOphir Munk 	if (!sym || !*sym) {
34079aa4307SOphir Munk 		rte_errno = EINVAL;
34179aa4307SOphir Munk 		dlmsg = dlerror();
34279aa4307SOphir Munk 		if (dlmsg)
34379aa4307SOphir Munk 			DRV_LOG(ERR, "Cannot resolve glue symbol: %s", dlmsg);
34479aa4307SOphir Munk 		goto glue_error;
34579aa4307SOphir Munk 	}
34679aa4307SOphir Munk 	mlx5_glue = *sym;
34779aa4307SOphir Munk 	return 0;
34879aa4307SOphir Munk 
34979aa4307SOphir Munk glue_error:
35079aa4307SOphir Munk 	if (handle)
35179aa4307SOphir Munk 		dlclose(handle);
35279aa4307SOphir Munk 	return -1;
35379aa4307SOphir Munk }
35479aa4307SOphir Munk 
35579aa4307SOphir Munk #endif
35679aa4307SOphir Munk 
35779aa4307SOphir Munk /**
35879aa4307SOphir Munk  * Initialization routine for run-time dependency on rdma-core.
35979aa4307SOphir Munk  */
36079aa4307SOphir Munk void
36179aa4307SOphir Munk mlx5_glue_constructor(void)
36279aa4307SOphir Munk {
36379aa4307SOphir Munk 	/*
36479aa4307SOphir Munk 	 * RDMAV_HUGEPAGES_SAFE tells ibv_fork_init() we intend to use
36579aa4307SOphir Munk 	 * huge pages. Calling ibv_fork_init() during init allows
36679aa4307SOphir Munk 	 * applications to use fork() safely for purposes other than
36779aa4307SOphir Munk 	 * using this PMD, which is not supported in forked processes.
36879aa4307SOphir Munk 	 */
36979aa4307SOphir Munk 	setenv("RDMAV_HUGEPAGES_SAFE", "1", 1);
37079aa4307SOphir Munk 	/* Match the size of Rx completion entry to the size of a cacheline. */
37179aa4307SOphir Munk 	if (RTE_CACHE_LINE_SIZE == 128)
37279aa4307SOphir Munk 		setenv("MLX5_CQE_SIZE", "128", 0);
37379aa4307SOphir Munk 	/*
37479aa4307SOphir Munk 	 * MLX5_DEVICE_FATAL_CLEANUP tells ibv_destroy functions to
37579aa4307SOphir Munk 	 * cleanup all the Verbs resources even when the device was removed.
37679aa4307SOphir Munk 	 */
37779aa4307SOphir Munk 	setenv("MLX5_DEVICE_FATAL_CLEANUP", "1", 1);
37879aa4307SOphir Munk 
37979aa4307SOphir Munk #ifdef MLX5_GLUE
38079aa4307SOphir Munk 	if (mlx5_glue_dlopen() != 0)
38179aa4307SOphir Munk 		goto glue_error;
38279aa4307SOphir Munk #endif
38379aa4307SOphir Munk 
38479aa4307SOphir Munk #ifdef RTE_LIBRTE_MLX5_DEBUG
38579aa4307SOphir Munk 	/* Glue structure must not contain any NULL pointers. */
38679aa4307SOphir Munk 	{
38779aa4307SOphir Munk 		unsigned int i;
38879aa4307SOphir Munk 
38979aa4307SOphir Munk 		for (i = 0; i != sizeof(*mlx5_glue) / sizeof(void *); ++i)
39079aa4307SOphir Munk 			MLX5_ASSERT(((const void *const *)mlx5_glue)[i]);
39179aa4307SOphir Munk 	}
39279aa4307SOphir Munk #endif
39379aa4307SOphir Munk 	if (strcmp(mlx5_glue->version, MLX5_GLUE_VERSION)) {
39479aa4307SOphir Munk 		rte_errno = EINVAL;
39579aa4307SOphir Munk 		DRV_LOG(ERR, "rdma-core glue \"%s\" mismatch: \"%s\" is "
39679aa4307SOphir Munk 			"required", mlx5_glue->version, MLX5_GLUE_VERSION);
39779aa4307SOphir Munk 		goto glue_error;
39879aa4307SOphir Munk 	}
39979aa4307SOphir Munk 	mlx5_glue->fork_init();
40079aa4307SOphir Munk 	return;
40179aa4307SOphir Munk 
40279aa4307SOphir Munk glue_error:
40379aa4307SOphir Munk 	DRV_LOG(WARNING, "Cannot initialize MLX5 common due to missing"
40479aa4307SOphir Munk 		" run-time dependency on rdma-core libraries (libibverbs,"
40579aa4307SOphir Munk 		" libmlx5)");
40679aa4307SOphir Munk 	mlx5_glue = NULL;
40779aa4307SOphir Munk }
408262c7ad0SOri Kam 
409*662d0dc6SMichael Baum static struct ibv_device *
410ad435d32SXueming Li mlx5_os_get_ibv_device(const struct rte_pci_addr *addr)
411c31f3f7fSShiri Kuzin {
412c31f3f7fSShiri Kuzin 	int n;
413c31f3f7fSShiri Kuzin 	struct ibv_device **ibv_list = mlx5_glue->get_device_list(&n);
414c31f3f7fSShiri Kuzin 	struct ibv_device *ibv_match = NULL;
415c31f3f7fSShiri Kuzin 
416c31f3f7fSShiri Kuzin 	if (ibv_list == NULL) {
417c31f3f7fSShiri Kuzin 		rte_errno = ENOSYS;
418c31f3f7fSShiri Kuzin 		return NULL;
419c31f3f7fSShiri Kuzin 	}
420c31f3f7fSShiri Kuzin 	while (n-- > 0) {
421c31f3f7fSShiri Kuzin 		struct rte_pci_addr paddr;
422c31f3f7fSShiri Kuzin 
423c31f3f7fSShiri Kuzin 		DRV_LOG(DEBUG, "Checking device \"%s\"..", ibv_list[n]->name);
4244d567938SThomas Monjalon 		if (mlx5_get_pci_addr(ibv_list[n]->ibdev_path, &paddr) != 0)
425c31f3f7fSShiri Kuzin 			continue;
426c31f3f7fSShiri Kuzin 		if (rte_pci_addr_cmp(addr, &paddr) != 0)
427c31f3f7fSShiri Kuzin 			continue;
428c31f3f7fSShiri Kuzin 		ibv_match = ibv_list[n];
429c31f3f7fSShiri Kuzin 		break;
430c31f3f7fSShiri Kuzin 	}
431ca1418ceSMichael Baum 	if (ibv_match == NULL) {
432ca1418ceSMichael Baum 		DRV_LOG(WARNING,
433ca1418ceSMichael Baum 			"No Verbs device matches PCI device " PCI_PRI_FMT ","
434ca1418ceSMichael Baum 			" are kernel drivers loaded?",
435ca1418ceSMichael Baum 			addr->domain, addr->bus, addr->devid, addr->function);
436c31f3f7fSShiri Kuzin 		rte_errno = ENOENT;
437ca1418ceSMichael Baum 	}
438c31f3f7fSShiri Kuzin 	mlx5_glue->free_device_list(ibv_list);
439c31f3f7fSShiri Kuzin 	return ibv_match;
440c31f3f7fSShiri Kuzin }
441887183efSMichael Baum 
442*662d0dc6SMichael Baum /* Try to disable ROCE by Netlink\Devlink. */
443*662d0dc6SMichael Baum static int
444*662d0dc6SMichael Baum mlx5_nl_roce_disable(const char *addr)
445*662d0dc6SMichael Baum {
446*662d0dc6SMichael Baum 	int nlsk_fd = mlx5_nl_init(NETLINK_GENERIC);
447*662d0dc6SMichael Baum 	int devlink_id;
448*662d0dc6SMichael Baum 	int enable;
449*662d0dc6SMichael Baum 	int ret;
450*662d0dc6SMichael Baum 
451*662d0dc6SMichael Baum 	if (nlsk_fd < 0)
452*662d0dc6SMichael Baum 		return nlsk_fd;
453*662d0dc6SMichael Baum 	devlink_id = mlx5_nl_devlink_family_id_get(nlsk_fd);
454*662d0dc6SMichael Baum 	if (devlink_id < 0) {
455*662d0dc6SMichael Baum 		ret = devlink_id;
456*662d0dc6SMichael Baum 		DRV_LOG(DEBUG,
457*662d0dc6SMichael Baum 			"Failed to get devlink id for ROCE operations by Netlink.");
458*662d0dc6SMichael Baum 		goto close;
459*662d0dc6SMichael Baum 	}
460*662d0dc6SMichael Baum 	ret = mlx5_nl_enable_roce_get(nlsk_fd, devlink_id, addr, &enable);
461*662d0dc6SMichael Baum 	if (ret) {
462*662d0dc6SMichael Baum 		DRV_LOG(DEBUG, "Failed to get ROCE enable by Netlink: %d.",
463*662d0dc6SMichael Baum 			ret);
464*662d0dc6SMichael Baum 		goto close;
465*662d0dc6SMichael Baum 	} else if (!enable) {
466*662d0dc6SMichael Baum 		DRV_LOG(INFO, "ROCE has already disabled(Netlink).");
467*662d0dc6SMichael Baum 		goto close;
468*662d0dc6SMichael Baum 	}
469*662d0dc6SMichael Baum 	ret = mlx5_nl_enable_roce_set(nlsk_fd, devlink_id, addr, 0);
470*662d0dc6SMichael Baum 	if (ret)
471*662d0dc6SMichael Baum 		DRV_LOG(DEBUG, "Failed to disable ROCE by Netlink: %d.", ret);
472*662d0dc6SMichael Baum 	else
473*662d0dc6SMichael Baum 		DRV_LOG(INFO, "ROCE is disabled by Netlink successfully.");
474*662d0dc6SMichael Baum close:
475*662d0dc6SMichael Baum 	close(nlsk_fd);
476*662d0dc6SMichael Baum 	return ret;
477*662d0dc6SMichael Baum }
478*662d0dc6SMichael Baum 
479*662d0dc6SMichael Baum /* Try to disable ROCE by sysfs. */
480*662d0dc6SMichael Baum static int
481*662d0dc6SMichael Baum mlx5_sys_roce_disable(const char *addr)
482*662d0dc6SMichael Baum {
483*662d0dc6SMichael Baum 	FILE *file_o;
484*662d0dc6SMichael Baum 	int enable;
485*662d0dc6SMichael Baum 	int ret;
486*662d0dc6SMichael Baum 
487*662d0dc6SMichael Baum 	MKSTR(file_p, "/sys/bus/pci/devices/%s/roce_enable", addr);
488*662d0dc6SMichael Baum 	file_o = fopen(file_p, "rb");
489*662d0dc6SMichael Baum 	if (!file_o) {
490*662d0dc6SMichael Baum 		rte_errno = ENOTSUP;
491*662d0dc6SMichael Baum 		return -ENOTSUP;
492*662d0dc6SMichael Baum 	}
493*662d0dc6SMichael Baum 	ret = fscanf(file_o, "%d", &enable);
494*662d0dc6SMichael Baum 	if (ret != 1) {
495*662d0dc6SMichael Baum 		rte_errno = EINVAL;
496*662d0dc6SMichael Baum 		ret = EINVAL;
497*662d0dc6SMichael Baum 		goto close;
498*662d0dc6SMichael Baum 	} else if (!enable) {
499*662d0dc6SMichael Baum 		ret = 0;
500*662d0dc6SMichael Baum 		DRV_LOG(INFO, "ROCE has already disabled(sysfs).");
501*662d0dc6SMichael Baum 		goto close;
502*662d0dc6SMichael Baum 	}
503*662d0dc6SMichael Baum 	fclose(file_o);
504*662d0dc6SMichael Baum 	file_o = fopen(file_p, "wb");
505*662d0dc6SMichael Baum 	if (!file_o) {
506*662d0dc6SMichael Baum 		rte_errno = ENOTSUP;
507*662d0dc6SMichael Baum 		return -ENOTSUP;
508*662d0dc6SMichael Baum 	}
509*662d0dc6SMichael Baum 	fprintf(file_o, "0\n");
510*662d0dc6SMichael Baum 	ret = 0;
511*662d0dc6SMichael Baum close:
512*662d0dc6SMichael Baum 	if (ret)
513*662d0dc6SMichael Baum 		DRV_LOG(DEBUG, "Failed to disable ROCE by sysfs: %d.", ret);
514*662d0dc6SMichael Baum 	else
515*662d0dc6SMichael Baum 		DRV_LOG(INFO, "ROCE is disabled by sysfs successfully.");
516*662d0dc6SMichael Baum 	fclose(file_o);
517*662d0dc6SMichael Baum 	return ret;
518*662d0dc6SMichael Baum }
519*662d0dc6SMichael Baum 
520*662d0dc6SMichael Baum static int
521*662d0dc6SMichael Baum mlx5_roce_disable(const struct rte_device *dev)
522*662d0dc6SMichael Baum {
523*662d0dc6SMichael Baum 	char pci_addr[PCI_PRI_STR_SIZE] = { 0 };
524*662d0dc6SMichael Baum 
525*662d0dc6SMichael Baum 	if (mlx5_dev_to_pci_str(dev, pci_addr, sizeof(pci_addr)) < 0)
526*662d0dc6SMichael Baum 		return -rte_errno;
527*662d0dc6SMichael Baum 	/* Firstly try to disable ROCE by Netlink and fallback to sysfs. */
528*662d0dc6SMichael Baum 	if (mlx5_nl_roce_disable(pci_addr) != 0 &&
529*662d0dc6SMichael Baum 	    mlx5_sys_roce_disable(pci_addr) != 0)
530*662d0dc6SMichael Baum 		return -rte_errno;
531*662d0dc6SMichael Baum 	return 0;
532*662d0dc6SMichael Baum }
533*662d0dc6SMichael Baum 
534*662d0dc6SMichael Baum static struct ibv_device *
535*662d0dc6SMichael Baum mlx5_os_get_ibv_dev(const struct rte_device *dev)
536*662d0dc6SMichael Baum {
537*662d0dc6SMichael Baum 	struct ibv_device *ibv;
538*662d0dc6SMichael Baum 
539*662d0dc6SMichael Baum 	if (mlx5_dev_is_pci(dev))
540*662d0dc6SMichael Baum 		ibv = mlx5_os_get_ibv_device(&RTE_DEV_TO_PCI_CONST(dev)->addr);
541*662d0dc6SMichael Baum 	else
542*662d0dc6SMichael Baum 		ibv = mlx5_get_aux_ibv_device(RTE_DEV_TO_AUXILIARY_CONST(dev));
543*662d0dc6SMichael Baum 	if (ibv == NULL) {
544*662d0dc6SMichael Baum 		rte_errno = ENODEV;
545*662d0dc6SMichael Baum 		DRV_LOG(ERR, "Verbs device not found: %s", dev->name);
546*662d0dc6SMichael Baum 	}
547*662d0dc6SMichael Baum 	return ibv;
548*662d0dc6SMichael Baum }
549*662d0dc6SMichael Baum 
550*662d0dc6SMichael Baum static struct ibv_device *
551*662d0dc6SMichael Baum mlx5_vdpa_get_ibv_dev(const struct rte_device *dev)
552*662d0dc6SMichael Baum {
553*662d0dc6SMichael Baum 	struct ibv_device *ibv;
554*662d0dc6SMichael Baum 	int retry;
555*662d0dc6SMichael Baum 
556*662d0dc6SMichael Baum 	if (mlx5_roce_disable(dev) != 0) {
557*662d0dc6SMichael Baum 		DRV_LOG(WARNING, "Failed to disable ROCE for \"%s\".",
558*662d0dc6SMichael Baum 			dev->name);
559*662d0dc6SMichael Baum 		return NULL;
560*662d0dc6SMichael Baum 	}
561*662d0dc6SMichael Baum 	/* Wait for the IB device to appear again after reload. */
562*662d0dc6SMichael Baum 	for (retry = MLX5_VDPA_MAX_RETRIES; retry > 0; --retry) {
563*662d0dc6SMichael Baum 		ibv = mlx5_os_get_ibv_dev(dev);
564*662d0dc6SMichael Baum 		if (ibv != NULL)
565*662d0dc6SMichael Baum 			return ibv;
566*662d0dc6SMichael Baum 		usleep(MLX5_VDPA_USEC);
567*662d0dc6SMichael Baum 	}
568*662d0dc6SMichael Baum 	DRV_LOG(ERR,
569*662d0dc6SMichael Baum 		"Cannot get IB device after disabling RoCE for \"%s\", retries exceed %d.",
570*662d0dc6SMichael Baum 		dev->name, MLX5_VDPA_MAX_RETRIES);
571*662d0dc6SMichael Baum 	rte_errno = EAGAIN;
572*662d0dc6SMichael Baum 	return NULL;
573*662d0dc6SMichael Baum }
574*662d0dc6SMichael Baum 
575887183efSMichael Baum static int
576887183efSMichael Baum mlx5_config_doorbell_mapping_env(int dbnc)
577887183efSMichael Baum {
578887183efSMichael Baum 	char *env;
579887183efSMichael Baum 	int value;
580887183efSMichael Baum 
581887183efSMichael Baum 	MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY);
582887183efSMichael Baum 	/* Get environment variable to store. */
583887183efSMichael Baum 	env = getenv(MLX5_SHUT_UP_BF);
584887183efSMichael Baum 	value = env ? !!strcmp(env, "0") : MLX5_ARG_UNSET;
585887183efSMichael Baum 	if (dbnc == MLX5_ARG_UNSET)
586887183efSMichael Baum 		setenv(MLX5_SHUT_UP_BF, MLX5_SHUT_UP_BF_DEFAULT, 1);
587887183efSMichael Baum 	else
588887183efSMichael Baum 		setenv(MLX5_SHUT_UP_BF,
589887183efSMichael Baum 		       dbnc == MLX5_TXDB_NCACHED ? "1" : "0", 1);
590887183efSMichael Baum 	return value;
591887183efSMichael Baum }
592887183efSMichael Baum 
593887183efSMichael Baum static void
594887183efSMichael Baum mlx5_restore_doorbell_mapping_env(int value)
595887183efSMichael Baum {
596887183efSMichael Baum 	MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY);
597887183efSMichael Baum 	/* Restore the original environment variable state. */
598887183efSMichael Baum 	if (value == MLX5_ARG_UNSET)
599887183efSMichael Baum 		unsetenv(MLX5_SHUT_UP_BF);
600887183efSMichael Baum 	else
601887183efSMichael Baum 		setenv(MLX5_SHUT_UP_BF, value ? "1" : "0", 1);
602887183efSMichael Baum }
603887183efSMichael Baum 
604887183efSMichael Baum /**
605887183efSMichael Baum  * Function API to open IB device.
606887183efSMichael Baum  *
607887183efSMichael Baum  *
608887183efSMichael Baum  * @param cdev
609887183efSMichael Baum  *   Pointer to the mlx5 device.
610ca1418ceSMichael Baum  * @param classes
611ca1418ceSMichael Baum  *   Chosen classes come from device arguments.
612887183efSMichael Baum  *
613887183efSMichael Baum  * @return
614887183efSMichael Baum  *   0 on success, a negative errno value otherwise and rte_errno is set.
615887183efSMichael Baum  */
616887183efSMichael Baum int
617ca1418ceSMichael Baum mlx5_os_open_device(struct mlx5_common_device *cdev, uint32_t classes)
618887183efSMichael Baum {
619887183efSMichael Baum 	struct ibv_device *ibv;
620887183efSMichael Baum 	struct ibv_context *ctx = NULL;
621887183efSMichael Baum 	int dbmap_env;
622887183efSMichael Baum 
623*662d0dc6SMichael Baum 	if (classes & MLX5_CLASS_VDPA)
624*662d0dc6SMichael Baum 		ibv = mlx5_vdpa_get_ibv_dev(cdev->dev);
625*662d0dc6SMichael Baum 	else
626887183efSMichael Baum 		ibv = mlx5_os_get_ibv_dev(cdev->dev);
627887183efSMichael Baum 	if (!ibv)
628887183efSMichael Baum 		return -rte_errno;
629887183efSMichael Baum 	DRV_LOG(INFO, "Dev information matches for device \"%s\".", ibv->name);
630887183efSMichael Baum 	/*
631887183efSMichael Baum 	 * Configure environment variable "MLX5_BF_SHUT_UP" before the device
632887183efSMichael Baum 	 * creation. The rdma_core library checks the variable at device
633887183efSMichael Baum 	 * creation and stores the result internally.
634887183efSMichael Baum 	 */
635887183efSMichael Baum 	dbmap_env = mlx5_config_doorbell_mapping_env(cdev->config.dbnc);
636887183efSMichael Baum 	/* Try to open IB device with DV first, then usual Verbs. */
637887183efSMichael Baum 	errno = 0;
638887183efSMichael Baum 	ctx = mlx5_glue->dv_open_device(ibv);
639887183efSMichael Baum 	if (ctx) {
640887183efSMichael Baum 		cdev->config.devx = 1;
641887183efSMichael Baum 		DRV_LOG(DEBUG, "DevX is supported.");
642ca1418ceSMichael Baum 	} else if (classes == MLX5_CLASS_ETH) {
643887183efSMichael Baum 		/* The environment variable is still configured. */
644887183efSMichael Baum 		ctx = mlx5_glue->open_device(ibv);
645887183efSMichael Baum 		if (ctx == NULL)
646887183efSMichael Baum 			goto error;
647887183efSMichael Baum 		DRV_LOG(DEBUG, "DevX is NOT supported.");
648ca1418ceSMichael Baum 	} else {
649ca1418ceSMichael Baum 		goto error;
650887183efSMichael Baum 	}
651887183efSMichael Baum 	/* The device is created, no need for environment. */
652887183efSMichael Baum 	mlx5_restore_doorbell_mapping_env(dbmap_env);
653887183efSMichael Baum 	/* Hint libmlx5 to use PMD allocator for data plane resources */
654887183efSMichael Baum 	mlx5_set_context_attr(cdev->dev, ctx);
655ca1418ceSMichael Baum 	cdev->ctx = ctx;
656887183efSMichael Baum 	return 0;
657887183efSMichael Baum error:
658887183efSMichael Baum 	rte_errno = errno ? errno : ENODEV;
659887183efSMichael Baum 	/* The device creation is failed, no need for environment. */
660887183efSMichael Baum 	mlx5_restore_doorbell_mapping_env(dbmap_env);
661887183efSMichael Baum 	DRV_LOG(ERR, "Failed to open IB device \"%s\".", ibv->name);
662887183efSMichael Baum 	return -rte_errno;
663887183efSMichael Baum }
664