xref: /dpdk/drivers/common/mlx5/linux/mlx5_common_os.c (revision 887183effa2aeef70508b37fe304a3f7c526d334)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2020 Mellanox Technologies, Ltd
3  */
4 
5 #include <unistd.h>
6 #include <string.h>
7 #include <stdio.h>
8 #ifdef RTE_IBVERBS_LINK_DLOPEN
9 #include <dlfcn.h>
10 #endif
11 #include <dirent.h>
12 #include <net/if.h>
13 
14 #include <rte_errno.h>
15 #include <rte_string_fns.h>
16 
17 #include "mlx5_common.h"
18 #include "mlx5_common_log.h"
19 #include "mlx5_common_defs.h"
20 #include "mlx5_common_os.h"
21 #include "mlx5_glue.h"
22 
23 #ifdef MLX5_GLUE
24 const struct mlx5_glue *mlx5_glue;
25 #endif
26 
27 int
28 mlx5_get_pci_addr(const char *dev_path, struct rte_pci_addr *pci_addr)
29 {
30 	FILE *file;
31 	char line[32];
32 	int rc = -ENOENT;
33 	MKSTR(path, "%s/device/uevent", dev_path);
34 
35 	file = fopen(path, "rb");
36 	if (file == NULL) {
37 		rte_errno = errno;
38 		return -rte_errno;
39 	}
40 	while (fgets(line, sizeof(line), file) == line) {
41 		size_t len = strlen(line);
42 
43 		/* Truncate long lines. */
44 		if (len == (sizeof(line) - 1)) {
45 			while (line[(len - 1)] != '\n') {
46 				int ret = fgetc(file);
47 
48 				if (ret == EOF)
49 					goto exit;
50 				line[(len - 1)] = ret;
51 			}
52 			/* No match for long lines. */
53 			continue;
54 		}
55 		/* Extract information. */
56 		if (sscanf(line,
57 			   "PCI_SLOT_NAME="
58 			   "%" SCNx32 ":%" SCNx8 ":%" SCNx8 ".%" SCNx8 "\n",
59 			   &pci_addr->domain,
60 			   &pci_addr->bus,
61 			   &pci_addr->devid,
62 			   &pci_addr->function) == 4) {
63 			rc = 0;
64 			break;
65 		}
66 	}
67 exit:
68 	fclose(file);
69 	if (rc)
70 		rte_errno = -rc;
71 	return rc;
72 }
73 
74 /**
75  * Extract port name, as a number, from sysfs or netlink information.
76  *
77  * @param[in] port_name_in
78  *   String representing the port name.
79  * @param[out] port_info_out
80  *   Port information, including port name as a number and port name
81  *   type if recognized
82  *
83  * @return
84  *   port_name field set according to recognized name format.
85  */
86 void
87 mlx5_translate_port_name(const char *port_name_in,
88 			 struct mlx5_switch_info *port_info_out)
89 {
90 	char ctrl = 0, pf_c1, pf_c2, vf_c1, vf_c2, eol;
91 	char *end;
92 	int sc_items;
93 
94 	sc_items = sscanf(port_name_in, "%c%d",
95 			  &ctrl, &port_info_out->ctrl_num);
96 	if (sc_items == 2 && ctrl == 'c') {
97 		port_name_in++; /* 'c' */
98 		port_name_in += snprintf(NULL, 0, "%d",
99 					  port_info_out->ctrl_num);
100 	}
101 	/* Check for port-name as a string of the form pf0vf0 or pf0sf0 */
102 	sc_items = sscanf(port_name_in, "%c%c%d%c%c%d%c",
103 			  &pf_c1, &pf_c2, &port_info_out->pf_num,
104 			  &vf_c1, &vf_c2, &port_info_out->port_name, &eol);
105 	if (sc_items == 6 && pf_c1 == 'p' && pf_c2 == 'f') {
106 		if (vf_c1 == 'v' && vf_c2 == 'f') {
107 			/* Kernel ver >= 5.0 or OFED ver >= 4.6 */
108 			port_info_out->name_type =
109 					MLX5_PHYS_PORT_NAME_TYPE_PFVF;
110 			return;
111 		}
112 		if (vf_c1 == 's' && vf_c2 == 'f') {
113 			/* Kernel ver >= 5.11 or OFED ver >= 5.1 */
114 			port_info_out->name_type =
115 					MLX5_PHYS_PORT_NAME_TYPE_PFSF;
116 			return;
117 		}
118 	}
119 	/*
120 	 * Check for port-name as a string of the form p0
121 	 * (support kernel ver >= 5.0, or OFED ver >= 4.6).
122 	 */
123 	sc_items = sscanf(port_name_in, "%c%d%c",
124 			  &pf_c1, &port_info_out->port_name, &eol);
125 	if (sc_items == 2 && pf_c1 == 'p') {
126 		port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_UPLINK;
127 		return;
128 	}
129 	/*
130 	 * Check for port-name as a string of the form pf0
131 	 * (support kernel ver >= 5.7 for HPF representor on BF).
132 	 */
133 	sc_items = sscanf(port_name_in, "%c%c%d%c",
134 			  &pf_c1, &pf_c2, &port_info_out->pf_num, &eol);
135 	if (sc_items == 3 && pf_c1 == 'p' && pf_c2 == 'f') {
136 		port_info_out->port_name = -1;
137 		port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_PFHPF;
138 		return;
139 	}
140 	/* Check for port-name as a number (support kernel ver < 5.0 */
141 	errno = 0;
142 	port_info_out->port_name = strtol(port_name_in, &end, 0);
143 	if (!errno &&
144 	    (size_t)(end - port_name_in) == strlen(port_name_in)) {
145 		port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_LEGACY;
146 		return;
147 	}
148 	port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN;
149 }
150 
151 int
152 mlx5_get_ifname_sysfs(const char *ibdev_path, char *ifname)
153 {
154 	DIR *dir;
155 	struct dirent *dent;
156 	unsigned int dev_type = 0;
157 	unsigned int dev_port_prev = ~0u;
158 	char match[IF_NAMESIZE] = "";
159 
160 	MLX5_ASSERT(ibdev_path);
161 	{
162 		MKSTR(path, "%s/device/net", ibdev_path);
163 
164 		dir = opendir(path);
165 		if (dir == NULL) {
166 			rte_errno = errno;
167 			return -rte_errno;
168 		}
169 	}
170 	while ((dent = readdir(dir)) != NULL) {
171 		char *name = dent->d_name;
172 		FILE *file;
173 		unsigned int dev_port;
174 		int r;
175 
176 		if ((name[0] == '.') &&
177 		    ((name[1] == '\0') ||
178 		     ((name[1] == '.') && (name[2] == '\0'))))
179 			continue;
180 
181 		MKSTR(path, "%s/device/net/%s/%s",
182 		      ibdev_path, name,
183 		      (dev_type ? "dev_id" : "dev_port"));
184 
185 		file = fopen(path, "rb");
186 		if (file == NULL) {
187 			if (errno != ENOENT)
188 				continue;
189 			/*
190 			 * Switch to dev_id when dev_port does not exist as
191 			 * is the case with Linux kernel versions < 3.15.
192 			 */
193 try_dev_id:
194 			match[0] = '\0';
195 			if (dev_type)
196 				break;
197 			dev_type = 1;
198 			dev_port_prev = ~0u;
199 			rewinddir(dir);
200 			continue;
201 		}
202 		r = fscanf(file, (dev_type ? "%x" : "%u"), &dev_port);
203 		fclose(file);
204 		if (r != 1)
205 			continue;
206 		/*
207 		 * Switch to dev_id when dev_port returns the same value for
208 		 * all ports. May happen when using a MOFED release older than
209 		 * 3.0 with a Linux kernel >= 3.15.
210 		 */
211 		if (dev_port == dev_port_prev)
212 			goto try_dev_id;
213 		dev_port_prev = dev_port;
214 		if (dev_port == 0)
215 			strlcpy(match, name, IF_NAMESIZE);
216 	}
217 	closedir(dir);
218 	if (match[0] == '\0') {
219 		rte_errno = ENOENT;
220 		return -rte_errno;
221 	}
222 	strncpy(ifname, match, IF_NAMESIZE);
223 	return 0;
224 }
225 
226 #ifdef MLX5_GLUE
227 
228 /**
229  * Suffix RTE_EAL_PMD_PATH with "-glue".
230  *
231  * This function performs a sanity check on RTE_EAL_PMD_PATH before
232  * suffixing its last component.
233  *
234  * @param buf[out]
235  *   Output buffer, should be large enough otherwise NULL is returned.
236  * @param size
237  *   Size of @p out.
238  *
239  * @return
240  *   Pointer to @p buf or @p NULL in case suffix cannot be appended.
241  */
242 static char *
243 mlx5_glue_path(char *buf, size_t size)
244 {
245 	static const char *const bad[] = { "/", ".", "..", NULL };
246 	const char *path = RTE_EAL_PMD_PATH;
247 	size_t len = strlen(path);
248 	size_t off;
249 	int i;
250 
251 	while (len && path[len - 1] == '/')
252 		--len;
253 	for (off = len; off && path[off - 1] != '/'; --off)
254 		;
255 	for (i = 0; bad[i]; ++i)
256 		if (!strncmp(path + off, bad[i], (int)(len - off)))
257 			goto error;
258 	i = snprintf(buf, size, "%.*s-glue", (int)len, path);
259 	if (i == -1 || (size_t)i >= size)
260 		goto error;
261 	return buf;
262 error:
263 	RTE_LOG(ERR, PMD, "unable to append \"-glue\" to last component of"
264 		" RTE_EAL_PMD_PATH (\"" RTE_EAL_PMD_PATH "\"), please"
265 		" re-configure DPDK");
266 	return NULL;
267 }
268 
269 static int
270 mlx5_glue_dlopen(void)
271 {
272 	char glue_path[sizeof(RTE_EAL_PMD_PATH) - 1 + sizeof("-glue")];
273 	void *handle = NULL;
274 
275 	char const *path[] = {
276 		/*
277 		 * A basic security check is necessary before trusting
278 		 * MLX5_GLUE_PATH, which may override RTE_EAL_PMD_PATH.
279 		 */
280 		(geteuid() == getuid() && getegid() == getgid() ?
281 		 getenv("MLX5_GLUE_PATH") : NULL),
282 		/*
283 		 * When RTE_EAL_PMD_PATH is set, use its glue-suffixed
284 		 * variant, otherwise let dlopen() look up libraries on its
285 		 * own.
286 		 */
287 		(*RTE_EAL_PMD_PATH ?
288 		 mlx5_glue_path(glue_path, sizeof(glue_path)) : ""),
289 	};
290 	unsigned int i = 0;
291 	void **sym;
292 	const char *dlmsg;
293 
294 	while (!handle && i != RTE_DIM(path)) {
295 		const char *end;
296 		size_t len;
297 		int ret;
298 
299 		if (!path[i]) {
300 			++i;
301 			continue;
302 		}
303 		end = strpbrk(path[i], ":;");
304 		if (!end)
305 			end = path[i] + strlen(path[i]);
306 		len = end - path[i];
307 		ret = 0;
308 		do {
309 			char name[ret + 1];
310 
311 			ret = snprintf(name, sizeof(name), "%.*s%s" MLX5_GLUE,
312 				       (int)len, path[i],
313 				       (!len || *(end - 1) == '/') ? "" : "/");
314 			if (ret == -1)
315 				break;
316 			if (sizeof(name) != (size_t)ret + 1)
317 				continue;
318 			DRV_LOG(DEBUG, "Looking for rdma-core glue as "
319 				"\"%s\"", name);
320 			handle = dlopen(name, RTLD_LAZY);
321 			break;
322 		} while (1);
323 		path[i] = end + 1;
324 		if (!*end)
325 			++i;
326 	}
327 	if (!handle) {
328 		rte_errno = EINVAL;
329 		dlmsg = dlerror();
330 		if (dlmsg)
331 			DRV_LOG(WARNING, "Cannot load glue library: %s", dlmsg);
332 		goto glue_error;
333 	}
334 	sym = dlsym(handle, "mlx5_glue");
335 	if (!sym || !*sym) {
336 		rte_errno = EINVAL;
337 		dlmsg = dlerror();
338 		if (dlmsg)
339 			DRV_LOG(ERR, "Cannot resolve glue symbol: %s", dlmsg);
340 		goto glue_error;
341 	}
342 	mlx5_glue = *sym;
343 	return 0;
344 
345 glue_error:
346 	if (handle)
347 		dlclose(handle);
348 	return -1;
349 }
350 
351 #endif
352 
353 /**
354  * Initialization routine for run-time dependency on rdma-core.
355  */
356 void
357 mlx5_glue_constructor(void)
358 {
359 	/*
360 	 * RDMAV_HUGEPAGES_SAFE tells ibv_fork_init() we intend to use
361 	 * huge pages. Calling ibv_fork_init() during init allows
362 	 * applications to use fork() safely for purposes other than
363 	 * using this PMD, which is not supported in forked processes.
364 	 */
365 	setenv("RDMAV_HUGEPAGES_SAFE", "1", 1);
366 	/* Match the size of Rx completion entry to the size of a cacheline. */
367 	if (RTE_CACHE_LINE_SIZE == 128)
368 		setenv("MLX5_CQE_SIZE", "128", 0);
369 	/*
370 	 * MLX5_DEVICE_FATAL_CLEANUP tells ibv_destroy functions to
371 	 * cleanup all the Verbs resources even when the device was removed.
372 	 */
373 	setenv("MLX5_DEVICE_FATAL_CLEANUP", "1", 1);
374 
375 #ifdef MLX5_GLUE
376 	if (mlx5_glue_dlopen() != 0)
377 		goto glue_error;
378 #endif
379 
380 #ifdef RTE_LIBRTE_MLX5_DEBUG
381 	/* Glue structure must not contain any NULL pointers. */
382 	{
383 		unsigned int i;
384 
385 		for (i = 0; i != sizeof(*mlx5_glue) / sizeof(void *); ++i)
386 			MLX5_ASSERT(((const void *const *)mlx5_glue)[i]);
387 	}
388 #endif
389 	if (strcmp(mlx5_glue->version, MLX5_GLUE_VERSION)) {
390 		rte_errno = EINVAL;
391 		DRV_LOG(ERR, "rdma-core glue \"%s\" mismatch: \"%s\" is "
392 			"required", mlx5_glue->version, MLX5_GLUE_VERSION);
393 		goto glue_error;
394 	}
395 	mlx5_glue->fork_init();
396 	return;
397 
398 glue_error:
399 	DRV_LOG(WARNING, "Cannot initialize MLX5 common due to missing"
400 		" run-time dependency on rdma-core libraries (libibverbs,"
401 		" libmlx5)");
402 	mlx5_glue = NULL;
403 }
404 
405 struct ibv_device *
406 mlx5_os_get_ibv_device(const struct rte_pci_addr *addr)
407 {
408 	int n;
409 	struct ibv_device **ibv_list = mlx5_glue->get_device_list(&n);
410 	struct ibv_device *ibv_match = NULL;
411 
412 	if (ibv_list == NULL) {
413 		rte_errno = ENOSYS;
414 		return NULL;
415 	}
416 	while (n-- > 0) {
417 		struct rte_pci_addr paddr;
418 
419 		DRV_LOG(DEBUG, "Checking device \"%s\"..", ibv_list[n]->name);
420 		if (mlx5_get_pci_addr(ibv_list[n]->ibdev_path, &paddr) != 0)
421 			continue;
422 		if (rte_pci_addr_cmp(addr, &paddr) != 0)
423 			continue;
424 		ibv_match = ibv_list[n];
425 		break;
426 	}
427 	if (ibv_match == NULL)
428 		rte_errno = ENOENT;
429 	mlx5_glue->free_device_list(ibv_list);
430 	return ibv_match;
431 }
432 
433 static int
434 mlx5_config_doorbell_mapping_env(int dbnc)
435 {
436 	char *env;
437 	int value;
438 
439 	MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY);
440 	/* Get environment variable to store. */
441 	env = getenv(MLX5_SHUT_UP_BF);
442 	value = env ? !!strcmp(env, "0") : MLX5_ARG_UNSET;
443 	if (dbnc == MLX5_ARG_UNSET)
444 		setenv(MLX5_SHUT_UP_BF, MLX5_SHUT_UP_BF_DEFAULT, 1);
445 	else
446 		setenv(MLX5_SHUT_UP_BF,
447 		       dbnc == MLX5_TXDB_NCACHED ? "1" : "0", 1);
448 	return value;
449 }
450 
451 static void
452 mlx5_restore_doorbell_mapping_env(int value)
453 {
454 	MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY);
455 	/* Restore the original environment variable state. */
456 	if (value == MLX5_ARG_UNSET)
457 		unsetenv(MLX5_SHUT_UP_BF);
458 	else
459 		setenv(MLX5_SHUT_UP_BF, value ? "1" : "0", 1);
460 }
461 
462 /**
463  * Function API to open IB device.
464  *
465  *
466  * @param cdev
467  *   Pointer to the mlx5 device.
468  * @param ctx_ptr
469  *   Pointer to fill inside pointer to device context.
470  *
471  * @return
472  *   0 on success, a negative errno value otherwise and rte_errno is set.
473  */
474 int
475 mlx5_os_open_device(struct mlx5_common_device *cdev, void **ctx_ptr)
476 {
477 	struct ibv_device *ibv;
478 	struct ibv_context *ctx = NULL;
479 	int dbmap_env;
480 
481 	ibv = mlx5_os_get_ibv_dev(cdev->dev);
482 	if (!ibv)
483 		return -rte_errno;
484 	DRV_LOG(INFO, "Dev information matches for device \"%s\".", ibv->name);
485 	/*
486 	 * Configure environment variable "MLX5_BF_SHUT_UP" before the device
487 	 * creation. The rdma_core library checks the variable at device
488 	 * creation and stores the result internally.
489 	 */
490 	dbmap_env = mlx5_config_doorbell_mapping_env(cdev->config.dbnc);
491 	/* Try to open IB device with DV first, then usual Verbs. */
492 	errno = 0;
493 	ctx = mlx5_glue->dv_open_device(ibv);
494 	if (ctx) {
495 		cdev->config.devx = 1;
496 		DRV_LOG(DEBUG, "DevX is supported.");
497 	} else {
498 		/* The environment variable is still configured. */
499 		ctx = mlx5_glue->open_device(ibv);
500 		if (ctx == NULL)
501 			goto error;
502 		DRV_LOG(DEBUG, "DevX is NOT supported.");
503 	}
504 	/* The device is created, no need for environment. */
505 	mlx5_restore_doorbell_mapping_env(dbmap_env);
506 	/* Hint libmlx5 to use PMD allocator for data plane resources */
507 	mlx5_set_context_attr(cdev->dev, ctx);
508 	*ctx_ptr = (void *)ctx;
509 	return 0;
510 error:
511 	rte_errno = errno ? errno : ENODEV;
512 	/* The device creation is failed, no need for environment. */
513 	mlx5_restore_doorbell_mapping_env(dbmap_env);
514 	DRV_LOG(ERR, "Failed to open IB device \"%s\".", ibv->name);
515 	return -rte_errno;
516 }
517