xref: /dpdk/drivers/common/mlx5/mlx5_common.c (revision 6d24988751cb71993bf796855e5e53e4c2f0537b)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2019 Mellanox Technologies, Ltd
3  */
4 
5 #include <unistd.h>
6 #include <string.h>
7 #include <stdio.h>
8 #ifdef RTE_IBVERBS_LINK_DLOPEN
9 #include <dlfcn.h>
10 #endif
11 
12 #include <rte_errno.h>
13 
14 #include "mlx5_common.h"
15 #include "mlx5_common_utils.h"
16 #include "mlx5_glue.h"
17 
18 
19 int mlx5_common_logtype;
20 
21 #ifdef MLX5_GLUE
22 const struct mlx5_glue *mlx5_glue;
23 #endif
24 
25 uint8_t haswell_broadwell_cpu;
26 
27 /**
28  * Get PCI information by sysfs device path.
29  *
30  * @param dev_path
31  *   Pointer to device sysfs folder name.
32  * @param[out] pci_addr
33  *   PCI bus address output buffer.
34  *
35  * @return
36  *   0 on success, a negative errno value otherwise and rte_errno is set.
37  */
38 int
39 mlx5_dev_to_pci_addr(const char *dev_path,
40 		     struct rte_pci_addr *pci_addr)
41 {
42 	FILE *file;
43 	char line[32];
44 	MKSTR(path, "%s/device/uevent", dev_path);
45 
46 	file = fopen(path, "rb");
47 	if (file == NULL) {
48 		rte_errno = errno;
49 		return -rte_errno;
50 	}
51 	while (fgets(line, sizeof(line), file) == line) {
52 		size_t len = strlen(line);
53 		int ret;
54 
55 		/* Truncate long lines. */
56 		if (len == (sizeof(line) - 1))
57 			while (line[(len - 1)] != '\n') {
58 				ret = fgetc(file);
59 				if (ret == EOF)
60 					break;
61 				line[(len - 1)] = ret;
62 			}
63 		/* Extract information. */
64 		if (sscanf(line,
65 			   "PCI_SLOT_NAME="
66 			   "%" SCNx32 ":%" SCNx8 ":%" SCNx8 ".%" SCNx8 "\n",
67 			   &pci_addr->domain,
68 			   &pci_addr->bus,
69 			   &pci_addr->devid,
70 			   &pci_addr->function) == 4) {
71 			ret = 0;
72 			break;
73 		}
74 	}
75 	fclose(file);
76 	return 0;
77 }
78 
79 static int
80 mlx5_class_check_handler(__rte_unused const char *key, const char *value,
81 			 void *opaque)
82 {
83 	enum mlx5_class *ret = opaque;
84 
85 	if (strcmp(value, "vdpa") == 0) {
86 		*ret = MLX5_CLASS_VDPA;
87 	} else if (strcmp(value, "net") == 0) {
88 		*ret = MLX5_CLASS_NET;
89 	} else {
90 		DRV_LOG(ERR, "Invalid mlx5 class %s. Maybe typo in device"
91 			" class argument setting?", value);
92 		*ret = MLX5_CLASS_INVALID;
93 	}
94 	return 0;
95 }
96 
97 enum mlx5_class
98 mlx5_class_get(struct rte_devargs *devargs)
99 {
100 	struct rte_kvargs *kvlist;
101 	const char *key = MLX5_CLASS_ARG_NAME;
102 	enum mlx5_class ret = MLX5_CLASS_NET;
103 
104 	if (devargs == NULL)
105 		return ret;
106 	kvlist = rte_kvargs_parse(devargs->args, NULL);
107 	if (kvlist == NULL)
108 		return ret;
109 	if (rte_kvargs_count(kvlist, key))
110 		rte_kvargs_process(kvlist, key, mlx5_class_check_handler, &ret);
111 	rte_kvargs_free(kvlist);
112 	return ret;
113 }
114 
115 /**
116  * Extract port name, as a number, from sysfs or netlink information.
117  *
118  * @param[in] port_name_in
119  *   String representing the port name.
120  * @param[out] port_info_out
121  *   Port information, including port name as a number and port name
122  *   type if recognized
123  *
124  * @return
125  *   port_name field set according to recognized name format.
126  */
127 void
128 mlx5_translate_port_name(const char *port_name_in,
129 			 struct mlx5_switch_info *port_info_out)
130 {
131 	char pf_c1, pf_c2, vf_c1, vf_c2;
132 	char *end;
133 	int sc_items;
134 
135 	/*
136 	 * Check for port-name as a string of the form pf0vf0
137 	 * (support kernel ver >= 5.0 or OFED ver >= 4.6).
138 	 */
139 	sc_items = sscanf(port_name_in, "%c%c%d%c%c%d",
140 			  &pf_c1, &pf_c2, &port_info_out->pf_num,
141 			  &vf_c1, &vf_c2, &port_info_out->port_name);
142 	if (sc_items == 6 &&
143 	    pf_c1 == 'p' && pf_c2 == 'f' &&
144 	    vf_c1 == 'v' && vf_c2 == 'f') {
145 		port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_PFVF;
146 		return;
147 	}
148 	/*
149 	 * Check for port-name as a string of the form p0
150 	 * (support kernel ver >= 5.0, or OFED ver >= 4.6).
151 	 */
152 	sc_items = sscanf(port_name_in, "%c%d",
153 			  &pf_c1, &port_info_out->port_name);
154 	if (sc_items == 2 && pf_c1 == 'p') {
155 		port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_UPLINK;
156 		return;
157 	}
158 	/* Check for port-name as a number (support kernel ver < 5.0 */
159 	errno = 0;
160 	port_info_out->port_name = strtol(port_name_in, &end, 0);
161 	if (!errno &&
162 	    (size_t)(end - port_name_in) == strlen(port_name_in)) {
163 		port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_LEGACY;
164 		return;
165 	}
166 	port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN;
167 	return;
168 }
169 
170 #ifdef MLX5_GLUE
171 
172 /**
173  * Suffix RTE_EAL_PMD_PATH with "-glue".
174  *
175  * This function performs a sanity check on RTE_EAL_PMD_PATH before
176  * suffixing its last component.
177  *
178  * @param buf[out]
179  *   Output buffer, should be large enough otherwise NULL is returned.
180  * @param size
181  *   Size of @p out.
182  *
183  * @return
184  *   Pointer to @p buf or @p NULL in case suffix cannot be appended.
185  */
186 static char *
187 mlx5_glue_path(char *buf, size_t size)
188 {
189 	static const char *const bad[] = { "/", ".", "..", NULL };
190 	const char *path = RTE_EAL_PMD_PATH;
191 	size_t len = strlen(path);
192 	size_t off;
193 	int i;
194 
195 	while (len && path[len - 1] == '/')
196 		--len;
197 	for (off = len; off && path[off - 1] != '/'; --off)
198 		;
199 	for (i = 0; bad[i]; ++i)
200 		if (!strncmp(path + off, bad[i], (int)(len - off)))
201 			goto error;
202 	i = snprintf(buf, size, "%.*s-glue", (int)len, path);
203 	if (i == -1 || (size_t)i >= size)
204 		goto error;
205 	return buf;
206 error:
207 	RTE_LOG(ERR, PMD, "unable to append \"-glue\" to last component of"
208 		" RTE_EAL_PMD_PATH (\"" RTE_EAL_PMD_PATH "\"), please"
209 		" re-configure DPDK");
210 	return NULL;
211 }
212 
213 static int
214 mlx5_glue_dlopen(void)
215 {
216 	char glue_path[sizeof(RTE_EAL_PMD_PATH) - 1 + sizeof("-glue")];
217 	void *handle = NULL;
218 
219 	const char *path[] = {
220 		/*
221 		 * A basic security check is necessary before trusting
222 		 * MLX5_GLUE_PATH, which may override RTE_EAL_PMD_PATH.
223 		 */
224 		(geteuid() == getuid() && getegid() == getgid() ?
225 		 getenv("MLX5_GLUE_PATH") : NULL),
226 		/*
227 		 * When RTE_EAL_PMD_PATH is set, use its glue-suffixed
228 		 * variant, otherwise let dlopen() look up libraries on its
229 		 * own.
230 		 */
231 		(*RTE_EAL_PMD_PATH ?
232 		 mlx5_glue_path(glue_path, sizeof(glue_path)) : ""),
233 	};
234 	unsigned int i = 0;
235 	void **sym;
236 	const char *dlmsg;
237 
238 	while (!handle && i != RTE_DIM(path)) {
239 		const char *end;
240 		size_t len;
241 		int ret;
242 
243 		if (!path[i]) {
244 			++i;
245 			continue;
246 		}
247 		end = strpbrk(path[i], ":;");
248 		if (!end)
249 			end = path[i] + strlen(path[i]);
250 		len = end - path[i];
251 		ret = 0;
252 		do {
253 			char name[ret + 1];
254 
255 			ret = snprintf(name, sizeof(name), "%.*s%s" MLX5_GLUE,
256 				       (int)len, path[i],
257 				       (!len || *(end - 1) == '/') ? "" : "/");
258 			if (ret == -1)
259 				break;
260 			if (sizeof(name) != (size_t)ret + 1)
261 				continue;
262 			DRV_LOG(DEBUG, "Looking for rdma-core glue as "
263 				"\"%s\"", name);
264 			handle = dlopen(name, RTLD_LAZY);
265 			break;
266 		} while (1);
267 		path[i] = end + 1;
268 		if (!*end)
269 			++i;
270 	}
271 	if (!handle) {
272 		rte_errno = EINVAL;
273 		dlmsg = dlerror();
274 		if (dlmsg)
275 			DRV_LOG(WARNING, "Cannot load glue library: %s", dlmsg);
276 		goto glue_error;
277 	}
278 	sym = dlsym(handle, "mlx5_glue");
279 	if (!sym || !*sym) {
280 		rte_errno = EINVAL;
281 		dlmsg = dlerror();
282 		if (dlmsg)
283 			DRV_LOG(ERR, "Cannot resolve glue symbol: %s", dlmsg);
284 		goto glue_error;
285 	}
286 	mlx5_glue = *sym;
287 	return 0;
288 
289 glue_error:
290 	if (handle)
291 		dlclose(handle);
292 	return -1;
293 }
294 
295 #endif
296 
297 /* In case this is an x86_64 intel processor to check if
298  * we should use relaxed ordering.
299  */
300 #ifdef RTE_ARCH_X86_64
301 /**
302  * This function returns processor identification and feature information
303  * into the registers.
304  *
305  * @param eax, ebx, ecx, edx
306  *		Pointers to the registers that will hold cpu information.
307  * @param level
308  *		The main category of information returned.
309  */
310 static inline void mlx5_cpu_id(unsigned int level,
311 				unsigned int *eax, unsigned int *ebx,
312 				unsigned int *ecx, unsigned int *edx)
313 {
314 	__asm__("cpuid\n\t"
315 		: "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx)
316 		: "0" (level));
317 }
318 #endif
319 
320 RTE_INIT_PRIO(mlx5_log_init, LOG)
321 {
322 	mlx5_common_logtype = rte_log_register("pmd.common.mlx5");
323 	if (mlx5_common_logtype >= 0)
324 		rte_log_set_level(mlx5_common_logtype, RTE_LOG_NOTICE);
325 }
326 
327 /**
328  * Initialization routine for run-time dependency on rdma-core.
329  */
330 RTE_INIT_PRIO(mlx5_glue_init, CLASS)
331 {
332 	/*
333 	 * RDMAV_HUGEPAGES_SAFE tells ibv_fork_init() we intend to use
334 	 * huge pages. Calling ibv_fork_init() during init allows
335 	 * applications to use fork() safely for purposes other than
336 	 * using this PMD, which is not supported in forked processes.
337 	 */
338 	setenv("RDMAV_HUGEPAGES_SAFE", "1", 1);
339 	/* Match the size of Rx completion entry to the size of a cacheline. */
340 	if (RTE_CACHE_LINE_SIZE == 128)
341 		setenv("MLX5_CQE_SIZE", "128", 0);
342 	/*
343 	 * MLX5_DEVICE_FATAL_CLEANUP tells ibv_destroy functions to
344 	 * cleanup all the Verbs resources even when the device was removed.
345 	 */
346 	setenv("MLX5_DEVICE_FATAL_CLEANUP", "1", 1);
347 
348 #ifdef MLX5_GLUE
349 	if (mlx5_glue_dlopen() != 0)
350 		goto glue_error;
351 #endif
352 
353 #ifdef RTE_LIBRTE_MLX5_DEBUG
354 	/* Glue structure must not contain any NULL pointers. */
355 	{
356 		unsigned int i;
357 
358 		for (i = 0; i != sizeof(*mlx5_glue) / sizeof(void *); ++i)
359 			MLX5_ASSERT(((const void *const *)mlx5_glue)[i]);
360 	}
361 #endif
362 	if (strcmp(mlx5_glue->version, MLX5_GLUE_VERSION)) {
363 		rte_errno = EINVAL;
364 		DRV_LOG(ERR, "rdma-core glue \"%s\" mismatch: \"%s\" is "
365 			"required", mlx5_glue->version, MLX5_GLUE_VERSION);
366 		goto glue_error;
367 	}
368 	mlx5_glue->fork_init();
369 	return;
370 
371 glue_error:
372 	DRV_LOG(WARNING, "Cannot initialize MLX5 common due to missing"
373 		" run-time dependency on rdma-core libraries (libibverbs,"
374 		" libmlx5)");
375 	mlx5_glue = NULL;
376 	return;
377 }
378 
379 /**
380  * This function is responsible of initializing the variable
381  *  haswell_broadwell_cpu by checking if the cpu is intel
382  *  and reading the data returned from mlx5_cpu_id().
383  *  since haswell and broadwell cpus don't have improved performance
384  *  when using relaxed ordering we want to check the cpu type before
385  *  before deciding whether to enable RO or not.
386  *  if the cpu is haswell or broadwell the variable will be set to 1
387  *  otherwise it will be 0.
388  */
389 RTE_INIT_PRIO(mlx5_is_haswell_broadwell_cpu, LOG)
390 {
391 #ifdef RTE_ARCH_X86_64
392 	unsigned int broadwell_models[4] = {0x3d, 0x47, 0x4F, 0x56};
393 	unsigned int haswell_models[4] = {0x3c, 0x3f, 0x45, 0x46};
394 	unsigned int i, model, family, brand_id, vendor;
395 	unsigned int signature_intel_ebx = 0x756e6547;
396 	unsigned int extended_model;
397 	unsigned int eax = 0;
398 	unsigned int ebx = 0;
399 	unsigned int ecx = 0;
400 	unsigned int edx = 0;
401 	int max_level;
402 
403 	mlx5_cpu_id(0, &eax, &ebx, &ecx, &edx);
404 	vendor = ebx;
405 	max_level = eax;
406 	if (max_level < 1) {
407 		haswell_broadwell_cpu = 0;
408 		return;
409 	}
410 	mlx5_cpu_id(1, &eax, &ebx, &ecx, &edx);
411 	model = (eax >> 4) & 0x0f;
412 	family = (eax >> 8) & 0x0f;
413 	brand_id = ebx & 0xff;
414 	extended_model = (eax >> 12) & 0xf0;
415 	/* Check if the processor is Haswell or Broadwell */
416 	if (vendor == signature_intel_ebx) {
417 		if (family == 0x06)
418 			model += extended_model;
419 		if (brand_id == 0 && family == 0x6) {
420 			for (i = 0; i < RTE_DIM(broadwell_models); i++)
421 				if (model == broadwell_models[i]) {
422 					haswell_broadwell_cpu = 1;
423 					return;
424 				}
425 			for (i = 0; i < RTE_DIM(haswell_models); i++)
426 				if (model == haswell_models[i]) {
427 					haswell_broadwell_cpu = 1;
428 					return;
429 				}
430 		}
431 	}
432 #endif
433 	haswell_broadwell_cpu = 0;
434 }
435