xref: /dpdk/drivers/common/mlx5/mlx5_common.c (revision bc8e32473cc3978d763a1387eaa8244bcf75e77d)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2019 Mellanox Technologies, Ltd
3  */
4 
5 #include <unistd.h>
6 #include <string.h>
7 #include <stdio.h>
8 
9 #include <rte_errno.h>
10 #include <rte_mempool.h>
11 #include <rte_malloc.h>
12 
13 #include "mlx5_common.h"
14 #include "mlx5_common_os.h"
15 #include "mlx5_common_utils.h"
16 #include "mlx5_malloc.h"
17 #include "mlx5_common_pci.h"
18 
19 int mlx5_common_logtype;
20 
21 uint8_t haswell_broadwell_cpu;
22 
23 /* In case this is an x86_64 intel processor to check if
24  * we should use relaxed ordering.
25  */
26 #ifdef RTE_ARCH_X86_64
27 /**
28  * This function returns processor identification and feature information
29  * into the registers.
30  *
31  * @param eax, ebx, ecx, edx
32  *		Pointers to the registers that will hold cpu information.
33  * @param level
34  *		The main category of information returned.
35  */
36 static inline void mlx5_cpu_id(unsigned int level,
37 				unsigned int *eax, unsigned int *ebx,
38 				unsigned int *ecx, unsigned int *edx)
39 {
40 	__asm__("cpuid\n\t"
41 		: "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx)
42 		: "0" (level));
43 }
44 #endif
45 
46 RTE_INIT_PRIO(mlx5_log_init, LOG)
47 {
48 	mlx5_common_logtype = rte_log_register("pmd.common.mlx5");
49 	if (mlx5_common_logtype >= 0)
50 		rte_log_set_level(mlx5_common_logtype, RTE_LOG_NOTICE);
51 }
52 
53 static bool mlx5_common_initialized;
54 
55 /**
56  * One time innitialization routine for run-time dependency on glue library
57  * for multiple PMDs. Each mlx5 PMD that depends on mlx5_common module,
58  * must invoke in its constructor.
59  */
60 void
61 mlx5_common_init(void)
62 {
63 	if (mlx5_common_initialized)
64 		return;
65 
66 	mlx5_glue_constructor();
67 	mlx5_common_pci_init();
68 	mlx5_common_initialized = true;
69 }
70 
71 /**
72  * This function is responsible of initializing the variable
73  *  haswell_broadwell_cpu by checking if the cpu is intel
74  *  and reading the data returned from mlx5_cpu_id().
75  *  since haswell and broadwell cpus don't have improved performance
76  *  when using relaxed ordering we want to check the cpu type before
77  *  before deciding whether to enable RO or not.
78  *  if the cpu is haswell or broadwell the variable will be set to 1
79  *  otherwise it will be 0.
80  */
81 RTE_INIT_PRIO(mlx5_is_haswell_broadwell_cpu, LOG)
82 {
83 #ifdef RTE_ARCH_X86_64
84 	unsigned int broadwell_models[4] = {0x3d, 0x47, 0x4F, 0x56};
85 	unsigned int haswell_models[4] = {0x3c, 0x3f, 0x45, 0x46};
86 	unsigned int i, model, family, brand_id, vendor;
87 	unsigned int signature_intel_ebx = 0x756e6547;
88 	unsigned int extended_model;
89 	unsigned int eax = 0;
90 	unsigned int ebx = 0;
91 	unsigned int ecx = 0;
92 	unsigned int edx = 0;
93 	int max_level;
94 
95 	mlx5_cpu_id(0, &eax, &ebx, &ecx, &edx);
96 	vendor = ebx;
97 	max_level = eax;
98 	if (max_level < 1) {
99 		haswell_broadwell_cpu = 0;
100 		return;
101 	}
102 	mlx5_cpu_id(1, &eax, &ebx, &ecx, &edx);
103 	model = (eax >> 4) & 0x0f;
104 	family = (eax >> 8) & 0x0f;
105 	brand_id = ebx & 0xff;
106 	extended_model = (eax >> 12) & 0xf0;
107 	/* Check if the processor is Haswell or Broadwell */
108 	if (vendor == signature_intel_ebx) {
109 		if (family == 0x06)
110 			model += extended_model;
111 		if (brand_id == 0 && family == 0x6) {
112 			for (i = 0; i < RTE_DIM(broadwell_models); i++)
113 				if (model == broadwell_models[i]) {
114 					haswell_broadwell_cpu = 1;
115 					return;
116 				}
117 			for (i = 0; i < RTE_DIM(haswell_models); i++)
118 				if (model == haswell_models[i]) {
119 					haswell_broadwell_cpu = 1;
120 					return;
121 				}
122 		}
123 	}
124 #endif
125 	haswell_broadwell_cpu = 0;
126 }
127 
128 /**
129  * Allocate page of door-bells and register it using DevX API.
130  *
131  * @param [in] ctx
132  *   Pointer to the device context.
133  *
134  * @return
135  *   Pointer to new page on success, NULL otherwise.
136  */
137 static struct mlx5_devx_dbr_page *
138 mlx5_alloc_dbr_page(void *ctx)
139 {
140 	struct mlx5_devx_dbr_page *page;
141 
142 	/* Allocate space for door-bell page and management data. */
143 	page = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO,
144 			   sizeof(struct mlx5_devx_dbr_page),
145 			   RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
146 	if (!page) {
147 		DRV_LOG(ERR, "cannot allocate dbr page");
148 		return NULL;
149 	}
150 	/* Register allocated memory. */
151 	page->umem = mlx5_glue->devx_umem_reg(ctx, page->dbrs,
152 					      MLX5_DBR_PAGE_SIZE, 0);
153 	if (!page->umem) {
154 		DRV_LOG(ERR, "cannot umem reg dbr page");
155 		mlx5_free(page);
156 		return NULL;
157 	}
158 	return page;
159 }
160 
161 /**
162  * Find the next available door-bell, allocate new page if needed.
163  *
164  * @param [in] ctx
165  *   Pointer to device context.
166  * @param [in] head
167  *   Pointer to the head of dbr pages list.
168  * @param [out] dbr_page
169  *   Door-bell page containing the page data.
170  *
171  * @return
172  *   Door-bell address offset on success, a negative error value otherwise.
173  */
174 int64_t
175 mlx5_get_dbr(void *ctx,  struct mlx5_dbr_page_list *head,
176 	     struct mlx5_devx_dbr_page **dbr_page)
177 {
178 	struct mlx5_devx_dbr_page *page = NULL;
179 	uint32_t i, j;
180 
181 	LIST_FOREACH(page, head, next)
182 		if (page->dbr_count < MLX5_DBR_PER_PAGE)
183 			break;
184 	if (!page) { /* No page with free door-bell exists. */
185 		page = mlx5_alloc_dbr_page(ctx);
186 		if (!page) /* Failed to allocate new page. */
187 			return (-1);
188 		LIST_INSERT_HEAD(head, page, next);
189 	}
190 	/* Loop to find bitmap part with clear bit. */
191 	for (i = 0;
192 	     i < MLX5_DBR_BITMAP_SIZE && page->dbr_bitmap[i] == UINT64_MAX;
193 	     i++)
194 		; /* Empty. */
195 	/* Find the first clear bit. */
196 	MLX5_ASSERT(i < MLX5_DBR_BITMAP_SIZE);
197 	j = rte_bsf64(~page->dbr_bitmap[i]);
198 	page->dbr_bitmap[i] |= (UINT64_C(1) << j);
199 	page->dbr_count++;
200 	*dbr_page = page;
201 	return (i * CHAR_BIT * sizeof(uint64_t) + j) * MLX5_DBR_SIZE;
202 }
203 
204 /**
205  * Release a door-bell record.
206  *
207  * @param [in] head
208  *   Pointer to the head of dbr pages list.
209  * @param [in] umem_id
210  *   UMEM ID of page containing the door-bell record to release.
211  * @param [in] offset
212  *   Offset of door-bell record in page.
213  *
214  * @return
215  *   0 on success, a negative error value otherwise.
216  */
217 int32_t
218 mlx5_release_dbr(struct mlx5_dbr_page_list *head, uint32_t umem_id,
219 		 uint64_t offset)
220 {
221 	struct mlx5_devx_dbr_page *page = NULL;
222 	int ret = 0;
223 
224 	LIST_FOREACH(page, head, next)
225 		/* Find the page this address belongs to. */
226 		if (mlx5_os_get_umem_id(page->umem) == umem_id)
227 			break;
228 	if (!page)
229 		return -EINVAL;
230 	page->dbr_count--;
231 	if (!page->dbr_count) {
232 		/* Page not used, free it and remove from list. */
233 		LIST_REMOVE(page, next);
234 		if (page->umem)
235 			ret = -mlx5_glue->devx_umem_dereg(page->umem);
236 		mlx5_free(page);
237 	} else {
238 		/* Mark in bitmap that this door-bell is not in use. */
239 		offset /= MLX5_DBR_SIZE;
240 		int i = offset / 64;
241 		int j = offset % 64;
242 
243 		page->dbr_bitmap[i] &= ~(UINT64_C(1) << j);
244 	}
245 	return ret;
246 }
247 
248 /**
249  * Allocate the User Access Region with DevX on specified device.
250  *
251  * @param [in] ctx
252  *   Infiniband device context to perform allocation on.
253  * @param [in] mapping
254  *   MLX5DV_UAR_ALLOC_TYPE_BF - allocate as cached memory with write-combining
255  *				attributes (if supported by the host), the
256  *				writes to the UAR registers must be followed
257  *				by write memory barrier.
258  *   MLX5DV_UAR_ALLOC_TYPE_NC - allocate as non-cached nenory, all writes are
259  *				promoted to the registers immediately, no
260  *				memory barriers needed.
261  *   mapping < 0 - the first attempt is performed with MLX5DV_UAR_ALLOC_TYPE_BF,
262  *		   if this fails the next attempt with MLX5DV_UAR_ALLOC_TYPE_NC
263  *		   is performed. The drivers specifying negative values should
264  *		   always provide the write memory barrier operation after UAR
265  *		   register writings.
266  * If there is no definitions for the MLX5DV_UAR_ALLOC_TYPE_xx (older rdma
267  * library headers), the caller can specify 0.
268  *
269  * @return
270  *   UAR object pointer on success, NULL otherwise and rte_errno is set.
271  */
272 void *
273 mlx5_devx_alloc_uar(void *ctx, int mapping)
274 {
275 	void *uar;
276 	uint32_t retry, uar_mapping;
277 	void *base_addr;
278 
279 	for (retry = 0; retry < MLX5_ALLOC_UAR_RETRY; ++retry) {
280 #ifdef MLX5DV_UAR_ALLOC_TYPE_NC
281 		/* Control the mapping type according to the settings. */
282 		uar_mapping = (mapping < 0) ?
283 			      MLX5DV_UAR_ALLOC_TYPE_NC : mapping;
284 #else
285 		/*
286 		 * It seems we have no way to control the memory mapping type
287 		 * for the UAR, the default "Write-Combining" type is supposed.
288 		 */
289 		uar_mapping = 0;
290 		RTE_SET_USED(mapping);
291 #endif
292 		uar = mlx5_glue->devx_alloc_uar(ctx, uar_mapping);
293 #ifdef MLX5DV_UAR_ALLOC_TYPE_NC
294 		if (!uar &&
295 		    mapping < 0 &&
296 		    uar_mapping == MLX5DV_UAR_ALLOC_TYPE_BF) {
297 			/*
298 			 * In some environments like virtual machine the
299 			 * Write Combining mapped might be not supported and
300 			 * UAR allocation fails. We tried "Non-Cached" mapping
301 			 * for the case.
302 			 */
303 			DRV_LOG(WARNING, "Failed to allocate DevX UAR (BF)");
304 			uar_mapping = MLX5DV_UAR_ALLOC_TYPE_NC;
305 			uar = mlx5_glue->devx_alloc_uar(ctx, uar_mapping);
306 		} else if (!uar &&
307 			   mapping < 0 &&
308 			   uar_mapping == MLX5DV_UAR_ALLOC_TYPE_NC) {
309 			/*
310 			 * If Verbs/kernel does not support "Non-Cached"
311 			 * try the "Write-Combining".
312 			 */
313 			DRV_LOG(WARNING, "Failed to allocate DevX UAR (NC)");
314 			uar_mapping = MLX5DV_UAR_ALLOC_TYPE_BF;
315 			uar = mlx5_glue->devx_alloc_uar(ctx, uar_mapping);
316 		}
317 #endif
318 		if (!uar) {
319 			DRV_LOG(ERR, "Failed to allocate DevX UAR (BF/NC)");
320 			rte_errno = ENOMEM;
321 			goto exit;
322 		}
323 		base_addr = mlx5_os_get_devx_uar_base_addr(uar);
324 		if (base_addr)
325 			break;
326 		/*
327 		 * The UARs are allocated by rdma_core within the
328 		 * IB device context, on context closure all UARs
329 		 * will be freed, should be no memory/object leakage.
330 		 */
331 		DRV_LOG(WARNING, "Retrying to allocate DevX UAR");
332 		uar = NULL;
333 	}
334 	/* Check whether we finally succeeded with valid UAR allocation. */
335 	if (!uar) {
336 		DRV_LOG(ERR, "Failed to allocate DevX UAR (NULL base)");
337 		rte_errno = ENOMEM;
338 	}
339 	/*
340 	 * Return void * instead of struct mlx5dv_devx_uar *
341 	 * is for compatibility with older rdma-core library headers.
342 	 */
343 exit:
344 	return uar;
345 }
346