xref: /dpdk/lib/eal/windows/eal_memalloc.c (revision ae67895b507bb6af22263c79ba0d5c374b396485)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright (c) 2020 Dmitry Kozlyuk
3  */
4 
5 #include <rte_errno.h>
6 
7 #include "eal_internal_cfg.h"
8 #include "eal_memalloc.h"
9 #include "eal_memcfg.h"
10 #include "eal_private.h"
11 #include "eal_windows.h"
12 
13 int
eal_memalloc_get_seg_fd(int list_idx,int seg_idx)14 eal_memalloc_get_seg_fd(int list_idx, int seg_idx)
15 {
16 	/* Hugepages have no associated files in Windows. */
17 	RTE_SET_USED(list_idx);
18 	RTE_SET_USED(seg_idx);
19 	EAL_LOG_NOT_IMPLEMENTED();
20 	return -ENOTSUP;
21 }
22 
23 int
eal_memalloc_get_seg_fd_offset(int list_idx,int seg_idx,size_t * offset)24 eal_memalloc_get_seg_fd_offset(int list_idx, int seg_idx, size_t *offset)
25 {
26 	/* Hugepages have no associated files in Windows. */
27 	RTE_SET_USED(list_idx);
28 	RTE_SET_USED(seg_idx);
29 	RTE_SET_USED(offset);
30 	EAL_LOG_NOT_IMPLEMENTED();
31 	return -ENOTSUP;
32 }
33 
34 static int
alloc_seg(struct rte_memseg * ms,void * requested_addr,int socket_id,struct hugepage_info * hi)35 alloc_seg(struct rte_memseg *ms, void *requested_addr, int socket_id,
36 	struct hugepage_info *hi)
37 {
38 	HANDLE current_process;
39 	unsigned int numa_node;
40 	size_t alloc_sz;
41 	void *addr;
42 	rte_iova_t iova = RTE_BAD_IOVA;
43 	PSAPI_WORKING_SET_EX_INFORMATION info;
44 	PSAPI_WORKING_SET_EX_BLOCK *page;
45 
46 	if (ms->len > 0) {
47 		/* If a segment is already allocated as needed, return it. */
48 		if ((ms->addr == requested_addr) &&
49 			(ms->socket_id == socket_id) &&
50 			(ms->hugepage_sz == hi->hugepage_sz)) {
51 			return 0;
52 		}
53 
54 		/* Bugcheck, should not happen. */
55 		EAL_LOG(DEBUG, "Attempted to reallocate segment %p "
56 			"(size %zu) on socket %d", ms->addr,
57 			ms->len, ms->socket_id);
58 		return -1;
59 	}
60 
61 	current_process = GetCurrentProcess();
62 	numa_node = eal_socket_numa_node(socket_id);
63 	alloc_sz = hi->hugepage_sz;
64 
65 	if (requested_addr == NULL) {
66 		/* Request a new chunk of memory from OS. */
67 		addr = eal_mem_alloc_socket(alloc_sz, socket_id);
68 		if (addr == NULL) {
69 			EAL_LOG(DEBUG, "Cannot allocate %zu bytes "
70 				"on socket %d", alloc_sz, socket_id);
71 			return -1;
72 		}
73 	} else {
74 		/* Requested address is already reserved, commit memory. */
75 		addr = eal_mem_commit(requested_addr, alloc_sz, socket_id);
76 
77 		/* During commitment, memory is temporary freed and might
78 		 * be allocated by different non-EAL thread. This is a fatal
79 		 * error, because it breaks MSL assumptions.
80 		 */
81 		if ((addr != NULL) && (addr != requested_addr)) {
82 			EAL_LOG(CRIT, "Address %p occupied by an alien "
83 				" allocation - MSL is not VA-contiguous!",
84 				requested_addr);
85 			return -1;
86 		}
87 
88 		if (addr == NULL) {
89 			EAL_LOG(DEBUG, "Cannot commit reserved memory %p "
90 				"(size %zu) on socket %d",
91 				requested_addr, alloc_sz, socket_id);
92 			return -1;
93 		}
94 	}
95 
96 	/* Force OS to allocate a physical page and select a NUMA node.
97 	 * Hugepages are not pageable in Windows, so there's no race
98 	 * for physical address.
99 	 */
100 	*(volatile int *)addr = *(volatile int *)addr;
101 
102 	iova = rte_mem_virt2iova(addr);
103 	if (iova == RTE_BAD_IOVA) {
104 		EAL_LOG(DEBUG,
105 			"Cannot get IOVA of allocated segment");
106 		goto error;
107 	}
108 
109 	/* Only "Ex" function can handle hugepages. */
110 	info.VirtualAddress = addr;
111 	if (!QueryWorkingSetEx(current_process, &info, sizeof(info))) {
112 		RTE_LOG_WIN32_ERR("QueryWorkingSetEx(%p)", addr);
113 		goto error;
114 	}
115 
116 	page = &info.VirtualAttributes;
117 	if (!page->Valid || !page->LargePage) {
118 		EAL_LOG(DEBUG, "Got regular page instead of a hugepage");
119 		goto error;
120 	}
121 	if (page->Node != numa_node) {
122 		EAL_LOG(DEBUG,
123 			"NUMA node hint %u (socket %d) not respected, got %u",
124 			numa_node, socket_id, page->Node);
125 		goto error;
126 	}
127 
128 	ms->addr = addr;
129 	ms->hugepage_sz = hi->hugepage_sz;
130 	ms->len = alloc_sz;
131 	ms->nchannel = rte_memory_get_nchannel();
132 	ms->nrank = rte_memory_get_nrank();
133 	ms->iova = iova;
134 	ms->socket_id = socket_id;
135 
136 	return 0;
137 
138 error:
139 	/* Only jump here when `addr` and `alloc_sz` are valid. */
140 	if (eal_mem_decommit(addr, alloc_sz) && (rte_errno == EADDRNOTAVAIL)) {
141 		/* During decommitment, memory is temporarily returned
142 		 * to the system and the address may become unavailable.
143 		 */
144 		EAL_LOG(CRIT, "Address %p occupied by an alien "
145 			" allocation - MSL is not VA-contiguous!", addr);
146 	}
147 	return -1;
148 }
149 
150 static int
free_seg(struct rte_memseg * ms)151 free_seg(struct rte_memseg *ms)
152 {
153 	if (eal_mem_decommit(ms->addr, ms->len)) {
154 		if (rte_errno == EADDRNOTAVAIL) {
155 			/* See alloc_seg() for explanation. */
156 			EAL_LOG(CRIT, "Address %p occupied by an alien "
157 				" allocation - MSL is not VA-contiguous!",
158 				ms->addr);
159 		}
160 		return -1;
161 	}
162 
163 	/* Must clear the segment, because alloc_seg() inspects it. */
164 	memset(ms, 0, sizeof(*ms));
165 	return 0;
166 }
167 
168 struct alloc_walk_param {
169 	struct hugepage_info *hi;
170 	struct rte_memseg **ms;
171 	size_t page_sz;
172 	unsigned int segs_allocated;
173 	unsigned int n_segs;
174 	int socket;
175 	bool exact;
176 };
177 
178 static int
alloc_seg_walk(const struct rte_memseg_list * msl,void * arg)179 alloc_seg_walk(const struct rte_memseg_list *msl, void *arg)
180 {
181 	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
182 	struct alloc_walk_param *wa = arg;
183 	struct rte_memseg_list *cur_msl;
184 	size_t page_sz;
185 	int cur_idx, start_idx, j;
186 	unsigned int msl_idx, need, i;
187 
188 	if (msl->page_sz != wa->page_sz)
189 		return 0;
190 	if (msl->socket_id != wa->socket)
191 		return 0;
192 
193 	page_sz = (size_t)msl->page_sz;
194 
195 	msl_idx = msl - mcfg->memsegs;
196 	cur_msl = &mcfg->memsegs[msl_idx];
197 
198 	need = wa->n_segs;
199 
200 	/* try finding space in memseg list */
201 	if (wa->exact) {
202 		/* if we require exact number of pages in a list, find them */
203 		cur_idx = rte_fbarray_find_next_n_free(
204 			&cur_msl->memseg_arr, 0, need);
205 		if (cur_idx < 0)
206 			return 0;
207 		start_idx = cur_idx;
208 	} else {
209 		int cur_len;
210 
211 		/* we don't require exact number of pages, so we're going to go
212 		 * for best-effort allocation. that means finding the biggest
213 		 * unused block, and going with that.
214 		 */
215 		cur_idx = rte_fbarray_find_biggest_free(
216 			&cur_msl->memseg_arr, 0);
217 		if (cur_idx < 0)
218 			return 0;
219 		start_idx = cur_idx;
220 		/* adjust the size to possibly be smaller than original
221 		 * request, but do not allow it to be bigger.
222 		 */
223 		cur_len = rte_fbarray_find_contig_free(
224 			&cur_msl->memseg_arr, cur_idx);
225 		need = RTE_MIN(need, (unsigned int)cur_len);
226 	}
227 
228 	for (i = 0; i < need; i++, cur_idx++) {
229 		struct rte_memseg *cur;
230 		void *map_addr;
231 
232 		cur = rte_fbarray_get(&cur_msl->memseg_arr, cur_idx);
233 		map_addr = RTE_PTR_ADD(cur_msl->base_va, cur_idx * page_sz);
234 
235 		if (alloc_seg(cur, map_addr, wa->socket, wa->hi)) {
236 			EAL_LOG(DEBUG, "attempted to allocate %i segments, "
237 				"but only %i were allocated", need, i);
238 
239 			/* if exact number wasn't requested, stop */
240 			if (!wa->exact)
241 				goto out;
242 
243 			/* clean up */
244 			for (j = start_idx; j < cur_idx; j++) {
245 				struct rte_memseg *tmp;
246 				struct rte_fbarray *arr = &cur_msl->memseg_arr;
247 
248 				tmp = rte_fbarray_get(arr, j);
249 				rte_fbarray_set_free(arr, j);
250 
251 				if (free_seg(tmp))
252 					EAL_LOG(DEBUG, "Cannot free page");
253 			}
254 			/* clear the list */
255 			if (wa->ms)
256 				memset(wa->ms, 0, sizeof(*wa->ms) * wa->n_segs);
257 
258 			return -1;
259 		}
260 		if (wa->ms)
261 			wa->ms[i] = cur;
262 
263 		rte_fbarray_set_used(&cur_msl->memseg_arr, cur_idx);
264 	}
265 
266 out:
267 	wa->segs_allocated = i;
268 	if (i > 0)
269 		cur_msl->version++;
270 
271 	/* if we didn't allocate any segments, move on to the next list */
272 	return i > 0;
273 }
274 
275 struct free_walk_param {
276 	struct hugepage_info *hi;
277 	struct rte_memseg *ms;
278 };
279 static int
free_seg_walk(const struct rte_memseg_list * msl,void * arg)280 free_seg_walk(const struct rte_memseg_list *msl, void *arg)
281 {
282 	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
283 	struct rte_memseg_list *found_msl;
284 	struct free_walk_param *wa = arg;
285 	uintptr_t start_addr, end_addr;
286 	int msl_idx, seg_idx, ret;
287 
288 	start_addr = (uintptr_t) msl->base_va;
289 	end_addr = start_addr + msl->len;
290 
291 	if ((uintptr_t)wa->ms->addr < start_addr ||
292 		(uintptr_t)wa->ms->addr >= end_addr)
293 		return 0;
294 
295 	msl_idx = msl - mcfg->memsegs;
296 	seg_idx = RTE_PTR_DIFF(wa->ms->addr, start_addr) / msl->page_sz;
297 
298 	/* msl is const */
299 	found_msl = &mcfg->memsegs[msl_idx];
300 	found_msl->version++;
301 
302 	rte_fbarray_set_free(&found_msl->memseg_arr, seg_idx);
303 
304 	ret = free_seg(wa->ms);
305 
306 	return (ret < 0) ? (-1) : 1;
307 }
308 
309 int
eal_memalloc_alloc_seg_bulk(struct rte_memseg ** ms,int n_segs,size_t page_sz,int socket,bool exact)310 eal_memalloc_alloc_seg_bulk(struct rte_memseg **ms, int n_segs,
311 		size_t page_sz, int socket, bool exact)
312 {
313 	unsigned int i;
314 	int ret = -1;
315 	struct alloc_walk_param wa;
316 	struct hugepage_info *hi = NULL;
317 	struct internal_config *internal_conf =
318 		eal_get_internal_configuration();
319 
320 	if (internal_conf->legacy_mem) {
321 		EAL_LOG(ERR, "dynamic allocation not supported in legacy mode");
322 		return -ENOTSUP;
323 	}
324 
325 	for (i = 0; i < internal_conf->num_hugepage_sizes; i++) {
326 		struct hugepage_info *hpi = &internal_conf->hugepage_info[i];
327 		if (page_sz == hpi->hugepage_sz) {
328 			hi = hpi;
329 			break;
330 		}
331 	}
332 	if (!hi) {
333 		EAL_LOG(ERR, "cannot find relevant hugepage_info entry");
334 		return -1;
335 	}
336 
337 	memset(&wa, 0, sizeof(wa));
338 	wa.exact = exact;
339 	wa.hi = hi;
340 	wa.ms = ms;
341 	wa.n_segs = n_segs;
342 	wa.page_sz = page_sz;
343 	wa.socket = socket;
344 	wa.segs_allocated = 0;
345 
346 	/* memalloc is locked, so it's safe to use thread-unsafe version */
347 	ret = rte_memseg_list_walk_thread_unsafe(alloc_seg_walk, &wa);
348 	if (ret == 0) {
349 		EAL_LOG(ERR, "cannot find suitable memseg_list");
350 		ret = -1;
351 	} else if (ret > 0) {
352 		ret = (int)wa.segs_allocated;
353 	}
354 
355 	return ret;
356 }
357 
358 struct rte_memseg *
eal_memalloc_alloc_seg(size_t page_sz,int socket)359 eal_memalloc_alloc_seg(size_t page_sz, int socket)
360 {
361 	struct rte_memseg *ms = NULL;
362 	eal_memalloc_alloc_seg_bulk(&ms, 1, page_sz, socket, true);
363 	return ms;
364 }
365 
366 int
eal_memalloc_free_seg_bulk(struct rte_memseg ** ms,int n_segs)367 eal_memalloc_free_seg_bulk(struct rte_memseg **ms, int n_segs)
368 {
369 	int seg, ret = 0;
370 	struct internal_config *internal_conf =
371 		eal_get_internal_configuration();
372 
373 	/* dynamic free not supported in legacy mode */
374 	if (internal_conf->legacy_mem)
375 		return -1;
376 
377 	for (seg = 0; seg < n_segs; seg++) {
378 		struct rte_memseg *cur = ms[seg];
379 		struct hugepage_info *hi = NULL;
380 		struct free_walk_param wa;
381 		size_t i;
382 		int walk_res;
383 
384 		/* if this page is marked as unfreeable, fail */
385 		if (cur->flags & RTE_MEMSEG_FLAG_DO_NOT_FREE) {
386 			EAL_LOG(DEBUG, "Page is not allowed to be freed");
387 			ret = -1;
388 			continue;
389 		}
390 
391 		memset(&wa, 0, sizeof(wa));
392 
393 		for (i = 0; i < RTE_DIM(internal_conf->hugepage_info); i++) {
394 			hi = &internal_conf->hugepage_info[i];
395 			if (cur->hugepage_sz == hi->hugepage_sz)
396 				break;
397 		}
398 		if (i == RTE_DIM(internal_conf->hugepage_info)) {
399 			EAL_LOG(ERR, "Can't find relevant hugepage_info entry");
400 			ret = -1;
401 			continue;
402 		}
403 
404 		wa.ms = cur;
405 		wa.hi = hi;
406 
407 		/* memalloc is locked, so it's safe to use thread-unsafe version
408 		 */
409 		walk_res = rte_memseg_list_walk_thread_unsafe(free_seg_walk,
410 				&wa);
411 		if (walk_res == 1)
412 			continue;
413 		if (walk_res == 0)
414 			EAL_LOG(ERR, "Couldn't find memseg list");
415 		ret = -1;
416 	}
417 	return ret;
418 }
419 
420 int
eal_memalloc_free_seg(struct rte_memseg * ms)421 eal_memalloc_free_seg(struct rte_memseg *ms)
422 {
423 	return eal_memalloc_free_seg_bulk(&ms, 1);
424 }
425 
426 int
eal_memalloc_sync_with_primary(void)427 eal_memalloc_sync_with_primary(void)
428 {
429 	/* No multi-process support. */
430 	EAL_LOG_NOT_IMPLEMENTED();
431 	return -ENOTSUP;
432 }
433 
434 int
eal_memalloc_cleanup(void)435 eal_memalloc_cleanup(void)
436 {
437 	/* not implemented */
438 	return 0;
439 }
440 
441 int
eal_memalloc_init(void)442 eal_memalloc_init(void)
443 {
444 	/* No action required. */
445 	return 0;
446 }
447