1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright (c) 2020 Dmitry Kozlyuk
3 */
4
5 #include <rte_errno.h>
6
7 #include "eal_internal_cfg.h"
8 #include "eal_memalloc.h"
9 #include "eal_memcfg.h"
10 #include "eal_private.h"
11 #include "eal_windows.h"
12
13 int
eal_memalloc_get_seg_fd(int list_idx,int seg_idx)14 eal_memalloc_get_seg_fd(int list_idx, int seg_idx)
15 {
16 /* Hugepages have no associated files in Windows. */
17 RTE_SET_USED(list_idx);
18 RTE_SET_USED(seg_idx);
19 EAL_LOG_NOT_IMPLEMENTED();
20 return -ENOTSUP;
21 }
22
23 int
eal_memalloc_get_seg_fd_offset(int list_idx,int seg_idx,size_t * offset)24 eal_memalloc_get_seg_fd_offset(int list_idx, int seg_idx, size_t *offset)
25 {
26 /* Hugepages have no associated files in Windows. */
27 RTE_SET_USED(list_idx);
28 RTE_SET_USED(seg_idx);
29 RTE_SET_USED(offset);
30 EAL_LOG_NOT_IMPLEMENTED();
31 return -ENOTSUP;
32 }
33
34 static int
alloc_seg(struct rte_memseg * ms,void * requested_addr,int socket_id,struct hugepage_info * hi)35 alloc_seg(struct rte_memseg *ms, void *requested_addr, int socket_id,
36 struct hugepage_info *hi)
37 {
38 HANDLE current_process;
39 unsigned int numa_node;
40 size_t alloc_sz;
41 void *addr;
42 rte_iova_t iova = RTE_BAD_IOVA;
43 PSAPI_WORKING_SET_EX_INFORMATION info;
44 PSAPI_WORKING_SET_EX_BLOCK *page;
45
46 if (ms->len > 0) {
47 /* If a segment is already allocated as needed, return it. */
48 if ((ms->addr == requested_addr) &&
49 (ms->socket_id == socket_id) &&
50 (ms->hugepage_sz == hi->hugepage_sz)) {
51 return 0;
52 }
53
54 /* Bugcheck, should not happen. */
55 EAL_LOG(DEBUG, "Attempted to reallocate segment %p "
56 "(size %zu) on socket %d", ms->addr,
57 ms->len, ms->socket_id);
58 return -1;
59 }
60
61 current_process = GetCurrentProcess();
62 numa_node = eal_socket_numa_node(socket_id);
63 alloc_sz = hi->hugepage_sz;
64
65 if (requested_addr == NULL) {
66 /* Request a new chunk of memory from OS. */
67 addr = eal_mem_alloc_socket(alloc_sz, socket_id);
68 if (addr == NULL) {
69 EAL_LOG(DEBUG, "Cannot allocate %zu bytes "
70 "on socket %d", alloc_sz, socket_id);
71 return -1;
72 }
73 } else {
74 /* Requested address is already reserved, commit memory. */
75 addr = eal_mem_commit(requested_addr, alloc_sz, socket_id);
76
77 /* During commitment, memory is temporary freed and might
78 * be allocated by different non-EAL thread. This is a fatal
79 * error, because it breaks MSL assumptions.
80 */
81 if ((addr != NULL) && (addr != requested_addr)) {
82 EAL_LOG(CRIT, "Address %p occupied by an alien "
83 " allocation - MSL is not VA-contiguous!",
84 requested_addr);
85 return -1;
86 }
87
88 if (addr == NULL) {
89 EAL_LOG(DEBUG, "Cannot commit reserved memory %p "
90 "(size %zu) on socket %d",
91 requested_addr, alloc_sz, socket_id);
92 return -1;
93 }
94 }
95
96 /* Force OS to allocate a physical page and select a NUMA node.
97 * Hugepages are not pageable in Windows, so there's no race
98 * for physical address.
99 */
100 *(volatile int *)addr = *(volatile int *)addr;
101
102 iova = rte_mem_virt2iova(addr);
103 if (iova == RTE_BAD_IOVA) {
104 EAL_LOG(DEBUG,
105 "Cannot get IOVA of allocated segment");
106 goto error;
107 }
108
109 /* Only "Ex" function can handle hugepages. */
110 info.VirtualAddress = addr;
111 if (!QueryWorkingSetEx(current_process, &info, sizeof(info))) {
112 RTE_LOG_WIN32_ERR("QueryWorkingSetEx(%p)", addr);
113 goto error;
114 }
115
116 page = &info.VirtualAttributes;
117 if (!page->Valid || !page->LargePage) {
118 EAL_LOG(DEBUG, "Got regular page instead of a hugepage");
119 goto error;
120 }
121 if (page->Node != numa_node) {
122 EAL_LOG(DEBUG,
123 "NUMA node hint %u (socket %d) not respected, got %u",
124 numa_node, socket_id, page->Node);
125 goto error;
126 }
127
128 ms->addr = addr;
129 ms->hugepage_sz = hi->hugepage_sz;
130 ms->len = alloc_sz;
131 ms->nchannel = rte_memory_get_nchannel();
132 ms->nrank = rte_memory_get_nrank();
133 ms->iova = iova;
134 ms->socket_id = socket_id;
135
136 return 0;
137
138 error:
139 /* Only jump here when `addr` and `alloc_sz` are valid. */
140 if (eal_mem_decommit(addr, alloc_sz) && (rte_errno == EADDRNOTAVAIL)) {
141 /* During decommitment, memory is temporarily returned
142 * to the system and the address may become unavailable.
143 */
144 EAL_LOG(CRIT, "Address %p occupied by an alien "
145 " allocation - MSL is not VA-contiguous!", addr);
146 }
147 return -1;
148 }
149
150 static int
free_seg(struct rte_memseg * ms)151 free_seg(struct rte_memseg *ms)
152 {
153 if (eal_mem_decommit(ms->addr, ms->len)) {
154 if (rte_errno == EADDRNOTAVAIL) {
155 /* See alloc_seg() for explanation. */
156 EAL_LOG(CRIT, "Address %p occupied by an alien "
157 " allocation - MSL is not VA-contiguous!",
158 ms->addr);
159 }
160 return -1;
161 }
162
163 /* Must clear the segment, because alloc_seg() inspects it. */
164 memset(ms, 0, sizeof(*ms));
165 return 0;
166 }
167
168 struct alloc_walk_param {
169 struct hugepage_info *hi;
170 struct rte_memseg **ms;
171 size_t page_sz;
172 unsigned int segs_allocated;
173 unsigned int n_segs;
174 int socket;
175 bool exact;
176 };
177
178 static int
alloc_seg_walk(const struct rte_memseg_list * msl,void * arg)179 alloc_seg_walk(const struct rte_memseg_list *msl, void *arg)
180 {
181 struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
182 struct alloc_walk_param *wa = arg;
183 struct rte_memseg_list *cur_msl;
184 size_t page_sz;
185 int cur_idx, start_idx, j;
186 unsigned int msl_idx, need, i;
187
188 if (msl->page_sz != wa->page_sz)
189 return 0;
190 if (msl->socket_id != wa->socket)
191 return 0;
192
193 page_sz = (size_t)msl->page_sz;
194
195 msl_idx = msl - mcfg->memsegs;
196 cur_msl = &mcfg->memsegs[msl_idx];
197
198 need = wa->n_segs;
199
200 /* try finding space in memseg list */
201 if (wa->exact) {
202 /* if we require exact number of pages in a list, find them */
203 cur_idx = rte_fbarray_find_next_n_free(
204 &cur_msl->memseg_arr, 0, need);
205 if (cur_idx < 0)
206 return 0;
207 start_idx = cur_idx;
208 } else {
209 int cur_len;
210
211 /* we don't require exact number of pages, so we're going to go
212 * for best-effort allocation. that means finding the biggest
213 * unused block, and going with that.
214 */
215 cur_idx = rte_fbarray_find_biggest_free(
216 &cur_msl->memseg_arr, 0);
217 if (cur_idx < 0)
218 return 0;
219 start_idx = cur_idx;
220 /* adjust the size to possibly be smaller than original
221 * request, but do not allow it to be bigger.
222 */
223 cur_len = rte_fbarray_find_contig_free(
224 &cur_msl->memseg_arr, cur_idx);
225 need = RTE_MIN(need, (unsigned int)cur_len);
226 }
227
228 for (i = 0; i < need; i++, cur_idx++) {
229 struct rte_memseg *cur;
230 void *map_addr;
231
232 cur = rte_fbarray_get(&cur_msl->memseg_arr, cur_idx);
233 map_addr = RTE_PTR_ADD(cur_msl->base_va, cur_idx * page_sz);
234
235 if (alloc_seg(cur, map_addr, wa->socket, wa->hi)) {
236 EAL_LOG(DEBUG, "attempted to allocate %i segments, "
237 "but only %i were allocated", need, i);
238
239 /* if exact number wasn't requested, stop */
240 if (!wa->exact)
241 goto out;
242
243 /* clean up */
244 for (j = start_idx; j < cur_idx; j++) {
245 struct rte_memseg *tmp;
246 struct rte_fbarray *arr = &cur_msl->memseg_arr;
247
248 tmp = rte_fbarray_get(arr, j);
249 rte_fbarray_set_free(arr, j);
250
251 if (free_seg(tmp))
252 EAL_LOG(DEBUG, "Cannot free page");
253 }
254 /* clear the list */
255 if (wa->ms)
256 memset(wa->ms, 0, sizeof(*wa->ms) * wa->n_segs);
257
258 return -1;
259 }
260 if (wa->ms)
261 wa->ms[i] = cur;
262
263 rte_fbarray_set_used(&cur_msl->memseg_arr, cur_idx);
264 }
265
266 out:
267 wa->segs_allocated = i;
268 if (i > 0)
269 cur_msl->version++;
270
271 /* if we didn't allocate any segments, move on to the next list */
272 return i > 0;
273 }
274
275 struct free_walk_param {
276 struct hugepage_info *hi;
277 struct rte_memseg *ms;
278 };
279 static int
free_seg_walk(const struct rte_memseg_list * msl,void * arg)280 free_seg_walk(const struct rte_memseg_list *msl, void *arg)
281 {
282 struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
283 struct rte_memseg_list *found_msl;
284 struct free_walk_param *wa = arg;
285 uintptr_t start_addr, end_addr;
286 int msl_idx, seg_idx, ret;
287
288 start_addr = (uintptr_t) msl->base_va;
289 end_addr = start_addr + msl->len;
290
291 if ((uintptr_t)wa->ms->addr < start_addr ||
292 (uintptr_t)wa->ms->addr >= end_addr)
293 return 0;
294
295 msl_idx = msl - mcfg->memsegs;
296 seg_idx = RTE_PTR_DIFF(wa->ms->addr, start_addr) / msl->page_sz;
297
298 /* msl is const */
299 found_msl = &mcfg->memsegs[msl_idx];
300 found_msl->version++;
301
302 rte_fbarray_set_free(&found_msl->memseg_arr, seg_idx);
303
304 ret = free_seg(wa->ms);
305
306 return (ret < 0) ? (-1) : 1;
307 }
308
309 int
eal_memalloc_alloc_seg_bulk(struct rte_memseg ** ms,int n_segs,size_t page_sz,int socket,bool exact)310 eal_memalloc_alloc_seg_bulk(struct rte_memseg **ms, int n_segs,
311 size_t page_sz, int socket, bool exact)
312 {
313 unsigned int i;
314 int ret = -1;
315 struct alloc_walk_param wa;
316 struct hugepage_info *hi = NULL;
317 struct internal_config *internal_conf =
318 eal_get_internal_configuration();
319
320 if (internal_conf->legacy_mem) {
321 EAL_LOG(ERR, "dynamic allocation not supported in legacy mode");
322 return -ENOTSUP;
323 }
324
325 for (i = 0; i < internal_conf->num_hugepage_sizes; i++) {
326 struct hugepage_info *hpi = &internal_conf->hugepage_info[i];
327 if (page_sz == hpi->hugepage_sz) {
328 hi = hpi;
329 break;
330 }
331 }
332 if (!hi) {
333 EAL_LOG(ERR, "cannot find relevant hugepage_info entry");
334 return -1;
335 }
336
337 memset(&wa, 0, sizeof(wa));
338 wa.exact = exact;
339 wa.hi = hi;
340 wa.ms = ms;
341 wa.n_segs = n_segs;
342 wa.page_sz = page_sz;
343 wa.socket = socket;
344 wa.segs_allocated = 0;
345
346 /* memalloc is locked, so it's safe to use thread-unsafe version */
347 ret = rte_memseg_list_walk_thread_unsafe(alloc_seg_walk, &wa);
348 if (ret == 0) {
349 EAL_LOG(ERR, "cannot find suitable memseg_list");
350 ret = -1;
351 } else if (ret > 0) {
352 ret = (int)wa.segs_allocated;
353 }
354
355 return ret;
356 }
357
358 struct rte_memseg *
eal_memalloc_alloc_seg(size_t page_sz,int socket)359 eal_memalloc_alloc_seg(size_t page_sz, int socket)
360 {
361 struct rte_memseg *ms = NULL;
362 eal_memalloc_alloc_seg_bulk(&ms, 1, page_sz, socket, true);
363 return ms;
364 }
365
366 int
eal_memalloc_free_seg_bulk(struct rte_memseg ** ms,int n_segs)367 eal_memalloc_free_seg_bulk(struct rte_memseg **ms, int n_segs)
368 {
369 int seg, ret = 0;
370 struct internal_config *internal_conf =
371 eal_get_internal_configuration();
372
373 /* dynamic free not supported in legacy mode */
374 if (internal_conf->legacy_mem)
375 return -1;
376
377 for (seg = 0; seg < n_segs; seg++) {
378 struct rte_memseg *cur = ms[seg];
379 struct hugepage_info *hi = NULL;
380 struct free_walk_param wa;
381 size_t i;
382 int walk_res;
383
384 /* if this page is marked as unfreeable, fail */
385 if (cur->flags & RTE_MEMSEG_FLAG_DO_NOT_FREE) {
386 EAL_LOG(DEBUG, "Page is not allowed to be freed");
387 ret = -1;
388 continue;
389 }
390
391 memset(&wa, 0, sizeof(wa));
392
393 for (i = 0; i < RTE_DIM(internal_conf->hugepage_info); i++) {
394 hi = &internal_conf->hugepage_info[i];
395 if (cur->hugepage_sz == hi->hugepage_sz)
396 break;
397 }
398 if (i == RTE_DIM(internal_conf->hugepage_info)) {
399 EAL_LOG(ERR, "Can't find relevant hugepage_info entry");
400 ret = -1;
401 continue;
402 }
403
404 wa.ms = cur;
405 wa.hi = hi;
406
407 /* memalloc is locked, so it's safe to use thread-unsafe version
408 */
409 walk_res = rte_memseg_list_walk_thread_unsafe(free_seg_walk,
410 &wa);
411 if (walk_res == 1)
412 continue;
413 if (walk_res == 0)
414 EAL_LOG(ERR, "Couldn't find memseg list");
415 ret = -1;
416 }
417 return ret;
418 }
419
420 int
eal_memalloc_free_seg(struct rte_memseg * ms)421 eal_memalloc_free_seg(struct rte_memseg *ms)
422 {
423 return eal_memalloc_free_seg_bulk(&ms, 1);
424 }
425
426 int
eal_memalloc_sync_with_primary(void)427 eal_memalloc_sync_with_primary(void)
428 {
429 /* No multi-process support. */
430 EAL_LOG_NOT_IMPLEMENTED();
431 return -ENOTSUP;
432 }
433
434 int
eal_memalloc_cleanup(void)435 eal_memalloc_cleanup(void)
436 {
437 /* not implemented */
438 return 0;
439 }
440
441 int
eal_memalloc_init(void)442 eal_memalloc_init(void)
443 {
444 /* No action required. */
445 return 0;
446 }
447