1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (c) 2020 Dmitry Kozlyuk 3 */ 4 5 #include <rte_errno.h> 6 7 #include "eal_internal_cfg.h" 8 #include "eal_memalloc.h" 9 #include "eal_memcfg.h" 10 #include "eal_private.h" 11 #include "eal_windows.h" 12 13 int 14 eal_memalloc_get_seg_fd(int list_idx, int seg_idx) 15 { 16 /* Hugepages have no associated files in Windows. */ 17 RTE_SET_USED(list_idx); 18 RTE_SET_USED(seg_idx); 19 EAL_LOG_NOT_IMPLEMENTED(); 20 return -ENOTSUP; 21 } 22 23 int 24 eal_memalloc_get_seg_fd_offset(int list_idx, int seg_idx, size_t *offset) 25 { 26 /* Hugepages have no associated files in Windows. */ 27 RTE_SET_USED(list_idx); 28 RTE_SET_USED(seg_idx); 29 RTE_SET_USED(offset); 30 EAL_LOG_NOT_IMPLEMENTED(); 31 return -ENOTSUP; 32 } 33 34 static int 35 alloc_seg(struct rte_memseg *ms, void *requested_addr, int socket_id, 36 struct hugepage_info *hi) 37 { 38 HANDLE current_process; 39 unsigned int numa_node; 40 size_t alloc_sz; 41 void *addr; 42 rte_iova_t iova = RTE_BAD_IOVA; 43 PSAPI_WORKING_SET_EX_INFORMATION info; 44 PSAPI_WORKING_SET_EX_BLOCK *page; 45 46 if (ms->len > 0) { 47 /* If a segment is already allocated as needed, return it. */ 48 if ((ms->addr == requested_addr) && 49 (ms->socket_id == socket_id) && 50 (ms->hugepage_sz == hi->hugepage_sz)) { 51 return 0; 52 } 53 54 /* Bugcheck, should not happen. */ 55 EAL_LOG(DEBUG, "Attempted to reallocate segment %p " 56 "(size %zu) on socket %d", ms->addr, 57 ms->len, ms->socket_id); 58 return -1; 59 } 60 61 current_process = GetCurrentProcess(); 62 numa_node = eal_socket_numa_node(socket_id); 63 alloc_sz = hi->hugepage_sz; 64 65 if (requested_addr == NULL) { 66 /* Request a new chunk of memory from OS. */ 67 addr = eal_mem_alloc_socket(alloc_sz, socket_id); 68 if (addr == NULL) { 69 EAL_LOG(DEBUG, "Cannot allocate %zu bytes " 70 "on socket %d", alloc_sz, socket_id); 71 return -1; 72 } 73 } else { 74 /* Requested address is already reserved, commit memory. */ 75 addr = eal_mem_commit(requested_addr, alloc_sz, socket_id); 76 77 /* During commitment, memory is temporary freed and might 78 * be allocated by different non-EAL thread. This is a fatal 79 * error, because it breaks MSL assumptions. 80 */ 81 if ((addr != NULL) && (addr != requested_addr)) { 82 EAL_LOG(CRIT, "Address %p occupied by an alien " 83 " allocation - MSL is not VA-contiguous!", 84 requested_addr); 85 return -1; 86 } 87 88 if (addr == NULL) { 89 EAL_LOG(DEBUG, "Cannot commit reserved memory %p " 90 "(size %zu) on socket %d", 91 requested_addr, alloc_sz, socket_id); 92 return -1; 93 } 94 } 95 96 /* Force OS to allocate a physical page and select a NUMA node. 97 * Hugepages are not pageable in Windows, so there's no race 98 * for physical address. 99 */ 100 *(volatile int *)addr = *(volatile int *)addr; 101 102 iova = rte_mem_virt2iova(addr); 103 if (iova == RTE_BAD_IOVA) { 104 EAL_LOG(DEBUG, 105 "Cannot get IOVA of allocated segment"); 106 goto error; 107 } 108 109 /* Only "Ex" function can handle hugepages. */ 110 info.VirtualAddress = addr; 111 if (!QueryWorkingSetEx(current_process, &info, sizeof(info))) { 112 RTE_LOG_WIN32_ERR("QueryWorkingSetEx(%p)", addr); 113 goto error; 114 } 115 116 page = &info.VirtualAttributes; 117 if (!page->Valid || !page->LargePage) { 118 EAL_LOG(DEBUG, "Got regular page instead of a hugepage"); 119 goto error; 120 } 121 if (page->Node != numa_node) { 122 EAL_LOG(DEBUG, 123 "NUMA node hint %u (socket %d) not respected, got %u", 124 numa_node, socket_id, page->Node); 125 goto error; 126 } 127 128 ms->addr = addr; 129 ms->hugepage_sz = hi->hugepage_sz; 130 ms->len = alloc_sz; 131 ms->nchannel = rte_memory_get_nchannel(); 132 ms->nrank = rte_memory_get_nrank(); 133 ms->iova = iova; 134 ms->socket_id = socket_id; 135 136 return 0; 137 138 error: 139 /* Only jump here when `addr` and `alloc_sz` are valid. */ 140 if (eal_mem_decommit(addr, alloc_sz) && (rte_errno == EADDRNOTAVAIL)) { 141 /* During decommitment, memory is temporarily returned 142 * to the system and the address may become unavailable. 143 */ 144 EAL_LOG(CRIT, "Address %p occupied by an alien " 145 " allocation - MSL is not VA-contiguous!", addr); 146 } 147 return -1; 148 } 149 150 static int 151 free_seg(struct rte_memseg *ms) 152 { 153 if (eal_mem_decommit(ms->addr, ms->len)) { 154 if (rte_errno == EADDRNOTAVAIL) { 155 /* See alloc_seg() for explanation. */ 156 EAL_LOG(CRIT, "Address %p occupied by an alien " 157 " allocation - MSL is not VA-contiguous!", 158 ms->addr); 159 } 160 return -1; 161 } 162 163 /* Must clear the segment, because alloc_seg() inspects it. */ 164 memset(ms, 0, sizeof(*ms)); 165 return 0; 166 } 167 168 struct alloc_walk_param { 169 struct hugepage_info *hi; 170 struct rte_memseg **ms; 171 size_t page_sz; 172 unsigned int segs_allocated; 173 unsigned int n_segs; 174 int socket; 175 bool exact; 176 }; 177 178 static int 179 alloc_seg_walk(const struct rte_memseg_list *msl, void *arg) 180 { 181 struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; 182 struct alloc_walk_param *wa = arg; 183 struct rte_memseg_list *cur_msl; 184 size_t page_sz; 185 int cur_idx, start_idx, j; 186 unsigned int msl_idx, need, i; 187 188 if (msl->page_sz != wa->page_sz) 189 return 0; 190 if (msl->socket_id != wa->socket) 191 return 0; 192 193 page_sz = (size_t)msl->page_sz; 194 195 msl_idx = msl - mcfg->memsegs; 196 cur_msl = &mcfg->memsegs[msl_idx]; 197 198 need = wa->n_segs; 199 200 /* try finding space in memseg list */ 201 if (wa->exact) { 202 /* if we require exact number of pages in a list, find them */ 203 cur_idx = rte_fbarray_find_next_n_free( 204 &cur_msl->memseg_arr, 0, need); 205 if (cur_idx < 0) 206 return 0; 207 start_idx = cur_idx; 208 } else { 209 int cur_len; 210 211 /* we don't require exact number of pages, so we're going to go 212 * for best-effort allocation. that means finding the biggest 213 * unused block, and going with that. 214 */ 215 cur_idx = rte_fbarray_find_biggest_free( 216 &cur_msl->memseg_arr, 0); 217 if (cur_idx < 0) 218 return 0; 219 start_idx = cur_idx; 220 /* adjust the size to possibly be smaller than original 221 * request, but do not allow it to be bigger. 222 */ 223 cur_len = rte_fbarray_find_contig_free( 224 &cur_msl->memseg_arr, cur_idx); 225 need = RTE_MIN(need, (unsigned int)cur_len); 226 } 227 228 for (i = 0; i < need; i++, cur_idx++) { 229 struct rte_memseg *cur; 230 void *map_addr; 231 232 cur = rte_fbarray_get(&cur_msl->memseg_arr, cur_idx); 233 map_addr = RTE_PTR_ADD(cur_msl->base_va, cur_idx * page_sz); 234 235 if (alloc_seg(cur, map_addr, wa->socket, wa->hi)) { 236 EAL_LOG(DEBUG, "attempted to allocate %i segments, " 237 "but only %i were allocated", need, i); 238 239 /* if exact number wasn't requested, stop */ 240 if (!wa->exact) 241 goto out; 242 243 /* clean up */ 244 for (j = start_idx; j < cur_idx; j++) { 245 struct rte_memseg *tmp; 246 struct rte_fbarray *arr = &cur_msl->memseg_arr; 247 248 tmp = rte_fbarray_get(arr, j); 249 rte_fbarray_set_free(arr, j); 250 251 if (free_seg(tmp)) 252 EAL_LOG(DEBUG, "Cannot free page"); 253 } 254 /* clear the list */ 255 if (wa->ms) 256 memset(wa->ms, 0, sizeof(*wa->ms) * wa->n_segs); 257 258 return -1; 259 } 260 if (wa->ms) 261 wa->ms[i] = cur; 262 263 rte_fbarray_set_used(&cur_msl->memseg_arr, cur_idx); 264 } 265 266 out: 267 wa->segs_allocated = i; 268 if (i > 0) 269 cur_msl->version++; 270 271 /* if we didn't allocate any segments, move on to the next list */ 272 return i > 0; 273 } 274 275 struct free_walk_param { 276 struct hugepage_info *hi; 277 struct rte_memseg *ms; 278 }; 279 static int 280 free_seg_walk(const struct rte_memseg_list *msl, void *arg) 281 { 282 struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; 283 struct rte_memseg_list *found_msl; 284 struct free_walk_param *wa = arg; 285 uintptr_t start_addr, end_addr; 286 int msl_idx, seg_idx, ret; 287 288 start_addr = (uintptr_t) msl->base_va; 289 end_addr = start_addr + msl->len; 290 291 if ((uintptr_t)wa->ms->addr < start_addr || 292 (uintptr_t)wa->ms->addr >= end_addr) 293 return 0; 294 295 msl_idx = msl - mcfg->memsegs; 296 seg_idx = RTE_PTR_DIFF(wa->ms->addr, start_addr) / msl->page_sz; 297 298 /* msl is const */ 299 found_msl = &mcfg->memsegs[msl_idx]; 300 found_msl->version++; 301 302 rte_fbarray_set_free(&found_msl->memseg_arr, seg_idx); 303 304 ret = free_seg(wa->ms); 305 306 return (ret < 0) ? (-1) : 1; 307 } 308 309 int 310 eal_memalloc_alloc_seg_bulk(struct rte_memseg **ms, int n_segs, 311 size_t page_sz, int socket, bool exact) 312 { 313 unsigned int i; 314 int ret = -1; 315 struct alloc_walk_param wa; 316 struct hugepage_info *hi = NULL; 317 struct internal_config *internal_conf = 318 eal_get_internal_configuration(); 319 320 if (internal_conf->legacy_mem) { 321 EAL_LOG(ERR, "dynamic allocation not supported in legacy mode"); 322 return -ENOTSUP; 323 } 324 325 for (i = 0; i < internal_conf->num_hugepage_sizes; i++) { 326 struct hugepage_info *hpi = &internal_conf->hugepage_info[i]; 327 if (page_sz == hpi->hugepage_sz) { 328 hi = hpi; 329 break; 330 } 331 } 332 if (!hi) { 333 EAL_LOG(ERR, "cannot find relevant hugepage_info entry"); 334 return -1; 335 } 336 337 memset(&wa, 0, sizeof(wa)); 338 wa.exact = exact; 339 wa.hi = hi; 340 wa.ms = ms; 341 wa.n_segs = n_segs; 342 wa.page_sz = page_sz; 343 wa.socket = socket; 344 wa.segs_allocated = 0; 345 346 /* memalloc is locked, so it's safe to use thread-unsafe version */ 347 ret = rte_memseg_list_walk_thread_unsafe(alloc_seg_walk, &wa); 348 if (ret == 0) { 349 EAL_LOG(ERR, "cannot find suitable memseg_list"); 350 ret = -1; 351 } else if (ret > 0) { 352 ret = (int)wa.segs_allocated; 353 } 354 355 return ret; 356 } 357 358 struct rte_memseg * 359 eal_memalloc_alloc_seg(size_t page_sz, int socket) 360 { 361 struct rte_memseg *ms = NULL; 362 eal_memalloc_alloc_seg_bulk(&ms, 1, page_sz, socket, true); 363 return ms; 364 } 365 366 int 367 eal_memalloc_free_seg_bulk(struct rte_memseg **ms, int n_segs) 368 { 369 int seg, ret = 0; 370 struct internal_config *internal_conf = 371 eal_get_internal_configuration(); 372 373 /* dynamic free not supported in legacy mode */ 374 if (internal_conf->legacy_mem) 375 return -1; 376 377 for (seg = 0; seg < n_segs; seg++) { 378 struct rte_memseg *cur = ms[seg]; 379 struct hugepage_info *hi = NULL; 380 struct free_walk_param wa; 381 size_t i; 382 int walk_res; 383 384 /* if this page is marked as unfreeable, fail */ 385 if (cur->flags & RTE_MEMSEG_FLAG_DO_NOT_FREE) { 386 EAL_LOG(DEBUG, "Page is not allowed to be freed"); 387 ret = -1; 388 continue; 389 } 390 391 memset(&wa, 0, sizeof(wa)); 392 393 for (i = 0; i < RTE_DIM(internal_conf->hugepage_info); i++) { 394 hi = &internal_conf->hugepage_info[i]; 395 if (cur->hugepage_sz == hi->hugepage_sz) 396 break; 397 } 398 if (i == RTE_DIM(internal_conf->hugepage_info)) { 399 EAL_LOG(ERR, "Can't find relevant hugepage_info entry"); 400 ret = -1; 401 continue; 402 } 403 404 wa.ms = cur; 405 wa.hi = hi; 406 407 /* memalloc is locked, so it's safe to use thread-unsafe version 408 */ 409 walk_res = rte_memseg_list_walk_thread_unsafe(free_seg_walk, 410 &wa); 411 if (walk_res == 1) 412 continue; 413 if (walk_res == 0) 414 EAL_LOG(ERR, "Couldn't find memseg list"); 415 ret = -1; 416 } 417 return ret; 418 } 419 420 int 421 eal_memalloc_free_seg(struct rte_memseg *ms) 422 { 423 return eal_memalloc_free_seg_bulk(&ms, 1); 424 } 425 426 int 427 eal_memalloc_sync_with_primary(void) 428 { 429 /* No multi-process support. */ 430 EAL_LOG_NOT_IMPLEMENTED(); 431 return -ENOTSUP; 432 } 433 434 int 435 eal_memalloc_cleanup(void) 436 { 437 /* not implemented */ 438 return 0; 439 } 440 441 int 442 eal_memalloc_init(void) 443 { 444 /* No action required. */ 445 return 0; 446 } 447