1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2014 Intel Corporation 3 */ 4 #include <sys/mman.h> 5 #include <unistd.h> 6 #include <sys/types.h> 7 #include <sys/sysctl.h> 8 #include <inttypes.h> 9 #include <errno.h> 10 #include <string.h> 11 #include <fcntl.h> 12 13 #include <rte_eal.h> 14 #include <rte_errno.h> 15 #include <rte_log.h> 16 #include <rte_string_fns.h> 17 18 #include "eal_private.h" 19 #include "eal_internal_cfg.h" 20 #include "eal_filesystem.h" 21 #include "eal_memcfg.h" 22 #include "eal_options.h" 23 24 #define EAL_PAGE_SIZE (sysconf(_SC_PAGESIZE)) 25 26 uint64_t eal_get_baseaddr(void) 27 { 28 /* 29 * FreeBSD may allocate something in the space we will be mapping things 30 * before we get a chance to do that, so use a base address that's far 31 * away from where malloc() et al usually map things. 32 */ 33 return 0x1000000000ULL; 34 } 35 36 /* 37 * Get physical address of any mapped virtual address in the current process. 38 */ 39 phys_addr_t 40 rte_mem_virt2phy(const void *virtaddr) 41 { 42 /* XXX not implemented. This function is only used by 43 * rte_mempool_virt2iova() when hugepages are disabled. */ 44 (void)virtaddr; 45 return RTE_BAD_IOVA; 46 } 47 rte_iova_t 48 rte_mem_virt2iova(const void *virtaddr) 49 { 50 return rte_mem_virt2phy(virtaddr); 51 } 52 53 int 54 rte_eal_hugepage_init(void) 55 { 56 struct rte_mem_config *mcfg; 57 uint64_t total_mem = 0; 58 void *addr; 59 unsigned int i, j, seg_idx = 0; 60 struct internal_config *internal_conf = 61 eal_get_internal_configuration(); 62 63 /* get pointer to global configuration */ 64 mcfg = rte_eal_get_configuration()->mem_config; 65 66 /* for debug purposes, hugetlbfs can be disabled */ 67 if (internal_conf->no_hugetlbfs) { 68 struct rte_memseg_list *msl; 69 uint64_t mem_sz, page_sz; 70 int n_segs; 71 72 /* create a memseg list */ 73 msl = &mcfg->memsegs[0]; 74 75 mem_sz = internal_conf->memory; 76 page_sz = RTE_PGSIZE_4K; 77 n_segs = mem_sz / page_sz; 78 79 if (eal_memseg_list_init_named( 80 msl, "nohugemem", page_sz, n_segs, 0, true)) { 81 return -1; 82 } 83 84 addr = mmap(NULL, mem_sz, PROT_READ | PROT_WRITE, 85 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 86 if (addr == MAP_FAILED) { 87 RTE_LOG(ERR, EAL, "%s: mmap() failed: %s\n", __func__, 88 strerror(errno)); 89 return -1; 90 } 91 92 msl->base_va = addr; 93 msl->len = mem_sz; 94 95 eal_memseg_list_populate(msl, addr, n_segs); 96 97 return 0; 98 } 99 100 /* map all hugepages and sort them */ 101 for (i = 0; i < internal_conf->num_hugepage_sizes; i++) { 102 struct hugepage_info *hpi; 103 rte_iova_t prev_end = 0; 104 int prev_ms_idx = -1; 105 uint64_t page_sz, mem_needed; 106 unsigned int n_pages, max_pages; 107 108 hpi = &internal_conf->hugepage_info[i]; 109 page_sz = hpi->hugepage_sz; 110 max_pages = hpi->num_pages[0]; 111 mem_needed = RTE_ALIGN_CEIL(internal_conf->memory - total_mem, 112 page_sz); 113 114 n_pages = RTE_MIN(mem_needed / page_sz, max_pages); 115 116 for (j = 0; j < n_pages; j++) { 117 struct rte_memseg_list *msl; 118 struct rte_fbarray *arr; 119 struct rte_memseg *seg; 120 int msl_idx, ms_idx; 121 rte_iova_t physaddr; 122 int error; 123 size_t sysctl_size = sizeof(physaddr); 124 char physaddr_str[64]; 125 bool is_adjacent; 126 127 /* first, check if this segment is IOVA-adjacent to 128 * the previous one. 129 */ 130 snprintf(physaddr_str, sizeof(physaddr_str), 131 "hw.contigmem.physaddr.%d", j); 132 error = sysctlbyname(physaddr_str, &physaddr, 133 &sysctl_size, NULL, 0); 134 if (error < 0) { 135 RTE_LOG(ERR, EAL, "Failed to get physical addr for buffer %u " 136 "from %s\n", j, hpi->hugedir); 137 return -1; 138 } 139 140 is_adjacent = prev_end != 0 && physaddr == prev_end; 141 prev_end = physaddr + hpi->hugepage_sz; 142 143 for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS; 144 msl_idx++) { 145 bool empty, need_hole; 146 msl = &mcfg->memsegs[msl_idx]; 147 arr = &msl->memseg_arr; 148 149 if (msl->page_sz != page_sz) 150 continue; 151 152 empty = arr->count == 0; 153 154 /* we need a hole if this isn't an empty memseg 155 * list, and if previous segment was not 156 * adjacent to current one. 157 */ 158 need_hole = !empty && !is_adjacent; 159 160 /* we need 1, plus hole if not adjacent */ 161 ms_idx = rte_fbarray_find_next_n_free(arr, 162 0, 1 + (need_hole ? 1 : 0)); 163 164 /* memseg list is full? */ 165 if (ms_idx < 0) 166 continue; 167 168 if (need_hole && prev_ms_idx == ms_idx - 1) 169 ms_idx++; 170 prev_ms_idx = ms_idx; 171 172 break; 173 } 174 if (msl_idx == RTE_MAX_MEMSEG_LISTS) { 175 RTE_LOG(ERR, EAL, "Could not find space for memseg. Please increase %s and/or %s in configuration.\n", 176 RTE_STR(RTE_MAX_MEMSEG_PER_TYPE), 177 RTE_STR(RTE_MAX_MEM_MB_PER_TYPE)); 178 return -1; 179 } 180 arr = &msl->memseg_arr; 181 seg = rte_fbarray_get(arr, ms_idx); 182 183 addr = RTE_PTR_ADD(msl->base_va, 184 (size_t)msl->page_sz * ms_idx); 185 186 /* address is already mapped in memseg list, so using 187 * MAP_FIXED here is safe. 188 */ 189 addr = mmap(addr, page_sz, PROT_READ|PROT_WRITE, 190 MAP_SHARED | MAP_FIXED, 191 hpi->lock_descriptor, 192 j * EAL_PAGE_SIZE); 193 if (addr == MAP_FAILED) { 194 RTE_LOG(ERR, EAL, "Failed to mmap buffer %u from %s\n", 195 j, hpi->hugedir); 196 return -1; 197 } 198 199 seg->addr = addr; 200 seg->iova = physaddr; 201 seg->hugepage_sz = page_sz; 202 seg->len = page_sz; 203 seg->nchannel = mcfg->nchannel; 204 seg->nrank = mcfg->nrank; 205 seg->socket_id = 0; 206 207 rte_fbarray_set_used(arr, ms_idx); 208 209 RTE_LOG(INFO, EAL, "Mapped memory segment %u @ %p: physaddr:0x%" 210 PRIx64", len %zu\n", 211 seg_idx++, addr, physaddr, page_sz); 212 213 total_mem += seg->len; 214 } 215 if (total_mem >= internal_conf->memory) 216 break; 217 } 218 if (total_mem < internal_conf->memory) { 219 RTE_LOG(ERR, EAL, "Couldn't reserve requested memory, " 220 "requested: %" PRIu64 "M " 221 "available: %" PRIu64 "M\n", 222 internal_conf->memory >> 20, total_mem >> 20); 223 return -1; 224 } 225 return 0; 226 } 227 228 struct attach_walk_args { 229 int fd_hugepage; 230 int seg_idx; 231 }; 232 static int 233 attach_segment(const struct rte_memseg_list *msl, const struct rte_memseg *ms, 234 void *arg) 235 { 236 struct attach_walk_args *wa = arg; 237 void *addr; 238 239 if (msl->external) 240 return 0; 241 242 addr = mmap(ms->addr, ms->len, PROT_READ | PROT_WRITE, 243 MAP_SHARED | MAP_FIXED, wa->fd_hugepage, 244 wa->seg_idx * EAL_PAGE_SIZE); 245 if (addr == MAP_FAILED || addr != ms->addr) 246 return -1; 247 wa->seg_idx++; 248 249 return 0; 250 } 251 252 int 253 rte_eal_hugepage_attach(void) 254 { 255 struct hugepage_info *hpi; 256 int fd_hugepage = -1; 257 unsigned int i; 258 struct internal_config *internal_conf = 259 eal_get_internal_configuration(); 260 261 hpi = &internal_conf->hugepage_info[0]; 262 263 for (i = 0; i < internal_conf->num_hugepage_sizes; i++) { 264 const struct hugepage_info *cur_hpi = &hpi[i]; 265 struct attach_walk_args wa; 266 267 memset(&wa, 0, sizeof(wa)); 268 269 /* Obtain a file descriptor for contiguous memory */ 270 fd_hugepage = open(cur_hpi->hugedir, O_RDWR); 271 if (fd_hugepage < 0) { 272 RTE_LOG(ERR, EAL, "Could not open %s\n", 273 cur_hpi->hugedir); 274 goto error; 275 } 276 wa.fd_hugepage = fd_hugepage; 277 wa.seg_idx = 0; 278 279 /* Map the contiguous memory into each memory segment */ 280 if (rte_memseg_walk(attach_segment, &wa) < 0) { 281 RTE_LOG(ERR, EAL, "Failed to mmap buffer %u from %s\n", 282 wa.seg_idx, cur_hpi->hugedir); 283 goto error; 284 } 285 286 close(fd_hugepage); 287 fd_hugepage = -1; 288 } 289 290 /* hugepage_info is no longer required */ 291 return 0; 292 293 error: 294 if (fd_hugepage >= 0) 295 close(fd_hugepage); 296 return -1; 297 } 298 299 int 300 rte_eal_using_phys_addrs(void) 301 { 302 return 0; 303 } 304 305 static uint64_t 306 get_mem_amount(uint64_t page_sz, uint64_t max_mem) 307 { 308 uint64_t area_sz, max_pages; 309 310 /* limit to RTE_MAX_MEMSEG_PER_LIST pages or RTE_MAX_MEM_MB_PER_LIST */ 311 max_pages = RTE_MAX_MEMSEG_PER_LIST; 312 max_mem = RTE_MIN((uint64_t)RTE_MAX_MEM_MB_PER_LIST << 20, max_mem); 313 314 area_sz = RTE_MIN(page_sz * max_pages, max_mem); 315 316 /* make sure the list isn't smaller than the page size */ 317 area_sz = RTE_MAX(area_sz, page_sz); 318 319 return RTE_ALIGN(area_sz, page_sz); 320 } 321 322 static int 323 memseg_list_alloc(struct rte_memseg_list *msl) 324 { 325 int flags = 0; 326 327 #ifdef RTE_ARCH_PPC_64 328 flags |= EAL_RESERVE_HUGEPAGES; 329 #endif 330 return eal_memseg_list_alloc(msl, flags); 331 } 332 333 static int 334 memseg_primary_init(void) 335 { 336 struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; 337 int hpi_idx, msl_idx = 0; 338 struct rte_memseg_list *msl; 339 uint64_t max_mem, total_mem; 340 struct internal_config *internal_conf = 341 eal_get_internal_configuration(); 342 343 /* no-huge does not need this at all */ 344 if (internal_conf->no_hugetlbfs) 345 return 0; 346 347 /* FreeBSD has an issue where core dump will dump the entire memory 348 * contents, including anonymous zero-page memory. Therefore, while we 349 * will be limiting total amount of memory to RTE_MAX_MEM_MB, we will 350 * also be further limiting total memory amount to whatever memory is 351 * available to us through contigmem driver (plus spacing blocks). 352 * 353 * so, at each stage, we will be checking how much memory we are 354 * preallocating, and adjust all the values accordingly. 355 */ 356 357 max_mem = (uint64_t)RTE_MAX_MEM_MB << 20; 358 total_mem = 0; 359 360 /* create memseg lists */ 361 for (hpi_idx = 0; hpi_idx < (int) internal_conf->num_hugepage_sizes; 362 hpi_idx++) { 363 uint64_t max_type_mem, total_type_mem = 0; 364 uint64_t avail_mem; 365 int type_msl_idx, max_segs, avail_segs, total_segs = 0; 366 struct hugepage_info *hpi; 367 uint64_t hugepage_sz; 368 369 hpi = &internal_conf->hugepage_info[hpi_idx]; 370 hugepage_sz = hpi->hugepage_sz; 371 372 /* no NUMA support on FreeBSD */ 373 374 /* check if we've already exceeded total memory amount */ 375 if (total_mem >= max_mem) 376 break; 377 378 /* first, calculate theoretical limits according to config */ 379 max_type_mem = RTE_MIN(max_mem - total_mem, 380 (uint64_t)RTE_MAX_MEM_MB_PER_TYPE << 20); 381 max_segs = RTE_MAX_MEMSEG_PER_TYPE; 382 383 /* now, limit all of that to whatever will actually be 384 * available to us, because without dynamic allocation support, 385 * all of that extra memory will be sitting there being useless 386 * and slowing down core dumps in case of a crash. 387 * 388 * we need (N*2)-1 segments because we cannot guarantee that 389 * each segment will be IOVA-contiguous with the previous one, 390 * so we will allocate more and put spaces between segments 391 * that are non-contiguous. 392 */ 393 avail_segs = (hpi->num_pages[0] * 2) - 1; 394 avail_mem = avail_segs * hugepage_sz; 395 396 max_type_mem = RTE_MIN(avail_mem, max_type_mem); 397 max_segs = RTE_MIN(avail_segs, max_segs); 398 399 type_msl_idx = 0; 400 while (total_type_mem < max_type_mem && 401 total_segs < max_segs) { 402 uint64_t cur_max_mem, cur_mem; 403 unsigned int n_segs; 404 405 if (msl_idx >= RTE_MAX_MEMSEG_LISTS) { 406 RTE_LOG(ERR, EAL, 407 "No more space in memseg lists, please increase %s\n", 408 RTE_STR(RTE_MAX_MEMSEG_LISTS)); 409 return -1; 410 } 411 412 msl = &mcfg->memsegs[msl_idx++]; 413 414 cur_max_mem = max_type_mem - total_type_mem; 415 416 cur_mem = get_mem_amount(hugepage_sz, 417 cur_max_mem); 418 n_segs = cur_mem / hugepage_sz; 419 420 if (eal_memseg_list_init(msl, hugepage_sz, n_segs, 421 0, type_msl_idx, false)) 422 return -1; 423 424 total_segs += msl->memseg_arr.len; 425 total_type_mem = total_segs * hugepage_sz; 426 type_msl_idx++; 427 428 if (memseg_list_alloc(msl)) { 429 RTE_LOG(ERR, EAL, "Cannot allocate VA space for memseg list\n"); 430 return -1; 431 } 432 } 433 total_mem += total_type_mem; 434 } 435 return 0; 436 } 437 438 static int 439 memseg_secondary_init(void) 440 { 441 struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; 442 int msl_idx = 0; 443 struct rte_memseg_list *msl; 444 445 for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS; msl_idx++) { 446 447 msl = &mcfg->memsegs[msl_idx]; 448 449 /* skip empty memseg lists */ 450 if (msl->memseg_arr.len == 0) 451 continue; 452 453 if (rte_fbarray_attach(&msl->memseg_arr)) { 454 RTE_LOG(ERR, EAL, "Cannot attach to primary process memseg lists\n"); 455 return -1; 456 } 457 458 /* preallocate VA space */ 459 if (memseg_list_alloc(msl)) { 460 RTE_LOG(ERR, EAL, "Cannot preallocate VA space for hugepage memory\n"); 461 return -1; 462 } 463 } 464 465 return 0; 466 } 467 468 int 469 rte_eal_memseg_init(void) 470 { 471 return rte_eal_process_type() == RTE_PROC_PRIMARY ? 472 memseg_primary_init() : 473 memseg_secondary_init(); 474 } 475