1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2017-2018 Intel Corporation 3 */ 4 5 #include <inttypes.h> 6 #include <limits.h> 7 #include <stdint.h> 8 #include <errno.h> 9 #include <string.h> 10 #include <unistd.h> 11 12 #include <rte_common.h> 13 #include <rte_eal_paging.h> 14 #include <rte_errno.h> 15 #include <rte_log.h> 16 #include <rte_spinlock.h> 17 18 #include "eal_filesystem.h" 19 #include "eal_private.h" 20 21 #include "rte_fbarray.h" 22 23 #define MASK_SHIFT 6ULL 24 #define MASK_ALIGN (1ULL << MASK_SHIFT) 25 #define MASK_LEN_TO_IDX(x) ((x) >> MASK_SHIFT) 26 #define MASK_LEN_TO_MOD(x) ((x) - RTE_ALIGN_FLOOR(x, MASK_ALIGN)) 27 #define MASK_GET_IDX(idx, mod) ((idx << MASK_SHIFT) + mod) 28 29 /* 30 * We use this to keep track of created/attached memory areas to prevent user 31 * errors in API usage. 32 */ 33 struct mem_area { 34 TAILQ_ENTRY(mem_area) next; 35 void *addr; 36 size_t len; 37 int fd; 38 }; 39 TAILQ_HEAD(mem_area_head, mem_area); 40 /* local per-process tailq */ 41 static struct mem_area_head mem_area_tailq = 42 TAILQ_HEAD_INITIALIZER(mem_area_tailq); 43 static rte_spinlock_t mem_area_lock = RTE_SPINLOCK_INITIALIZER; 44 45 /* 46 * This is a mask that is always stored at the end of array, to provide fast 47 * way of finding free/used spots without looping through each element. 48 */ 49 50 struct used_mask { 51 unsigned int n_masks; 52 uint64_t data[]; 53 }; 54 55 static size_t 56 calc_mask_size(unsigned int len) 57 { 58 /* mask must be multiple of MASK_ALIGN, even though length of array 59 * itself may not be aligned on that boundary. 60 */ 61 len = RTE_ALIGN_CEIL(len, MASK_ALIGN); 62 return sizeof(struct used_mask) + 63 sizeof(uint64_t) * MASK_LEN_TO_IDX(len); 64 } 65 66 static size_t 67 calc_data_size(size_t page_sz, unsigned int elt_sz, unsigned int len) 68 { 69 size_t data_sz = elt_sz * len; 70 size_t msk_sz = calc_mask_size(len); 71 return RTE_ALIGN_CEIL(data_sz + msk_sz, page_sz); 72 } 73 74 static struct used_mask * 75 get_used_mask(void *data, unsigned int elt_sz, unsigned int len) 76 { 77 return (struct used_mask *) RTE_PTR_ADD(data, elt_sz * len); 78 } 79 80 static int 81 resize_and_map(int fd, const char *path, void *addr, size_t len) 82 { 83 void *map_addr; 84 85 if (eal_file_truncate(fd, len)) { 86 EAL_LOG(ERR, "Cannot truncate %s", path); 87 return -1; 88 } 89 90 map_addr = rte_mem_map(addr, len, RTE_PROT_READ | RTE_PROT_WRITE, 91 RTE_MAP_SHARED | RTE_MAP_FORCE_ADDRESS, fd, 0); 92 if (map_addr != addr) { 93 return -1; 94 } 95 return 0; 96 } 97 98 static int 99 overlap(const struct mem_area *ma, const void *start, size_t len) 100 { 101 const void *end = RTE_PTR_ADD(start, len); 102 const void *ma_start = ma->addr; 103 const void *ma_end = RTE_PTR_ADD(ma->addr, ma->len); 104 105 /* start overlap? */ 106 if (start >= ma_start && start < ma_end) 107 return 1; 108 /* end overlap? */ 109 if (end > ma_start && end < ma_end) 110 return 1; 111 return 0; 112 } 113 114 static int 115 find_next_n(const struct rte_fbarray *arr, unsigned int start, unsigned int n, 116 bool used) 117 { 118 const struct used_mask *msk = get_used_mask(arr->data, arr->elt_sz, 119 arr->len); 120 unsigned int msk_idx, lookahead_idx, first, first_mod; 121 unsigned int last, last_mod; 122 uint64_t last_msk, ignore_msk; 123 124 /* 125 * mask only has granularity of MASK_ALIGN, but start may not be aligned 126 * on that boundary, so construct a special mask to exclude anything we 127 * don't want to see to avoid confusing ctz. 128 */ 129 first = MASK_LEN_TO_IDX(start); 130 first_mod = MASK_LEN_TO_MOD(start); 131 ignore_msk = ~((1ULL << first_mod) - 1); 132 133 /* array length may not be aligned, so calculate ignore mask for last 134 * mask index. 135 */ 136 last = MASK_LEN_TO_IDX(arr->len); 137 last_mod = MASK_LEN_TO_MOD(arr->len); 138 last_msk = ~(UINT64_MAX << last_mod); 139 140 for (msk_idx = first; msk_idx < msk->n_masks; msk_idx++) { 141 uint64_t cur_msk, lookahead_msk; 142 unsigned int run_start, clz, left; 143 bool found = false; 144 /* 145 * The process of getting n consecutive bits for arbitrary n is 146 * a bit involved, but here it is in a nutshell: 147 * 148 * 1. let n be the number of consecutive bits we're looking for 149 * 2. check if n can fit in one mask, and if so, do n-1 150 * rshift-ands to see if there is an appropriate run inside 151 * our current mask 152 * 2a. if we found a run, bail out early 153 * 2b. if we didn't find a run, proceed 154 * 3. invert the mask and count leading zeroes (that is, count 155 * how many consecutive set bits we had starting from the 156 * end of current mask) as k 157 * 3a. if k is 0, continue to next mask 158 * 3b. if k is not 0, we have a potential run 159 * 4. to satisfy our requirements, next mask must have n-k 160 * consecutive set bits right at the start, so we will do 161 * (n-k-1) rshift-ands and check if first bit is set. 162 * 163 * Step 4 will need to be repeated if (n-k) > MASK_ALIGN until 164 * we either run out of masks, lose the run, or find what we 165 * were looking for. 166 */ 167 cur_msk = msk->data[msk_idx]; 168 left = n; 169 170 /* if we're looking for free spaces, invert the mask */ 171 if (!used) 172 cur_msk = ~cur_msk; 173 174 /* combine current ignore mask with last index ignore mask */ 175 if (msk_idx == last) 176 ignore_msk |= last_msk; 177 178 /* if we have an ignore mask, ignore once */ 179 if (ignore_msk) { 180 cur_msk &= ignore_msk; 181 ignore_msk = 0; 182 } 183 184 /* if n can fit in within a single mask, do a search */ 185 if (n <= MASK_ALIGN) { 186 uint64_t tmp_msk = cur_msk; 187 unsigned int s_idx; 188 for (s_idx = 0; s_idx < n - 1; s_idx++) 189 tmp_msk &= tmp_msk >> 1ULL; 190 /* we found what we were looking for */ 191 if (tmp_msk != 0) { 192 run_start = rte_ctz64(tmp_msk); 193 return MASK_GET_IDX(msk_idx, run_start); 194 } 195 } 196 197 /* 198 * we didn't find our run within the mask, or n > MASK_ALIGN, 199 * so we're going for plan B. 200 */ 201 202 /* count leading zeroes on inverted mask */ 203 if (~cur_msk == 0) 204 clz = sizeof(cur_msk) * 8; 205 else 206 clz = rte_clz64(~cur_msk); 207 208 /* if there aren't any runs at the end either, just continue */ 209 if (clz == 0) 210 continue; 211 212 /* we have a partial run at the end, so try looking ahead */ 213 run_start = MASK_ALIGN - clz; 214 left -= clz; 215 216 for (lookahead_idx = msk_idx + 1; lookahead_idx < msk->n_masks; 217 lookahead_idx++) { 218 unsigned int s_idx, need; 219 lookahead_msk = msk->data[lookahead_idx]; 220 221 /* if we're looking for free space, invert the mask */ 222 if (!used) 223 lookahead_msk = ~lookahead_msk; 224 225 /* figure out how many consecutive bits we need here */ 226 need = RTE_MIN(left, MASK_ALIGN); 227 228 for (s_idx = 0; s_idx < need - 1; s_idx++) 229 lookahead_msk &= lookahead_msk >> 1ULL; 230 231 /* if first bit is not set, we've lost the run */ 232 if ((lookahead_msk & 1) == 0) { 233 /* 234 * we've scanned this far, so we know there are 235 * no runs in the space we've lookahead-scanned 236 * as well, so skip that on next iteration. 237 */ 238 ignore_msk = ~((1ULL << need) - 1); 239 msk_idx = lookahead_idx; 240 break; 241 } 242 243 left -= need; 244 245 /* check if we've found what we were looking for */ 246 if (left == 0) { 247 found = true; 248 break; 249 } 250 } 251 252 /* we didn't find anything, so continue */ 253 if (!found) 254 continue; 255 256 return MASK_GET_IDX(msk_idx, run_start); 257 } 258 /* we didn't find anything */ 259 rte_errno = used ? ENOENT : ENOSPC; 260 return -1; 261 } 262 263 static int 264 find_next(const struct rte_fbarray *arr, unsigned int start, bool used) 265 { 266 const struct used_mask *msk = get_used_mask(arr->data, arr->elt_sz, 267 arr->len); 268 unsigned int idx, first, first_mod; 269 unsigned int last, last_mod; 270 uint64_t last_msk, ignore_msk; 271 272 /* 273 * mask only has granularity of MASK_ALIGN, but start may not be aligned 274 * on that boundary, so construct a special mask to exclude anything we 275 * don't want to see to avoid confusing ctz. 276 */ 277 first = MASK_LEN_TO_IDX(start); 278 first_mod = MASK_LEN_TO_MOD(start); 279 ignore_msk = ~((1ULL << first_mod) - 1ULL); 280 281 /* array length may not be aligned, so calculate ignore mask for last 282 * mask index. 283 */ 284 last = MASK_LEN_TO_IDX(arr->len); 285 last_mod = MASK_LEN_TO_MOD(arr->len); 286 last_msk = ~(-(1ULL) << last_mod); 287 288 for (idx = first; idx < msk->n_masks; idx++) { 289 uint64_t cur = msk->data[idx]; 290 int found; 291 292 /* if we're looking for free entries, invert mask */ 293 if (!used) 294 cur = ~cur; 295 296 if (idx == last) 297 cur &= last_msk; 298 299 /* ignore everything before start on first iteration */ 300 if (idx == first) 301 cur &= ignore_msk; 302 303 /* check if we have any entries */ 304 if (cur == 0) 305 continue; 306 307 /* 308 * find first set bit - that will correspond to whatever it is 309 * that we're looking for. 310 */ 311 found = rte_ctz64(cur); 312 return MASK_GET_IDX(idx, found); 313 } 314 /* we didn't find anything */ 315 rte_errno = used ? ENOENT : ENOSPC; 316 return -1; 317 } 318 319 static int 320 find_contig(const struct rte_fbarray *arr, unsigned int start, bool used) 321 { 322 const struct used_mask *msk = get_used_mask(arr->data, arr->elt_sz, 323 arr->len); 324 unsigned int idx, first, first_mod; 325 unsigned int last, last_mod; 326 uint64_t last_msk; 327 unsigned int need_len, result = 0; 328 329 /* array length may not be aligned, so calculate ignore mask for last 330 * mask index. 331 */ 332 last = MASK_LEN_TO_IDX(arr->len); 333 last_mod = MASK_LEN_TO_MOD(arr->len); 334 last_msk = ~(-(1ULL) << last_mod); 335 336 first = MASK_LEN_TO_IDX(start); 337 first_mod = MASK_LEN_TO_MOD(start); 338 for (idx = first; idx < msk->n_masks; idx++, result += need_len) { 339 uint64_t cur = msk->data[idx]; 340 unsigned int run_len; 341 342 need_len = MASK_ALIGN; 343 344 /* if we're looking for free entries, invert mask */ 345 if (!used) 346 cur = ~cur; 347 348 /* if this is last mask, ignore everything after last bit */ 349 if (idx == last) 350 cur &= last_msk; 351 352 /* ignore everything before start on first iteration */ 353 if (idx == first) { 354 cur >>= first_mod; 355 /* at the start, we don't need the full mask len */ 356 need_len -= first_mod; 357 } 358 359 /* we will be looking for zeroes, so invert the mask */ 360 cur = ~cur; 361 362 /* if mask is zero, we have a complete run */ 363 if (cur == 0) 364 continue; 365 366 /* 367 * see if current run ends before mask end. 368 */ 369 run_len = rte_ctz64(cur); 370 371 /* add however many zeroes we've had in the last run and quit */ 372 if (run_len < need_len) { 373 result += run_len; 374 break; 375 } 376 } 377 return result; 378 } 379 380 static int 381 find_prev_n(const struct rte_fbarray *arr, unsigned int start, unsigned int n, 382 bool used) 383 { 384 const struct used_mask *msk = get_used_mask(arr->data, arr->elt_sz, 385 arr->len); 386 unsigned int msk_idx, lookbehind_idx, first, first_mod; 387 uint64_t ignore_msk; 388 389 /* 390 * mask only has granularity of MASK_ALIGN, but start may not be aligned 391 * on that boundary, so construct a special mask to exclude anything we 392 * don't want to see to avoid confusing ctz. 393 */ 394 first = MASK_LEN_TO_IDX(start); 395 first_mod = MASK_LEN_TO_MOD(start); 396 /* we're going backwards, so mask must start from the top */ 397 ignore_msk = first_mod == MASK_ALIGN - 1 ? 398 UINT64_MAX : /* prevent overflow */ 399 ~(UINT64_MAX << (first_mod + 1)); 400 401 /* go backwards, include zero */ 402 msk_idx = first; 403 do { 404 uint64_t cur_msk, lookbehind_msk; 405 unsigned int run_start, run_end, ctz, left; 406 bool found = false; 407 /* 408 * The process of getting n consecutive bits from the top for 409 * arbitrary n is a bit involved, but here it is in a nutshell: 410 * 411 * 1. let n be the number of consecutive bits we're looking for 412 * 2. check if n can fit in one mask, and if so, do n-1 413 * lshift-ands to see if there is an appropriate run inside 414 * our current mask 415 * 2a. if we found a run, bail out early 416 * 2b. if we didn't find a run, proceed 417 * 3. invert the mask and count trailing zeroes (that is, count 418 * how many consecutive set bits we had starting from the 419 * start of current mask) as k 420 * 3a. if k is 0, continue to next mask 421 * 3b. if k is not 0, we have a potential run 422 * 4. to satisfy our requirements, next mask must have n-k 423 * consecutive set bits at the end, so we will do (n-k-1) 424 * lshift-ands and check if last bit is set. 425 * 426 * Step 4 will need to be repeated if (n-k) > MASK_ALIGN until 427 * we either run out of masks, lose the run, or find what we 428 * were looking for. 429 */ 430 cur_msk = msk->data[msk_idx]; 431 left = n; 432 433 /* if we're looking for free spaces, invert the mask */ 434 if (!used) 435 cur_msk = ~cur_msk; 436 437 /* if we have an ignore mask, ignore once */ 438 if (ignore_msk) { 439 cur_msk &= ignore_msk; 440 ignore_msk = 0; 441 } 442 443 /* if n can fit in within a single mask, do a search */ 444 if (n <= MASK_ALIGN) { 445 uint64_t tmp_msk = cur_msk; 446 unsigned int s_idx; 447 for (s_idx = 0; s_idx < n - 1; s_idx++) 448 tmp_msk &= tmp_msk << 1ULL; 449 /* we found what we were looking for */ 450 if (tmp_msk != 0) { 451 /* clz will give us offset from end of mask, and 452 * we only get the end of our run, not start, 453 * so adjust result to point to where start 454 * would have been. 455 */ 456 run_start = MASK_ALIGN - 457 rte_clz64(tmp_msk) - n; 458 return MASK_GET_IDX(msk_idx, run_start); 459 } 460 } 461 462 /* 463 * we didn't find our run within the mask, or n > MASK_ALIGN, 464 * so we're going for plan B. 465 */ 466 467 /* count trailing zeroes on inverted mask */ 468 if (~cur_msk == 0) 469 ctz = sizeof(cur_msk) * 8; 470 else 471 ctz = rte_ctz64(~cur_msk); 472 473 /* if there aren't any runs at the start either, just 474 * continue 475 */ 476 if (ctz == 0) 477 continue; 478 479 /* we have a partial run at the start, so try looking behind */ 480 run_end = MASK_GET_IDX(msk_idx, ctz); 481 left -= ctz; 482 483 /* go backwards, include zero */ 484 lookbehind_idx = msk_idx - 1; 485 486 /* we can't lookbehind as we've run out of masks, so stop */ 487 if (msk_idx == 0) 488 break; 489 490 do { 491 const uint64_t last_bit = 1ULL << (MASK_ALIGN - 1); 492 unsigned int s_idx, need; 493 494 lookbehind_msk = msk->data[lookbehind_idx]; 495 496 /* if we're looking for free space, invert the mask */ 497 if (!used) 498 lookbehind_msk = ~lookbehind_msk; 499 500 /* figure out how many consecutive bits we need here */ 501 need = RTE_MIN(left, MASK_ALIGN); 502 503 for (s_idx = 0; s_idx < need - 1; s_idx++) 504 lookbehind_msk &= lookbehind_msk << 1ULL; 505 506 /* if last bit is not set, we've lost the run */ 507 if ((lookbehind_msk & last_bit) == 0) { 508 /* 509 * we've scanned this far, so we know there are 510 * no runs in the space we've lookbehind-scanned 511 * as well, so skip that on next iteration. 512 */ 513 ignore_msk = UINT64_MAX << need; 514 msk_idx = lookbehind_idx; 515 break; 516 } 517 518 left -= need; 519 520 /* check if we've found what we were looking for */ 521 if (left == 0) { 522 found = true; 523 break; 524 } 525 } while ((lookbehind_idx--) != 0); /* decrement after check to 526 * include zero 527 */ 528 529 /* we didn't find anything, so continue */ 530 if (!found) 531 continue; 532 533 /* we've found what we were looking for, but we only know where 534 * the run ended, so calculate start position. 535 */ 536 return run_end - n; 537 } while (msk_idx-- != 0); /* decrement after check to include zero */ 538 /* we didn't find anything */ 539 rte_errno = used ? ENOENT : ENOSPC; 540 return -1; 541 } 542 543 static int 544 find_prev(const struct rte_fbarray *arr, unsigned int start, bool used) 545 { 546 const struct used_mask *msk = get_used_mask(arr->data, arr->elt_sz, 547 arr->len); 548 unsigned int idx, first, first_mod; 549 uint64_t ignore_msk; 550 551 /* 552 * mask only has granularity of MASK_ALIGN, but start may not be aligned 553 * on that boundary, so construct a special mask to exclude anything we 554 * don't want to see to avoid confusing clz. 555 */ 556 first = MASK_LEN_TO_IDX(start); 557 first_mod = MASK_LEN_TO_MOD(start); 558 /* we're going backwards, so mask must start from the top */ 559 ignore_msk = first_mod == MASK_ALIGN - 1 ? 560 UINT64_MAX : /* prevent overflow */ 561 ~(UINT64_MAX << (first_mod + 1)); 562 563 /* go backwards, include zero */ 564 idx = first; 565 do { 566 uint64_t cur = msk->data[idx]; 567 int found; 568 569 /* if we're looking for free entries, invert mask */ 570 if (!used) 571 cur = ~cur; 572 573 /* ignore everything before start on first iteration */ 574 if (idx == first) 575 cur &= ignore_msk; 576 577 /* check if we have any entries */ 578 if (cur == 0) 579 continue; 580 581 /* 582 * find last set bit - that will correspond to whatever it is 583 * that we're looking for. we're counting trailing zeroes, thus 584 * the value we get is counted from end of mask, so calculate 585 * position from start of mask. 586 */ 587 found = MASK_ALIGN - rte_clz64(cur) - 1; 588 589 return MASK_GET_IDX(idx, found); 590 } while (idx-- != 0); /* decrement after check to include zero*/ 591 592 /* we didn't find anything */ 593 rte_errno = used ? ENOENT : ENOSPC; 594 return -1; 595 } 596 597 static int 598 find_rev_contig(const struct rte_fbarray *arr, unsigned int start, bool used) 599 { 600 const struct used_mask *msk = get_used_mask(arr->data, arr->elt_sz, 601 arr->len); 602 unsigned int idx, first, first_mod; 603 unsigned int need_len, result = 0; 604 605 first = MASK_LEN_TO_IDX(start); 606 first_mod = MASK_LEN_TO_MOD(start); 607 608 /* go backwards, include zero */ 609 idx = first; 610 do { 611 uint64_t cur = msk->data[idx]; 612 unsigned int run_len; 613 614 need_len = MASK_ALIGN; 615 616 /* if we're looking for free entries, invert mask */ 617 if (!used) 618 cur = ~cur; 619 620 /* ignore everything after start on first iteration */ 621 if (idx == first) { 622 unsigned int end_len = MASK_ALIGN - first_mod - 1; 623 cur <<= end_len; 624 /* at the start, we don't need the full mask len */ 625 need_len -= end_len; 626 } 627 628 /* we will be looking for zeroes, so invert the mask */ 629 cur = ~cur; 630 631 /* if mask is zero, we have a complete run */ 632 if (cur == 0) 633 goto endloop; 634 635 /* 636 * see where run ends, starting from the end. 637 */ 638 run_len = rte_clz64(cur); 639 640 /* add however many zeroes we've had in the last run and quit */ 641 if (run_len < need_len) { 642 result += run_len; 643 break; 644 } 645 endloop: 646 result += need_len; 647 } while (idx-- != 0); /* decrement after check to include zero */ 648 return result; 649 } 650 651 static int 652 set_used(struct rte_fbarray *arr, unsigned int idx, bool used) 653 { 654 struct used_mask *msk; 655 uint64_t msk_bit = 1ULL << MASK_LEN_TO_MOD(idx); 656 unsigned int msk_idx = MASK_LEN_TO_IDX(idx); 657 bool already_used; 658 int ret = -1; 659 660 if (arr == NULL || idx >= arr->len) { 661 rte_errno = EINVAL; 662 return -1; 663 } 664 msk = get_used_mask(arr->data, arr->elt_sz, arr->len); 665 ret = 0; 666 667 /* prevent array from changing under us */ 668 rte_rwlock_write_lock(&arr->rwlock); 669 670 already_used = (msk->data[msk_idx] & msk_bit) != 0; 671 672 /* nothing to be done */ 673 if (used == already_used) 674 goto out; 675 676 if (used) { 677 msk->data[msk_idx] |= msk_bit; 678 arr->count++; 679 } else { 680 msk->data[msk_idx] &= ~msk_bit; 681 arr->count--; 682 } 683 out: 684 rte_rwlock_write_unlock(&arr->rwlock); 685 686 return ret; 687 } 688 689 static int 690 fully_validate(const char *name, unsigned int elt_sz, unsigned int len) 691 { 692 if (name == NULL || elt_sz == 0 || len == 0 || len > INT_MAX) { 693 rte_errno = EINVAL; 694 return -1; 695 } 696 697 if (strnlen(name, RTE_FBARRAY_NAME_LEN) == RTE_FBARRAY_NAME_LEN) { 698 rte_errno = ENAMETOOLONG; 699 return -1; 700 } 701 return 0; 702 } 703 704 int 705 rte_fbarray_init(struct rte_fbarray *arr, const char *name, unsigned int len, 706 unsigned int elt_sz) 707 { 708 size_t page_sz, mmap_len; 709 char path[PATH_MAX]; 710 struct used_mask *msk; 711 struct mem_area *ma = NULL; 712 void *data = NULL; 713 int fd = -1; 714 const struct internal_config *internal_conf = 715 eal_get_internal_configuration(); 716 717 if (arr == NULL) { 718 rte_errno = EINVAL; 719 return -1; 720 } 721 722 if (fully_validate(name, elt_sz, len)) 723 return -1; 724 725 /* allocate mem area before doing anything */ 726 ma = malloc(sizeof(*ma)); 727 if (ma == NULL) { 728 rte_errno = ENOMEM; 729 return -1; 730 } 731 732 page_sz = rte_mem_page_size(); 733 if (page_sz == (size_t)-1) { 734 free(ma); 735 return -1; 736 } 737 738 /* calculate our memory limits */ 739 mmap_len = calc_data_size(page_sz, elt_sz, len); 740 741 data = eal_get_virtual_area(NULL, &mmap_len, page_sz, 0, 0); 742 if (data == NULL) { 743 free(ma); 744 return -1; 745 } 746 747 rte_spinlock_lock(&mem_area_lock); 748 749 fd = -1; 750 751 if (internal_conf->no_shconf) { 752 /* remap virtual area as writable */ 753 static const int flags = RTE_MAP_FORCE_ADDRESS | 754 RTE_MAP_PRIVATE | RTE_MAP_ANONYMOUS; 755 void *new_data = rte_mem_map(data, mmap_len, 756 RTE_PROT_READ | RTE_PROT_WRITE, flags, fd, 0); 757 if (new_data == NULL) { 758 EAL_LOG(DEBUG, "%s(): couldn't remap anonymous memory: %s", 759 __func__, rte_strerror(rte_errno)); 760 goto fail; 761 } 762 } else { 763 eal_get_fbarray_path(path, sizeof(path), name); 764 765 /* 766 * Each fbarray is unique to process namespace, i.e. the 767 * filename depends on process prefix. Try to take out a lock 768 * and see if we succeed. If we don't, someone else is using it 769 * already. 770 */ 771 fd = eal_file_open(path, EAL_OPEN_CREATE | EAL_OPEN_READWRITE); 772 if (fd < 0) { 773 EAL_LOG(DEBUG, "%s(): couldn't open %s: %s", 774 __func__, path, rte_strerror(rte_errno)); 775 goto fail; 776 } else if (eal_file_lock( 777 fd, EAL_FLOCK_EXCLUSIVE, EAL_FLOCK_RETURN)) { 778 EAL_LOG(DEBUG, "%s(): couldn't lock %s: %s", 779 __func__, path, rte_strerror(rte_errno)); 780 rte_errno = EBUSY; 781 goto fail; 782 } 783 784 /* take out a non-exclusive lock, so that other processes could 785 * still attach to it, but no other process could reinitialize 786 * it. 787 */ 788 if (eal_file_lock(fd, EAL_FLOCK_SHARED, EAL_FLOCK_RETURN)) 789 goto fail; 790 791 if (resize_and_map(fd, path, data, mmap_len)) 792 goto fail; 793 } 794 ma->addr = data; 795 ma->len = mmap_len; 796 ma->fd = fd; 797 798 /* do not close fd - keep it until detach/destroy */ 799 TAILQ_INSERT_TAIL(&mem_area_tailq, ma, next); 800 801 /* initialize the data */ 802 memset(data, 0, mmap_len); 803 804 /* populate data structure */ 805 strlcpy(arr->name, name, sizeof(arr->name)); 806 arr->data = data; 807 arr->len = len; 808 arr->elt_sz = elt_sz; 809 arr->count = 0; 810 811 msk = get_used_mask(data, elt_sz, len); 812 msk->n_masks = MASK_LEN_TO_IDX(RTE_ALIGN_CEIL(len, MASK_ALIGN)); 813 814 rte_rwlock_init(&arr->rwlock); 815 816 rte_spinlock_unlock(&mem_area_lock); 817 818 return 0; 819 fail: 820 if (data) 821 rte_mem_unmap(data, mmap_len); 822 if (fd >= 0) 823 close(fd); 824 free(ma); 825 826 rte_spinlock_unlock(&mem_area_lock); 827 return -1; 828 } 829 830 int 831 rte_fbarray_attach(struct rte_fbarray *arr) 832 { 833 struct mem_area *ma = NULL, *tmp = NULL; 834 size_t page_sz, mmap_len; 835 char path[PATH_MAX]; 836 void *data = NULL; 837 int fd = -1; 838 839 if (arr == NULL) { 840 rte_errno = EINVAL; 841 return -1; 842 } 843 844 /* 845 * we don't need to synchronize attach as two values we need (element 846 * size and array length) are constant for the duration of life of 847 * the array, so the parts we care about will not race. 848 */ 849 850 if (fully_validate(arr->name, arr->elt_sz, arr->len)) 851 return -1; 852 853 ma = malloc(sizeof(*ma)); 854 if (ma == NULL) { 855 rte_errno = ENOMEM; 856 return -1; 857 } 858 859 page_sz = rte_mem_page_size(); 860 if (page_sz == (size_t)-1) { 861 free(ma); 862 return -1; 863 } 864 865 mmap_len = calc_data_size(page_sz, arr->elt_sz, arr->len); 866 867 /* check the tailq - maybe user has already mapped this address space */ 868 rte_spinlock_lock(&mem_area_lock); 869 870 TAILQ_FOREACH(tmp, &mem_area_tailq, next) { 871 if (overlap(tmp, arr->data, mmap_len)) { 872 rte_errno = EEXIST; 873 goto fail; 874 } 875 } 876 877 /* we know this memory area is unique, so proceed */ 878 879 data = eal_get_virtual_area(arr->data, &mmap_len, page_sz, 0, 0); 880 if (data == NULL) 881 goto fail; 882 883 eal_get_fbarray_path(path, sizeof(path), arr->name); 884 885 fd = eal_file_open(path, EAL_OPEN_READWRITE); 886 if (fd < 0) { 887 goto fail; 888 } 889 890 /* lock the file, to let others know we're using it */ 891 if (eal_file_lock(fd, EAL_FLOCK_SHARED, EAL_FLOCK_RETURN)) 892 goto fail; 893 894 if (resize_and_map(fd, path, data, mmap_len)) 895 goto fail; 896 897 /* store our new memory area */ 898 ma->addr = data; 899 ma->fd = fd; /* keep fd until detach/destroy */ 900 ma->len = mmap_len; 901 902 TAILQ_INSERT_TAIL(&mem_area_tailq, ma, next); 903 904 /* we're done */ 905 906 rte_spinlock_unlock(&mem_area_lock); 907 return 0; 908 fail: 909 if (data) 910 rte_mem_unmap(data, mmap_len); 911 if (fd >= 0) 912 close(fd); 913 free(ma); 914 rte_spinlock_unlock(&mem_area_lock); 915 return -1; 916 } 917 918 int 919 rte_fbarray_detach(struct rte_fbarray *arr) 920 { 921 struct mem_area *tmp = NULL; 922 size_t mmap_len; 923 int ret = -1; 924 925 if (arr == NULL) { 926 rte_errno = EINVAL; 927 return -1; 928 } 929 930 /* 931 * we don't need to synchronize detach as two values we need (element 932 * size and total capacity) are constant for the duration of life of 933 * the array, so the parts we care about will not race. if the user is 934 * detaching while doing something else in the same process, we can't 935 * really do anything about it, things will blow up either way. 936 */ 937 938 size_t page_sz = rte_mem_page_size(); 939 if (page_sz == (size_t)-1) 940 return -1; 941 942 mmap_len = calc_data_size(page_sz, arr->elt_sz, arr->len); 943 944 /* does this area exist? */ 945 rte_spinlock_lock(&mem_area_lock); 946 947 TAILQ_FOREACH(tmp, &mem_area_tailq, next) { 948 if (tmp->addr == arr->data && tmp->len == mmap_len) 949 break; 950 } 951 if (tmp == NULL) { 952 rte_errno = ENOENT; 953 ret = -1; 954 goto out; 955 } 956 957 rte_mem_unmap(arr->data, mmap_len); 958 959 /* area is unmapped, close fd and remove the tailq entry */ 960 if (tmp->fd >= 0) 961 close(tmp->fd); 962 TAILQ_REMOVE(&mem_area_tailq, tmp, next); 963 free(tmp); 964 965 ret = 0; 966 out: 967 rte_spinlock_unlock(&mem_area_lock); 968 return ret; 969 } 970 971 int 972 rte_fbarray_destroy(struct rte_fbarray *arr) 973 { 974 struct mem_area *tmp = NULL; 975 size_t mmap_len; 976 int fd, ret; 977 char path[PATH_MAX]; 978 const struct internal_config *internal_conf = 979 eal_get_internal_configuration(); 980 981 if (arr == NULL) { 982 rte_errno = EINVAL; 983 return -1; 984 } 985 986 /* 987 * we don't need to synchronize detach as two values we need (element 988 * size and total capacity) are constant for the duration of life of 989 * the array, so the parts we care about will not race. if the user is 990 * detaching while doing something else in the same process, we can't 991 * really do anything about it, things will blow up either way. 992 */ 993 994 size_t page_sz = rte_mem_page_size(); 995 if (page_sz == (size_t)-1) 996 return -1; 997 998 mmap_len = calc_data_size(page_sz, arr->elt_sz, arr->len); 999 1000 /* does this area exist? */ 1001 rte_spinlock_lock(&mem_area_lock); 1002 1003 TAILQ_FOREACH(tmp, &mem_area_tailq, next) { 1004 if (tmp->addr == arr->data && tmp->len == mmap_len) 1005 break; 1006 } 1007 if (tmp == NULL) { 1008 rte_errno = ENOENT; 1009 ret = -1; 1010 goto out; 1011 } 1012 /* with no shconf, there were never any files to begin with */ 1013 if (!internal_conf->no_shconf) { 1014 /* 1015 * attempt to get an exclusive lock on the file, to ensure it 1016 * has been detached by all other processes 1017 */ 1018 fd = tmp->fd; 1019 if (eal_file_lock(fd, EAL_FLOCK_EXCLUSIVE, EAL_FLOCK_RETURN)) { 1020 EAL_LOG(DEBUG, "Cannot destroy fbarray - another process is using it"); 1021 rte_errno = EBUSY; 1022 ret = -1; 1023 goto out; 1024 } 1025 1026 /* we're OK to destroy the file */ 1027 eal_get_fbarray_path(path, sizeof(path), arr->name); 1028 if (unlink(path)) { 1029 EAL_LOG(DEBUG, "Cannot unlink fbarray: %s", 1030 strerror(errno)); 1031 rte_errno = errno; 1032 /* 1033 * we're still holding an exclusive lock, so drop it to 1034 * shared. 1035 */ 1036 eal_file_lock(fd, EAL_FLOCK_SHARED, EAL_FLOCK_RETURN); 1037 1038 ret = -1; 1039 goto out; 1040 } 1041 close(fd); 1042 } 1043 rte_mem_unmap(arr->data, mmap_len); 1044 1045 /* area is unmapped, remove the tailq entry */ 1046 TAILQ_REMOVE(&mem_area_tailq, tmp, next); 1047 free(tmp); 1048 ret = 0; 1049 1050 /* reset the fbarray structure */ 1051 memset(arr, 0, sizeof(*arr)); 1052 out: 1053 rte_spinlock_unlock(&mem_area_lock); 1054 return ret; 1055 } 1056 1057 void * 1058 rte_fbarray_get(const struct rte_fbarray *arr, unsigned int idx) 1059 { 1060 void *ret = NULL; 1061 if (arr == NULL) { 1062 rte_errno = EINVAL; 1063 return NULL; 1064 } 1065 1066 if (idx >= arr->len) { 1067 rte_errno = EINVAL; 1068 return NULL; 1069 } 1070 1071 ret = RTE_PTR_ADD(arr->data, idx * arr->elt_sz); 1072 1073 return ret; 1074 } 1075 1076 int 1077 rte_fbarray_set_used(struct rte_fbarray *arr, unsigned int idx) 1078 { 1079 return set_used(arr, idx, true); 1080 } 1081 1082 int 1083 rte_fbarray_set_free(struct rte_fbarray *arr, unsigned int idx) 1084 { 1085 return set_used(arr, idx, false); 1086 } 1087 1088 int 1089 rte_fbarray_is_used(struct rte_fbarray *arr, unsigned int idx) 1090 { 1091 struct used_mask *msk; 1092 int msk_idx; 1093 uint64_t msk_bit; 1094 int ret = -1; 1095 1096 if (arr == NULL || idx >= arr->len) { 1097 rte_errno = EINVAL; 1098 return -1; 1099 } 1100 1101 /* prevent array from changing under us */ 1102 rte_rwlock_read_lock(&arr->rwlock); 1103 1104 msk = get_used_mask(arr->data, arr->elt_sz, arr->len); 1105 msk_idx = MASK_LEN_TO_IDX(idx); 1106 msk_bit = 1ULL << MASK_LEN_TO_MOD(idx); 1107 1108 ret = (msk->data[msk_idx] & msk_bit) != 0; 1109 1110 rte_rwlock_read_unlock(&arr->rwlock); 1111 1112 return ret; 1113 } 1114 1115 static int 1116 fbarray_find(struct rte_fbarray *arr, unsigned int start, bool next, bool used) 1117 { 1118 int ret = -1; 1119 1120 if (arr == NULL || start >= arr->len) { 1121 rte_errno = EINVAL; 1122 return -1; 1123 } 1124 1125 /* prevent array from changing under us */ 1126 rte_rwlock_read_lock(&arr->rwlock); 1127 1128 /* cheap checks to prevent doing useless work */ 1129 if (!used) { 1130 if (arr->len == arr->count) { 1131 rte_errno = ENOSPC; 1132 goto out; 1133 } 1134 if (arr->count == 0) { 1135 ret = start; 1136 goto out; 1137 } 1138 } else { 1139 if (arr->count == 0) { 1140 rte_errno = ENOENT; 1141 goto out; 1142 } 1143 if (arr->len == arr->count) { 1144 ret = start; 1145 goto out; 1146 } 1147 } 1148 if (next) 1149 ret = find_next(arr, start, used); 1150 else 1151 ret = find_prev(arr, start, used); 1152 out: 1153 rte_rwlock_read_unlock(&arr->rwlock); 1154 return ret; 1155 } 1156 1157 int 1158 rte_fbarray_find_next_free(struct rte_fbarray *arr, unsigned int start) 1159 { 1160 return fbarray_find(arr, start, true, false); 1161 } 1162 1163 int 1164 rte_fbarray_find_next_used(struct rte_fbarray *arr, unsigned int start) 1165 { 1166 return fbarray_find(arr, start, true, true); 1167 } 1168 1169 int 1170 rte_fbarray_find_prev_free(struct rte_fbarray *arr, unsigned int start) 1171 { 1172 return fbarray_find(arr, start, false, false); 1173 } 1174 1175 int 1176 rte_fbarray_find_prev_used(struct rte_fbarray *arr, unsigned int start) 1177 { 1178 return fbarray_find(arr, start, false, true); 1179 } 1180 1181 static int 1182 fbarray_find_n(struct rte_fbarray *arr, unsigned int start, unsigned int n, 1183 bool next, bool used) 1184 { 1185 int ret = -1; 1186 1187 if (arr == NULL || start >= arr->len || n > arr->len || n == 0) { 1188 rte_errno = EINVAL; 1189 return -1; 1190 } 1191 if (next && (arr->len - start) < n) { 1192 rte_errno = used ? ENOENT : ENOSPC; 1193 return -1; 1194 } 1195 if (!next && start < (n - 1)) { 1196 rte_errno = used ? ENOENT : ENOSPC; 1197 return -1; 1198 } 1199 1200 /* prevent array from changing under us */ 1201 rte_rwlock_read_lock(&arr->rwlock); 1202 1203 /* cheap checks to prevent doing useless work */ 1204 if (!used) { 1205 if (arr->len == arr->count || arr->len - arr->count < n) { 1206 rte_errno = ENOSPC; 1207 goto out; 1208 } 1209 if (arr->count == 0) { 1210 ret = next ? start : start - n + 1; 1211 goto out; 1212 } 1213 } else { 1214 if (arr->count < n) { 1215 rte_errno = ENOENT; 1216 goto out; 1217 } 1218 if (arr->count == arr->len) { 1219 ret = next ? start : start - n + 1; 1220 goto out; 1221 } 1222 } 1223 1224 if (next) 1225 ret = find_next_n(arr, start, n, used); 1226 else 1227 ret = find_prev_n(arr, start, n, used); 1228 out: 1229 rte_rwlock_read_unlock(&arr->rwlock); 1230 return ret; 1231 } 1232 1233 int 1234 rte_fbarray_find_next_n_free(struct rte_fbarray *arr, unsigned int start, 1235 unsigned int n) 1236 { 1237 return fbarray_find_n(arr, start, n, true, false); 1238 } 1239 1240 int 1241 rte_fbarray_find_next_n_used(struct rte_fbarray *arr, unsigned int start, 1242 unsigned int n) 1243 { 1244 return fbarray_find_n(arr, start, n, true, true); 1245 } 1246 1247 int 1248 rte_fbarray_find_prev_n_free(struct rte_fbarray *arr, unsigned int start, 1249 unsigned int n) 1250 { 1251 return fbarray_find_n(arr, start, n, false, false); 1252 } 1253 1254 int 1255 rte_fbarray_find_prev_n_used(struct rte_fbarray *arr, unsigned int start, 1256 unsigned int n) 1257 { 1258 return fbarray_find_n(arr, start, n, false, true); 1259 } 1260 1261 static int 1262 fbarray_find_contig(struct rte_fbarray *arr, unsigned int start, bool next, 1263 bool used) 1264 { 1265 int ret = -1; 1266 1267 if (arr == NULL || start >= arr->len) { 1268 rte_errno = EINVAL; 1269 return -1; 1270 } 1271 1272 /* prevent array from changing under us */ 1273 rte_rwlock_read_lock(&arr->rwlock); 1274 1275 /* cheap checks to prevent doing useless work */ 1276 if (used) { 1277 if (arr->count == 0) { 1278 ret = 0; 1279 goto out; 1280 } 1281 if (next && arr->count == arr->len) { 1282 ret = arr->len - start; 1283 goto out; 1284 } 1285 if (!next && arr->count == arr->len) { 1286 ret = start + 1; 1287 goto out; 1288 } 1289 } else { 1290 if (arr->len == arr->count) { 1291 ret = 0; 1292 goto out; 1293 } 1294 if (next && arr->count == 0) { 1295 ret = arr->len - start; 1296 goto out; 1297 } 1298 if (!next && arr->count == 0) { 1299 ret = start + 1; 1300 goto out; 1301 } 1302 } 1303 1304 if (next) 1305 ret = find_contig(arr, start, used); 1306 else 1307 ret = find_rev_contig(arr, start, used); 1308 out: 1309 rte_rwlock_read_unlock(&arr->rwlock); 1310 return ret; 1311 } 1312 1313 static int 1314 fbarray_find_biggest(struct rte_fbarray *arr, unsigned int start, bool used, 1315 bool rev) 1316 { 1317 int cur_idx, next_idx, cur_len, biggest_idx, biggest_len; 1318 /* don't stack if conditions, use function pointers instead */ 1319 int (*find_func)(struct rte_fbarray *, unsigned int); 1320 int (*find_contig_func)(struct rte_fbarray *, unsigned int); 1321 1322 if (arr == NULL || start >= arr->len) { 1323 rte_errno = EINVAL; 1324 return -1; 1325 } 1326 /* the other API calls already do their fair share of cheap checks, so 1327 * no need to do them here. 1328 */ 1329 1330 /* the API's called are thread-safe, but something may still happen 1331 * between the API calls, so lock the fbarray. all other API's are 1332 * read-locking the fbarray, so read lock here is OK. 1333 */ 1334 rte_rwlock_read_lock(&arr->rwlock); 1335 1336 /* pick out appropriate functions */ 1337 if (used) { 1338 if (rev) { 1339 find_func = rte_fbarray_find_prev_used; 1340 find_contig_func = rte_fbarray_find_rev_contig_used; 1341 } else { 1342 find_func = rte_fbarray_find_next_used; 1343 find_contig_func = rte_fbarray_find_contig_used; 1344 } 1345 } else { 1346 if (rev) { 1347 find_func = rte_fbarray_find_prev_free; 1348 find_contig_func = rte_fbarray_find_rev_contig_free; 1349 } else { 1350 find_func = rte_fbarray_find_next_free; 1351 find_contig_func = rte_fbarray_find_contig_free; 1352 } 1353 } 1354 1355 cur_idx = start; 1356 biggest_idx = -1; /* default is error */ 1357 biggest_len = 0; 1358 for (;;) { 1359 cur_idx = find_func(arr, cur_idx); 1360 1361 /* block found, check its length */ 1362 if (cur_idx >= 0) { 1363 cur_len = find_contig_func(arr, cur_idx); 1364 /* decide where we go next */ 1365 next_idx = rev ? cur_idx - cur_len : cur_idx + cur_len; 1366 /* move current index to start of chunk */ 1367 cur_idx = rev ? next_idx + 1 : cur_idx; 1368 1369 if (cur_len > biggest_len) { 1370 biggest_idx = cur_idx; 1371 biggest_len = cur_len; 1372 } 1373 cur_idx = next_idx; 1374 /* in reverse mode, next_idx may be -1 if chunk started 1375 * at array beginning. this means there's no more work 1376 * to do. 1377 */ 1378 if (cur_idx < 0) 1379 break; 1380 } else { 1381 /* nothing more to find, stop. however, a failed API 1382 * call has set rte_errno, which we want to ignore, as 1383 * reaching the end of fbarray is not an error. 1384 */ 1385 rte_errno = 0; 1386 break; 1387 } 1388 } 1389 /* if we didn't find anything at all, set rte_errno */ 1390 if (biggest_idx < 0) 1391 rte_errno = used ? ENOENT : ENOSPC; 1392 1393 rte_rwlock_read_unlock(&arr->rwlock); 1394 return biggest_idx; 1395 } 1396 1397 int 1398 rte_fbarray_find_biggest_free(struct rte_fbarray *arr, unsigned int start) 1399 { 1400 return fbarray_find_biggest(arr, start, false, false); 1401 } 1402 1403 int 1404 rte_fbarray_find_biggest_used(struct rte_fbarray *arr, unsigned int start) 1405 { 1406 return fbarray_find_biggest(arr, start, true, false); 1407 } 1408 1409 int 1410 rte_fbarray_find_rev_biggest_free(struct rte_fbarray *arr, unsigned int start) 1411 { 1412 return fbarray_find_biggest(arr, start, false, true); 1413 } 1414 1415 int 1416 rte_fbarray_find_rev_biggest_used(struct rte_fbarray *arr, unsigned int start) 1417 { 1418 return fbarray_find_biggest(arr, start, true, true); 1419 } 1420 1421 1422 int 1423 rte_fbarray_find_contig_free(struct rte_fbarray *arr, unsigned int start) 1424 { 1425 return fbarray_find_contig(arr, start, true, false); 1426 } 1427 1428 int 1429 rte_fbarray_find_contig_used(struct rte_fbarray *arr, unsigned int start) 1430 { 1431 return fbarray_find_contig(arr, start, true, true); 1432 } 1433 1434 int 1435 rte_fbarray_find_rev_contig_free(struct rte_fbarray *arr, unsigned int start) 1436 { 1437 return fbarray_find_contig(arr, start, false, false); 1438 } 1439 1440 int 1441 rte_fbarray_find_rev_contig_used(struct rte_fbarray *arr, unsigned int start) 1442 { 1443 return fbarray_find_contig(arr, start, false, true); 1444 } 1445 1446 int 1447 rte_fbarray_find_idx(const struct rte_fbarray *arr, const void *elt) 1448 { 1449 void *end; 1450 int ret = -1; 1451 1452 /* 1453 * no need to synchronize as it doesn't matter if underlying data 1454 * changes - we're doing pointer arithmetic here. 1455 */ 1456 1457 if (arr == NULL || elt == NULL) { 1458 rte_errno = EINVAL; 1459 return -1; 1460 } 1461 end = RTE_PTR_ADD(arr->data, arr->elt_sz * arr->len); 1462 if (elt < arr->data || elt >= end) { 1463 rte_errno = EINVAL; 1464 return -1; 1465 } 1466 1467 ret = RTE_PTR_DIFF(elt, arr->data) / arr->elt_sz; 1468 1469 return ret; 1470 } 1471 1472 void 1473 rte_fbarray_dump_metadata(struct rte_fbarray *arr, FILE *f) 1474 { 1475 struct used_mask *msk; 1476 unsigned int i; 1477 1478 if (arr == NULL || f == NULL) { 1479 rte_errno = EINVAL; 1480 return; 1481 } 1482 1483 if (fully_validate(arr->name, arr->elt_sz, arr->len)) { 1484 fprintf(f, "Invalid file-backed array\n"); 1485 return; 1486 } 1487 1488 /* prevent array from changing under us */ 1489 rte_rwlock_read_lock(&arr->rwlock); 1490 1491 fprintf(f, "File-backed array: %s\n", arr->name); 1492 fprintf(f, "size: %i occupied: %i elt_sz: %i\n", 1493 arr->len, arr->count, arr->elt_sz); 1494 1495 msk = get_used_mask(arr->data, arr->elt_sz, arr->len); 1496 1497 for (i = 0; i < msk->n_masks; i++) 1498 fprintf(f, "msk idx %i: 0x%016" PRIx64 "\n", i, msk->data[i]); 1499 rte_rwlock_read_unlock(&arr->rwlock); 1500 } 1501