1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2017-2018 Intel Corporation 3 */ 4 5 #include <fcntl.h> 6 #include <inttypes.h> 7 #include <limits.h> 8 #include <stdint.h> 9 #include <errno.h> 10 #include <string.h> 11 #include <unistd.h> 12 13 #include <rte_common.h> 14 #include <rte_eal_paging.h> 15 #include <rte_errno.h> 16 #include <rte_log.h> 17 #include <rte_memory.h> 18 #include <rte_spinlock.h> 19 #include <rte_tailq.h> 20 21 #include "eal_filesystem.h" 22 #include "eal_private.h" 23 24 #include "rte_fbarray.h" 25 26 #define MASK_SHIFT 6ULL 27 #define MASK_ALIGN (1ULL << MASK_SHIFT) 28 #define MASK_LEN_TO_IDX(x) ((x) >> MASK_SHIFT) 29 #define MASK_LEN_TO_MOD(x) ((x) - RTE_ALIGN_FLOOR(x, MASK_ALIGN)) 30 #define MASK_GET_IDX(idx, mod) ((idx << MASK_SHIFT) + mod) 31 32 /* 33 * We use this to keep track of created/attached memory areas to prevent user 34 * errors in API usage. 35 */ 36 struct mem_area { 37 TAILQ_ENTRY(mem_area) next; 38 void *addr; 39 size_t len; 40 int fd; 41 }; 42 TAILQ_HEAD(mem_area_head, mem_area); 43 /* local per-process tailq */ 44 static struct mem_area_head mem_area_tailq = 45 TAILQ_HEAD_INITIALIZER(mem_area_tailq); 46 static rte_spinlock_t mem_area_lock = RTE_SPINLOCK_INITIALIZER; 47 48 /* 49 * This is a mask that is always stored at the end of array, to provide fast 50 * way of finding free/used spots without looping through each element. 51 */ 52 53 struct used_mask { 54 unsigned int n_masks; 55 uint64_t data[]; 56 }; 57 58 static size_t 59 calc_mask_size(unsigned int len) 60 { 61 /* mask must be multiple of MASK_ALIGN, even though length of array 62 * itself may not be aligned on that boundary. 63 */ 64 len = RTE_ALIGN_CEIL(len, MASK_ALIGN); 65 return sizeof(struct used_mask) + 66 sizeof(uint64_t) * MASK_LEN_TO_IDX(len); 67 } 68 69 static size_t 70 calc_data_size(size_t page_sz, unsigned int elt_sz, unsigned int len) 71 { 72 size_t data_sz = elt_sz * len; 73 size_t msk_sz = calc_mask_size(len); 74 return RTE_ALIGN_CEIL(data_sz + msk_sz, page_sz); 75 } 76 77 static struct used_mask * 78 get_used_mask(void *data, unsigned int elt_sz, unsigned int len) 79 { 80 return (struct used_mask *) RTE_PTR_ADD(data, elt_sz * len); 81 } 82 83 static int 84 resize_and_map(int fd, const char *path, void *addr, size_t len) 85 { 86 void *map_addr; 87 88 if (eal_file_truncate(fd, len)) { 89 RTE_LOG(ERR, EAL, "Cannot truncate %s\n", path); 90 return -1; 91 } 92 93 map_addr = rte_mem_map(addr, len, RTE_PROT_READ | RTE_PROT_WRITE, 94 RTE_MAP_SHARED | RTE_MAP_FORCE_ADDRESS, fd, 0); 95 if (map_addr != addr) { 96 return -1; 97 } 98 return 0; 99 } 100 101 static int 102 overlap(const struct mem_area *ma, const void *start, size_t len) 103 { 104 const void *end = RTE_PTR_ADD(start, len); 105 const void *ma_start = ma->addr; 106 const void *ma_end = RTE_PTR_ADD(ma->addr, ma->len); 107 108 /* start overlap? */ 109 if (start >= ma_start && start < ma_end) 110 return 1; 111 /* end overlap? */ 112 if (end > ma_start && end < ma_end) 113 return 1; 114 return 0; 115 } 116 117 static int 118 find_next_n(const struct rte_fbarray *arr, unsigned int start, unsigned int n, 119 bool used) 120 { 121 const struct used_mask *msk = get_used_mask(arr->data, arr->elt_sz, 122 arr->len); 123 unsigned int msk_idx, lookahead_idx, first, first_mod; 124 unsigned int last, last_mod; 125 uint64_t last_msk, ignore_msk; 126 127 /* 128 * mask only has granularity of MASK_ALIGN, but start may not be aligned 129 * on that boundary, so construct a special mask to exclude anything we 130 * don't want to see to avoid confusing ctz. 131 */ 132 first = MASK_LEN_TO_IDX(start); 133 first_mod = MASK_LEN_TO_MOD(start); 134 ignore_msk = ~((1ULL << first_mod) - 1); 135 136 /* array length may not be aligned, so calculate ignore mask for last 137 * mask index. 138 */ 139 last = MASK_LEN_TO_IDX(arr->len); 140 last_mod = MASK_LEN_TO_MOD(arr->len); 141 last_msk = ~(UINT64_MAX << last_mod); 142 143 for (msk_idx = first; msk_idx < msk->n_masks; msk_idx++) { 144 uint64_t cur_msk, lookahead_msk; 145 unsigned int run_start, clz, left; 146 bool found = false; 147 /* 148 * The process of getting n consecutive bits for arbitrary n is 149 * a bit involved, but here it is in a nutshell: 150 * 151 * 1. let n be the number of consecutive bits we're looking for 152 * 2. check if n can fit in one mask, and if so, do n-1 153 * rshift-ands to see if there is an appropriate run inside 154 * our current mask 155 * 2a. if we found a run, bail out early 156 * 2b. if we didn't find a run, proceed 157 * 3. invert the mask and count leading zeroes (that is, count 158 * how many consecutive set bits we had starting from the 159 * end of current mask) as k 160 * 3a. if k is 0, continue to next mask 161 * 3b. if k is not 0, we have a potential run 162 * 4. to satisfy our requirements, next mask must have n-k 163 * consecutive set bits right at the start, so we will do 164 * (n-k-1) rshift-ands and check if first bit is set. 165 * 166 * Step 4 will need to be repeated if (n-k) > MASK_ALIGN until 167 * we either run out of masks, lose the run, or find what we 168 * were looking for. 169 */ 170 cur_msk = msk->data[msk_idx]; 171 left = n; 172 173 /* if we're looking for free spaces, invert the mask */ 174 if (!used) 175 cur_msk = ~cur_msk; 176 177 /* combine current ignore mask with last index ignore mask */ 178 if (msk_idx == last) 179 ignore_msk |= last_msk; 180 181 /* if we have an ignore mask, ignore once */ 182 if (ignore_msk) { 183 cur_msk &= ignore_msk; 184 ignore_msk = 0; 185 } 186 187 /* if n can fit in within a single mask, do a search */ 188 if (n <= MASK_ALIGN) { 189 uint64_t tmp_msk = cur_msk; 190 unsigned int s_idx; 191 for (s_idx = 0; s_idx < n - 1; s_idx++) 192 tmp_msk &= tmp_msk >> 1ULL; 193 /* we found what we were looking for */ 194 if (tmp_msk != 0) { 195 run_start = __builtin_ctzll(tmp_msk); 196 return MASK_GET_IDX(msk_idx, run_start); 197 } 198 } 199 200 /* 201 * we didn't find our run within the mask, or n > MASK_ALIGN, 202 * so we're going for plan B. 203 */ 204 205 /* count leading zeroes on inverted mask */ 206 if (~cur_msk == 0) 207 clz = sizeof(cur_msk) * 8; 208 else 209 clz = __builtin_clzll(~cur_msk); 210 211 /* if there aren't any runs at the end either, just continue */ 212 if (clz == 0) 213 continue; 214 215 /* we have a partial run at the end, so try looking ahead */ 216 run_start = MASK_ALIGN - clz; 217 left -= clz; 218 219 for (lookahead_idx = msk_idx + 1; lookahead_idx < msk->n_masks; 220 lookahead_idx++) { 221 unsigned int s_idx, need; 222 lookahead_msk = msk->data[lookahead_idx]; 223 224 /* if we're looking for free space, invert the mask */ 225 if (!used) 226 lookahead_msk = ~lookahead_msk; 227 228 /* figure out how many consecutive bits we need here */ 229 need = RTE_MIN(left, MASK_ALIGN); 230 231 for (s_idx = 0; s_idx < need - 1; s_idx++) 232 lookahead_msk &= lookahead_msk >> 1ULL; 233 234 /* if first bit is not set, we've lost the run */ 235 if ((lookahead_msk & 1) == 0) { 236 /* 237 * we've scanned this far, so we know there are 238 * no runs in the space we've lookahead-scanned 239 * as well, so skip that on next iteration. 240 */ 241 ignore_msk = ~((1ULL << need) - 1); 242 msk_idx = lookahead_idx; 243 break; 244 } 245 246 left -= need; 247 248 /* check if we've found what we were looking for */ 249 if (left == 0) { 250 found = true; 251 break; 252 } 253 } 254 255 /* we didn't find anything, so continue */ 256 if (!found) 257 continue; 258 259 return MASK_GET_IDX(msk_idx, run_start); 260 } 261 /* we didn't find anything */ 262 rte_errno = used ? ENOENT : ENOSPC; 263 return -1; 264 } 265 266 static int 267 find_next(const struct rte_fbarray *arr, unsigned int start, bool used) 268 { 269 const struct used_mask *msk = get_used_mask(arr->data, arr->elt_sz, 270 arr->len); 271 unsigned int idx, first, first_mod; 272 unsigned int last, last_mod; 273 uint64_t last_msk, ignore_msk; 274 275 /* 276 * mask only has granularity of MASK_ALIGN, but start may not be aligned 277 * on that boundary, so construct a special mask to exclude anything we 278 * don't want to see to avoid confusing ctz. 279 */ 280 first = MASK_LEN_TO_IDX(start); 281 first_mod = MASK_LEN_TO_MOD(start); 282 ignore_msk = ~((1ULL << first_mod) - 1ULL); 283 284 /* array length may not be aligned, so calculate ignore mask for last 285 * mask index. 286 */ 287 last = MASK_LEN_TO_IDX(arr->len); 288 last_mod = MASK_LEN_TO_MOD(arr->len); 289 last_msk = ~(-(1ULL) << last_mod); 290 291 for (idx = first; idx < msk->n_masks; idx++) { 292 uint64_t cur = msk->data[idx]; 293 int found; 294 295 /* if we're looking for free entries, invert mask */ 296 if (!used) 297 cur = ~cur; 298 299 if (idx == last) 300 cur &= last_msk; 301 302 /* ignore everything before start on first iteration */ 303 if (idx == first) 304 cur &= ignore_msk; 305 306 /* check if we have any entries */ 307 if (cur == 0) 308 continue; 309 310 /* 311 * find first set bit - that will correspond to whatever it is 312 * that we're looking for. 313 */ 314 found = __builtin_ctzll(cur); 315 return MASK_GET_IDX(idx, found); 316 } 317 /* we didn't find anything */ 318 rte_errno = used ? ENOENT : ENOSPC; 319 return -1; 320 } 321 322 static int 323 find_contig(const struct rte_fbarray *arr, unsigned int start, bool used) 324 { 325 const struct used_mask *msk = get_used_mask(arr->data, arr->elt_sz, 326 arr->len); 327 unsigned int idx, first, first_mod; 328 unsigned int last, last_mod; 329 uint64_t last_msk; 330 unsigned int need_len, result = 0; 331 332 /* array length may not be aligned, so calculate ignore mask for last 333 * mask index. 334 */ 335 last = MASK_LEN_TO_IDX(arr->len); 336 last_mod = MASK_LEN_TO_MOD(arr->len); 337 last_msk = ~(-(1ULL) << last_mod); 338 339 first = MASK_LEN_TO_IDX(start); 340 first_mod = MASK_LEN_TO_MOD(start); 341 for (idx = first; idx < msk->n_masks; idx++, result += need_len) { 342 uint64_t cur = msk->data[idx]; 343 unsigned int run_len; 344 345 need_len = MASK_ALIGN; 346 347 /* if we're looking for free entries, invert mask */ 348 if (!used) 349 cur = ~cur; 350 351 /* if this is last mask, ignore everything after last bit */ 352 if (idx == last) 353 cur &= last_msk; 354 355 /* ignore everything before start on first iteration */ 356 if (idx == first) { 357 cur >>= first_mod; 358 /* at the start, we don't need the full mask len */ 359 need_len -= first_mod; 360 } 361 362 /* we will be looking for zeroes, so invert the mask */ 363 cur = ~cur; 364 365 /* if mask is zero, we have a complete run */ 366 if (cur == 0) 367 continue; 368 369 /* 370 * see if current run ends before mask end. 371 */ 372 run_len = __builtin_ctzll(cur); 373 374 /* add however many zeroes we've had in the last run and quit */ 375 if (run_len < need_len) { 376 result += run_len; 377 break; 378 } 379 } 380 return result; 381 } 382 383 static int 384 find_prev_n(const struct rte_fbarray *arr, unsigned int start, unsigned int n, 385 bool used) 386 { 387 const struct used_mask *msk = get_used_mask(arr->data, arr->elt_sz, 388 arr->len); 389 unsigned int msk_idx, lookbehind_idx, first, first_mod; 390 uint64_t ignore_msk; 391 392 /* 393 * mask only has granularity of MASK_ALIGN, but start may not be aligned 394 * on that boundary, so construct a special mask to exclude anything we 395 * don't want to see to avoid confusing ctz. 396 */ 397 first = MASK_LEN_TO_IDX(start); 398 first_mod = MASK_LEN_TO_MOD(start); 399 /* we're going backwards, so mask must start from the top */ 400 ignore_msk = first_mod == MASK_ALIGN - 1 ? 401 UINT64_MAX : /* prevent overflow */ 402 ~(UINT64_MAX << (first_mod + 1)); 403 404 /* go backwards, include zero */ 405 msk_idx = first; 406 do { 407 uint64_t cur_msk, lookbehind_msk; 408 unsigned int run_start, run_end, ctz, left; 409 bool found = false; 410 /* 411 * The process of getting n consecutive bits from the top for 412 * arbitrary n is a bit involved, but here it is in a nutshell: 413 * 414 * 1. let n be the number of consecutive bits we're looking for 415 * 2. check if n can fit in one mask, and if so, do n-1 416 * lshift-ands to see if there is an appropriate run inside 417 * our current mask 418 * 2a. if we found a run, bail out early 419 * 2b. if we didn't find a run, proceed 420 * 3. invert the mask and count trailing zeroes (that is, count 421 * how many consecutive set bits we had starting from the 422 * start of current mask) as k 423 * 3a. if k is 0, continue to next mask 424 * 3b. if k is not 0, we have a potential run 425 * 4. to satisfy our requirements, next mask must have n-k 426 * consecutive set bits at the end, so we will do (n-k-1) 427 * lshift-ands and check if last bit is set. 428 * 429 * Step 4 will need to be repeated if (n-k) > MASK_ALIGN until 430 * we either run out of masks, lose the run, or find what we 431 * were looking for. 432 */ 433 cur_msk = msk->data[msk_idx]; 434 left = n; 435 436 /* if we're looking for free spaces, invert the mask */ 437 if (!used) 438 cur_msk = ~cur_msk; 439 440 /* if we have an ignore mask, ignore once */ 441 if (ignore_msk) { 442 cur_msk &= ignore_msk; 443 ignore_msk = 0; 444 } 445 446 /* if n can fit in within a single mask, do a search */ 447 if (n <= MASK_ALIGN) { 448 uint64_t tmp_msk = cur_msk; 449 unsigned int s_idx; 450 for (s_idx = 0; s_idx < n - 1; s_idx++) 451 tmp_msk &= tmp_msk << 1ULL; 452 /* we found what we were looking for */ 453 if (tmp_msk != 0) { 454 /* clz will give us offset from end of mask, and 455 * we only get the end of our run, not start, 456 * so adjust result to point to where start 457 * would have been. 458 */ 459 run_start = MASK_ALIGN - 460 __builtin_clzll(tmp_msk) - n; 461 return MASK_GET_IDX(msk_idx, run_start); 462 } 463 } 464 465 /* 466 * we didn't find our run within the mask, or n > MASK_ALIGN, 467 * so we're going for plan B. 468 */ 469 470 /* count trailing zeroes on inverted mask */ 471 if (~cur_msk == 0) 472 ctz = sizeof(cur_msk) * 8; 473 else 474 ctz = __builtin_ctzll(~cur_msk); 475 476 /* if there aren't any runs at the start either, just 477 * continue 478 */ 479 if (ctz == 0) 480 continue; 481 482 /* we have a partial run at the start, so try looking behind */ 483 run_end = MASK_GET_IDX(msk_idx, ctz); 484 left -= ctz; 485 486 /* go backwards, include zero */ 487 lookbehind_idx = msk_idx - 1; 488 489 /* we can't lookbehind as we've run out of masks, so stop */ 490 if (msk_idx == 0) 491 break; 492 493 do { 494 const uint64_t last_bit = 1ULL << (MASK_ALIGN - 1); 495 unsigned int s_idx, need; 496 497 lookbehind_msk = msk->data[lookbehind_idx]; 498 499 /* if we're looking for free space, invert the mask */ 500 if (!used) 501 lookbehind_msk = ~lookbehind_msk; 502 503 /* figure out how many consecutive bits we need here */ 504 need = RTE_MIN(left, MASK_ALIGN); 505 506 for (s_idx = 0; s_idx < need - 1; s_idx++) 507 lookbehind_msk &= lookbehind_msk << 1ULL; 508 509 /* if last bit is not set, we've lost the run */ 510 if ((lookbehind_msk & last_bit) == 0) { 511 /* 512 * we've scanned this far, so we know there are 513 * no runs in the space we've lookbehind-scanned 514 * as well, so skip that on next iteration. 515 */ 516 ignore_msk = UINT64_MAX << need; 517 msk_idx = lookbehind_idx; 518 break; 519 } 520 521 left -= need; 522 523 /* check if we've found what we were looking for */ 524 if (left == 0) { 525 found = true; 526 break; 527 } 528 } while ((lookbehind_idx--) != 0); /* decrement after check to 529 * include zero 530 */ 531 532 /* we didn't find anything, so continue */ 533 if (!found) 534 continue; 535 536 /* we've found what we were looking for, but we only know where 537 * the run ended, so calculate start position. 538 */ 539 return run_end - n; 540 } while (msk_idx-- != 0); /* decrement after check to include zero */ 541 /* we didn't find anything */ 542 rte_errno = used ? ENOENT : ENOSPC; 543 return -1; 544 } 545 546 static int 547 find_prev(const struct rte_fbarray *arr, unsigned int start, bool used) 548 { 549 const struct used_mask *msk = get_used_mask(arr->data, arr->elt_sz, 550 arr->len); 551 unsigned int idx, first, first_mod; 552 uint64_t ignore_msk; 553 554 /* 555 * mask only has granularity of MASK_ALIGN, but start may not be aligned 556 * on that boundary, so construct a special mask to exclude anything we 557 * don't want to see to avoid confusing clz. 558 */ 559 first = MASK_LEN_TO_IDX(start); 560 first_mod = MASK_LEN_TO_MOD(start); 561 /* we're going backwards, so mask must start from the top */ 562 ignore_msk = first_mod == MASK_ALIGN - 1 ? 563 UINT64_MAX : /* prevent overflow */ 564 ~(UINT64_MAX << (first_mod + 1)); 565 566 /* go backwards, include zero */ 567 idx = first; 568 do { 569 uint64_t cur = msk->data[idx]; 570 int found; 571 572 /* if we're looking for free entries, invert mask */ 573 if (!used) 574 cur = ~cur; 575 576 /* ignore everything before start on first iteration */ 577 if (idx == first) 578 cur &= ignore_msk; 579 580 /* check if we have any entries */ 581 if (cur == 0) 582 continue; 583 584 /* 585 * find last set bit - that will correspond to whatever it is 586 * that we're looking for. we're counting trailing zeroes, thus 587 * the value we get is counted from end of mask, so calculate 588 * position from start of mask. 589 */ 590 found = MASK_ALIGN - __builtin_clzll(cur) - 1; 591 592 return MASK_GET_IDX(idx, found); 593 } while (idx-- != 0); /* decrement after check to include zero*/ 594 595 /* we didn't find anything */ 596 rte_errno = used ? ENOENT : ENOSPC; 597 return -1; 598 } 599 600 static int 601 find_rev_contig(const struct rte_fbarray *arr, unsigned int start, bool used) 602 { 603 const struct used_mask *msk = get_used_mask(arr->data, arr->elt_sz, 604 arr->len); 605 unsigned int idx, first, first_mod; 606 unsigned int need_len, result = 0; 607 608 first = MASK_LEN_TO_IDX(start); 609 first_mod = MASK_LEN_TO_MOD(start); 610 611 /* go backwards, include zero */ 612 idx = first; 613 do { 614 uint64_t cur = msk->data[idx]; 615 unsigned int run_len; 616 617 need_len = MASK_ALIGN; 618 619 /* if we're looking for free entries, invert mask */ 620 if (!used) 621 cur = ~cur; 622 623 /* ignore everything after start on first iteration */ 624 if (idx == first) { 625 unsigned int end_len = MASK_ALIGN - first_mod - 1; 626 cur <<= end_len; 627 /* at the start, we don't need the full mask len */ 628 need_len -= end_len; 629 } 630 631 /* we will be looking for zeroes, so invert the mask */ 632 cur = ~cur; 633 634 /* if mask is zero, we have a complete run */ 635 if (cur == 0) 636 goto endloop; 637 638 /* 639 * see where run ends, starting from the end. 640 */ 641 run_len = __builtin_clzll(cur); 642 643 /* add however many zeroes we've had in the last run and quit */ 644 if (run_len < need_len) { 645 result += run_len; 646 break; 647 } 648 endloop: 649 result += need_len; 650 } while (idx-- != 0); /* decrement after check to include zero */ 651 return result; 652 } 653 654 static int 655 set_used(struct rte_fbarray *arr, unsigned int idx, bool used) 656 { 657 struct used_mask *msk; 658 uint64_t msk_bit = 1ULL << MASK_LEN_TO_MOD(idx); 659 unsigned int msk_idx = MASK_LEN_TO_IDX(idx); 660 bool already_used; 661 int ret = -1; 662 663 if (arr == NULL || idx >= arr->len) { 664 rte_errno = EINVAL; 665 return -1; 666 } 667 msk = get_used_mask(arr->data, arr->elt_sz, arr->len); 668 ret = 0; 669 670 /* prevent array from changing under us */ 671 rte_rwlock_write_lock(&arr->rwlock); 672 673 already_used = (msk->data[msk_idx] & msk_bit) != 0; 674 675 /* nothing to be done */ 676 if (used == already_used) 677 goto out; 678 679 if (used) { 680 msk->data[msk_idx] |= msk_bit; 681 arr->count++; 682 } else { 683 msk->data[msk_idx] &= ~msk_bit; 684 arr->count--; 685 } 686 out: 687 rte_rwlock_write_unlock(&arr->rwlock); 688 689 return ret; 690 } 691 692 static int 693 fully_validate(const char *name, unsigned int elt_sz, unsigned int len) 694 { 695 if (name == NULL || elt_sz == 0 || len == 0 || len > INT_MAX) { 696 rte_errno = EINVAL; 697 return -1; 698 } 699 700 if (strnlen(name, RTE_FBARRAY_NAME_LEN) == RTE_FBARRAY_NAME_LEN) { 701 rte_errno = ENAMETOOLONG; 702 return -1; 703 } 704 return 0; 705 } 706 707 int 708 rte_fbarray_init(struct rte_fbarray *arr, const char *name, unsigned int len, 709 unsigned int elt_sz) 710 { 711 size_t page_sz, mmap_len; 712 char path[PATH_MAX]; 713 struct used_mask *msk; 714 struct mem_area *ma = NULL; 715 void *data = NULL; 716 int fd = -1; 717 const struct internal_config *internal_conf = 718 eal_get_internal_configuration(); 719 720 if (arr == NULL) { 721 rte_errno = EINVAL; 722 return -1; 723 } 724 725 if (fully_validate(name, elt_sz, len)) 726 return -1; 727 728 /* allocate mem area before doing anything */ 729 ma = malloc(sizeof(*ma)); 730 if (ma == NULL) { 731 rte_errno = ENOMEM; 732 return -1; 733 } 734 735 page_sz = rte_mem_page_size(); 736 if (page_sz == (size_t)-1) { 737 free(ma); 738 return -1; 739 } 740 741 /* calculate our memory limits */ 742 mmap_len = calc_data_size(page_sz, elt_sz, len); 743 744 data = eal_get_virtual_area(NULL, &mmap_len, page_sz, 0, 0); 745 if (data == NULL) { 746 free(ma); 747 return -1; 748 } 749 750 rte_spinlock_lock(&mem_area_lock); 751 752 fd = -1; 753 754 if (internal_conf->no_shconf) { 755 /* remap virtual area as writable */ 756 static const int flags = RTE_MAP_FORCE_ADDRESS | 757 RTE_MAP_PRIVATE | RTE_MAP_ANONYMOUS; 758 void *new_data = rte_mem_map(data, mmap_len, 759 RTE_PROT_READ | RTE_PROT_WRITE, flags, fd, 0); 760 if (new_data == NULL) { 761 RTE_LOG(DEBUG, EAL, "%s(): couldn't remap anonymous memory: %s\n", 762 __func__, rte_strerror(rte_errno)); 763 goto fail; 764 } 765 } else { 766 eal_get_fbarray_path(path, sizeof(path), name); 767 768 /* 769 * Each fbarray is unique to process namespace, i.e. the 770 * filename depends on process prefix. Try to take out a lock 771 * and see if we succeed. If we don't, someone else is using it 772 * already. 773 */ 774 fd = eal_file_open(path, EAL_OPEN_CREATE | EAL_OPEN_READWRITE); 775 if (fd < 0) { 776 RTE_LOG(DEBUG, EAL, "%s(): couldn't open %s: %s\n", 777 __func__, path, rte_strerror(rte_errno)); 778 goto fail; 779 } else if (eal_file_lock( 780 fd, EAL_FLOCK_EXCLUSIVE, EAL_FLOCK_RETURN)) { 781 RTE_LOG(DEBUG, EAL, "%s(): couldn't lock %s: %s\n", 782 __func__, path, rte_strerror(rte_errno)); 783 rte_errno = EBUSY; 784 goto fail; 785 } 786 787 /* take out a non-exclusive lock, so that other processes could 788 * still attach to it, but no other process could reinitialize 789 * it. 790 */ 791 if (eal_file_lock(fd, EAL_FLOCK_SHARED, EAL_FLOCK_RETURN)) 792 goto fail; 793 794 if (resize_and_map(fd, path, data, mmap_len)) 795 goto fail; 796 } 797 ma->addr = data; 798 ma->len = mmap_len; 799 ma->fd = fd; 800 801 /* do not close fd - keep it until detach/destroy */ 802 TAILQ_INSERT_TAIL(&mem_area_tailq, ma, next); 803 804 /* initialize the data */ 805 memset(data, 0, mmap_len); 806 807 /* populate data structure */ 808 strlcpy(arr->name, name, sizeof(arr->name)); 809 arr->data = data; 810 arr->len = len; 811 arr->elt_sz = elt_sz; 812 arr->count = 0; 813 814 msk = get_used_mask(data, elt_sz, len); 815 msk->n_masks = MASK_LEN_TO_IDX(RTE_ALIGN_CEIL(len, MASK_ALIGN)); 816 817 rte_rwlock_init(&arr->rwlock); 818 819 rte_spinlock_unlock(&mem_area_lock); 820 821 return 0; 822 fail: 823 if (data) 824 rte_mem_unmap(data, mmap_len); 825 if (fd >= 0) 826 close(fd); 827 free(ma); 828 829 rte_spinlock_unlock(&mem_area_lock); 830 return -1; 831 } 832 833 int 834 rte_fbarray_attach(struct rte_fbarray *arr) 835 { 836 struct mem_area *ma = NULL, *tmp = NULL; 837 size_t page_sz, mmap_len; 838 char path[PATH_MAX]; 839 void *data = NULL; 840 int fd = -1; 841 842 if (arr == NULL) { 843 rte_errno = EINVAL; 844 return -1; 845 } 846 847 /* 848 * we don't need to synchronize attach as two values we need (element 849 * size and array length) are constant for the duration of life of 850 * the array, so the parts we care about will not race. 851 */ 852 853 if (fully_validate(arr->name, arr->elt_sz, arr->len)) 854 return -1; 855 856 ma = malloc(sizeof(*ma)); 857 if (ma == NULL) { 858 rte_errno = ENOMEM; 859 return -1; 860 } 861 862 page_sz = rte_mem_page_size(); 863 if (page_sz == (size_t)-1) { 864 free(ma); 865 return -1; 866 } 867 868 mmap_len = calc_data_size(page_sz, arr->elt_sz, arr->len); 869 870 /* check the tailq - maybe user has already mapped this address space */ 871 rte_spinlock_lock(&mem_area_lock); 872 873 TAILQ_FOREACH(tmp, &mem_area_tailq, next) { 874 if (overlap(tmp, arr->data, mmap_len)) { 875 rte_errno = EEXIST; 876 goto fail; 877 } 878 } 879 880 /* we know this memory area is unique, so proceed */ 881 882 data = eal_get_virtual_area(arr->data, &mmap_len, page_sz, 0, 0); 883 if (data == NULL) 884 goto fail; 885 886 eal_get_fbarray_path(path, sizeof(path), arr->name); 887 888 fd = eal_file_open(path, EAL_OPEN_READWRITE); 889 if (fd < 0) { 890 goto fail; 891 } 892 893 /* lock the file, to let others know we're using it */ 894 if (eal_file_lock(fd, EAL_FLOCK_SHARED, EAL_FLOCK_RETURN)) 895 goto fail; 896 897 if (resize_and_map(fd, path, data, mmap_len)) 898 goto fail; 899 900 /* store our new memory area */ 901 ma->addr = data; 902 ma->fd = fd; /* keep fd until detach/destroy */ 903 ma->len = mmap_len; 904 905 TAILQ_INSERT_TAIL(&mem_area_tailq, ma, next); 906 907 /* we're done */ 908 909 rte_spinlock_unlock(&mem_area_lock); 910 return 0; 911 fail: 912 if (data) 913 rte_mem_unmap(data, mmap_len); 914 if (fd >= 0) 915 close(fd); 916 free(ma); 917 rte_spinlock_unlock(&mem_area_lock); 918 return -1; 919 } 920 921 int 922 rte_fbarray_detach(struct rte_fbarray *arr) 923 { 924 struct mem_area *tmp = NULL; 925 size_t mmap_len; 926 int ret = -1; 927 928 if (arr == NULL) { 929 rte_errno = EINVAL; 930 return -1; 931 } 932 933 /* 934 * we don't need to synchronize detach as two values we need (element 935 * size and total capacity) are constant for the duration of life of 936 * the array, so the parts we care about will not race. if the user is 937 * detaching while doing something else in the same process, we can't 938 * really do anything about it, things will blow up either way. 939 */ 940 941 size_t page_sz = rte_mem_page_size(); 942 if (page_sz == (size_t)-1) 943 return -1; 944 945 mmap_len = calc_data_size(page_sz, arr->elt_sz, arr->len); 946 947 /* does this area exist? */ 948 rte_spinlock_lock(&mem_area_lock); 949 950 TAILQ_FOREACH(tmp, &mem_area_tailq, next) { 951 if (tmp->addr == arr->data && tmp->len == mmap_len) 952 break; 953 } 954 if (tmp == NULL) { 955 rte_errno = ENOENT; 956 ret = -1; 957 goto out; 958 } 959 960 rte_mem_unmap(arr->data, mmap_len); 961 962 /* area is unmapped, close fd and remove the tailq entry */ 963 if (tmp->fd >= 0) 964 close(tmp->fd); 965 TAILQ_REMOVE(&mem_area_tailq, tmp, next); 966 free(tmp); 967 968 ret = 0; 969 out: 970 rte_spinlock_unlock(&mem_area_lock); 971 return ret; 972 } 973 974 int 975 rte_fbarray_destroy(struct rte_fbarray *arr) 976 { 977 struct mem_area *tmp = NULL; 978 size_t mmap_len; 979 int fd, ret; 980 char path[PATH_MAX]; 981 const struct internal_config *internal_conf = 982 eal_get_internal_configuration(); 983 984 if (arr == NULL) { 985 rte_errno = EINVAL; 986 return -1; 987 } 988 989 /* 990 * we don't need to synchronize detach as two values we need (element 991 * size and total capacity) are constant for the duration of life of 992 * the array, so the parts we care about will not race. if the user is 993 * detaching while doing something else in the same process, we can't 994 * really do anything about it, things will blow up either way. 995 */ 996 997 size_t page_sz = rte_mem_page_size(); 998 if (page_sz == (size_t)-1) 999 return -1; 1000 1001 mmap_len = calc_data_size(page_sz, arr->elt_sz, arr->len); 1002 1003 /* does this area exist? */ 1004 rte_spinlock_lock(&mem_area_lock); 1005 1006 TAILQ_FOREACH(tmp, &mem_area_tailq, next) { 1007 if (tmp->addr == arr->data && tmp->len == mmap_len) 1008 break; 1009 } 1010 if (tmp == NULL) { 1011 rte_errno = ENOENT; 1012 ret = -1; 1013 goto out; 1014 } 1015 /* with no shconf, there were never any files to begin with */ 1016 if (!internal_conf->no_shconf) { 1017 /* 1018 * attempt to get an exclusive lock on the file, to ensure it 1019 * has been detached by all other processes 1020 */ 1021 fd = tmp->fd; 1022 if (eal_file_lock(fd, EAL_FLOCK_EXCLUSIVE, EAL_FLOCK_RETURN)) { 1023 RTE_LOG(DEBUG, EAL, "Cannot destroy fbarray - another process is using it\n"); 1024 rte_errno = EBUSY; 1025 ret = -1; 1026 goto out; 1027 } 1028 1029 /* we're OK to destroy the file */ 1030 eal_get_fbarray_path(path, sizeof(path), arr->name); 1031 if (unlink(path)) { 1032 RTE_LOG(DEBUG, EAL, "Cannot unlink fbarray: %s\n", 1033 strerror(errno)); 1034 rte_errno = errno; 1035 /* 1036 * we're still holding an exclusive lock, so drop it to 1037 * shared. 1038 */ 1039 eal_file_lock(fd, EAL_FLOCK_SHARED, EAL_FLOCK_RETURN); 1040 1041 ret = -1; 1042 goto out; 1043 } 1044 close(fd); 1045 } 1046 rte_mem_unmap(arr->data, mmap_len); 1047 1048 /* area is unmapped, remove the tailq entry */ 1049 TAILQ_REMOVE(&mem_area_tailq, tmp, next); 1050 free(tmp); 1051 ret = 0; 1052 1053 /* reset the fbarray structure */ 1054 memset(arr, 0, sizeof(*arr)); 1055 out: 1056 rte_spinlock_unlock(&mem_area_lock); 1057 return ret; 1058 } 1059 1060 void * 1061 rte_fbarray_get(const struct rte_fbarray *arr, unsigned int idx) 1062 { 1063 void *ret = NULL; 1064 if (arr == NULL) { 1065 rte_errno = EINVAL; 1066 return NULL; 1067 } 1068 1069 if (idx >= arr->len) { 1070 rte_errno = EINVAL; 1071 return NULL; 1072 } 1073 1074 ret = RTE_PTR_ADD(arr->data, idx * arr->elt_sz); 1075 1076 return ret; 1077 } 1078 1079 int 1080 rte_fbarray_set_used(struct rte_fbarray *arr, unsigned int idx) 1081 { 1082 return set_used(arr, idx, true); 1083 } 1084 1085 int 1086 rte_fbarray_set_free(struct rte_fbarray *arr, unsigned int idx) 1087 { 1088 return set_used(arr, idx, false); 1089 } 1090 1091 int 1092 rte_fbarray_is_used(struct rte_fbarray *arr, unsigned int idx) 1093 { 1094 struct used_mask *msk; 1095 int msk_idx; 1096 uint64_t msk_bit; 1097 int ret = -1; 1098 1099 if (arr == NULL || idx >= arr->len) { 1100 rte_errno = EINVAL; 1101 return -1; 1102 } 1103 1104 /* prevent array from changing under us */ 1105 rte_rwlock_read_lock(&arr->rwlock); 1106 1107 msk = get_used_mask(arr->data, arr->elt_sz, arr->len); 1108 msk_idx = MASK_LEN_TO_IDX(idx); 1109 msk_bit = 1ULL << MASK_LEN_TO_MOD(idx); 1110 1111 ret = (msk->data[msk_idx] & msk_bit) != 0; 1112 1113 rte_rwlock_read_unlock(&arr->rwlock); 1114 1115 return ret; 1116 } 1117 1118 static int 1119 fbarray_find(struct rte_fbarray *arr, unsigned int start, bool next, bool used) 1120 { 1121 int ret = -1; 1122 1123 if (arr == NULL || start >= arr->len) { 1124 rte_errno = EINVAL; 1125 return -1; 1126 } 1127 1128 /* prevent array from changing under us */ 1129 rte_rwlock_read_lock(&arr->rwlock); 1130 1131 /* cheap checks to prevent doing useless work */ 1132 if (!used) { 1133 if (arr->len == arr->count) { 1134 rte_errno = ENOSPC; 1135 goto out; 1136 } 1137 if (arr->count == 0) { 1138 ret = start; 1139 goto out; 1140 } 1141 } else { 1142 if (arr->count == 0) { 1143 rte_errno = ENOENT; 1144 goto out; 1145 } 1146 if (arr->len == arr->count) { 1147 ret = start; 1148 goto out; 1149 } 1150 } 1151 if (next) 1152 ret = find_next(arr, start, used); 1153 else 1154 ret = find_prev(arr, start, used); 1155 out: 1156 rte_rwlock_read_unlock(&arr->rwlock); 1157 return ret; 1158 } 1159 1160 int 1161 rte_fbarray_find_next_free(struct rte_fbarray *arr, unsigned int start) 1162 { 1163 return fbarray_find(arr, start, true, false); 1164 } 1165 1166 int 1167 rte_fbarray_find_next_used(struct rte_fbarray *arr, unsigned int start) 1168 { 1169 return fbarray_find(arr, start, true, true); 1170 } 1171 1172 int 1173 rte_fbarray_find_prev_free(struct rte_fbarray *arr, unsigned int start) 1174 { 1175 return fbarray_find(arr, start, false, false); 1176 } 1177 1178 int 1179 rte_fbarray_find_prev_used(struct rte_fbarray *arr, unsigned int start) 1180 { 1181 return fbarray_find(arr, start, false, true); 1182 } 1183 1184 static int 1185 fbarray_find_n(struct rte_fbarray *arr, unsigned int start, unsigned int n, 1186 bool next, bool used) 1187 { 1188 int ret = -1; 1189 1190 if (arr == NULL || start >= arr->len || n > arr->len || n == 0) { 1191 rte_errno = EINVAL; 1192 return -1; 1193 } 1194 if (next && (arr->len - start) < n) { 1195 rte_errno = used ? ENOENT : ENOSPC; 1196 return -1; 1197 } 1198 if (!next && start < (n - 1)) { 1199 rte_errno = used ? ENOENT : ENOSPC; 1200 return -1; 1201 } 1202 1203 /* prevent array from changing under us */ 1204 rte_rwlock_read_lock(&arr->rwlock); 1205 1206 /* cheap checks to prevent doing useless work */ 1207 if (!used) { 1208 if (arr->len == arr->count || arr->len - arr->count < n) { 1209 rte_errno = ENOSPC; 1210 goto out; 1211 } 1212 if (arr->count == 0) { 1213 ret = next ? start : start - n + 1; 1214 goto out; 1215 } 1216 } else { 1217 if (arr->count < n) { 1218 rte_errno = ENOENT; 1219 goto out; 1220 } 1221 if (arr->count == arr->len) { 1222 ret = next ? start : start - n + 1; 1223 goto out; 1224 } 1225 } 1226 1227 if (next) 1228 ret = find_next_n(arr, start, n, used); 1229 else 1230 ret = find_prev_n(arr, start, n, used); 1231 out: 1232 rte_rwlock_read_unlock(&arr->rwlock); 1233 return ret; 1234 } 1235 1236 int 1237 rte_fbarray_find_next_n_free(struct rte_fbarray *arr, unsigned int start, 1238 unsigned int n) 1239 { 1240 return fbarray_find_n(arr, start, n, true, false); 1241 } 1242 1243 int 1244 rte_fbarray_find_next_n_used(struct rte_fbarray *arr, unsigned int start, 1245 unsigned int n) 1246 { 1247 return fbarray_find_n(arr, start, n, true, true); 1248 } 1249 1250 int 1251 rte_fbarray_find_prev_n_free(struct rte_fbarray *arr, unsigned int start, 1252 unsigned int n) 1253 { 1254 return fbarray_find_n(arr, start, n, false, false); 1255 } 1256 1257 int 1258 rte_fbarray_find_prev_n_used(struct rte_fbarray *arr, unsigned int start, 1259 unsigned int n) 1260 { 1261 return fbarray_find_n(arr, start, n, false, true); 1262 } 1263 1264 static int 1265 fbarray_find_contig(struct rte_fbarray *arr, unsigned int start, bool next, 1266 bool used) 1267 { 1268 int ret = -1; 1269 1270 if (arr == NULL || start >= arr->len) { 1271 rte_errno = EINVAL; 1272 return -1; 1273 } 1274 1275 /* prevent array from changing under us */ 1276 rte_rwlock_read_lock(&arr->rwlock); 1277 1278 /* cheap checks to prevent doing useless work */ 1279 if (used) { 1280 if (arr->count == 0) { 1281 ret = 0; 1282 goto out; 1283 } 1284 if (next && arr->count == arr->len) { 1285 ret = arr->len - start; 1286 goto out; 1287 } 1288 if (!next && arr->count == arr->len) { 1289 ret = start + 1; 1290 goto out; 1291 } 1292 } else { 1293 if (arr->len == arr->count) { 1294 ret = 0; 1295 goto out; 1296 } 1297 if (next && arr->count == 0) { 1298 ret = arr->len - start; 1299 goto out; 1300 } 1301 if (!next && arr->count == 0) { 1302 ret = start + 1; 1303 goto out; 1304 } 1305 } 1306 1307 if (next) 1308 ret = find_contig(arr, start, used); 1309 else 1310 ret = find_rev_contig(arr, start, used); 1311 out: 1312 rte_rwlock_read_unlock(&arr->rwlock); 1313 return ret; 1314 } 1315 1316 static int 1317 fbarray_find_biggest(struct rte_fbarray *arr, unsigned int start, bool used, 1318 bool rev) 1319 { 1320 int cur_idx, next_idx, cur_len, biggest_idx, biggest_len; 1321 /* don't stack if conditions, use function pointers instead */ 1322 int (*find_func)(struct rte_fbarray *, unsigned int); 1323 int (*find_contig_func)(struct rte_fbarray *, unsigned int); 1324 1325 if (arr == NULL || start >= arr->len) { 1326 rte_errno = EINVAL; 1327 return -1; 1328 } 1329 /* the other API calls already do their fair share of cheap checks, so 1330 * no need to do them here. 1331 */ 1332 1333 /* the API's called are thread-safe, but something may still happen 1334 * between the API calls, so lock the fbarray. all other API's are 1335 * read-locking the fbarray, so read lock here is OK. 1336 */ 1337 rte_rwlock_read_lock(&arr->rwlock); 1338 1339 /* pick out appropriate functions */ 1340 if (used) { 1341 if (rev) { 1342 find_func = rte_fbarray_find_prev_used; 1343 find_contig_func = rte_fbarray_find_rev_contig_used; 1344 } else { 1345 find_func = rte_fbarray_find_next_used; 1346 find_contig_func = rte_fbarray_find_contig_used; 1347 } 1348 } else { 1349 if (rev) { 1350 find_func = rte_fbarray_find_prev_free; 1351 find_contig_func = rte_fbarray_find_rev_contig_free; 1352 } else { 1353 find_func = rte_fbarray_find_next_free; 1354 find_contig_func = rte_fbarray_find_contig_free; 1355 } 1356 } 1357 1358 cur_idx = start; 1359 biggest_idx = -1; /* default is error */ 1360 biggest_len = 0; 1361 for (;;) { 1362 cur_idx = find_func(arr, cur_idx); 1363 1364 /* block found, check its length */ 1365 if (cur_idx >= 0) { 1366 cur_len = find_contig_func(arr, cur_idx); 1367 /* decide where we go next */ 1368 next_idx = rev ? cur_idx - cur_len : cur_idx + cur_len; 1369 /* move current index to start of chunk */ 1370 cur_idx = rev ? next_idx + 1 : cur_idx; 1371 1372 if (cur_len > biggest_len) { 1373 biggest_idx = cur_idx; 1374 biggest_len = cur_len; 1375 } 1376 cur_idx = next_idx; 1377 /* in reverse mode, next_idx may be -1 if chunk started 1378 * at array beginning. this means there's no more work 1379 * to do. 1380 */ 1381 if (cur_idx < 0) 1382 break; 1383 } else { 1384 /* nothing more to find, stop. however, a failed API 1385 * call has set rte_errno, which we want to ignore, as 1386 * reaching the end of fbarray is not an error. 1387 */ 1388 rte_errno = 0; 1389 break; 1390 } 1391 } 1392 /* if we didn't find anything at all, set rte_errno */ 1393 if (biggest_idx < 0) 1394 rte_errno = used ? ENOENT : ENOSPC; 1395 1396 rte_rwlock_read_unlock(&arr->rwlock); 1397 return biggest_idx; 1398 } 1399 1400 int 1401 rte_fbarray_find_biggest_free(struct rte_fbarray *arr, unsigned int start) 1402 { 1403 return fbarray_find_biggest(arr, start, false, false); 1404 } 1405 1406 int 1407 rte_fbarray_find_biggest_used(struct rte_fbarray *arr, unsigned int start) 1408 { 1409 return fbarray_find_biggest(arr, start, true, false); 1410 } 1411 1412 int 1413 rte_fbarray_find_rev_biggest_free(struct rte_fbarray *arr, unsigned int start) 1414 { 1415 return fbarray_find_biggest(arr, start, false, true); 1416 } 1417 1418 int 1419 rte_fbarray_find_rev_biggest_used(struct rte_fbarray *arr, unsigned int start) 1420 { 1421 return fbarray_find_biggest(arr, start, true, true); 1422 } 1423 1424 1425 int 1426 rte_fbarray_find_contig_free(struct rte_fbarray *arr, unsigned int start) 1427 { 1428 return fbarray_find_contig(arr, start, true, false); 1429 } 1430 1431 int 1432 rte_fbarray_find_contig_used(struct rte_fbarray *arr, unsigned int start) 1433 { 1434 return fbarray_find_contig(arr, start, true, true); 1435 } 1436 1437 int 1438 rte_fbarray_find_rev_contig_free(struct rte_fbarray *arr, unsigned int start) 1439 { 1440 return fbarray_find_contig(arr, start, false, false); 1441 } 1442 1443 int 1444 rte_fbarray_find_rev_contig_used(struct rte_fbarray *arr, unsigned int start) 1445 { 1446 return fbarray_find_contig(arr, start, false, true); 1447 } 1448 1449 int 1450 rte_fbarray_find_idx(const struct rte_fbarray *arr, const void *elt) 1451 { 1452 void *end; 1453 int ret = -1; 1454 1455 /* 1456 * no need to synchronize as it doesn't matter if underlying data 1457 * changes - we're doing pointer arithmetic here. 1458 */ 1459 1460 if (arr == NULL || elt == NULL) { 1461 rte_errno = EINVAL; 1462 return -1; 1463 } 1464 end = RTE_PTR_ADD(arr->data, arr->elt_sz * arr->len); 1465 if (elt < arr->data || elt >= end) { 1466 rte_errno = EINVAL; 1467 return -1; 1468 } 1469 1470 ret = RTE_PTR_DIFF(elt, arr->data) / arr->elt_sz; 1471 1472 return ret; 1473 } 1474 1475 void 1476 rte_fbarray_dump_metadata(struct rte_fbarray *arr, FILE *f) 1477 { 1478 struct used_mask *msk; 1479 unsigned int i; 1480 1481 if (arr == NULL || f == NULL) { 1482 rte_errno = EINVAL; 1483 return; 1484 } 1485 1486 if (fully_validate(arr->name, arr->elt_sz, arr->len)) { 1487 fprintf(f, "Invalid file-backed array\n"); 1488 goto out; 1489 } 1490 1491 /* prevent array from changing under us */ 1492 rte_rwlock_read_lock(&arr->rwlock); 1493 1494 fprintf(f, "File-backed array: %s\n", arr->name); 1495 fprintf(f, "size: %i occupied: %i elt_sz: %i\n", 1496 arr->len, arr->count, arr->elt_sz); 1497 1498 msk = get_used_mask(arr->data, arr->elt_sz, arr->len); 1499 1500 for (i = 0; i < msk->n_masks; i++) 1501 fprintf(f, "msk idx %i: 0x%016" PRIx64 "\n", i, msk->data[i]); 1502 out: 1503 rte_rwlock_read_unlock(&arr->rwlock); 1504 } 1505