1 /* hash - hashing table processing. 2 3 Copyright (C) 1998-2004, 2006-2007, 2009-2010 Free Software Foundation, Inc. 4 5 Written by Jim Meyering, 1992. 6 7 This program is free software: you can redistribute it and/or modify 8 it under the terms of the GNU General Public License as published by 9 the Free Software Foundation; either version 3 of the License, or 10 (at your option) any later version. 11 12 This program is distributed in the hope that it will be useful, 13 but WITHOUT ANY WARRANTY; without even the implied warranty of 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 GNU General Public License for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with this program. If not, see <http://www.gnu.org/licenses/>. */ 19 20 /* A generic hash table package. */ 21 22 /* Define USE_OBSTACK to 1 if you want the allocator to use obstacks instead 23 of malloc. If you change USE_OBSTACK, you have to recompile! */ 24 25 #include <config.h> 26 27 #include "hash.h" 28 29 #include "bitrotate.h" 30 #include "xalloc.h" 31 32 #include <stdint.h> 33 #include <stdio.h> 34 #include <stdlib.h> 35 36 #if USE_OBSTACK 37 # include "obstack.h" 38 # ifndef obstack_chunk_alloc 39 # define obstack_chunk_alloc malloc 40 # endif 41 # ifndef obstack_chunk_free 42 # define obstack_chunk_free free 43 # endif 44 #endif 45 46 struct hash_entry 47 { 48 void *data; 49 struct hash_entry *next; 50 }; 51 52 struct hash_table 53 { 54 /* The array of buckets starts at BUCKET and extends to BUCKET_LIMIT-1, 55 for a possibility of N_BUCKETS. Among those, N_BUCKETS_USED buckets 56 are not empty, there are N_ENTRIES active entries in the table. */ 57 struct hash_entry *bucket; 58 struct hash_entry const *bucket_limit; 59 size_t n_buckets; 60 size_t n_buckets_used; 61 size_t n_entries; 62 63 /* Tuning arguments, kept in a physically separate structure. */ 64 const Hash_tuning *tuning; 65 66 /* Three functions are given to `hash_initialize', see the documentation 67 block for this function. In a word, HASHER randomizes a user entry 68 into a number up from 0 up to some maximum minus 1; COMPARATOR returns 69 true if two user entries compare equally; and DATA_FREER is the cleanup 70 function for a user entry. */ 71 Hash_hasher hasher; 72 Hash_comparator comparator; 73 Hash_data_freer data_freer; 74 75 /* A linked list of freed struct hash_entry structs. */ 76 struct hash_entry *free_entry_list; 77 78 #if USE_OBSTACK 79 /* Whenever obstacks are used, it is possible to allocate all overflowed 80 entries into a single stack, so they all can be freed in a single 81 operation. It is not clear if the speedup is worth the trouble. */ 82 struct obstack entry_stack; 83 #endif 84 }; 85 86 /* A hash table contains many internal entries, each holding a pointer to 87 some user-provided data (also called a user entry). An entry indistinctly 88 refers to both the internal entry and its associated user entry. A user 89 entry contents may be hashed by a randomization function (the hashing 90 function, or just `hasher' for short) into a number (or `slot') between 0 91 and the current table size. At each slot position in the hash table, 92 starts a linked chain of entries for which the user data all hash to this 93 slot. A bucket is the collection of all entries hashing to the same slot. 94 95 A good `hasher' function will distribute entries rather evenly in buckets. 96 In the ideal case, the length of each bucket is roughly the number of 97 entries divided by the table size. Finding the slot for a data is usually 98 done in constant time by the `hasher', and the later finding of a precise 99 entry is linear in time with the size of the bucket. Consequently, a 100 larger hash table size (that is, a larger number of buckets) is prone to 101 yielding shorter chains, *given* the `hasher' function behaves properly. 102 103 Long buckets slow down the lookup algorithm. One might use big hash table 104 sizes in hope to reduce the average length of buckets, but this might 105 become inordinate, as unused slots in the hash table take some space. The 106 best bet is to make sure you are using a good `hasher' function (beware 107 that those are not that easy to write! :-), and to use a table size 108 larger than the actual number of entries. */ 109 110 /* If an insertion makes the ratio of nonempty buckets to table size larger 111 than the growth threshold (a number between 0.0 and 1.0), then increase 112 the table size by multiplying by the growth factor (a number greater than 113 1.0). The growth threshold defaults to 0.8, and the growth factor 114 defaults to 1.414, meaning that the table will have doubled its size 115 every second time 80% of the buckets get used. */ 116 #define DEFAULT_GROWTH_THRESHOLD 0.8 117 #define DEFAULT_GROWTH_FACTOR 1.414 118 119 /* If a deletion empties a bucket and causes the ratio of used buckets to 120 table size to become smaller than the shrink threshold (a number between 121 0.0 and 1.0), then shrink the table by multiplying by the shrink factor (a 122 number greater than the shrink threshold but smaller than 1.0). The shrink 123 threshold and factor default to 0.0 and 1.0, meaning that the table never 124 shrinks. */ 125 #define DEFAULT_SHRINK_THRESHOLD 0.0 126 #define DEFAULT_SHRINK_FACTOR 1.0 127 128 /* Use this to initialize or reset a TUNING structure to 129 some sensible values. */ 130 static const Hash_tuning default_tuning = 131 { 132 DEFAULT_SHRINK_THRESHOLD, 133 DEFAULT_SHRINK_FACTOR, 134 DEFAULT_GROWTH_THRESHOLD, 135 DEFAULT_GROWTH_FACTOR, 136 false 137 }; 138 139 /* Information and lookup. */ 140 141 /* The following few functions provide information about the overall hash 142 table organization: the number of entries, number of buckets and maximum 143 length of buckets. */ 144 145 /* Return the number of buckets in the hash table. The table size, the total 146 number of buckets (used plus unused), or the maximum number of slots, are 147 the same quantity. */ 148 149 size_t 150 hash_get_n_buckets (const Hash_table *table) 151 { 152 return table->n_buckets; 153 } 154 155 /* Return the number of slots in use (non-empty buckets). */ 156 157 size_t 158 hash_get_n_buckets_used (const Hash_table *table) 159 { 160 return table->n_buckets_used; 161 } 162 163 /* Return the number of active entries. */ 164 165 size_t 166 hash_get_n_entries (const Hash_table *table) 167 { 168 return table->n_entries; 169 } 170 171 /* Return the length of the longest chain (bucket). */ 172 173 size_t 174 hash_get_max_bucket_length (const Hash_table *table) 175 { 176 struct hash_entry const *bucket; 177 size_t max_bucket_length = 0; 178 179 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++) 180 { 181 if (bucket->data) 182 { 183 struct hash_entry const *cursor = bucket; 184 size_t bucket_length = 1; 185 186 while (cursor = cursor->next, cursor) 187 bucket_length++; 188 189 if (bucket_length > max_bucket_length) 190 max_bucket_length = bucket_length; 191 } 192 } 193 194 return max_bucket_length; 195 } 196 197 /* Do a mild validation of a hash table, by traversing it and checking two 198 statistics. */ 199 200 bool 201 hash_table_ok (const Hash_table *table) 202 { 203 struct hash_entry const *bucket; 204 size_t n_buckets_used = 0; 205 size_t n_entries = 0; 206 207 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++) 208 { 209 if (bucket->data) 210 { 211 struct hash_entry const *cursor = bucket; 212 213 /* Count bucket head. */ 214 n_buckets_used++; 215 n_entries++; 216 217 /* Count bucket overflow. */ 218 while (cursor = cursor->next, cursor) 219 n_entries++; 220 } 221 } 222 223 if (n_buckets_used == table->n_buckets_used && n_entries == table->n_entries) 224 return true; 225 226 return false; 227 } 228 229 void 230 hash_print_statistics (const Hash_table *table, FILE *stream) 231 { 232 size_t n_entries = hash_get_n_entries (table); 233 size_t n_buckets = hash_get_n_buckets (table); 234 size_t n_buckets_used = hash_get_n_buckets_used (table); 235 size_t max_bucket_length = hash_get_max_bucket_length (table); 236 237 fprintf (stream, "# entries: %lu\n", (unsigned long int) n_entries); 238 fprintf (stream, "# buckets: %lu\n", (unsigned long int) n_buckets); 239 fprintf (stream, "# buckets used: %lu (%.2f%%)\n", 240 (unsigned long int) n_buckets_used, 241 (100.0 * n_buckets_used) / n_buckets); 242 fprintf (stream, "max bucket length: %lu\n", 243 (unsigned long int) max_bucket_length); 244 } 245 246 /* If ENTRY matches an entry already in the hash table, return the 247 entry from the table. Otherwise, return NULL. */ 248 249 void * 250 hash_lookup (const Hash_table *table, const void *entry) 251 { 252 struct hash_entry const *bucket 253 = table->bucket + table->hasher (entry, table->n_buckets); 254 struct hash_entry const *cursor; 255 256 if (! (bucket < table->bucket_limit)) 257 abort (); 258 259 if (bucket->data == NULL) 260 return NULL; 261 262 for (cursor = bucket; cursor; cursor = cursor->next) 263 if (entry == cursor->data || table->comparator (entry, cursor->data)) 264 return cursor->data; 265 266 return NULL; 267 } 268 269 /* Walking. */ 270 271 /* The functions in this page traverse the hash table and process the 272 contained entries. For the traversal to work properly, the hash table 273 should not be resized nor modified while any particular entry is being 274 processed. In particular, entries should not be added, and an entry 275 may be removed only if there is no shrink threshold and the entry being 276 removed has already been passed to hash_get_next. */ 277 278 /* Return the first data in the table, or NULL if the table is empty. */ 279 280 void * 281 hash_get_first (const Hash_table *table) 282 { 283 struct hash_entry const *bucket; 284 285 if (table->n_entries == 0) 286 return NULL; 287 288 for (bucket = table->bucket; ; bucket++) 289 if (! (bucket < table->bucket_limit)) 290 abort (); 291 else if (bucket->data) 292 return bucket->data; 293 } 294 295 /* Return the user data for the entry following ENTRY, where ENTRY has been 296 returned by a previous call to either `hash_get_first' or `hash_get_next'. 297 Return NULL if there are no more entries. */ 298 299 void * 300 hash_get_next (const Hash_table *table, const void *entry) 301 { 302 struct hash_entry const *bucket 303 = table->bucket + table->hasher (entry, table->n_buckets); 304 struct hash_entry const *cursor; 305 306 if (! (bucket < table->bucket_limit)) 307 abort (); 308 309 /* Find next entry in the same bucket. */ 310 for (cursor = bucket; cursor; cursor = cursor->next) 311 if (cursor->data == entry && cursor->next) 312 return cursor->next->data; 313 314 /* Find first entry in any subsequent bucket. */ 315 while (++bucket < table->bucket_limit) 316 if (bucket->data) 317 return bucket->data; 318 319 /* None found. */ 320 return NULL; 321 } 322 323 /* Fill BUFFER with pointers to active user entries in the hash table, then 324 return the number of pointers copied. Do not copy more than BUFFER_SIZE 325 pointers. */ 326 327 size_t 328 hash_get_entries (const Hash_table *table, void **buffer, 329 size_t buffer_size) 330 { 331 size_t counter = 0; 332 struct hash_entry const *bucket; 333 struct hash_entry const *cursor; 334 335 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++) 336 { 337 if (bucket->data) 338 { 339 for (cursor = bucket; cursor; cursor = cursor->next) 340 { 341 if (counter >= buffer_size) 342 return counter; 343 buffer[counter++] = cursor->data; 344 } 345 } 346 } 347 348 return counter; 349 } 350 351 /* Call a PROCESSOR function for each entry of a hash table, and return the 352 number of entries for which the processor function returned success. A 353 pointer to some PROCESSOR_DATA which will be made available to each call to 354 the processor function. The PROCESSOR accepts two arguments: the first is 355 the user entry being walked into, the second is the value of PROCESSOR_DATA 356 as received. The walking continue for as long as the PROCESSOR function 357 returns nonzero. When it returns zero, the walking is interrupted. */ 358 359 size_t 360 hash_do_for_each (const Hash_table *table, Hash_processor processor, 361 void *processor_data) 362 { 363 size_t counter = 0; 364 struct hash_entry const *bucket; 365 struct hash_entry const *cursor; 366 367 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++) 368 { 369 if (bucket->data) 370 { 371 for (cursor = bucket; cursor; cursor = cursor->next) 372 { 373 if (! processor (cursor->data, processor_data)) 374 return counter; 375 counter++; 376 } 377 } 378 } 379 380 return counter; 381 } 382 383 /* Allocation and clean-up. */ 384 385 /* Return a hash index for a NUL-terminated STRING between 0 and N_BUCKETS-1. 386 This is a convenience routine for constructing other hashing functions. */ 387 388 #if USE_DIFF_HASH 389 390 /* About hashings, Paul Eggert writes to me (FP), on 1994-01-01: "Please see 391 B. J. McKenzie, R. Harries & T. Bell, Selecting a hashing algorithm, 392 Software--practice & experience 20, 2 (Feb 1990), 209-224. Good hash 393 algorithms tend to be domain-specific, so what's good for [diffutils'] io.c 394 may not be good for your application." */ 395 396 size_t 397 hash_string (const char *string, size_t n_buckets) 398 { 399 # define HASH_ONE_CHAR(Value, Byte) \ 400 ((Byte) + rotl_sz (Value, 7)) 401 402 size_t value = 0; 403 unsigned char ch; 404 405 for (; (ch = *string); string++) 406 value = HASH_ONE_CHAR (value, ch); 407 return value % n_buckets; 408 409 # undef HASH_ONE_CHAR 410 } 411 412 #else /* not USE_DIFF_HASH */ 413 414 /* This one comes from `recode', and performs a bit better than the above as 415 per a few experiments. It is inspired from a hashing routine found in the 416 very old Cyber `snoop', itself written in typical Greg Mansfield style. 417 (By the way, what happened to this excellent man? Is he still alive?) */ 418 419 size_t 420 hash_string (const char *string, size_t n_buckets) 421 { 422 size_t value = 0; 423 unsigned char ch; 424 425 for (; (ch = *string); string++) 426 value = (value * 31 + ch) % n_buckets; 427 return value; 428 } 429 430 #endif /* not USE_DIFF_HASH */ 431 432 /* Return true if CANDIDATE is a prime number. CANDIDATE should be an odd 433 number at least equal to 11. */ 434 435 static bool 436 is_prime (size_t candidate) 437 { 438 size_t divisor = 3; 439 size_t square = divisor * divisor; 440 441 while (square < candidate && (candidate % divisor)) 442 { 443 divisor++; 444 square += 4 * divisor; 445 divisor++; 446 } 447 448 return (candidate % divisor ? true : false); 449 } 450 451 /* Round a given CANDIDATE number up to the nearest prime, and return that 452 prime. Primes lower than 10 are merely skipped. */ 453 454 static size_t 455 next_prime (size_t candidate) 456 { 457 /* Skip small primes. */ 458 if (candidate < 10) 459 candidate = 10; 460 461 /* Make it definitely odd. */ 462 candidate |= 1; 463 464 while (SIZE_MAX != candidate && !is_prime (candidate)) 465 candidate += 2; 466 467 return candidate; 468 } 469 470 void 471 hash_reset_tuning (Hash_tuning *tuning) 472 { 473 *tuning = default_tuning; 474 } 475 476 /* If the user passes a NULL hasher, we hash the raw pointer. */ 477 static size_t 478 raw_hasher (const void *data, size_t n) 479 { 480 /* When hashing unique pointers, it is often the case that they were 481 generated by malloc and thus have the property that the low-order 482 bits are 0. As this tends to give poorer performance with small 483 tables, we rotate the pointer value before performing division, 484 in an attempt to improve hash quality. */ 485 size_t val = rotr_sz ((size_t) data, 3); 486 return val % n; 487 } 488 489 /* If the user passes a NULL comparator, we use pointer comparison. */ 490 static bool 491 raw_comparator (const void *a, const void *b) 492 { 493 return a == b; 494 } 495 496 497 /* For the given hash TABLE, check the user supplied tuning structure for 498 reasonable values, and return true if there is no gross error with it. 499 Otherwise, definitively reset the TUNING field to some acceptable default 500 in the hash table (that is, the user loses the right of further modifying 501 tuning arguments), and return false. */ 502 503 static bool 504 check_tuning (Hash_table *table) 505 { 506 const Hash_tuning *tuning = table->tuning; 507 float epsilon; 508 if (tuning == &default_tuning) 509 return true; 510 511 /* Be a bit stricter than mathematics would require, so that 512 rounding errors in size calculations do not cause allocations to 513 fail to grow or shrink as they should. The smallest allocation 514 is 11 (due to next_prime's algorithm), so an epsilon of 0.1 515 should be good enough. */ 516 epsilon = 0.1f; 517 518 if (epsilon < tuning->growth_threshold 519 && tuning->growth_threshold < 1 - epsilon 520 && 1 + epsilon < tuning->growth_factor 521 && 0 <= tuning->shrink_threshold 522 && tuning->shrink_threshold + epsilon < tuning->shrink_factor 523 && tuning->shrink_factor <= 1 524 && tuning->shrink_threshold + epsilon < tuning->growth_threshold) 525 return true; 526 527 table->tuning = &default_tuning; 528 return false; 529 } 530 531 /* Compute the size of the bucket array for the given CANDIDATE and 532 TUNING, or return 0 if there is no possible way to allocate that 533 many entries. */ 534 535 static size_t 536 compute_bucket_size (size_t candidate, const Hash_tuning *tuning) 537 { 538 if (!tuning->is_n_buckets) 539 { 540 float new_candidate = candidate / tuning->growth_threshold; 541 if (SIZE_MAX <= new_candidate) 542 return 0; 543 candidate = new_candidate; 544 } 545 candidate = next_prime (candidate); 546 if (xalloc_oversized (candidate, sizeof (struct hash_entry *))) 547 return 0; 548 return candidate; 549 } 550 551 /* Allocate and return a new hash table, or NULL upon failure. The initial 552 number of buckets is automatically selected so as to _guarantee_ that you 553 may insert at least CANDIDATE different user entries before any growth of 554 the hash table size occurs. So, if have a reasonably tight a-priori upper 555 bound on the number of entries you intend to insert in the hash table, you 556 may save some table memory and insertion time, by specifying it here. If 557 the IS_N_BUCKETS field of the TUNING structure is true, the CANDIDATE 558 argument has its meaning changed to the wanted number of buckets. 559 560 TUNING points to a structure of user-supplied values, in case some fine 561 tuning is wanted over the default behavior of the hasher. If TUNING is 562 NULL, the default tuning parameters are used instead. If TUNING is 563 provided but the values requested are out of bounds or might cause 564 rounding errors, return NULL. 565 566 The user-supplied HASHER function, when not NULL, accepts two 567 arguments ENTRY and TABLE_SIZE. It computes, by hashing ENTRY contents, a 568 slot number for that entry which should be in the range 0..TABLE_SIZE-1. 569 This slot number is then returned. 570 571 The user-supplied COMPARATOR function, when not NULL, accepts two 572 arguments pointing to user data, it then returns true for a pair of entries 573 that compare equal, or false otherwise. This function is internally called 574 on entries which are already known to hash to the same bucket index, 575 but which are distinct pointers. 576 577 The user-supplied DATA_FREER function, when not NULL, may be later called 578 with the user data as an argument, just before the entry containing the 579 data gets freed. This happens from within `hash_free' or `hash_clear'. 580 You should specify this function only if you want these functions to free 581 all of your `data' data. This is typically the case when your data is 582 simply an auxiliary struct that you have malloc'd to aggregate several 583 values. */ 584 585 Hash_table * 586 hash_initialize (size_t candidate, const Hash_tuning *tuning, 587 Hash_hasher hasher, Hash_comparator comparator, 588 Hash_data_freer data_freer) 589 { 590 Hash_table *table; 591 592 if (hasher == NULL) 593 hasher = raw_hasher; 594 if (comparator == NULL) 595 comparator = raw_comparator; 596 597 table = malloc (sizeof *table); 598 if (table == NULL) 599 return NULL; 600 601 if (!tuning) 602 tuning = &default_tuning; 603 table->tuning = tuning; 604 if (!check_tuning (table)) 605 { 606 /* Fail if the tuning options are invalid. This is the only occasion 607 when the user gets some feedback about it. Once the table is created, 608 if the user provides invalid tuning options, we silently revert to 609 using the defaults, and ignore further request to change the tuning 610 options. */ 611 goto fail; 612 } 613 614 table->n_buckets = compute_bucket_size (candidate, tuning); 615 if (!table->n_buckets) 616 goto fail; 617 618 table->bucket = calloc (table->n_buckets, sizeof *table->bucket); 619 if (table->bucket == NULL) 620 goto fail; 621 table->bucket_limit = table->bucket + table->n_buckets; 622 table->n_buckets_used = 0; 623 table->n_entries = 0; 624 625 table->hasher = hasher; 626 table->comparator = comparator; 627 table->data_freer = data_freer; 628 629 table->free_entry_list = NULL; 630 #if USE_OBSTACK 631 obstack_init (&table->entry_stack); 632 #endif 633 return table; 634 635 fail: 636 free (table); 637 return NULL; 638 } 639 640 /* Make all buckets empty, placing any chained entries on the free list. 641 Apply the user-specified function data_freer (if any) to the datas of any 642 affected entries. */ 643 644 void 645 hash_clear (Hash_table *table) 646 { 647 struct hash_entry *bucket; 648 649 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++) 650 { 651 if (bucket->data) 652 { 653 struct hash_entry *cursor; 654 struct hash_entry *next; 655 656 /* Free the bucket overflow. */ 657 for (cursor = bucket->next; cursor; cursor = next) 658 { 659 if (table->data_freer) 660 table->data_freer (cursor->data); 661 cursor->data = NULL; 662 663 next = cursor->next; 664 /* Relinking is done one entry at a time, as it is to be expected 665 that overflows are either rare or short. */ 666 cursor->next = table->free_entry_list; 667 table->free_entry_list = cursor; 668 } 669 670 /* Free the bucket head. */ 671 if (table->data_freer) 672 table->data_freer (bucket->data); 673 bucket->data = NULL; 674 bucket->next = NULL; 675 } 676 } 677 678 table->n_buckets_used = 0; 679 table->n_entries = 0; 680 } 681 682 /* Reclaim all storage associated with a hash table. If a data_freer 683 function has been supplied by the user when the hash table was created, 684 this function applies it to the data of each entry before freeing that 685 entry. */ 686 687 void 688 hash_free (Hash_table *table) 689 { 690 struct hash_entry *bucket; 691 struct hash_entry *cursor; 692 struct hash_entry *next; 693 694 /* Call the user data_freer function. */ 695 if (table->data_freer && table->n_entries) 696 { 697 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++) 698 { 699 if (bucket->data) 700 { 701 for (cursor = bucket; cursor; cursor = cursor->next) 702 table->data_freer (cursor->data); 703 } 704 } 705 } 706 707 #if USE_OBSTACK 708 709 obstack_free (&table->entry_stack, NULL); 710 711 #else 712 713 /* Free all bucket overflowed entries. */ 714 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++) 715 { 716 for (cursor = bucket->next; cursor; cursor = next) 717 { 718 next = cursor->next; 719 free (cursor); 720 } 721 } 722 723 /* Also reclaim the internal list of previously freed entries. */ 724 for (cursor = table->free_entry_list; cursor; cursor = next) 725 { 726 next = cursor->next; 727 free (cursor); 728 } 729 730 #endif 731 732 /* Free the remainder of the hash table structure. */ 733 free (table->bucket); 734 free (table); 735 } 736 737 /* Insertion and deletion. */ 738 739 /* Get a new hash entry for a bucket overflow, possibly by recycling a 740 previously freed one. If this is not possible, allocate a new one. */ 741 742 static struct hash_entry * 743 allocate_entry (Hash_table *table) 744 { 745 struct hash_entry *new; 746 747 if (table->free_entry_list) 748 { 749 new = table->free_entry_list; 750 table->free_entry_list = new->next; 751 } 752 else 753 { 754 #if USE_OBSTACK 755 new = obstack_alloc (&table->entry_stack, sizeof *new); 756 #else 757 new = malloc (sizeof *new); 758 #endif 759 } 760 761 return new; 762 } 763 764 /* Free a hash entry which was part of some bucket overflow, 765 saving it for later recycling. */ 766 767 static void 768 free_entry (Hash_table *table, struct hash_entry *entry) 769 { 770 entry->data = NULL; 771 entry->next = table->free_entry_list; 772 table->free_entry_list = entry; 773 } 774 775 /* This private function is used to help with insertion and deletion. When 776 ENTRY matches an entry in the table, return a pointer to the corresponding 777 user data and set *BUCKET_HEAD to the head of the selected bucket. 778 Otherwise, return NULL. When DELETE is true and ENTRY matches an entry in 779 the table, unlink the matching entry. */ 780 781 static void * 782 hash_find_entry (Hash_table *table, const void *entry, 783 struct hash_entry **bucket_head, bool delete) 784 { 785 struct hash_entry *bucket 786 = table->bucket + table->hasher (entry, table->n_buckets); 787 struct hash_entry *cursor; 788 789 if (! (bucket < table->bucket_limit)) 790 abort (); 791 792 *bucket_head = bucket; 793 794 /* Test for empty bucket. */ 795 if (bucket->data == NULL) 796 return NULL; 797 798 /* See if the entry is the first in the bucket. */ 799 if (entry == bucket->data || table->comparator (entry, bucket->data)) 800 { 801 void *data = bucket->data; 802 803 if (delete) 804 { 805 if (bucket->next) 806 { 807 struct hash_entry *next = bucket->next; 808 809 /* Bump the first overflow entry into the bucket head, then save 810 the previous first overflow entry for later recycling. */ 811 *bucket = *next; 812 free_entry (table, next); 813 } 814 else 815 { 816 bucket->data = NULL; 817 } 818 } 819 820 return data; 821 } 822 823 /* Scan the bucket overflow. */ 824 for (cursor = bucket; cursor->next; cursor = cursor->next) 825 { 826 if (entry == cursor->next->data 827 || table->comparator (entry, cursor->next->data)) 828 { 829 void *data = cursor->next->data; 830 831 if (delete) 832 { 833 struct hash_entry *next = cursor->next; 834 835 /* Unlink the entry to delete, then save the freed entry for later 836 recycling. */ 837 cursor->next = next->next; 838 free_entry (table, next); 839 } 840 841 return data; 842 } 843 } 844 845 /* No entry found. */ 846 return NULL; 847 } 848 849 /* Internal helper, to move entries from SRC to DST. Both tables must 850 share the same free entry list. If SAFE, only move overflow 851 entries, saving bucket heads for later, so that no allocations will 852 occur. Return false if the free entry list is exhausted and an 853 allocation fails. */ 854 855 static bool 856 transfer_entries (Hash_table *dst, Hash_table *src, bool safe) 857 { 858 struct hash_entry *bucket; 859 struct hash_entry *cursor; 860 struct hash_entry *next; 861 for (bucket = src->bucket; bucket < src->bucket_limit; bucket++) 862 if (bucket->data) 863 { 864 void *data; 865 struct hash_entry *new_bucket; 866 867 /* Within each bucket, transfer overflow entries first and 868 then the bucket head, to minimize memory pressure. After 869 all, the only time we might allocate is when moving the 870 bucket head, but moving overflow entries first may create 871 free entries that can be recycled by the time we finally 872 get to the bucket head. */ 873 for (cursor = bucket->next; cursor; cursor = next) 874 { 875 data = cursor->data; 876 new_bucket = (dst->bucket + dst->hasher (data, dst->n_buckets)); 877 878 if (! (new_bucket < dst->bucket_limit)) 879 abort (); 880 881 next = cursor->next; 882 883 if (new_bucket->data) 884 { 885 /* Merely relink an existing entry, when moving from a 886 bucket overflow into a bucket overflow. */ 887 cursor->next = new_bucket->next; 888 new_bucket->next = cursor; 889 } 890 else 891 { 892 /* Free an existing entry, when moving from a bucket 893 overflow into a bucket header. */ 894 new_bucket->data = data; 895 dst->n_buckets_used++; 896 free_entry (dst, cursor); 897 } 898 } 899 /* Now move the bucket head. Be sure that if we fail due to 900 allocation failure that the src table is in a consistent 901 state. */ 902 data = bucket->data; 903 bucket->next = NULL; 904 if (safe) 905 continue; 906 new_bucket = (dst->bucket + dst->hasher (data, dst->n_buckets)); 907 908 if (! (new_bucket < dst->bucket_limit)) 909 abort (); 910 911 if (new_bucket->data) 912 { 913 /* Allocate or recycle an entry, when moving from a bucket 914 header into a bucket overflow. */ 915 struct hash_entry *new_entry = allocate_entry (dst); 916 917 if (new_entry == NULL) 918 return false; 919 920 new_entry->data = data; 921 new_entry->next = new_bucket->next; 922 new_bucket->next = new_entry; 923 } 924 else 925 { 926 /* Move from one bucket header to another. */ 927 new_bucket->data = data; 928 dst->n_buckets_used++; 929 } 930 bucket->data = NULL; 931 src->n_buckets_used--; 932 } 933 return true; 934 } 935 936 /* For an already existing hash table, change the number of buckets through 937 specifying CANDIDATE. The contents of the hash table are preserved. The 938 new number of buckets is automatically selected so as to _guarantee_ that 939 the table may receive at least CANDIDATE different user entries, including 940 those already in the table, before any other growth of the hash table size 941 occurs. If TUNING->IS_N_BUCKETS is true, then CANDIDATE specifies the 942 exact number of buckets desired. Return true iff the rehash succeeded. */ 943 944 bool 945 hash_rehash (Hash_table *table, size_t candidate) 946 { 947 Hash_table storage; 948 Hash_table *new_table; 949 size_t new_size = compute_bucket_size (candidate, table->tuning); 950 951 if (!new_size) 952 return false; 953 if (new_size == table->n_buckets) 954 return true; 955 new_table = &storage; 956 new_table->bucket = calloc (new_size, sizeof *new_table->bucket); 957 if (new_table->bucket == NULL) 958 return false; 959 new_table->n_buckets = new_size; 960 new_table->bucket_limit = new_table->bucket + new_size; 961 new_table->n_buckets_used = 0; 962 new_table->n_entries = 0; 963 new_table->tuning = table->tuning; 964 new_table->hasher = table->hasher; 965 new_table->comparator = table->comparator; 966 new_table->data_freer = table->data_freer; 967 968 /* In order for the transfer to successfully complete, we need 969 additional overflow entries when distinct buckets in the old 970 table collide into a common bucket in the new table. The worst 971 case possible is a hasher that gives a good spread with the old 972 size, but returns a constant with the new size; if we were to 973 guarantee table->n_buckets_used-1 free entries in advance, then 974 the transfer would be guaranteed to not allocate memory. 975 However, for large tables, a guarantee of no further allocation 976 introduces a lot of extra memory pressure, all for an unlikely 977 corner case (most rehashes reduce, rather than increase, the 978 number of overflow entries needed). So, we instead ensure that 979 the transfer process can be reversed if we hit a memory 980 allocation failure mid-transfer. */ 981 982 /* Merely reuse the extra old space into the new table. */ 983 #if USE_OBSTACK 984 new_table->entry_stack = table->entry_stack; 985 #endif 986 new_table->free_entry_list = table->free_entry_list; 987 988 if (transfer_entries (new_table, table, false)) 989 { 990 /* Entries transferred successfully; tie up the loose ends. */ 991 free (table->bucket); 992 table->bucket = new_table->bucket; 993 table->bucket_limit = new_table->bucket_limit; 994 table->n_buckets = new_table->n_buckets; 995 table->n_buckets_used = new_table->n_buckets_used; 996 table->free_entry_list = new_table->free_entry_list; 997 /* table->n_entries and table->entry_stack already hold their value. */ 998 return true; 999 } 1000 1001 /* We've allocated new_table->bucket (and possibly some entries), 1002 exhausted the free list, and moved some but not all entries into 1003 new_table. We must undo the partial move before returning 1004 failure. The only way to get into this situation is if new_table 1005 uses fewer buckets than the old table, so we will reclaim some 1006 free entries as overflows in the new table are put back into 1007 distinct buckets in the old table. 1008 1009 There are some pathological cases where a single pass through the 1010 table requires more intermediate overflow entries than using two 1011 passes. Two passes give worse cache performance and takes 1012 longer, but at this point, we're already out of memory, so slow 1013 and safe is better than failure. */ 1014 table->free_entry_list = new_table->free_entry_list; 1015 if (! (transfer_entries (table, new_table, true) 1016 && transfer_entries (table, new_table, false))) 1017 abort (); 1018 /* table->n_entries already holds its value. */ 1019 free (new_table->bucket); 1020 return false; 1021 } 1022 1023 /* If ENTRY matches an entry already in the hash table, return the pointer 1024 to the entry from the table. Otherwise, insert ENTRY and return ENTRY. 1025 Return NULL if the storage required for insertion cannot be allocated. 1026 This implementation does not support duplicate entries or insertion of 1027 NULL. */ 1028 1029 void * 1030 hash_insert (Hash_table *table, const void *entry) 1031 { 1032 void *data; 1033 struct hash_entry *bucket; 1034 1035 /* The caller cannot insert a NULL entry. */ 1036 if (! entry) 1037 abort (); 1038 1039 /* If there's a matching entry already in the table, return that. */ 1040 if ((data = hash_find_entry (table, entry, &bucket, false)) != NULL) 1041 return data; 1042 1043 /* If the growth threshold of the buckets in use has been reached, increase 1044 the table size and rehash. There's no point in checking the number of 1045 entries: if the hashing function is ill-conditioned, rehashing is not 1046 likely to improve it. */ 1047 1048 if (table->n_buckets_used 1049 > table->tuning->growth_threshold * table->n_buckets) 1050 { 1051 /* Check more fully, before starting real work. If tuning arguments 1052 became invalid, the second check will rely on proper defaults. */ 1053 check_tuning (table); 1054 if (table->n_buckets_used 1055 > table->tuning->growth_threshold * table->n_buckets) 1056 { 1057 const Hash_tuning *tuning = table->tuning; 1058 float candidate = 1059 (tuning->is_n_buckets 1060 ? (table->n_buckets * tuning->growth_factor) 1061 : (table->n_buckets * tuning->growth_factor 1062 * tuning->growth_threshold)); 1063 1064 if (SIZE_MAX <= candidate) 1065 return NULL; 1066 1067 /* If the rehash fails, arrange to return NULL. */ 1068 if (!hash_rehash (table, candidate)) 1069 return NULL; 1070 1071 /* Update the bucket we are interested in. */ 1072 if (hash_find_entry (table, entry, &bucket, false) != NULL) 1073 abort (); 1074 } 1075 } 1076 1077 /* ENTRY is not matched, it should be inserted. */ 1078 1079 if (bucket->data) 1080 { 1081 struct hash_entry *new_entry = allocate_entry (table); 1082 1083 if (new_entry == NULL) 1084 return NULL; 1085 1086 /* Add ENTRY in the overflow of the bucket. */ 1087 1088 new_entry->data = (void *) entry; 1089 new_entry->next = bucket->next; 1090 bucket->next = new_entry; 1091 table->n_entries++; 1092 return (void *) entry; 1093 } 1094 1095 /* Add ENTRY right in the bucket head. */ 1096 1097 bucket->data = (void *) entry; 1098 table->n_entries++; 1099 table->n_buckets_used++; 1100 1101 return (void *) entry; 1102 } 1103 1104 /* If ENTRY is already in the table, remove it and return the just-deleted 1105 data (the user may want to deallocate its storage). If ENTRY is not in the 1106 table, don't modify the table and return NULL. */ 1107 1108 void * 1109 hash_delete (Hash_table *table, const void *entry) 1110 { 1111 void *data; 1112 struct hash_entry *bucket; 1113 1114 data = hash_find_entry (table, entry, &bucket, true); 1115 if (!data) 1116 return NULL; 1117 1118 table->n_entries--; 1119 if (!bucket->data) 1120 { 1121 table->n_buckets_used--; 1122 1123 /* If the shrink threshold of the buckets in use has been reached, 1124 rehash into a smaller table. */ 1125 1126 if (table->n_buckets_used 1127 < table->tuning->shrink_threshold * table->n_buckets) 1128 { 1129 /* Check more fully, before starting real work. If tuning arguments 1130 became invalid, the second check will rely on proper defaults. */ 1131 check_tuning (table); 1132 if (table->n_buckets_used 1133 < table->tuning->shrink_threshold * table->n_buckets) 1134 { 1135 const Hash_tuning *tuning = table->tuning; 1136 size_t candidate = 1137 (tuning->is_n_buckets 1138 ? table->n_buckets * tuning->shrink_factor 1139 : (table->n_buckets * tuning->shrink_factor 1140 * tuning->growth_threshold)); 1141 1142 if (!hash_rehash (table, candidate)) 1143 { 1144 /* Failure to allocate memory in an attempt to 1145 shrink the table is not fatal. But since memory 1146 is low, we can at least be kind and free any 1147 spare entries, rather than keeping them tied up 1148 in the free entry list. */ 1149 #if ! USE_OBSTACK 1150 struct hash_entry *cursor = table->free_entry_list; 1151 struct hash_entry *next; 1152 while (cursor) 1153 { 1154 next = cursor->next; 1155 free (cursor); 1156 cursor = next; 1157 } 1158 table->free_entry_list = NULL; 1159 #endif 1160 } 1161 } 1162 } 1163 } 1164 1165 return data; 1166 } 1167 1168 /* Testing. */ 1169 1170 #if TESTING 1171 1172 void 1173 hash_print (const Hash_table *table) 1174 { 1175 struct hash_entry *bucket = (struct hash_entry *) table->bucket; 1176 1177 for ( ; bucket < table->bucket_limit; bucket++) 1178 { 1179 struct hash_entry *cursor; 1180 1181 if (bucket) 1182 printf ("%lu:\n", (unsigned long int) (bucket - table->bucket)); 1183 1184 for (cursor = bucket; cursor; cursor = cursor->next) 1185 { 1186 char const *s = cursor->data; 1187 /* FIXME */ 1188 if (s) 1189 printf (" %s\n", s); 1190 } 1191 } 1192 } 1193 1194 #endif /* TESTING */ 1195