xref: /dflybsd-src/contrib/gcc-8.0/libiberty/hashtab.c (revision 38fd149817dfbff97799f62fcb70be98c4e32523)
1*38fd1498Szrj /* An expandable hash tables datatype.
2*38fd1498Szrj    Copyright (C) 1999-2018 Free Software Foundation, Inc.
3*38fd1498Szrj    Contributed by Vladimir Makarov (vmakarov@cygnus.com).
4*38fd1498Szrj 
5*38fd1498Szrj This file is part of the libiberty library.
6*38fd1498Szrj Libiberty is free software; you can redistribute it and/or
7*38fd1498Szrj modify it under the terms of the GNU Library General Public
8*38fd1498Szrj License as published by the Free Software Foundation; either
9*38fd1498Szrj version 2 of the License, or (at your option) any later version.
10*38fd1498Szrj 
11*38fd1498Szrj Libiberty is distributed in the hope that it will be useful,
12*38fd1498Szrj but WITHOUT ANY WARRANTY; without even the implied warranty of
13*38fd1498Szrj MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14*38fd1498Szrj Library General Public License for more details.
15*38fd1498Szrj 
16*38fd1498Szrj You should have received a copy of the GNU Library General Public
17*38fd1498Szrj License along with libiberty; see the file COPYING.LIB.  If
18*38fd1498Szrj not, write to the Free Software Foundation, Inc., 51 Franklin Street - Fifth Floor,
19*38fd1498Szrj Boston, MA 02110-1301, USA.  */
20*38fd1498Szrj 
21*38fd1498Szrj /* This package implements basic hash table functionality.  It is possible
22*38fd1498Szrj    to search for an entry, create an entry and destroy an entry.
23*38fd1498Szrj 
24*38fd1498Szrj    Elements in the table are generic pointers.
25*38fd1498Szrj 
26*38fd1498Szrj    The size of the table is not fixed; if the occupancy of the table
27*38fd1498Szrj    grows too high the hash table will be expanded.
28*38fd1498Szrj 
29*38fd1498Szrj    The abstract data implementation is based on generalized Algorithm D
30*38fd1498Szrj    from Knuth's book "The art of computer programming".  Hash table is
31*38fd1498Szrj    expanded by creation of new hash table and transferring elements from
32*38fd1498Szrj    the old table to the new table. */
33*38fd1498Szrj 
34*38fd1498Szrj #ifdef HAVE_CONFIG_H
35*38fd1498Szrj #include "config.h"
36*38fd1498Szrj #endif
37*38fd1498Szrj 
38*38fd1498Szrj #include <sys/types.h>
39*38fd1498Szrj 
40*38fd1498Szrj #ifdef HAVE_STDLIB_H
41*38fd1498Szrj #include <stdlib.h>
42*38fd1498Szrj #endif
43*38fd1498Szrj #ifdef HAVE_STRING_H
44*38fd1498Szrj #include <string.h>
45*38fd1498Szrj #endif
46*38fd1498Szrj #ifdef HAVE_MALLOC_H
47*38fd1498Szrj #include <malloc.h>
48*38fd1498Szrj #endif
49*38fd1498Szrj #ifdef HAVE_LIMITS_H
50*38fd1498Szrj #include <limits.h>
51*38fd1498Szrj #endif
52*38fd1498Szrj #ifdef HAVE_INTTYPES_H
53*38fd1498Szrj #include <inttypes.h>
54*38fd1498Szrj #endif
55*38fd1498Szrj #ifdef HAVE_STDINT_H
56*38fd1498Szrj #include <stdint.h>
57*38fd1498Szrj #endif
58*38fd1498Szrj 
59*38fd1498Szrj #include <stdio.h>
60*38fd1498Szrj 
61*38fd1498Szrj #include "libiberty.h"
62*38fd1498Szrj #include "ansidecl.h"
63*38fd1498Szrj #include "hashtab.h"
64*38fd1498Szrj 
65*38fd1498Szrj #ifndef CHAR_BIT
66*38fd1498Szrj #define CHAR_BIT 8
67*38fd1498Szrj #endif
68*38fd1498Szrj 
69*38fd1498Szrj static unsigned int higher_prime_index (unsigned long);
70*38fd1498Szrj static hashval_t htab_mod_1 (hashval_t, hashval_t, hashval_t, int);
71*38fd1498Szrj static hashval_t htab_mod (hashval_t, htab_t);
72*38fd1498Szrj static hashval_t htab_mod_m2 (hashval_t, htab_t);
73*38fd1498Szrj static hashval_t hash_pointer (const void *);
74*38fd1498Szrj static int eq_pointer (const void *, const void *);
75*38fd1498Szrj static int htab_expand (htab_t);
76*38fd1498Szrj static PTR *find_empty_slot_for_expand (htab_t, hashval_t);
77*38fd1498Szrj 
78*38fd1498Szrj /* At some point, we could make these be NULL, and modify the
79*38fd1498Szrj    hash-table routines to handle NULL specially; that would avoid
80*38fd1498Szrj    function-call overhead for the common case of hashing pointers.  */
81*38fd1498Szrj htab_hash htab_hash_pointer = hash_pointer;
82*38fd1498Szrj htab_eq htab_eq_pointer = eq_pointer;
83*38fd1498Szrj 
84*38fd1498Szrj /* Table of primes and multiplicative inverses.
85*38fd1498Szrj 
86*38fd1498Szrj    Note that these are not minimally reduced inverses.  Unlike when generating
87*38fd1498Szrj    code to divide by a constant, we want to be able to use the same algorithm
88*38fd1498Szrj    all the time.  All of these inverses (are implied to) have bit 32 set.
89*38fd1498Szrj 
90*38fd1498Szrj    For the record, here's the function that computed the table; it's a
91*38fd1498Szrj    vastly simplified version of the function of the same name from gcc.  */
92*38fd1498Szrj 
93*38fd1498Szrj #if 0
94*38fd1498Szrj unsigned int
95*38fd1498Szrj ceil_log2 (unsigned int x)
96*38fd1498Szrj {
97*38fd1498Szrj   int i;
98*38fd1498Szrj   for (i = 31; i >= 0 ; --i)
99*38fd1498Szrj     if (x > (1u << i))
100*38fd1498Szrj       return i+1;
101*38fd1498Szrj   abort ();
102*38fd1498Szrj }
103*38fd1498Szrj 
104*38fd1498Szrj unsigned int
105*38fd1498Szrj choose_multiplier (unsigned int d, unsigned int *mlp, unsigned char *shiftp)
106*38fd1498Szrj {
107*38fd1498Szrj   unsigned long long mhigh;
108*38fd1498Szrj   double nx;
109*38fd1498Szrj   int lgup, post_shift;
110*38fd1498Szrj   int pow, pow2;
111*38fd1498Szrj   int n = 32, precision = 32;
112*38fd1498Szrj 
113*38fd1498Szrj   lgup = ceil_log2 (d);
114*38fd1498Szrj   pow = n + lgup;
115*38fd1498Szrj   pow2 = n + lgup - precision;
116*38fd1498Szrj 
117*38fd1498Szrj   nx = ldexp (1.0, pow) + ldexp (1.0, pow2);
118*38fd1498Szrj   mhigh = nx / d;
119*38fd1498Szrj 
120*38fd1498Szrj   *shiftp = lgup - 1;
121*38fd1498Szrj   *mlp = mhigh;
122*38fd1498Szrj   return mhigh >> 32;
123*38fd1498Szrj }
124*38fd1498Szrj #endif
125*38fd1498Szrj 
126*38fd1498Szrj struct prime_ent
127*38fd1498Szrj {
128*38fd1498Szrj   hashval_t prime;
129*38fd1498Szrj   hashval_t inv;
130*38fd1498Szrj   hashval_t inv_m2;	/* inverse of prime-2 */
131*38fd1498Szrj   hashval_t shift;
132*38fd1498Szrj };
133*38fd1498Szrj 
134*38fd1498Szrj static struct prime_ent const prime_tab[] = {
135*38fd1498Szrj   {          7, 0x24924925, 0x9999999b, 2 },
136*38fd1498Szrj   {         13, 0x3b13b13c, 0x745d1747, 3 },
137*38fd1498Szrj   {         31, 0x08421085, 0x1a7b9612, 4 },
138*38fd1498Szrj   {         61, 0x0c9714fc, 0x15b1e5f8, 5 },
139*38fd1498Szrj   {        127, 0x02040811, 0x0624dd30, 6 },
140*38fd1498Szrj   {        251, 0x05197f7e, 0x073260a5, 7 },
141*38fd1498Szrj   {        509, 0x01824366, 0x02864fc8, 8 },
142*38fd1498Szrj   {       1021, 0x00c0906d, 0x014191f7, 9 },
143*38fd1498Szrj   {       2039, 0x0121456f, 0x0161e69e, 10 },
144*38fd1498Szrj   {       4093, 0x00300902, 0x00501908, 11 },
145*38fd1498Szrj   {       8191, 0x00080041, 0x00180241, 12 },
146*38fd1498Szrj   {      16381, 0x000c0091, 0x00140191, 13 },
147*38fd1498Szrj   {      32749, 0x002605a5, 0x002a06e6, 14 },
148*38fd1498Szrj   {      65521, 0x000f00e2, 0x00110122, 15 },
149*38fd1498Szrj   {     131071, 0x00008001, 0x00018003, 16 },
150*38fd1498Szrj   {     262139, 0x00014002, 0x0001c004, 17 },
151*38fd1498Szrj   {     524287, 0x00002001, 0x00006001, 18 },
152*38fd1498Szrj   {    1048573, 0x00003001, 0x00005001, 19 },
153*38fd1498Szrj   {    2097143, 0x00004801, 0x00005801, 20 },
154*38fd1498Szrj   {    4194301, 0x00000c01, 0x00001401, 21 },
155*38fd1498Szrj   {    8388593, 0x00001e01, 0x00002201, 22 },
156*38fd1498Szrj   {   16777213, 0x00000301, 0x00000501, 23 },
157*38fd1498Szrj   {   33554393, 0x00001381, 0x00001481, 24 },
158*38fd1498Szrj   {   67108859, 0x00000141, 0x000001c1, 25 },
159*38fd1498Szrj   {  134217689, 0x000004e1, 0x00000521, 26 },
160*38fd1498Szrj   {  268435399, 0x00000391, 0x000003b1, 27 },
161*38fd1498Szrj   {  536870909, 0x00000019, 0x00000029, 28 },
162*38fd1498Szrj   { 1073741789, 0x0000008d, 0x00000095, 29 },
163*38fd1498Szrj   { 2147483647, 0x00000003, 0x00000007, 30 },
164*38fd1498Szrj   /* Avoid "decimal constant so large it is unsigned" for 4294967291.  */
165*38fd1498Szrj   { 0xfffffffb, 0x00000006, 0x00000008, 31 }
166*38fd1498Szrj };
167*38fd1498Szrj 
168*38fd1498Szrj /* The following function returns an index into the above table of the
169*38fd1498Szrj    nearest prime number which is greater than N, and near a power of two. */
170*38fd1498Szrj 
171*38fd1498Szrj static unsigned int
higher_prime_index(unsigned long n)172*38fd1498Szrj higher_prime_index (unsigned long n)
173*38fd1498Szrj {
174*38fd1498Szrj   unsigned int low = 0;
175*38fd1498Szrj   unsigned int high = sizeof(prime_tab) / sizeof(prime_tab[0]);
176*38fd1498Szrj 
177*38fd1498Szrj   while (low != high)
178*38fd1498Szrj     {
179*38fd1498Szrj       unsigned int mid = low + (high - low) / 2;
180*38fd1498Szrj       if (n > prime_tab[mid].prime)
181*38fd1498Szrj 	low = mid + 1;
182*38fd1498Szrj       else
183*38fd1498Szrj 	high = mid;
184*38fd1498Szrj     }
185*38fd1498Szrj 
186*38fd1498Szrj   /* If we've run out of primes, abort.  */
187*38fd1498Szrj   if (n > prime_tab[low].prime)
188*38fd1498Szrj     {
189*38fd1498Szrj       fprintf (stderr, "Cannot find prime bigger than %lu\n", n);
190*38fd1498Szrj       abort ();
191*38fd1498Szrj     }
192*38fd1498Szrj 
193*38fd1498Szrj   return low;
194*38fd1498Szrj }
195*38fd1498Szrj 
196*38fd1498Szrj /* Returns non-zero if P1 and P2 are equal.  */
197*38fd1498Szrj 
198*38fd1498Szrj static int
eq_pointer(const PTR p1,const PTR p2)199*38fd1498Szrj eq_pointer (const PTR p1, const PTR p2)
200*38fd1498Szrj {
201*38fd1498Szrj   return p1 == p2;
202*38fd1498Szrj }
203*38fd1498Szrj 
204*38fd1498Szrj 
205*38fd1498Szrj /* The parens around the function names in the next two definitions
206*38fd1498Szrj    are essential in order to prevent macro expansions of the name.
207*38fd1498Szrj    The bodies, however, are expanded as expected, so they are not
208*38fd1498Szrj    recursive definitions.  */
209*38fd1498Szrj 
210*38fd1498Szrj /* Return the current size of given hash table.  */
211*38fd1498Szrj 
212*38fd1498Szrj #define htab_size(htab)  ((htab)->size)
213*38fd1498Szrj 
size_t(htab_size)214*38fd1498Szrj size_t
215*38fd1498Szrj (htab_size) (htab_t htab)
216*38fd1498Szrj {
217*38fd1498Szrj   return htab_size (htab);
218*38fd1498Szrj }
219*38fd1498Szrj 
220*38fd1498Szrj /* Return the current number of elements in given hash table. */
221*38fd1498Szrj 
222*38fd1498Szrj #define htab_elements(htab)  ((htab)->n_elements - (htab)->n_deleted)
223*38fd1498Szrj 
size_t(htab_elements)224*38fd1498Szrj size_t
225*38fd1498Szrj (htab_elements) (htab_t htab)
226*38fd1498Szrj {
227*38fd1498Szrj   return htab_elements (htab);
228*38fd1498Szrj }
229*38fd1498Szrj 
230*38fd1498Szrj /* Return X % Y.  */
231*38fd1498Szrj 
232*38fd1498Szrj static inline hashval_t
htab_mod_1(hashval_t x,hashval_t y,hashval_t inv,int shift)233*38fd1498Szrj htab_mod_1 (hashval_t x, hashval_t y, hashval_t inv, int shift)
234*38fd1498Szrj {
235*38fd1498Szrj   /* The multiplicative inverses computed above are for 32-bit types, and
236*38fd1498Szrj      requires that we be able to compute a highpart multiply.  */
237*38fd1498Szrj #ifdef UNSIGNED_64BIT_TYPE
238*38fd1498Szrj   __extension__ typedef UNSIGNED_64BIT_TYPE ull;
239*38fd1498Szrj   if (sizeof (hashval_t) * CHAR_BIT <= 32)
240*38fd1498Szrj     {
241*38fd1498Szrj       hashval_t t1, t2, t3, t4, q, r;
242*38fd1498Szrj 
243*38fd1498Szrj       t1 = ((ull)x * inv) >> 32;
244*38fd1498Szrj       t2 = x - t1;
245*38fd1498Szrj       t3 = t2 >> 1;
246*38fd1498Szrj       t4 = t1 + t3;
247*38fd1498Szrj       q  = t4 >> shift;
248*38fd1498Szrj       r  = x - (q * y);
249*38fd1498Szrj 
250*38fd1498Szrj       return r;
251*38fd1498Szrj     }
252*38fd1498Szrj #endif
253*38fd1498Szrj 
254*38fd1498Szrj   /* Otherwise just use the native division routines.  */
255*38fd1498Szrj   return x % y;
256*38fd1498Szrj }
257*38fd1498Szrj 
258*38fd1498Szrj /* Compute the primary hash for HASH given HTAB's current size.  */
259*38fd1498Szrj 
260*38fd1498Szrj static inline hashval_t
htab_mod(hashval_t hash,htab_t htab)261*38fd1498Szrj htab_mod (hashval_t hash, htab_t htab)
262*38fd1498Szrj {
263*38fd1498Szrj   const struct prime_ent *p = &prime_tab[htab->size_prime_index];
264*38fd1498Szrj   return htab_mod_1 (hash, p->prime, p->inv, p->shift);
265*38fd1498Szrj }
266*38fd1498Szrj 
267*38fd1498Szrj /* Compute the secondary hash for HASH given HTAB's current size.  */
268*38fd1498Szrj 
269*38fd1498Szrj static inline hashval_t
htab_mod_m2(hashval_t hash,htab_t htab)270*38fd1498Szrj htab_mod_m2 (hashval_t hash, htab_t htab)
271*38fd1498Szrj {
272*38fd1498Szrj   const struct prime_ent *p = &prime_tab[htab->size_prime_index];
273*38fd1498Szrj   return 1 + htab_mod_1 (hash, p->prime - 2, p->inv_m2, p->shift);
274*38fd1498Szrj }
275*38fd1498Szrj 
276*38fd1498Szrj /* This function creates table with length slightly longer than given
277*38fd1498Szrj    source length.  Created hash table is initiated as empty (all the
278*38fd1498Szrj    hash table entries are HTAB_EMPTY_ENTRY).  The function returns the
279*38fd1498Szrj    created hash table, or NULL if memory allocation fails.  */
280*38fd1498Szrj 
281*38fd1498Szrj htab_t
htab_create_alloc(size_t size,htab_hash hash_f,htab_eq eq_f,htab_del del_f,htab_alloc alloc_f,htab_free free_f)282*38fd1498Szrj htab_create_alloc (size_t size, htab_hash hash_f, htab_eq eq_f,
283*38fd1498Szrj                    htab_del del_f, htab_alloc alloc_f, htab_free free_f)
284*38fd1498Szrj {
285*38fd1498Szrj   return htab_create_typed_alloc (size, hash_f, eq_f, del_f, alloc_f, alloc_f,
286*38fd1498Szrj 				  free_f);
287*38fd1498Szrj }
288*38fd1498Szrj 
289*38fd1498Szrj /* As above, but uses the variants of ALLOC_F and FREE_F which accept
290*38fd1498Szrj    an extra argument.  */
291*38fd1498Szrj 
292*38fd1498Szrj htab_t
htab_create_alloc_ex(size_t size,htab_hash hash_f,htab_eq eq_f,htab_del del_f,void * alloc_arg,htab_alloc_with_arg alloc_f,htab_free_with_arg free_f)293*38fd1498Szrj htab_create_alloc_ex (size_t size, htab_hash hash_f, htab_eq eq_f,
294*38fd1498Szrj 		      htab_del del_f, void *alloc_arg,
295*38fd1498Szrj 		      htab_alloc_with_arg alloc_f,
296*38fd1498Szrj 		      htab_free_with_arg free_f)
297*38fd1498Szrj {
298*38fd1498Szrj   htab_t result;
299*38fd1498Szrj   unsigned int size_prime_index;
300*38fd1498Szrj 
301*38fd1498Szrj   size_prime_index = higher_prime_index (size);
302*38fd1498Szrj   size = prime_tab[size_prime_index].prime;
303*38fd1498Szrj 
304*38fd1498Szrj   result = (htab_t) (*alloc_f) (alloc_arg, 1, sizeof (struct htab));
305*38fd1498Szrj   if (result == NULL)
306*38fd1498Szrj     return NULL;
307*38fd1498Szrj   result->entries = (PTR *) (*alloc_f) (alloc_arg, size, sizeof (PTR));
308*38fd1498Szrj   if (result->entries == NULL)
309*38fd1498Szrj     {
310*38fd1498Szrj       if (free_f != NULL)
311*38fd1498Szrj 	(*free_f) (alloc_arg, result);
312*38fd1498Szrj       return NULL;
313*38fd1498Szrj     }
314*38fd1498Szrj   result->size = size;
315*38fd1498Szrj   result->size_prime_index = size_prime_index;
316*38fd1498Szrj   result->hash_f = hash_f;
317*38fd1498Szrj   result->eq_f = eq_f;
318*38fd1498Szrj   result->del_f = del_f;
319*38fd1498Szrj   result->alloc_arg = alloc_arg;
320*38fd1498Szrj   result->alloc_with_arg_f = alloc_f;
321*38fd1498Szrj   result->free_with_arg_f = free_f;
322*38fd1498Szrj   return result;
323*38fd1498Szrj }
324*38fd1498Szrj 
325*38fd1498Szrj /*
326*38fd1498Szrj 
327*38fd1498Szrj @deftypefn Supplemental htab_t htab_create_typed_alloc (size_t @var{size}, @
328*38fd1498Szrj htab_hash @var{hash_f}, htab_eq @var{eq_f}, htab_del @var{del_f}, @
329*38fd1498Szrj htab_alloc @var{alloc_tab_f}, htab_alloc @var{alloc_f}, @
330*38fd1498Szrj htab_free @var{free_f})
331*38fd1498Szrj 
332*38fd1498Szrj This function creates a hash table that uses two different allocators
333*38fd1498Szrj @var{alloc_tab_f} and @var{alloc_f} to use for allocating the table itself
334*38fd1498Szrj and its entries respectively.  This is useful when variables of different
335*38fd1498Szrj types need to be allocated with different allocators.
336*38fd1498Szrj 
337*38fd1498Szrj The created hash table is slightly larger than @var{size} and it is
338*38fd1498Szrj initially empty (all the hash table entries are @code{HTAB_EMPTY_ENTRY}).
339*38fd1498Szrj The function returns the created hash table, or @code{NULL} if memory
340*38fd1498Szrj allocation fails.
341*38fd1498Szrj 
342*38fd1498Szrj @end deftypefn
343*38fd1498Szrj 
344*38fd1498Szrj */
345*38fd1498Szrj 
346*38fd1498Szrj htab_t
htab_create_typed_alloc(size_t size,htab_hash hash_f,htab_eq eq_f,htab_del del_f,htab_alloc alloc_tab_f,htab_alloc alloc_f,htab_free free_f)347*38fd1498Szrj htab_create_typed_alloc (size_t size, htab_hash hash_f, htab_eq eq_f,
348*38fd1498Szrj 			 htab_del del_f, htab_alloc alloc_tab_f,
349*38fd1498Szrj 			 htab_alloc alloc_f, htab_free free_f)
350*38fd1498Szrj {
351*38fd1498Szrj   htab_t result;
352*38fd1498Szrj   unsigned int size_prime_index;
353*38fd1498Szrj 
354*38fd1498Szrj   size_prime_index = higher_prime_index (size);
355*38fd1498Szrj   size = prime_tab[size_prime_index].prime;
356*38fd1498Szrj 
357*38fd1498Szrj   result = (htab_t) (*alloc_tab_f) (1, sizeof (struct htab));
358*38fd1498Szrj   if (result == NULL)
359*38fd1498Szrj     return NULL;
360*38fd1498Szrj   result->entries = (PTR *) (*alloc_f) (size, sizeof (PTR));
361*38fd1498Szrj   if (result->entries == NULL)
362*38fd1498Szrj     {
363*38fd1498Szrj       if (free_f != NULL)
364*38fd1498Szrj 	(*free_f) (result);
365*38fd1498Szrj       return NULL;
366*38fd1498Szrj     }
367*38fd1498Szrj   result->size = size;
368*38fd1498Szrj   result->size_prime_index = size_prime_index;
369*38fd1498Szrj   result->hash_f = hash_f;
370*38fd1498Szrj   result->eq_f = eq_f;
371*38fd1498Szrj   result->del_f = del_f;
372*38fd1498Szrj   result->alloc_f = alloc_f;
373*38fd1498Szrj   result->free_f = free_f;
374*38fd1498Szrj   return result;
375*38fd1498Szrj }
376*38fd1498Szrj 
377*38fd1498Szrj 
378*38fd1498Szrj /* Update the function pointers and allocation parameter in the htab_t.  */
379*38fd1498Szrj 
380*38fd1498Szrj void
htab_set_functions_ex(htab_t htab,htab_hash hash_f,htab_eq eq_f,htab_del del_f,PTR alloc_arg,htab_alloc_with_arg alloc_f,htab_free_with_arg free_f)381*38fd1498Szrj htab_set_functions_ex (htab_t htab, htab_hash hash_f, htab_eq eq_f,
382*38fd1498Szrj                        htab_del del_f, PTR alloc_arg,
383*38fd1498Szrj                        htab_alloc_with_arg alloc_f, htab_free_with_arg free_f)
384*38fd1498Szrj {
385*38fd1498Szrj   htab->hash_f = hash_f;
386*38fd1498Szrj   htab->eq_f = eq_f;
387*38fd1498Szrj   htab->del_f = del_f;
388*38fd1498Szrj   htab->alloc_arg = alloc_arg;
389*38fd1498Szrj   htab->alloc_with_arg_f = alloc_f;
390*38fd1498Szrj   htab->free_with_arg_f = free_f;
391*38fd1498Szrj }
392*38fd1498Szrj 
393*38fd1498Szrj /* These functions exist solely for backward compatibility.  */
394*38fd1498Szrj 
395*38fd1498Szrj #undef htab_create
396*38fd1498Szrj htab_t
htab_create(size_t size,htab_hash hash_f,htab_eq eq_f,htab_del del_f)397*38fd1498Szrj htab_create (size_t size, htab_hash hash_f, htab_eq eq_f, htab_del del_f)
398*38fd1498Szrj {
399*38fd1498Szrj   return htab_create_alloc (size, hash_f, eq_f, del_f, xcalloc, free);
400*38fd1498Szrj }
401*38fd1498Szrj 
402*38fd1498Szrj htab_t
htab_try_create(size_t size,htab_hash hash_f,htab_eq eq_f,htab_del del_f)403*38fd1498Szrj htab_try_create (size_t size, htab_hash hash_f, htab_eq eq_f, htab_del del_f)
404*38fd1498Szrj {
405*38fd1498Szrj   return htab_create_alloc (size, hash_f, eq_f, del_f, calloc, free);
406*38fd1498Szrj }
407*38fd1498Szrj 
408*38fd1498Szrj /* This function frees all memory allocated for given hash table.
409*38fd1498Szrj    Naturally the hash table must already exist. */
410*38fd1498Szrj 
411*38fd1498Szrj void
htab_delete(htab_t htab)412*38fd1498Szrj htab_delete (htab_t htab)
413*38fd1498Szrj {
414*38fd1498Szrj   size_t size = htab_size (htab);
415*38fd1498Szrj   PTR *entries = htab->entries;
416*38fd1498Szrj   int i;
417*38fd1498Szrj 
418*38fd1498Szrj   if (htab->del_f)
419*38fd1498Szrj     for (i = size - 1; i >= 0; i--)
420*38fd1498Szrj       if (entries[i] != HTAB_EMPTY_ENTRY && entries[i] != HTAB_DELETED_ENTRY)
421*38fd1498Szrj 	(*htab->del_f) (entries[i]);
422*38fd1498Szrj 
423*38fd1498Szrj   if (htab->free_f != NULL)
424*38fd1498Szrj     {
425*38fd1498Szrj       (*htab->free_f) (entries);
426*38fd1498Szrj       (*htab->free_f) (htab);
427*38fd1498Szrj     }
428*38fd1498Szrj   else if (htab->free_with_arg_f != NULL)
429*38fd1498Szrj     {
430*38fd1498Szrj       (*htab->free_with_arg_f) (htab->alloc_arg, entries);
431*38fd1498Szrj       (*htab->free_with_arg_f) (htab->alloc_arg, htab);
432*38fd1498Szrj     }
433*38fd1498Szrj }
434*38fd1498Szrj 
435*38fd1498Szrj /* This function clears all entries in the given hash table.  */
436*38fd1498Szrj 
437*38fd1498Szrj void
htab_empty(htab_t htab)438*38fd1498Szrj htab_empty (htab_t htab)
439*38fd1498Szrj {
440*38fd1498Szrj   size_t size = htab_size (htab);
441*38fd1498Szrj   PTR *entries = htab->entries;
442*38fd1498Szrj   int i;
443*38fd1498Szrj 
444*38fd1498Szrj   if (htab->del_f)
445*38fd1498Szrj     for (i = size - 1; i >= 0; i--)
446*38fd1498Szrj       if (entries[i] != HTAB_EMPTY_ENTRY && entries[i] != HTAB_DELETED_ENTRY)
447*38fd1498Szrj 	(*htab->del_f) (entries[i]);
448*38fd1498Szrj 
449*38fd1498Szrj   /* Instead of clearing megabyte, downsize the table.  */
450*38fd1498Szrj   if (size > 1024*1024 / sizeof (PTR))
451*38fd1498Szrj     {
452*38fd1498Szrj       int nindex = higher_prime_index (1024 / sizeof (PTR));
453*38fd1498Szrj       int nsize = prime_tab[nindex].prime;
454*38fd1498Szrj 
455*38fd1498Szrj       if (htab->free_f != NULL)
456*38fd1498Szrj 	(*htab->free_f) (htab->entries);
457*38fd1498Szrj       else if (htab->free_with_arg_f != NULL)
458*38fd1498Szrj 	(*htab->free_with_arg_f) (htab->alloc_arg, htab->entries);
459*38fd1498Szrj       if (htab->alloc_with_arg_f != NULL)
460*38fd1498Szrj 	htab->entries = (PTR *) (*htab->alloc_with_arg_f) (htab->alloc_arg, nsize,
461*38fd1498Szrj 						           sizeof (PTR *));
462*38fd1498Szrj       else
463*38fd1498Szrj 	htab->entries = (PTR *) (*htab->alloc_f) (nsize, sizeof (PTR *));
464*38fd1498Szrj      htab->size = nsize;
465*38fd1498Szrj      htab->size_prime_index = nindex;
466*38fd1498Szrj     }
467*38fd1498Szrj   else
468*38fd1498Szrj     memset (entries, 0, size * sizeof (PTR));
469*38fd1498Szrj   htab->n_deleted = 0;
470*38fd1498Szrj   htab->n_elements = 0;
471*38fd1498Szrj }
472*38fd1498Szrj 
473*38fd1498Szrj /* Similar to htab_find_slot, but without several unwanted side effects:
474*38fd1498Szrj     - Does not call htab->eq_f when it finds an existing entry.
475*38fd1498Szrj     - Does not change the count of elements/searches/collisions in the
476*38fd1498Szrj       hash table.
477*38fd1498Szrj    This function also assumes there are no deleted entries in the table.
478*38fd1498Szrj    HASH is the hash value for the element to be inserted.  */
479*38fd1498Szrj 
480*38fd1498Szrj static PTR *
find_empty_slot_for_expand(htab_t htab,hashval_t hash)481*38fd1498Szrj find_empty_slot_for_expand (htab_t htab, hashval_t hash)
482*38fd1498Szrj {
483*38fd1498Szrj   hashval_t index = htab_mod (hash, htab);
484*38fd1498Szrj   size_t size = htab_size (htab);
485*38fd1498Szrj   PTR *slot = htab->entries + index;
486*38fd1498Szrj   hashval_t hash2;
487*38fd1498Szrj 
488*38fd1498Szrj   if (*slot == HTAB_EMPTY_ENTRY)
489*38fd1498Szrj     return slot;
490*38fd1498Szrj   else if (*slot == HTAB_DELETED_ENTRY)
491*38fd1498Szrj     abort ();
492*38fd1498Szrj 
493*38fd1498Szrj   hash2 = htab_mod_m2 (hash, htab);
494*38fd1498Szrj   for (;;)
495*38fd1498Szrj     {
496*38fd1498Szrj       index += hash2;
497*38fd1498Szrj       if (index >= size)
498*38fd1498Szrj 	index -= size;
499*38fd1498Szrj 
500*38fd1498Szrj       slot = htab->entries + index;
501*38fd1498Szrj       if (*slot == HTAB_EMPTY_ENTRY)
502*38fd1498Szrj 	return slot;
503*38fd1498Szrj       else if (*slot == HTAB_DELETED_ENTRY)
504*38fd1498Szrj 	abort ();
505*38fd1498Szrj     }
506*38fd1498Szrj }
507*38fd1498Szrj 
508*38fd1498Szrj /* The following function changes size of memory allocated for the
509*38fd1498Szrj    entries and repeatedly inserts the table elements.  The occupancy
510*38fd1498Szrj    of the table after the call will be about 50%.  Naturally the hash
511*38fd1498Szrj    table must already exist.  Remember also that the place of the
512*38fd1498Szrj    table entries is changed.  If memory allocation failures are allowed,
513*38fd1498Szrj    this function will return zero, indicating that the table could not be
514*38fd1498Szrj    expanded.  If all goes well, it will return a non-zero value.  */
515*38fd1498Szrj 
516*38fd1498Szrj static int
htab_expand(htab_t htab)517*38fd1498Szrj htab_expand (htab_t htab)
518*38fd1498Szrj {
519*38fd1498Szrj   PTR *oentries;
520*38fd1498Szrj   PTR *olimit;
521*38fd1498Szrj   PTR *p;
522*38fd1498Szrj   PTR *nentries;
523*38fd1498Szrj   size_t nsize, osize, elts;
524*38fd1498Szrj   unsigned int oindex, nindex;
525*38fd1498Szrj 
526*38fd1498Szrj   oentries = htab->entries;
527*38fd1498Szrj   oindex = htab->size_prime_index;
528*38fd1498Szrj   osize = htab->size;
529*38fd1498Szrj   olimit = oentries + osize;
530*38fd1498Szrj   elts = htab_elements (htab);
531*38fd1498Szrj 
532*38fd1498Szrj   /* Resize only when table after removal of unused elements is either
533*38fd1498Szrj      too full or too empty.  */
534*38fd1498Szrj   if (elts * 2 > osize || (elts * 8 < osize && osize > 32))
535*38fd1498Szrj     {
536*38fd1498Szrj       nindex = higher_prime_index (elts * 2);
537*38fd1498Szrj       nsize = prime_tab[nindex].prime;
538*38fd1498Szrj     }
539*38fd1498Szrj   else
540*38fd1498Szrj     {
541*38fd1498Szrj       nindex = oindex;
542*38fd1498Szrj       nsize = osize;
543*38fd1498Szrj     }
544*38fd1498Szrj 
545*38fd1498Szrj   if (htab->alloc_with_arg_f != NULL)
546*38fd1498Szrj     nentries = (PTR *) (*htab->alloc_with_arg_f) (htab->alloc_arg, nsize,
547*38fd1498Szrj 						  sizeof (PTR *));
548*38fd1498Szrj   else
549*38fd1498Szrj     nentries = (PTR *) (*htab->alloc_f) (nsize, sizeof (PTR *));
550*38fd1498Szrj   if (nentries == NULL)
551*38fd1498Szrj     return 0;
552*38fd1498Szrj   htab->entries = nentries;
553*38fd1498Szrj   htab->size = nsize;
554*38fd1498Szrj   htab->size_prime_index = nindex;
555*38fd1498Szrj   htab->n_elements -= htab->n_deleted;
556*38fd1498Szrj   htab->n_deleted = 0;
557*38fd1498Szrj 
558*38fd1498Szrj   p = oentries;
559*38fd1498Szrj   do
560*38fd1498Szrj     {
561*38fd1498Szrj       PTR x = *p;
562*38fd1498Szrj 
563*38fd1498Szrj       if (x != HTAB_EMPTY_ENTRY && x != HTAB_DELETED_ENTRY)
564*38fd1498Szrj 	{
565*38fd1498Szrj 	  PTR *q = find_empty_slot_for_expand (htab, (*htab->hash_f) (x));
566*38fd1498Szrj 
567*38fd1498Szrj 	  *q = x;
568*38fd1498Szrj 	}
569*38fd1498Szrj 
570*38fd1498Szrj       p++;
571*38fd1498Szrj     }
572*38fd1498Szrj   while (p < olimit);
573*38fd1498Szrj 
574*38fd1498Szrj   if (htab->free_f != NULL)
575*38fd1498Szrj     (*htab->free_f) (oentries);
576*38fd1498Szrj   else if (htab->free_with_arg_f != NULL)
577*38fd1498Szrj     (*htab->free_with_arg_f) (htab->alloc_arg, oentries);
578*38fd1498Szrj   return 1;
579*38fd1498Szrj }
580*38fd1498Szrj 
581*38fd1498Szrj /* This function searches for a hash table entry equal to the given
582*38fd1498Szrj    element.  It cannot be used to insert or delete an element.  */
583*38fd1498Szrj 
584*38fd1498Szrj PTR
htab_find_with_hash(htab_t htab,const PTR element,hashval_t hash)585*38fd1498Szrj htab_find_with_hash (htab_t htab, const PTR element, hashval_t hash)
586*38fd1498Szrj {
587*38fd1498Szrj   hashval_t index, hash2;
588*38fd1498Szrj   size_t size;
589*38fd1498Szrj   PTR entry;
590*38fd1498Szrj 
591*38fd1498Szrj   htab->searches++;
592*38fd1498Szrj   size = htab_size (htab);
593*38fd1498Szrj   index = htab_mod (hash, htab);
594*38fd1498Szrj 
595*38fd1498Szrj   entry = htab->entries[index];
596*38fd1498Szrj   if (entry == HTAB_EMPTY_ENTRY
597*38fd1498Szrj       || (entry != HTAB_DELETED_ENTRY && (*htab->eq_f) (entry, element)))
598*38fd1498Szrj     return entry;
599*38fd1498Szrj 
600*38fd1498Szrj   hash2 = htab_mod_m2 (hash, htab);
601*38fd1498Szrj   for (;;)
602*38fd1498Szrj     {
603*38fd1498Szrj       htab->collisions++;
604*38fd1498Szrj       index += hash2;
605*38fd1498Szrj       if (index >= size)
606*38fd1498Szrj 	index -= size;
607*38fd1498Szrj 
608*38fd1498Szrj       entry = htab->entries[index];
609*38fd1498Szrj       if (entry == HTAB_EMPTY_ENTRY
610*38fd1498Szrj 	  || (entry != HTAB_DELETED_ENTRY && (*htab->eq_f) (entry, element)))
611*38fd1498Szrj 	return entry;
612*38fd1498Szrj     }
613*38fd1498Szrj }
614*38fd1498Szrj 
615*38fd1498Szrj /* Like htab_find_slot_with_hash, but compute the hash value from the
616*38fd1498Szrj    element.  */
617*38fd1498Szrj 
618*38fd1498Szrj PTR
htab_find(htab_t htab,const PTR element)619*38fd1498Szrj htab_find (htab_t htab, const PTR element)
620*38fd1498Szrj {
621*38fd1498Szrj   return htab_find_with_hash (htab, element, (*htab->hash_f) (element));
622*38fd1498Szrj }
623*38fd1498Szrj 
624*38fd1498Szrj /* This function searches for a hash table slot containing an entry
625*38fd1498Szrj    equal to the given element.  To delete an entry, call this with
626*38fd1498Szrj    insert=NO_INSERT, then call htab_clear_slot on the slot returned
627*38fd1498Szrj    (possibly after doing some checks).  To insert an entry, call this
628*38fd1498Szrj    with insert=INSERT, then write the value you want into the returned
629*38fd1498Szrj    slot.  When inserting an entry, NULL may be returned if memory
630*38fd1498Szrj    allocation fails.  */
631*38fd1498Szrj 
632*38fd1498Szrj PTR *
htab_find_slot_with_hash(htab_t htab,const PTR element,hashval_t hash,enum insert_option insert)633*38fd1498Szrj htab_find_slot_with_hash (htab_t htab, const PTR element,
634*38fd1498Szrj                           hashval_t hash, enum insert_option insert)
635*38fd1498Szrj {
636*38fd1498Szrj   PTR *first_deleted_slot;
637*38fd1498Szrj   hashval_t index, hash2;
638*38fd1498Szrj   size_t size;
639*38fd1498Szrj   PTR entry;
640*38fd1498Szrj 
641*38fd1498Szrj   size = htab_size (htab);
642*38fd1498Szrj   if (insert == INSERT && size * 3 <= htab->n_elements * 4)
643*38fd1498Szrj     {
644*38fd1498Szrj       if (htab_expand (htab) == 0)
645*38fd1498Szrj 	return NULL;
646*38fd1498Szrj       size = htab_size (htab);
647*38fd1498Szrj     }
648*38fd1498Szrj 
649*38fd1498Szrj   index = htab_mod (hash, htab);
650*38fd1498Szrj 
651*38fd1498Szrj   htab->searches++;
652*38fd1498Szrj   first_deleted_slot = NULL;
653*38fd1498Szrj 
654*38fd1498Szrj   entry = htab->entries[index];
655*38fd1498Szrj   if (entry == HTAB_EMPTY_ENTRY)
656*38fd1498Szrj     goto empty_entry;
657*38fd1498Szrj   else if (entry == HTAB_DELETED_ENTRY)
658*38fd1498Szrj     first_deleted_slot = &htab->entries[index];
659*38fd1498Szrj   else if ((*htab->eq_f) (entry, element))
660*38fd1498Szrj     return &htab->entries[index];
661*38fd1498Szrj 
662*38fd1498Szrj   hash2 = htab_mod_m2 (hash, htab);
663*38fd1498Szrj   for (;;)
664*38fd1498Szrj     {
665*38fd1498Szrj       htab->collisions++;
666*38fd1498Szrj       index += hash2;
667*38fd1498Szrj       if (index >= size)
668*38fd1498Szrj 	index -= size;
669*38fd1498Szrj 
670*38fd1498Szrj       entry = htab->entries[index];
671*38fd1498Szrj       if (entry == HTAB_EMPTY_ENTRY)
672*38fd1498Szrj 	goto empty_entry;
673*38fd1498Szrj       else if (entry == HTAB_DELETED_ENTRY)
674*38fd1498Szrj 	{
675*38fd1498Szrj 	  if (!first_deleted_slot)
676*38fd1498Szrj 	    first_deleted_slot = &htab->entries[index];
677*38fd1498Szrj 	}
678*38fd1498Szrj       else if ((*htab->eq_f) (entry, element))
679*38fd1498Szrj 	return &htab->entries[index];
680*38fd1498Szrj     }
681*38fd1498Szrj 
682*38fd1498Szrj  empty_entry:
683*38fd1498Szrj   if (insert == NO_INSERT)
684*38fd1498Szrj     return NULL;
685*38fd1498Szrj 
686*38fd1498Szrj   if (first_deleted_slot)
687*38fd1498Szrj     {
688*38fd1498Szrj       htab->n_deleted--;
689*38fd1498Szrj       *first_deleted_slot = HTAB_EMPTY_ENTRY;
690*38fd1498Szrj       return first_deleted_slot;
691*38fd1498Szrj     }
692*38fd1498Szrj 
693*38fd1498Szrj   htab->n_elements++;
694*38fd1498Szrj   return &htab->entries[index];
695*38fd1498Szrj }
696*38fd1498Szrj 
697*38fd1498Szrj /* Like htab_find_slot_with_hash, but compute the hash value from the
698*38fd1498Szrj    element.  */
699*38fd1498Szrj 
700*38fd1498Szrj PTR *
htab_find_slot(htab_t htab,const PTR element,enum insert_option insert)701*38fd1498Szrj htab_find_slot (htab_t htab, const PTR element, enum insert_option insert)
702*38fd1498Szrj {
703*38fd1498Szrj   return htab_find_slot_with_hash (htab, element, (*htab->hash_f) (element),
704*38fd1498Szrj 				   insert);
705*38fd1498Szrj }
706*38fd1498Szrj 
707*38fd1498Szrj /* This function deletes an element with the given value from hash
708*38fd1498Szrj    table (the hash is computed from the element).  If there is no matching
709*38fd1498Szrj    element in the hash table, this function does nothing.  */
710*38fd1498Szrj 
711*38fd1498Szrj void
htab_remove_elt(htab_t htab,PTR element)712*38fd1498Szrj htab_remove_elt (htab_t htab, PTR element)
713*38fd1498Szrj {
714*38fd1498Szrj   htab_remove_elt_with_hash (htab, element, (*htab->hash_f) (element));
715*38fd1498Szrj }
716*38fd1498Szrj 
717*38fd1498Szrj 
718*38fd1498Szrj /* This function deletes an element with the given value from hash
719*38fd1498Szrj    table.  If there is no matching element in the hash table, this
720*38fd1498Szrj    function does nothing.  */
721*38fd1498Szrj 
722*38fd1498Szrj void
htab_remove_elt_with_hash(htab_t htab,PTR element,hashval_t hash)723*38fd1498Szrj htab_remove_elt_with_hash (htab_t htab, PTR element, hashval_t hash)
724*38fd1498Szrj {
725*38fd1498Szrj   PTR *slot;
726*38fd1498Szrj 
727*38fd1498Szrj   slot = htab_find_slot_with_hash (htab, element, hash, NO_INSERT);
728*38fd1498Szrj   if (*slot == HTAB_EMPTY_ENTRY)
729*38fd1498Szrj     return;
730*38fd1498Szrj 
731*38fd1498Szrj   if (htab->del_f)
732*38fd1498Szrj     (*htab->del_f) (*slot);
733*38fd1498Szrj 
734*38fd1498Szrj   *slot = HTAB_DELETED_ENTRY;
735*38fd1498Szrj   htab->n_deleted++;
736*38fd1498Szrj }
737*38fd1498Szrj 
738*38fd1498Szrj /* This function clears a specified slot in a hash table.  It is
739*38fd1498Szrj    useful when you've already done the lookup and don't want to do it
740*38fd1498Szrj    again.  */
741*38fd1498Szrj 
742*38fd1498Szrj void
htab_clear_slot(htab_t htab,PTR * slot)743*38fd1498Szrj htab_clear_slot (htab_t htab, PTR *slot)
744*38fd1498Szrj {
745*38fd1498Szrj   if (slot < htab->entries || slot >= htab->entries + htab_size (htab)
746*38fd1498Szrj       || *slot == HTAB_EMPTY_ENTRY || *slot == HTAB_DELETED_ENTRY)
747*38fd1498Szrj     abort ();
748*38fd1498Szrj 
749*38fd1498Szrj   if (htab->del_f)
750*38fd1498Szrj     (*htab->del_f) (*slot);
751*38fd1498Szrj 
752*38fd1498Szrj   *slot = HTAB_DELETED_ENTRY;
753*38fd1498Szrj   htab->n_deleted++;
754*38fd1498Szrj }
755*38fd1498Szrj 
756*38fd1498Szrj /* This function scans over the entire hash table calling
757*38fd1498Szrj    CALLBACK for each live entry.  If CALLBACK returns false,
758*38fd1498Szrj    the iteration stops.  INFO is passed as CALLBACK's second
759*38fd1498Szrj    argument.  */
760*38fd1498Szrj 
761*38fd1498Szrj void
htab_traverse_noresize(htab_t htab,htab_trav callback,PTR info)762*38fd1498Szrj htab_traverse_noresize (htab_t htab, htab_trav callback, PTR info)
763*38fd1498Szrj {
764*38fd1498Szrj   PTR *slot;
765*38fd1498Szrj   PTR *limit;
766*38fd1498Szrj 
767*38fd1498Szrj   slot = htab->entries;
768*38fd1498Szrj   limit = slot + htab_size (htab);
769*38fd1498Szrj 
770*38fd1498Szrj   do
771*38fd1498Szrj     {
772*38fd1498Szrj       PTR x = *slot;
773*38fd1498Szrj 
774*38fd1498Szrj       if (x != HTAB_EMPTY_ENTRY && x != HTAB_DELETED_ENTRY)
775*38fd1498Szrj 	if (!(*callback) (slot, info))
776*38fd1498Szrj 	  break;
777*38fd1498Szrj     }
778*38fd1498Szrj   while (++slot < limit);
779*38fd1498Szrj }
780*38fd1498Szrj 
781*38fd1498Szrj /* Like htab_traverse_noresize, but does resize the table when it is
782*38fd1498Szrj    too empty to improve effectivity of subsequent calls.  */
783*38fd1498Szrj 
784*38fd1498Szrj void
htab_traverse(htab_t htab,htab_trav callback,PTR info)785*38fd1498Szrj htab_traverse (htab_t htab, htab_trav callback, PTR info)
786*38fd1498Szrj {
787*38fd1498Szrj   size_t size = htab_size (htab);
788*38fd1498Szrj   if (htab_elements (htab) * 8 < size && size > 32)
789*38fd1498Szrj     htab_expand (htab);
790*38fd1498Szrj 
791*38fd1498Szrj   htab_traverse_noresize (htab, callback, info);
792*38fd1498Szrj }
793*38fd1498Szrj 
794*38fd1498Szrj /* Return the fraction of fixed collisions during all work with given
795*38fd1498Szrj    hash table. */
796*38fd1498Szrj 
797*38fd1498Szrj double
htab_collisions(htab_t htab)798*38fd1498Szrj htab_collisions (htab_t htab)
799*38fd1498Szrj {
800*38fd1498Szrj   if (htab->searches == 0)
801*38fd1498Szrj     return 0.0;
802*38fd1498Szrj 
803*38fd1498Szrj   return (double) htab->collisions / (double) htab->searches;
804*38fd1498Szrj }
805*38fd1498Szrj 
806*38fd1498Szrj /* Hash P as a null-terminated string.
807*38fd1498Szrj 
808*38fd1498Szrj    Copied from gcc/hashtable.c.  Zack had the following to say with respect
809*38fd1498Szrj    to applicability, though note that unlike hashtable.c, this hash table
810*38fd1498Szrj    implementation re-hashes rather than chain buckets.
811*38fd1498Szrj 
812*38fd1498Szrj    http://gcc.gnu.org/ml/gcc-patches/2001-08/msg01021.html
813*38fd1498Szrj    From: Zack Weinberg <zackw@panix.com>
814*38fd1498Szrj    Date: Fri, 17 Aug 2001 02:15:56 -0400
815*38fd1498Szrj 
816*38fd1498Szrj    I got it by extracting all the identifiers from all the source code
817*38fd1498Szrj    I had lying around in mid-1999, and testing many recurrences of
818*38fd1498Szrj    the form "H_n = H_{n-1} * K + c_n * L + M" where K, L, M were either
819*38fd1498Szrj    prime numbers or the appropriate identity.  This was the best one.
820*38fd1498Szrj    I don't remember exactly what constituted "best", except I was
821*38fd1498Szrj    looking at bucket-length distributions mostly.
822*38fd1498Szrj 
823*38fd1498Szrj    So it should be very good at hashing identifiers, but might not be
824*38fd1498Szrj    as good at arbitrary strings.
825*38fd1498Szrj 
826*38fd1498Szrj    I'll add that it thoroughly trounces the hash functions recommended
827*38fd1498Szrj    for this use at http://burtleburtle.net/bob/hash/index.html, both
828*38fd1498Szrj    on speed and bucket distribution.  I haven't tried it against the
829*38fd1498Szrj    function they just started using for Perl's hashes.  */
830*38fd1498Szrj 
831*38fd1498Szrj hashval_t
htab_hash_string(const PTR p)832*38fd1498Szrj htab_hash_string (const PTR p)
833*38fd1498Szrj {
834*38fd1498Szrj   const unsigned char *str = (const unsigned char *) p;
835*38fd1498Szrj   hashval_t r = 0;
836*38fd1498Szrj   unsigned char c;
837*38fd1498Szrj 
838*38fd1498Szrj   while ((c = *str++) != 0)
839*38fd1498Szrj     r = r * 67 + c - 113;
840*38fd1498Szrj 
841*38fd1498Szrj   return r;
842*38fd1498Szrj }
843*38fd1498Szrj 
844*38fd1498Szrj /* DERIVED FROM:
845*38fd1498Szrj --------------------------------------------------------------------
846*38fd1498Szrj lookup2.c, by Bob Jenkins, December 1996, Public Domain.
847*38fd1498Szrj hash(), hash2(), hash3, and mix() are externally useful functions.
848*38fd1498Szrj Routines to test the hash are included if SELF_TEST is defined.
849*38fd1498Szrj You can use this free for any purpose.  It has no warranty.
850*38fd1498Szrj --------------------------------------------------------------------
851*38fd1498Szrj */
852*38fd1498Szrj 
853*38fd1498Szrj /*
854*38fd1498Szrj --------------------------------------------------------------------
855*38fd1498Szrj mix -- mix 3 32-bit values reversibly.
856*38fd1498Szrj For every delta with one or two bit set, and the deltas of all three
857*38fd1498Szrj   high bits or all three low bits, whether the original value of a,b,c
858*38fd1498Szrj   is almost all zero or is uniformly distributed,
859*38fd1498Szrj * If mix() is run forward or backward, at least 32 bits in a,b,c
860*38fd1498Szrj   have at least 1/4 probability of changing.
861*38fd1498Szrj * If mix() is run forward, every bit of c will change between 1/3 and
862*38fd1498Szrj   2/3 of the time.  (Well, 22/100 and 78/100 for some 2-bit deltas.)
863*38fd1498Szrj mix() was built out of 36 single-cycle latency instructions in a
864*38fd1498Szrj   structure that could supported 2x parallelism, like so:
865*38fd1498Szrj       a -= b;
866*38fd1498Szrj       a -= c; x = (c>>13);
867*38fd1498Szrj       b -= c; a ^= x;
868*38fd1498Szrj       b -= a; x = (a<<8);
869*38fd1498Szrj       c -= a; b ^= x;
870*38fd1498Szrj       c -= b; x = (b>>13);
871*38fd1498Szrj       ...
872*38fd1498Szrj   Unfortunately, superscalar Pentiums and Sparcs can't take advantage
873*38fd1498Szrj   of that parallelism.  They've also turned some of those single-cycle
874*38fd1498Szrj   latency instructions into multi-cycle latency instructions.  Still,
875*38fd1498Szrj   this is the fastest good hash I could find.  There were about 2^^68
876*38fd1498Szrj   to choose from.  I only looked at a billion or so.
877*38fd1498Szrj --------------------------------------------------------------------
878*38fd1498Szrj */
879*38fd1498Szrj /* same, but slower, works on systems that might have 8 byte hashval_t's */
880*38fd1498Szrj #define mix(a,b,c) \
881*38fd1498Szrj { \
882*38fd1498Szrj   a -= b; a -= c; a ^= (c>>13); \
883*38fd1498Szrj   b -= c; b -= a; b ^= (a<< 8); \
884*38fd1498Szrj   c -= a; c -= b; c ^= ((b&0xffffffff)>>13); \
885*38fd1498Szrj   a -= b; a -= c; a ^= ((c&0xffffffff)>>12); \
886*38fd1498Szrj   b -= c; b -= a; b = (b ^ (a<<16)) & 0xffffffff; \
887*38fd1498Szrj   c -= a; c -= b; c = (c ^ (b>> 5)) & 0xffffffff; \
888*38fd1498Szrj   a -= b; a -= c; a = (a ^ (c>> 3)) & 0xffffffff; \
889*38fd1498Szrj   b -= c; b -= a; b = (b ^ (a<<10)) & 0xffffffff; \
890*38fd1498Szrj   c -= a; c -= b; c = (c ^ (b>>15)) & 0xffffffff; \
891*38fd1498Szrj }
892*38fd1498Szrj 
893*38fd1498Szrj /*
894*38fd1498Szrj --------------------------------------------------------------------
895*38fd1498Szrj hash() -- hash a variable-length key into a 32-bit value
896*38fd1498Szrj   k     : the key (the unaligned variable-length array of bytes)
897*38fd1498Szrj   len   : the length of the key, counting by bytes
898*38fd1498Szrj   level : can be any 4-byte value
899*38fd1498Szrj Returns a 32-bit value.  Every bit of the key affects every bit of
900*38fd1498Szrj the return value.  Every 1-bit and 2-bit delta achieves avalanche.
901*38fd1498Szrj About 36+6len instructions.
902*38fd1498Szrj 
903*38fd1498Szrj The best hash table sizes are powers of 2.  There is no need to do
904*38fd1498Szrj mod a prime (mod is sooo slow!).  If you need less than 32 bits,
905*38fd1498Szrj use a bitmask.  For example, if you need only 10 bits, do
906*38fd1498Szrj   h = (h & hashmask(10));
907*38fd1498Szrj In which case, the hash table should have hashsize(10) elements.
908*38fd1498Szrj 
909*38fd1498Szrj If you are hashing n strings (ub1 **)k, do it like this:
910*38fd1498Szrj   for (i=0, h=0; i<n; ++i) h = hash( k[i], len[i], h);
911*38fd1498Szrj 
912*38fd1498Szrj By Bob Jenkins, 1996.  bob_jenkins@burtleburtle.net.  You may use this
913*38fd1498Szrj code any way you wish, private, educational, or commercial.  It's free.
914*38fd1498Szrj 
915*38fd1498Szrj See http://burtleburtle.net/bob/hash/evahash.html
916*38fd1498Szrj Use for hash table lookup, or anything where one collision in 2^32 is
917*38fd1498Szrj acceptable.  Do NOT use for cryptographic purposes.
918*38fd1498Szrj --------------------------------------------------------------------
919*38fd1498Szrj */
920*38fd1498Szrj 
921*38fd1498Szrj hashval_t
iterative_hash(const PTR k_in,register size_t length,register hashval_t initval)922*38fd1498Szrj iterative_hash (const PTR k_in /* the key */,
923*38fd1498Szrj                 register size_t  length /* the length of the key */,
924*38fd1498Szrj                 register hashval_t initval /* the previous hash, or
925*38fd1498Szrj                                               an arbitrary value */)
926*38fd1498Szrj {
927*38fd1498Szrj   register const unsigned char *k = (const unsigned char *)k_in;
928*38fd1498Szrj   register hashval_t a,b,c,len;
929*38fd1498Szrj 
930*38fd1498Szrj   /* Set up the internal state */
931*38fd1498Szrj   len = length;
932*38fd1498Szrj   a = b = 0x9e3779b9;  /* the golden ratio; an arbitrary value */
933*38fd1498Szrj   c = initval;           /* the previous hash value */
934*38fd1498Szrj 
935*38fd1498Szrj   /*---------------------------------------- handle most of the key */
936*38fd1498Szrj #ifndef WORDS_BIGENDIAN
937*38fd1498Szrj   /* On a little-endian machine, if the data is 4-byte aligned we can hash
938*38fd1498Szrj      by word for better speed.  This gives nondeterministic results on
939*38fd1498Szrj      big-endian machines.  */
940*38fd1498Szrj   if (sizeof (hashval_t) == 4 && (((size_t)k)&3) == 0)
941*38fd1498Szrj     while (len >= 12)    /* aligned */
942*38fd1498Szrj       {
943*38fd1498Szrj 	a += *(hashval_t *)(k+0);
944*38fd1498Szrj 	b += *(hashval_t *)(k+4);
945*38fd1498Szrj 	c += *(hashval_t *)(k+8);
946*38fd1498Szrj 	mix(a,b,c);
947*38fd1498Szrj 	k += 12; len -= 12;
948*38fd1498Szrj       }
949*38fd1498Szrj   else /* unaligned */
950*38fd1498Szrj #endif
951*38fd1498Szrj     while (len >= 12)
952*38fd1498Szrj       {
953*38fd1498Szrj 	a += (k[0] +((hashval_t)k[1]<<8) +((hashval_t)k[2]<<16) +((hashval_t)k[3]<<24));
954*38fd1498Szrj 	b += (k[4] +((hashval_t)k[5]<<8) +((hashval_t)k[6]<<16) +((hashval_t)k[7]<<24));
955*38fd1498Szrj 	c += (k[8] +((hashval_t)k[9]<<8) +((hashval_t)k[10]<<16)+((hashval_t)k[11]<<24));
956*38fd1498Szrj 	mix(a,b,c);
957*38fd1498Szrj 	k += 12; len -= 12;
958*38fd1498Szrj       }
959*38fd1498Szrj 
960*38fd1498Szrj   /*------------------------------------- handle the last 11 bytes */
961*38fd1498Szrj   c += length;
962*38fd1498Szrj   switch(len)              /* all the case statements fall through */
963*38fd1498Szrj     {
964*38fd1498Szrj     case 11: c+=((hashval_t)k[10]<<24);	/* fall through */
965*38fd1498Szrj     case 10: c+=((hashval_t)k[9]<<16);	/* fall through */
966*38fd1498Szrj     case 9 : c+=((hashval_t)k[8]<<8);	/* fall through */
967*38fd1498Szrj       /* the first byte of c is reserved for the length */
968*38fd1498Szrj     case 8 : b+=((hashval_t)k[7]<<24);	/* fall through */
969*38fd1498Szrj     case 7 : b+=((hashval_t)k[6]<<16);	/* fall through */
970*38fd1498Szrj     case 6 : b+=((hashval_t)k[5]<<8);	/* fall through */
971*38fd1498Szrj     case 5 : b+=k[4];			/* fall through */
972*38fd1498Szrj     case 4 : a+=((hashval_t)k[3]<<24);	/* fall through */
973*38fd1498Szrj     case 3 : a+=((hashval_t)k[2]<<16);	/* fall through */
974*38fd1498Szrj     case 2 : a+=((hashval_t)k[1]<<8);	/* fall through */
975*38fd1498Szrj     case 1 : a+=k[0];
976*38fd1498Szrj       /* case 0: nothing left to add */
977*38fd1498Szrj     }
978*38fd1498Szrj   mix(a,b,c);
979*38fd1498Szrj   /*-------------------------------------------- report the result */
980*38fd1498Szrj   return c;
981*38fd1498Szrj }
982*38fd1498Szrj 
983*38fd1498Szrj /* Returns a hash code for pointer P. Simplified version of evahash */
984*38fd1498Szrj 
985*38fd1498Szrj static hashval_t
hash_pointer(const PTR p)986*38fd1498Szrj hash_pointer (const PTR p)
987*38fd1498Szrj {
988*38fd1498Szrj   intptr_t v = (intptr_t) p;
989*38fd1498Szrj   unsigned a, b, c;
990*38fd1498Szrj 
991*38fd1498Szrj   a = b = 0x9e3779b9;
992*38fd1498Szrj   a += v >> (sizeof (intptr_t) * CHAR_BIT / 2);
993*38fd1498Szrj   b += v & (((intptr_t) 1 << (sizeof (intptr_t) * CHAR_BIT / 2)) - 1);
994*38fd1498Szrj   c = 0x42135234;
995*38fd1498Szrj   mix (a, b, c);
996*38fd1498Szrj   return c;
997*38fd1498Szrj }
998