1*e4b17023SJohn Marino /* An expandable hash tables datatype.
2*e4b17023SJohn Marino Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2009, 2010
3*e4b17023SJohn Marino Free Software Foundation, Inc.
4*e4b17023SJohn Marino Contributed by Vladimir Makarov (vmakarov@cygnus.com).
5*e4b17023SJohn Marino
6*e4b17023SJohn Marino This file is part of the libiberty library.
7*e4b17023SJohn Marino Libiberty is free software; you can redistribute it and/or
8*e4b17023SJohn Marino modify it under the terms of the GNU Library General Public
9*e4b17023SJohn Marino License as published by the Free Software Foundation; either
10*e4b17023SJohn Marino version 2 of the License, or (at your option) any later version.
11*e4b17023SJohn Marino
12*e4b17023SJohn Marino Libiberty is distributed in the hope that it will be useful,
13*e4b17023SJohn Marino but WITHOUT ANY WARRANTY; without even the implied warranty of
14*e4b17023SJohn Marino MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15*e4b17023SJohn Marino Library General Public License for more details.
16*e4b17023SJohn Marino
17*e4b17023SJohn Marino You should have received a copy of the GNU Library General Public
18*e4b17023SJohn Marino License along with libiberty; see the file COPYING.LIB. If
19*e4b17023SJohn Marino not, write to the Free Software Foundation, Inc., 51 Franklin Street - Fifth Floor,
20*e4b17023SJohn Marino Boston, MA 02110-1301, USA. */
21*e4b17023SJohn Marino
22*e4b17023SJohn Marino /* This package implements basic hash table functionality. It is possible
23*e4b17023SJohn Marino to search for an entry, create an entry and destroy an entry.
24*e4b17023SJohn Marino
25*e4b17023SJohn Marino Elements in the table are generic pointers.
26*e4b17023SJohn Marino
27*e4b17023SJohn Marino The size of the table is not fixed; if the occupancy of the table
28*e4b17023SJohn Marino grows too high the hash table will be expanded.
29*e4b17023SJohn Marino
30*e4b17023SJohn Marino The abstract data implementation is based on generalized Algorithm D
31*e4b17023SJohn Marino from Knuth's book "The art of computer programming". Hash table is
32*e4b17023SJohn Marino expanded by creation of new hash table and transferring elements from
33*e4b17023SJohn Marino the old table to the new table. */
34*e4b17023SJohn Marino
35*e4b17023SJohn Marino #ifdef HAVE_CONFIG_H
36*e4b17023SJohn Marino #include "config.h"
37*e4b17023SJohn Marino #endif
38*e4b17023SJohn Marino
39*e4b17023SJohn Marino #include <sys/types.h>
40*e4b17023SJohn Marino
41*e4b17023SJohn Marino #ifdef HAVE_STDLIB_H
42*e4b17023SJohn Marino #include <stdlib.h>
43*e4b17023SJohn Marino #endif
44*e4b17023SJohn Marino #ifdef HAVE_STRING_H
45*e4b17023SJohn Marino #include <string.h>
46*e4b17023SJohn Marino #endif
47*e4b17023SJohn Marino #ifdef HAVE_MALLOC_H
48*e4b17023SJohn Marino #include <malloc.h>
49*e4b17023SJohn Marino #endif
50*e4b17023SJohn Marino #ifdef HAVE_LIMITS_H
51*e4b17023SJohn Marino #include <limits.h>
52*e4b17023SJohn Marino #endif
53*e4b17023SJohn Marino #ifdef HAVE_INTTYPES_H
54*e4b17023SJohn Marino #include <inttypes.h>
55*e4b17023SJohn Marino #endif
56*e4b17023SJohn Marino #ifdef HAVE_STDINT_H
57*e4b17023SJohn Marino #include <stdint.h>
58*e4b17023SJohn Marino #endif
59*e4b17023SJohn Marino
60*e4b17023SJohn Marino #include <stdio.h>
61*e4b17023SJohn Marino
62*e4b17023SJohn Marino #include "libiberty.h"
63*e4b17023SJohn Marino #include "ansidecl.h"
64*e4b17023SJohn Marino #include "hashtab.h"
65*e4b17023SJohn Marino
66*e4b17023SJohn Marino #ifndef CHAR_BIT
67*e4b17023SJohn Marino #define CHAR_BIT 8
68*e4b17023SJohn Marino #endif
69*e4b17023SJohn Marino
70*e4b17023SJohn Marino static unsigned int higher_prime_index (unsigned long);
71*e4b17023SJohn Marino static hashval_t htab_mod_1 (hashval_t, hashval_t, hashval_t, int);
72*e4b17023SJohn Marino static hashval_t htab_mod (hashval_t, htab_t);
73*e4b17023SJohn Marino static hashval_t htab_mod_m2 (hashval_t, htab_t);
74*e4b17023SJohn Marino static hashval_t hash_pointer (const void *);
75*e4b17023SJohn Marino static int eq_pointer (const void *, const void *);
76*e4b17023SJohn Marino static int htab_expand (htab_t);
77*e4b17023SJohn Marino static PTR *find_empty_slot_for_expand (htab_t, hashval_t);
78*e4b17023SJohn Marino
79*e4b17023SJohn Marino /* At some point, we could make these be NULL, and modify the
80*e4b17023SJohn Marino hash-table routines to handle NULL specially; that would avoid
81*e4b17023SJohn Marino function-call overhead for the common case of hashing pointers. */
82*e4b17023SJohn Marino htab_hash htab_hash_pointer = hash_pointer;
83*e4b17023SJohn Marino htab_eq htab_eq_pointer = eq_pointer;
84*e4b17023SJohn Marino
85*e4b17023SJohn Marino /* Table of primes and multiplicative inverses.
86*e4b17023SJohn Marino
87*e4b17023SJohn Marino Note that these are not minimally reduced inverses. Unlike when generating
88*e4b17023SJohn Marino code to divide by a constant, we want to be able to use the same algorithm
89*e4b17023SJohn Marino all the time. All of these inverses (are implied to) have bit 32 set.
90*e4b17023SJohn Marino
91*e4b17023SJohn Marino For the record, here's the function that computed the table; it's a
92*e4b17023SJohn Marino vastly simplified version of the function of the same name from gcc. */
93*e4b17023SJohn Marino
94*e4b17023SJohn Marino #if 0
95*e4b17023SJohn Marino unsigned int
96*e4b17023SJohn Marino ceil_log2 (unsigned int x)
97*e4b17023SJohn Marino {
98*e4b17023SJohn Marino int i;
99*e4b17023SJohn Marino for (i = 31; i >= 0 ; --i)
100*e4b17023SJohn Marino if (x > (1u << i))
101*e4b17023SJohn Marino return i+1;
102*e4b17023SJohn Marino abort ();
103*e4b17023SJohn Marino }
104*e4b17023SJohn Marino
105*e4b17023SJohn Marino unsigned int
106*e4b17023SJohn Marino choose_multiplier (unsigned int d, unsigned int *mlp, unsigned char *shiftp)
107*e4b17023SJohn Marino {
108*e4b17023SJohn Marino unsigned long long mhigh;
109*e4b17023SJohn Marino double nx;
110*e4b17023SJohn Marino int lgup, post_shift;
111*e4b17023SJohn Marino int pow, pow2;
112*e4b17023SJohn Marino int n = 32, precision = 32;
113*e4b17023SJohn Marino
114*e4b17023SJohn Marino lgup = ceil_log2 (d);
115*e4b17023SJohn Marino pow = n + lgup;
116*e4b17023SJohn Marino pow2 = n + lgup - precision;
117*e4b17023SJohn Marino
118*e4b17023SJohn Marino nx = ldexp (1.0, pow) + ldexp (1.0, pow2);
119*e4b17023SJohn Marino mhigh = nx / d;
120*e4b17023SJohn Marino
121*e4b17023SJohn Marino *shiftp = lgup - 1;
122*e4b17023SJohn Marino *mlp = mhigh;
123*e4b17023SJohn Marino return mhigh >> 32;
124*e4b17023SJohn Marino }
125*e4b17023SJohn Marino #endif
126*e4b17023SJohn Marino
127*e4b17023SJohn Marino struct prime_ent
128*e4b17023SJohn Marino {
129*e4b17023SJohn Marino hashval_t prime;
130*e4b17023SJohn Marino hashval_t inv;
131*e4b17023SJohn Marino hashval_t inv_m2; /* inverse of prime-2 */
132*e4b17023SJohn Marino hashval_t shift;
133*e4b17023SJohn Marino };
134*e4b17023SJohn Marino
135*e4b17023SJohn Marino static struct prime_ent const prime_tab[] = {
136*e4b17023SJohn Marino { 7, 0x24924925, 0x9999999b, 2 },
137*e4b17023SJohn Marino { 13, 0x3b13b13c, 0x745d1747, 3 },
138*e4b17023SJohn Marino { 31, 0x08421085, 0x1a7b9612, 4 },
139*e4b17023SJohn Marino { 61, 0x0c9714fc, 0x15b1e5f8, 5 },
140*e4b17023SJohn Marino { 127, 0x02040811, 0x0624dd30, 6 },
141*e4b17023SJohn Marino { 251, 0x05197f7e, 0x073260a5, 7 },
142*e4b17023SJohn Marino { 509, 0x01824366, 0x02864fc8, 8 },
143*e4b17023SJohn Marino { 1021, 0x00c0906d, 0x014191f7, 9 },
144*e4b17023SJohn Marino { 2039, 0x0121456f, 0x0161e69e, 10 },
145*e4b17023SJohn Marino { 4093, 0x00300902, 0x00501908, 11 },
146*e4b17023SJohn Marino { 8191, 0x00080041, 0x00180241, 12 },
147*e4b17023SJohn Marino { 16381, 0x000c0091, 0x00140191, 13 },
148*e4b17023SJohn Marino { 32749, 0x002605a5, 0x002a06e6, 14 },
149*e4b17023SJohn Marino { 65521, 0x000f00e2, 0x00110122, 15 },
150*e4b17023SJohn Marino { 131071, 0x00008001, 0x00018003, 16 },
151*e4b17023SJohn Marino { 262139, 0x00014002, 0x0001c004, 17 },
152*e4b17023SJohn Marino { 524287, 0x00002001, 0x00006001, 18 },
153*e4b17023SJohn Marino { 1048573, 0x00003001, 0x00005001, 19 },
154*e4b17023SJohn Marino { 2097143, 0x00004801, 0x00005801, 20 },
155*e4b17023SJohn Marino { 4194301, 0x00000c01, 0x00001401, 21 },
156*e4b17023SJohn Marino { 8388593, 0x00001e01, 0x00002201, 22 },
157*e4b17023SJohn Marino { 16777213, 0x00000301, 0x00000501, 23 },
158*e4b17023SJohn Marino { 33554393, 0x00001381, 0x00001481, 24 },
159*e4b17023SJohn Marino { 67108859, 0x00000141, 0x000001c1, 25 },
160*e4b17023SJohn Marino { 134217689, 0x000004e1, 0x00000521, 26 },
161*e4b17023SJohn Marino { 268435399, 0x00000391, 0x000003b1, 27 },
162*e4b17023SJohn Marino { 536870909, 0x00000019, 0x00000029, 28 },
163*e4b17023SJohn Marino { 1073741789, 0x0000008d, 0x00000095, 29 },
164*e4b17023SJohn Marino { 2147483647, 0x00000003, 0x00000007, 30 },
165*e4b17023SJohn Marino /* Avoid "decimal constant so large it is unsigned" for 4294967291. */
166*e4b17023SJohn Marino { 0xfffffffb, 0x00000006, 0x00000008, 31 }
167*e4b17023SJohn Marino };
168*e4b17023SJohn Marino
169*e4b17023SJohn Marino /* The following function returns an index into the above table of the
170*e4b17023SJohn Marino nearest prime number which is greater than N, and near a power of two. */
171*e4b17023SJohn Marino
172*e4b17023SJohn Marino static unsigned int
higher_prime_index(unsigned long n)173*e4b17023SJohn Marino higher_prime_index (unsigned long n)
174*e4b17023SJohn Marino {
175*e4b17023SJohn Marino unsigned int low = 0;
176*e4b17023SJohn Marino unsigned int high = sizeof(prime_tab) / sizeof(prime_tab[0]);
177*e4b17023SJohn Marino
178*e4b17023SJohn Marino while (low != high)
179*e4b17023SJohn Marino {
180*e4b17023SJohn Marino unsigned int mid = low + (high - low) / 2;
181*e4b17023SJohn Marino if (n > prime_tab[mid].prime)
182*e4b17023SJohn Marino low = mid + 1;
183*e4b17023SJohn Marino else
184*e4b17023SJohn Marino high = mid;
185*e4b17023SJohn Marino }
186*e4b17023SJohn Marino
187*e4b17023SJohn Marino /* If we've run out of primes, abort. */
188*e4b17023SJohn Marino if (n > prime_tab[low].prime)
189*e4b17023SJohn Marino {
190*e4b17023SJohn Marino fprintf (stderr, "Cannot find prime bigger than %lu\n", n);
191*e4b17023SJohn Marino abort ();
192*e4b17023SJohn Marino }
193*e4b17023SJohn Marino
194*e4b17023SJohn Marino return low;
195*e4b17023SJohn Marino }
196*e4b17023SJohn Marino
197*e4b17023SJohn Marino /* Returns a hash code for P. */
198*e4b17023SJohn Marino
199*e4b17023SJohn Marino static hashval_t
hash_pointer(const PTR p)200*e4b17023SJohn Marino hash_pointer (const PTR p)
201*e4b17023SJohn Marino {
202*e4b17023SJohn Marino return (hashval_t) ((intptr_t)p >> 3);
203*e4b17023SJohn Marino }
204*e4b17023SJohn Marino
205*e4b17023SJohn Marino /* Returns non-zero if P1 and P2 are equal. */
206*e4b17023SJohn Marino
207*e4b17023SJohn Marino static int
eq_pointer(const PTR p1,const PTR p2)208*e4b17023SJohn Marino eq_pointer (const PTR p1, const PTR p2)
209*e4b17023SJohn Marino {
210*e4b17023SJohn Marino return p1 == p2;
211*e4b17023SJohn Marino }
212*e4b17023SJohn Marino
213*e4b17023SJohn Marino
214*e4b17023SJohn Marino /* The parens around the function names in the next two definitions
215*e4b17023SJohn Marino are essential in order to prevent macro expansions of the name.
216*e4b17023SJohn Marino The bodies, however, are expanded as expected, so they are not
217*e4b17023SJohn Marino recursive definitions. */
218*e4b17023SJohn Marino
219*e4b17023SJohn Marino /* Return the current size of given hash table. */
220*e4b17023SJohn Marino
221*e4b17023SJohn Marino #define htab_size(htab) ((htab)->size)
222*e4b17023SJohn Marino
size_t(htab_size)223*e4b17023SJohn Marino size_t
224*e4b17023SJohn Marino (htab_size) (htab_t htab)
225*e4b17023SJohn Marino {
226*e4b17023SJohn Marino return htab_size (htab);
227*e4b17023SJohn Marino }
228*e4b17023SJohn Marino
229*e4b17023SJohn Marino /* Return the current number of elements in given hash table. */
230*e4b17023SJohn Marino
231*e4b17023SJohn Marino #define htab_elements(htab) ((htab)->n_elements - (htab)->n_deleted)
232*e4b17023SJohn Marino
size_t(htab_elements)233*e4b17023SJohn Marino size_t
234*e4b17023SJohn Marino (htab_elements) (htab_t htab)
235*e4b17023SJohn Marino {
236*e4b17023SJohn Marino return htab_elements (htab);
237*e4b17023SJohn Marino }
238*e4b17023SJohn Marino
239*e4b17023SJohn Marino /* Return X % Y. */
240*e4b17023SJohn Marino
241*e4b17023SJohn Marino static inline hashval_t
htab_mod_1(hashval_t x,hashval_t y,hashval_t inv,int shift)242*e4b17023SJohn Marino htab_mod_1 (hashval_t x, hashval_t y, hashval_t inv, int shift)
243*e4b17023SJohn Marino {
244*e4b17023SJohn Marino /* The multiplicative inverses computed above are for 32-bit types, and
245*e4b17023SJohn Marino requires that we be able to compute a highpart multiply. */
246*e4b17023SJohn Marino #ifdef UNSIGNED_64BIT_TYPE
247*e4b17023SJohn Marino __extension__ typedef UNSIGNED_64BIT_TYPE ull;
248*e4b17023SJohn Marino if (sizeof (hashval_t) * CHAR_BIT <= 32)
249*e4b17023SJohn Marino {
250*e4b17023SJohn Marino hashval_t t1, t2, t3, t4, q, r;
251*e4b17023SJohn Marino
252*e4b17023SJohn Marino t1 = ((ull)x * inv) >> 32;
253*e4b17023SJohn Marino t2 = x - t1;
254*e4b17023SJohn Marino t3 = t2 >> 1;
255*e4b17023SJohn Marino t4 = t1 + t3;
256*e4b17023SJohn Marino q = t4 >> shift;
257*e4b17023SJohn Marino r = x - (q * y);
258*e4b17023SJohn Marino
259*e4b17023SJohn Marino return r;
260*e4b17023SJohn Marino }
261*e4b17023SJohn Marino #endif
262*e4b17023SJohn Marino
263*e4b17023SJohn Marino /* Otherwise just use the native division routines. */
264*e4b17023SJohn Marino return x % y;
265*e4b17023SJohn Marino }
266*e4b17023SJohn Marino
267*e4b17023SJohn Marino /* Compute the primary hash for HASH given HTAB's current size. */
268*e4b17023SJohn Marino
269*e4b17023SJohn Marino static inline hashval_t
htab_mod(hashval_t hash,htab_t htab)270*e4b17023SJohn Marino htab_mod (hashval_t hash, htab_t htab)
271*e4b17023SJohn Marino {
272*e4b17023SJohn Marino const struct prime_ent *p = &prime_tab[htab->size_prime_index];
273*e4b17023SJohn Marino return htab_mod_1 (hash, p->prime, p->inv, p->shift);
274*e4b17023SJohn Marino }
275*e4b17023SJohn Marino
276*e4b17023SJohn Marino /* Compute the secondary hash for HASH given HTAB's current size. */
277*e4b17023SJohn Marino
278*e4b17023SJohn Marino static inline hashval_t
htab_mod_m2(hashval_t hash,htab_t htab)279*e4b17023SJohn Marino htab_mod_m2 (hashval_t hash, htab_t htab)
280*e4b17023SJohn Marino {
281*e4b17023SJohn Marino const struct prime_ent *p = &prime_tab[htab->size_prime_index];
282*e4b17023SJohn Marino return 1 + htab_mod_1 (hash, p->prime - 2, p->inv_m2, p->shift);
283*e4b17023SJohn Marino }
284*e4b17023SJohn Marino
285*e4b17023SJohn Marino /* This function creates table with length slightly longer than given
286*e4b17023SJohn Marino source length. Created hash table is initiated as empty (all the
287*e4b17023SJohn Marino hash table entries are HTAB_EMPTY_ENTRY). The function returns the
288*e4b17023SJohn Marino created hash table, or NULL if memory allocation fails. */
289*e4b17023SJohn Marino
290*e4b17023SJohn Marino htab_t
htab_create_alloc(size_t size,htab_hash hash_f,htab_eq eq_f,htab_del del_f,htab_alloc alloc_f,htab_free free_f)291*e4b17023SJohn Marino htab_create_alloc (size_t size, htab_hash hash_f, htab_eq eq_f,
292*e4b17023SJohn Marino htab_del del_f, htab_alloc alloc_f, htab_free free_f)
293*e4b17023SJohn Marino {
294*e4b17023SJohn Marino return htab_create_typed_alloc (size, hash_f, eq_f, del_f, alloc_f, alloc_f,
295*e4b17023SJohn Marino free_f);
296*e4b17023SJohn Marino }
297*e4b17023SJohn Marino
298*e4b17023SJohn Marino /* As above, but uses the variants of ALLOC_F and FREE_F which accept
299*e4b17023SJohn Marino an extra argument. */
300*e4b17023SJohn Marino
301*e4b17023SJohn Marino htab_t
htab_create_alloc_ex(size_t size,htab_hash hash_f,htab_eq eq_f,htab_del del_f,void * alloc_arg,htab_alloc_with_arg alloc_f,htab_free_with_arg free_f)302*e4b17023SJohn Marino htab_create_alloc_ex (size_t size, htab_hash hash_f, htab_eq eq_f,
303*e4b17023SJohn Marino htab_del del_f, void *alloc_arg,
304*e4b17023SJohn Marino htab_alloc_with_arg alloc_f,
305*e4b17023SJohn Marino htab_free_with_arg free_f)
306*e4b17023SJohn Marino {
307*e4b17023SJohn Marino htab_t result;
308*e4b17023SJohn Marino unsigned int size_prime_index;
309*e4b17023SJohn Marino
310*e4b17023SJohn Marino size_prime_index = higher_prime_index (size);
311*e4b17023SJohn Marino size = prime_tab[size_prime_index].prime;
312*e4b17023SJohn Marino
313*e4b17023SJohn Marino result = (htab_t) (*alloc_f) (alloc_arg, 1, sizeof (struct htab));
314*e4b17023SJohn Marino if (result == NULL)
315*e4b17023SJohn Marino return NULL;
316*e4b17023SJohn Marino result->entries = (PTR *) (*alloc_f) (alloc_arg, size, sizeof (PTR));
317*e4b17023SJohn Marino if (result->entries == NULL)
318*e4b17023SJohn Marino {
319*e4b17023SJohn Marino if (free_f != NULL)
320*e4b17023SJohn Marino (*free_f) (alloc_arg, result);
321*e4b17023SJohn Marino return NULL;
322*e4b17023SJohn Marino }
323*e4b17023SJohn Marino result->size = size;
324*e4b17023SJohn Marino result->size_prime_index = size_prime_index;
325*e4b17023SJohn Marino result->hash_f = hash_f;
326*e4b17023SJohn Marino result->eq_f = eq_f;
327*e4b17023SJohn Marino result->del_f = del_f;
328*e4b17023SJohn Marino result->alloc_arg = alloc_arg;
329*e4b17023SJohn Marino result->alloc_with_arg_f = alloc_f;
330*e4b17023SJohn Marino result->free_with_arg_f = free_f;
331*e4b17023SJohn Marino return result;
332*e4b17023SJohn Marino }
333*e4b17023SJohn Marino
334*e4b17023SJohn Marino /*
335*e4b17023SJohn Marino
336*e4b17023SJohn Marino @deftypefn Supplemental htab_t htab_create_typed_alloc (size_t @var{size}, @
337*e4b17023SJohn Marino htab_hash @var{hash_f}, htab_eq @var{eq_f}, htab_del @var{del_f}, @
338*e4b17023SJohn Marino htab_alloc @var{alloc_tab_f}, htab_alloc @var{alloc_f}, @
339*e4b17023SJohn Marino htab_free @var{free_f})
340*e4b17023SJohn Marino
341*e4b17023SJohn Marino This function creates a hash table that uses two different allocators
342*e4b17023SJohn Marino @var{alloc_tab_f} and @var{alloc_f} to use for allocating the table itself
343*e4b17023SJohn Marino and its entries respectively. This is useful when variables of different
344*e4b17023SJohn Marino types need to be allocated with different allocators.
345*e4b17023SJohn Marino
346*e4b17023SJohn Marino The created hash table is slightly larger than @var{size} and it is
347*e4b17023SJohn Marino initially empty (all the hash table entries are @code{HTAB_EMPTY_ENTRY}).
348*e4b17023SJohn Marino The function returns the created hash table, or @code{NULL} if memory
349*e4b17023SJohn Marino allocation fails.
350*e4b17023SJohn Marino
351*e4b17023SJohn Marino @end deftypefn
352*e4b17023SJohn Marino
353*e4b17023SJohn Marino */
354*e4b17023SJohn Marino
355*e4b17023SJohn Marino htab_t
htab_create_typed_alloc(size_t size,htab_hash hash_f,htab_eq eq_f,htab_del del_f,htab_alloc alloc_tab_f,htab_alloc alloc_f,htab_free free_f)356*e4b17023SJohn Marino htab_create_typed_alloc (size_t size, htab_hash hash_f, htab_eq eq_f,
357*e4b17023SJohn Marino htab_del del_f, htab_alloc alloc_tab_f,
358*e4b17023SJohn Marino htab_alloc alloc_f, htab_free free_f)
359*e4b17023SJohn Marino {
360*e4b17023SJohn Marino htab_t result;
361*e4b17023SJohn Marino unsigned int size_prime_index;
362*e4b17023SJohn Marino
363*e4b17023SJohn Marino size_prime_index = higher_prime_index (size);
364*e4b17023SJohn Marino size = prime_tab[size_prime_index].prime;
365*e4b17023SJohn Marino
366*e4b17023SJohn Marino result = (htab_t) (*alloc_tab_f) (1, sizeof (struct htab));
367*e4b17023SJohn Marino if (result == NULL)
368*e4b17023SJohn Marino return NULL;
369*e4b17023SJohn Marino result->entries = (PTR *) (*alloc_f) (size, sizeof (PTR));
370*e4b17023SJohn Marino if (result->entries == NULL)
371*e4b17023SJohn Marino {
372*e4b17023SJohn Marino if (free_f != NULL)
373*e4b17023SJohn Marino (*free_f) (result);
374*e4b17023SJohn Marino return NULL;
375*e4b17023SJohn Marino }
376*e4b17023SJohn Marino result->size = size;
377*e4b17023SJohn Marino result->size_prime_index = size_prime_index;
378*e4b17023SJohn Marino result->hash_f = hash_f;
379*e4b17023SJohn Marino result->eq_f = eq_f;
380*e4b17023SJohn Marino result->del_f = del_f;
381*e4b17023SJohn Marino result->alloc_f = alloc_f;
382*e4b17023SJohn Marino result->free_f = free_f;
383*e4b17023SJohn Marino return result;
384*e4b17023SJohn Marino }
385*e4b17023SJohn Marino
386*e4b17023SJohn Marino
387*e4b17023SJohn Marino /* Update the function pointers and allocation parameter in the htab_t. */
388*e4b17023SJohn Marino
389*e4b17023SJohn Marino void
htab_set_functions_ex(htab_t htab,htab_hash hash_f,htab_eq eq_f,htab_del del_f,PTR alloc_arg,htab_alloc_with_arg alloc_f,htab_free_with_arg free_f)390*e4b17023SJohn Marino htab_set_functions_ex (htab_t htab, htab_hash hash_f, htab_eq eq_f,
391*e4b17023SJohn Marino htab_del del_f, PTR alloc_arg,
392*e4b17023SJohn Marino htab_alloc_with_arg alloc_f, htab_free_with_arg free_f)
393*e4b17023SJohn Marino {
394*e4b17023SJohn Marino htab->hash_f = hash_f;
395*e4b17023SJohn Marino htab->eq_f = eq_f;
396*e4b17023SJohn Marino htab->del_f = del_f;
397*e4b17023SJohn Marino htab->alloc_arg = alloc_arg;
398*e4b17023SJohn Marino htab->alloc_with_arg_f = alloc_f;
399*e4b17023SJohn Marino htab->free_with_arg_f = free_f;
400*e4b17023SJohn Marino }
401*e4b17023SJohn Marino
402*e4b17023SJohn Marino /* These functions exist solely for backward compatibility. */
403*e4b17023SJohn Marino
404*e4b17023SJohn Marino #undef htab_create
405*e4b17023SJohn Marino htab_t
htab_create(size_t size,htab_hash hash_f,htab_eq eq_f,htab_del del_f)406*e4b17023SJohn Marino htab_create (size_t size, htab_hash hash_f, htab_eq eq_f, htab_del del_f)
407*e4b17023SJohn Marino {
408*e4b17023SJohn Marino return htab_create_alloc (size, hash_f, eq_f, del_f, xcalloc, free);
409*e4b17023SJohn Marino }
410*e4b17023SJohn Marino
411*e4b17023SJohn Marino htab_t
htab_try_create(size_t size,htab_hash hash_f,htab_eq eq_f,htab_del del_f)412*e4b17023SJohn Marino htab_try_create (size_t size, htab_hash hash_f, htab_eq eq_f, htab_del del_f)
413*e4b17023SJohn Marino {
414*e4b17023SJohn Marino return htab_create_alloc (size, hash_f, eq_f, del_f, calloc, free);
415*e4b17023SJohn Marino }
416*e4b17023SJohn Marino
417*e4b17023SJohn Marino /* This function frees all memory allocated for given hash table.
418*e4b17023SJohn Marino Naturally the hash table must already exist. */
419*e4b17023SJohn Marino
420*e4b17023SJohn Marino void
htab_delete(htab_t htab)421*e4b17023SJohn Marino htab_delete (htab_t htab)
422*e4b17023SJohn Marino {
423*e4b17023SJohn Marino size_t size = htab_size (htab);
424*e4b17023SJohn Marino PTR *entries = htab->entries;
425*e4b17023SJohn Marino int i;
426*e4b17023SJohn Marino
427*e4b17023SJohn Marino if (htab->del_f)
428*e4b17023SJohn Marino for (i = size - 1; i >= 0; i--)
429*e4b17023SJohn Marino if (entries[i] != HTAB_EMPTY_ENTRY && entries[i] != HTAB_DELETED_ENTRY)
430*e4b17023SJohn Marino (*htab->del_f) (entries[i]);
431*e4b17023SJohn Marino
432*e4b17023SJohn Marino if (htab->free_f != NULL)
433*e4b17023SJohn Marino {
434*e4b17023SJohn Marino (*htab->free_f) (entries);
435*e4b17023SJohn Marino (*htab->free_f) (htab);
436*e4b17023SJohn Marino }
437*e4b17023SJohn Marino else if (htab->free_with_arg_f != NULL)
438*e4b17023SJohn Marino {
439*e4b17023SJohn Marino (*htab->free_with_arg_f) (htab->alloc_arg, entries);
440*e4b17023SJohn Marino (*htab->free_with_arg_f) (htab->alloc_arg, htab);
441*e4b17023SJohn Marino }
442*e4b17023SJohn Marino }
443*e4b17023SJohn Marino
444*e4b17023SJohn Marino /* This function clears all entries in the given hash table. */
445*e4b17023SJohn Marino
446*e4b17023SJohn Marino void
htab_empty(htab_t htab)447*e4b17023SJohn Marino htab_empty (htab_t htab)
448*e4b17023SJohn Marino {
449*e4b17023SJohn Marino size_t size = htab_size (htab);
450*e4b17023SJohn Marino PTR *entries = htab->entries;
451*e4b17023SJohn Marino int i;
452*e4b17023SJohn Marino
453*e4b17023SJohn Marino if (htab->del_f)
454*e4b17023SJohn Marino for (i = size - 1; i >= 0; i--)
455*e4b17023SJohn Marino if (entries[i] != HTAB_EMPTY_ENTRY && entries[i] != HTAB_DELETED_ENTRY)
456*e4b17023SJohn Marino (*htab->del_f) (entries[i]);
457*e4b17023SJohn Marino
458*e4b17023SJohn Marino /* Instead of clearing megabyte, downsize the table. */
459*e4b17023SJohn Marino if (size > 1024*1024 / sizeof (PTR))
460*e4b17023SJohn Marino {
461*e4b17023SJohn Marino int nindex = higher_prime_index (1024 / sizeof (PTR));
462*e4b17023SJohn Marino int nsize = prime_tab[nindex].prime;
463*e4b17023SJohn Marino
464*e4b17023SJohn Marino if (htab->free_f != NULL)
465*e4b17023SJohn Marino (*htab->free_f) (htab->entries);
466*e4b17023SJohn Marino else if (htab->free_with_arg_f != NULL)
467*e4b17023SJohn Marino (*htab->free_with_arg_f) (htab->alloc_arg, htab->entries);
468*e4b17023SJohn Marino if (htab->alloc_with_arg_f != NULL)
469*e4b17023SJohn Marino htab->entries = (PTR *) (*htab->alloc_with_arg_f) (htab->alloc_arg, nsize,
470*e4b17023SJohn Marino sizeof (PTR *));
471*e4b17023SJohn Marino else
472*e4b17023SJohn Marino htab->entries = (PTR *) (*htab->alloc_f) (nsize, sizeof (PTR *));
473*e4b17023SJohn Marino htab->size = nsize;
474*e4b17023SJohn Marino htab->size_prime_index = nindex;
475*e4b17023SJohn Marino }
476*e4b17023SJohn Marino else
477*e4b17023SJohn Marino memset (entries, 0, size * sizeof (PTR));
478*e4b17023SJohn Marino htab->n_deleted = 0;
479*e4b17023SJohn Marino htab->n_elements = 0;
480*e4b17023SJohn Marino }
481*e4b17023SJohn Marino
482*e4b17023SJohn Marino /* Similar to htab_find_slot, but without several unwanted side effects:
483*e4b17023SJohn Marino - Does not call htab->eq_f when it finds an existing entry.
484*e4b17023SJohn Marino - Does not change the count of elements/searches/collisions in the
485*e4b17023SJohn Marino hash table.
486*e4b17023SJohn Marino This function also assumes there are no deleted entries in the table.
487*e4b17023SJohn Marino HASH is the hash value for the element to be inserted. */
488*e4b17023SJohn Marino
489*e4b17023SJohn Marino static PTR *
find_empty_slot_for_expand(htab_t htab,hashval_t hash)490*e4b17023SJohn Marino find_empty_slot_for_expand (htab_t htab, hashval_t hash)
491*e4b17023SJohn Marino {
492*e4b17023SJohn Marino hashval_t index = htab_mod (hash, htab);
493*e4b17023SJohn Marino size_t size = htab_size (htab);
494*e4b17023SJohn Marino PTR *slot = htab->entries + index;
495*e4b17023SJohn Marino hashval_t hash2;
496*e4b17023SJohn Marino
497*e4b17023SJohn Marino if (*slot == HTAB_EMPTY_ENTRY)
498*e4b17023SJohn Marino return slot;
499*e4b17023SJohn Marino else if (*slot == HTAB_DELETED_ENTRY)
500*e4b17023SJohn Marino abort ();
501*e4b17023SJohn Marino
502*e4b17023SJohn Marino hash2 = htab_mod_m2 (hash, htab);
503*e4b17023SJohn Marino for (;;)
504*e4b17023SJohn Marino {
505*e4b17023SJohn Marino index += hash2;
506*e4b17023SJohn Marino if (index >= size)
507*e4b17023SJohn Marino index -= size;
508*e4b17023SJohn Marino
509*e4b17023SJohn Marino slot = htab->entries + index;
510*e4b17023SJohn Marino if (*slot == HTAB_EMPTY_ENTRY)
511*e4b17023SJohn Marino return slot;
512*e4b17023SJohn Marino else if (*slot == HTAB_DELETED_ENTRY)
513*e4b17023SJohn Marino abort ();
514*e4b17023SJohn Marino }
515*e4b17023SJohn Marino }
516*e4b17023SJohn Marino
517*e4b17023SJohn Marino /* The following function changes size of memory allocated for the
518*e4b17023SJohn Marino entries and repeatedly inserts the table elements. The occupancy
519*e4b17023SJohn Marino of the table after the call will be about 50%. Naturally the hash
520*e4b17023SJohn Marino table must already exist. Remember also that the place of the
521*e4b17023SJohn Marino table entries is changed. If memory allocation failures are allowed,
522*e4b17023SJohn Marino this function will return zero, indicating that the table could not be
523*e4b17023SJohn Marino expanded. If all goes well, it will return a non-zero value. */
524*e4b17023SJohn Marino
525*e4b17023SJohn Marino static int
htab_expand(htab_t htab)526*e4b17023SJohn Marino htab_expand (htab_t htab)
527*e4b17023SJohn Marino {
528*e4b17023SJohn Marino PTR *oentries;
529*e4b17023SJohn Marino PTR *olimit;
530*e4b17023SJohn Marino PTR *p;
531*e4b17023SJohn Marino PTR *nentries;
532*e4b17023SJohn Marino size_t nsize, osize, elts;
533*e4b17023SJohn Marino unsigned int oindex, nindex;
534*e4b17023SJohn Marino
535*e4b17023SJohn Marino oentries = htab->entries;
536*e4b17023SJohn Marino oindex = htab->size_prime_index;
537*e4b17023SJohn Marino osize = htab->size;
538*e4b17023SJohn Marino olimit = oentries + osize;
539*e4b17023SJohn Marino elts = htab_elements (htab);
540*e4b17023SJohn Marino
541*e4b17023SJohn Marino /* Resize only when table after removal of unused elements is either
542*e4b17023SJohn Marino too full or too empty. */
543*e4b17023SJohn Marino if (elts * 2 > osize || (elts * 8 < osize && osize > 32))
544*e4b17023SJohn Marino {
545*e4b17023SJohn Marino nindex = higher_prime_index (elts * 2);
546*e4b17023SJohn Marino nsize = prime_tab[nindex].prime;
547*e4b17023SJohn Marino }
548*e4b17023SJohn Marino else
549*e4b17023SJohn Marino {
550*e4b17023SJohn Marino nindex = oindex;
551*e4b17023SJohn Marino nsize = osize;
552*e4b17023SJohn Marino }
553*e4b17023SJohn Marino
554*e4b17023SJohn Marino if (htab->alloc_with_arg_f != NULL)
555*e4b17023SJohn Marino nentries = (PTR *) (*htab->alloc_with_arg_f) (htab->alloc_arg, nsize,
556*e4b17023SJohn Marino sizeof (PTR *));
557*e4b17023SJohn Marino else
558*e4b17023SJohn Marino nentries = (PTR *) (*htab->alloc_f) (nsize, sizeof (PTR *));
559*e4b17023SJohn Marino if (nentries == NULL)
560*e4b17023SJohn Marino return 0;
561*e4b17023SJohn Marino htab->entries = nentries;
562*e4b17023SJohn Marino htab->size = nsize;
563*e4b17023SJohn Marino htab->size_prime_index = nindex;
564*e4b17023SJohn Marino htab->n_elements -= htab->n_deleted;
565*e4b17023SJohn Marino htab->n_deleted = 0;
566*e4b17023SJohn Marino
567*e4b17023SJohn Marino p = oentries;
568*e4b17023SJohn Marino do
569*e4b17023SJohn Marino {
570*e4b17023SJohn Marino PTR x = *p;
571*e4b17023SJohn Marino
572*e4b17023SJohn Marino if (x != HTAB_EMPTY_ENTRY && x != HTAB_DELETED_ENTRY)
573*e4b17023SJohn Marino {
574*e4b17023SJohn Marino PTR *q = find_empty_slot_for_expand (htab, (*htab->hash_f) (x));
575*e4b17023SJohn Marino
576*e4b17023SJohn Marino *q = x;
577*e4b17023SJohn Marino }
578*e4b17023SJohn Marino
579*e4b17023SJohn Marino p++;
580*e4b17023SJohn Marino }
581*e4b17023SJohn Marino while (p < olimit);
582*e4b17023SJohn Marino
583*e4b17023SJohn Marino if (htab->free_f != NULL)
584*e4b17023SJohn Marino (*htab->free_f) (oentries);
585*e4b17023SJohn Marino else if (htab->free_with_arg_f != NULL)
586*e4b17023SJohn Marino (*htab->free_with_arg_f) (htab->alloc_arg, oentries);
587*e4b17023SJohn Marino return 1;
588*e4b17023SJohn Marino }
589*e4b17023SJohn Marino
590*e4b17023SJohn Marino /* This function searches for a hash table entry equal to the given
591*e4b17023SJohn Marino element. It cannot be used to insert or delete an element. */
592*e4b17023SJohn Marino
593*e4b17023SJohn Marino PTR
htab_find_with_hash(htab_t htab,const PTR element,hashval_t hash)594*e4b17023SJohn Marino htab_find_with_hash (htab_t htab, const PTR element, hashval_t hash)
595*e4b17023SJohn Marino {
596*e4b17023SJohn Marino hashval_t index, hash2;
597*e4b17023SJohn Marino size_t size;
598*e4b17023SJohn Marino PTR entry;
599*e4b17023SJohn Marino
600*e4b17023SJohn Marino htab->searches++;
601*e4b17023SJohn Marino size = htab_size (htab);
602*e4b17023SJohn Marino index = htab_mod (hash, htab);
603*e4b17023SJohn Marino
604*e4b17023SJohn Marino entry = htab->entries[index];
605*e4b17023SJohn Marino if (entry == HTAB_EMPTY_ENTRY
606*e4b17023SJohn Marino || (entry != HTAB_DELETED_ENTRY && (*htab->eq_f) (entry, element)))
607*e4b17023SJohn Marino return entry;
608*e4b17023SJohn Marino
609*e4b17023SJohn Marino hash2 = htab_mod_m2 (hash, htab);
610*e4b17023SJohn Marino for (;;)
611*e4b17023SJohn Marino {
612*e4b17023SJohn Marino htab->collisions++;
613*e4b17023SJohn Marino index += hash2;
614*e4b17023SJohn Marino if (index >= size)
615*e4b17023SJohn Marino index -= size;
616*e4b17023SJohn Marino
617*e4b17023SJohn Marino entry = htab->entries[index];
618*e4b17023SJohn Marino if (entry == HTAB_EMPTY_ENTRY
619*e4b17023SJohn Marino || (entry != HTAB_DELETED_ENTRY && (*htab->eq_f) (entry, element)))
620*e4b17023SJohn Marino return entry;
621*e4b17023SJohn Marino }
622*e4b17023SJohn Marino }
623*e4b17023SJohn Marino
624*e4b17023SJohn Marino /* Like htab_find_slot_with_hash, but compute the hash value from the
625*e4b17023SJohn Marino element. */
626*e4b17023SJohn Marino
627*e4b17023SJohn Marino PTR
htab_find(htab_t htab,const PTR element)628*e4b17023SJohn Marino htab_find (htab_t htab, const PTR element)
629*e4b17023SJohn Marino {
630*e4b17023SJohn Marino return htab_find_with_hash (htab, element, (*htab->hash_f) (element));
631*e4b17023SJohn Marino }
632*e4b17023SJohn Marino
633*e4b17023SJohn Marino /* This function searches for a hash table slot containing an entry
634*e4b17023SJohn Marino equal to the given element. To delete an entry, call this with
635*e4b17023SJohn Marino insert=NO_INSERT, then call htab_clear_slot on the slot returned
636*e4b17023SJohn Marino (possibly after doing some checks). To insert an entry, call this
637*e4b17023SJohn Marino with insert=INSERT, then write the value you want into the returned
638*e4b17023SJohn Marino slot. When inserting an entry, NULL may be returned if memory
639*e4b17023SJohn Marino allocation fails. */
640*e4b17023SJohn Marino
641*e4b17023SJohn Marino PTR *
htab_find_slot_with_hash(htab_t htab,const PTR element,hashval_t hash,enum insert_option insert)642*e4b17023SJohn Marino htab_find_slot_with_hash (htab_t htab, const PTR element,
643*e4b17023SJohn Marino hashval_t hash, enum insert_option insert)
644*e4b17023SJohn Marino {
645*e4b17023SJohn Marino PTR *first_deleted_slot;
646*e4b17023SJohn Marino hashval_t index, hash2;
647*e4b17023SJohn Marino size_t size;
648*e4b17023SJohn Marino PTR entry;
649*e4b17023SJohn Marino
650*e4b17023SJohn Marino size = htab_size (htab);
651*e4b17023SJohn Marino if (insert == INSERT && size * 3 <= htab->n_elements * 4)
652*e4b17023SJohn Marino {
653*e4b17023SJohn Marino if (htab_expand (htab) == 0)
654*e4b17023SJohn Marino return NULL;
655*e4b17023SJohn Marino size = htab_size (htab);
656*e4b17023SJohn Marino }
657*e4b17023SJohn Marino
658*e4b17023SJohn Marino index = htab_mod (hash, htab);
659*e4b17023SJohn Marino
660*e4b17023SJohn Marino htab->searches++;
661*e4b17023SJohn Marino first_deleted_slot = NULL;
662*e4b17023SJohn Marino
663*e4b17023SJohn Marino entry = htab->entries[index];
664*e4b17023SJohn Marino if (entry == HTAB_EMPTY_ENTRY)
665*e4b17023SJohn Marino goto empty_entry;
666*e4b17023SJohn Marino else if (entry == HTAB_DELETED_ENTRY)
667*e4b17023SJohn Marino first_deleted_slot = &htab->entries[index];
668*e4b17023SJohn Marino else if ((*htab->eq_f) (entry, element))
669*e4b17023SJohn Marino return &htab->entries[index];
670*e4b17023SJohn Marino
671*e4b17023SJohn Marino hash2 = htab_mod_m2 (hash, htab);
672*e4b17023SJohn Marino for (;;)
673*e4b17023SJohn Marino {
674*e4b17023SJohn Marino htab->collisions++;
675*e4b17023SJohn Marino index += hash2;
676*e4b17023SJohn Marino if (index >= size)
677*e4b17023SJohn Marino index -= size;
678*e4b17023SJohn Marino
679*e4b17023SJohn Marino entry = htab->entries[index];
680*e4b17023SJohn Marino if (entry == HTAB_EMPTY_ENTRY)
681*e4b17023SJohn Marino goto empty_entry;
682*e4b17023SJohn Marino else if (entry == HTAB_DELETED_ENTRY)
683*e4b17023SJohn Marino {
684*e4b17023SJohn Marino if (!first_deleted_slot)
685*e4b17023SJohn Marino first_deleted_slot = &htab->entries[index];
686*e4b17023SJohn Marino }
687*e4b17023SJohn Marino else if ((*htab->eq_f) (entry, element))
688*e4b17023SJohn Marino return &htab->entries[index];
689*e4b17023SJohn Marino }
690*e4b17023SJohn Marino
691*e4b17023SJohn Marino empty_entry:
692*e4b17023SJohn Marino if (insert == NO_INSERT)
693*e4b17023SJohn Marino return NULL;
694*e4b17023SJohn Marino
695*e4b17023SJohn Marino if (first_deleted_slot)
696*e4b17023SJohn Marino {
697*e4b17023SJohn Marino htab->n_deleted--;
698*e4b17023SJohn Marino *first_deleted_slot = HTAB_EMPTY_ENTRY;
699*e4b17023SJohn Marino return first_deleted_slot;
700*e4b17023SJohn Marino }
701*e4b17023SJohn Marino
702*e4b17023SJohn Marino htab->n_elements++;
703*e4b17023SJohn Marino return &htab->entries[index];
704*e4b17023SJohn Marino }
705*e4b17023SJohn Marino
706*e4b17023SJohn Marino /* Like htab_find_slot_with_hash, but compute the hash value from the
707*e4b17023SJohn Marino element. */
708*e4b17023SJohn Marino
709*e4b17023SJohn Marino PTR *
htab_find_slot(htab_t htab,const PTR element,enum insert_option insert)710*e4b17023SJohn Marino htab_find_slot (htab_t htab, const PTR element, enum insert_option insert)
711*e4b17023SJohn Marino {
712*e4b17023SJohn Marino return htab_find_slot_with_hash (htab, element, (*htab->hash_f) (element),
713*e4b17023SJohn Marino insert);
714*e4b17023SJohn Marino }
715*e4b17023SJohn Marino
716*e4b17023SJohn Marino /* This function deletes an element with the given value from hash
717*e4b17023SJohn Marino table (the hash is computed from the element). If there is no matching
718*e4b17023SJohn Marino element in the hash table, this function does nothing. */
719*e4b17023SJohn Marino
720*e4b17023SJohn Marino void
htab_remove_elt(htab_t htab,PTR element)721*e4b17023SJohn Marino htab_remove_elt (htab_t htab, PTR element)
722*e4b17023SJohn Marino {
723*e4b17023SJohn Marino htab_remove_elt_with_hash (htab, element, (*htab->hash_f) (element));
724*e4b17023SJohn Marino }
725*e4b17023SJohn Marino
726*e4b17023SJohn Marino
727*e4b17023SJohn Marino /* This function deletes an element with the given value from hash
728*e4b17023SJohn Marino table. If there is no matching element in the hash table, this
729*e4b17023SJohn Marino function does nothing. */
730*e4b17023SJohn Marino
731*e4b17023SJohn Marino void
htab_remove_elt_with_hash(htab_t htab,PTR element,hashval_t hash)732*e4b17023SJohn Marino htab_remove_elt_with_hash (htab_t htab, PTR element, hashval_t hash)
733*e4b17023SJohn Marino {
734*e4b17023SJohn Marino PTR *slot;
735*e4b17023SJohn Marino
736*e4b17023SJohn Marino slot = htab_find_slot_with_hash (htab, element, hash, NO_INSERT);
737*e4b17023SJohn Marino if (*slot == HTAB_EMPTY_ENTRY)
738*e4b17023SJohn Marino return;
739*e4b17023SJohn Marino
740*e4b17023SJohn Marino if (htab->del_f)
741*e4b17023SJohn Marino (*htab->del_f) (*slot);
742*e4b17023SJohn Marino
743*e4b17023SJohn Marino *slot = HTAB_DELETED_ENTRY;
744*e4b17023SJohn Marino htab->n_deleted++;
745*e4b17023SJohn Marino }
746*e4b17023SJohn Marino
747*e4b17023SJohn Marino /* This function clears a specified slot in a hash table. It is
748*e4b17023SJohn Marino useful when you've already done the lookup and don't want to do it
749*e4b17023SJohn Marino again. */
750*e4b17023SJohn Marino
751*e4b17023SJohn Marino void
htab_clear_slot(htab_t htab,PTR * slot)752*e4b17023SJohn Marino htab_clear_slot (htab_t htab, PTR *slot)
753*e4b17023SJohn Marino {
754*e4b17023SJohn Marino if (slot < htab->entries || slot >= htab->entries + htab_size (htab)
755*e4b17023SJohn Marino || *slot == HTAB_EMPTY_ENTRY || *slot == HTAB_DELETED_ENTRY)
756*e4b17023SJohn Marino abort ();
757*e4b17023SJohn Marino
758*e4b17023SJohn Marino if (htab->del_f)
759*e4b17023SJohn Marino (*htab->del_f) (*slot);
760*e4b17023SJohn Marino
761*e4b17023SJohn Marino *slot = HTAB_DELETED_ENTRY;
762*e4b17023SJohn Marino htab->n_deleted++;
763*e4b17023SJohn Marino }
764*e4b17023SJohn Marino
765*e4b17023SJohn Marino /* This function scans over the entire hash table calling
766*e4b17023SJohn Marino CALLBACK for each live entry. If CALLBACK returns false,
767*e4b17023SJohn Marino the iteration stops. INFO is passed as CALLBACK's second
768*e4b17023SJohn Marino argument. */
769*e4b17023SJohn Marino
770*e4b17023SJohn Marino void
htab_traverse_noresize(htab_t htab,htab_trav callback,PTR info)771*e4b17023SJohn Marino htab_traverse_noresize (htab_t htab, htab_trav callback, PTR info)
772*e4b17023SJohn Marino {
773*e4b17023SJohn Marino PTR *slot;
774*e4b17023SJohn Marino PTR *limit;
775*e4b17023SJohn Marino
776*e4b17023SJohn Marino slot = htab->entries;
777*e4b17023SJohn Marino limit = slot + htab_size (htab);
778*e4b17023SJohn Marino
779*e4b17023SJohn Marino do
780*e4b17023SJohn Marino {
781*e4b17023SJohn Marino PTR x = *slot;
782*e4b17023SJohn Marino
783*e4b17023SJohn Marino if (x != HTAB_EMPTY_ENTRY && x != HTAB_DELETED_ENTRY)
784*e4b17023SJohn Marino if (!(*callback) (slot, info))
785*e4b17023SJohn Marino break;
786*e4b17023SJohn Marino }
787*e4b17023SJohn Marino while (++slot < limit);
788*e4b17023SJohn Marino }
789*e4b17023SJohn Marino
790*e4b17023SJohn Marino /* Like htab_traverse_noresize, but does resize the table when it is
791*e4b17023SJohn Marino too empty to improve effectivity of subsequent calls. */
792*e4b17023SJohn Marino
793*e4b17023SJohn Marino void
htab_traverse(htab_t htab,htab_trav callback,PTR info)794*e4b17023SJohn Marino htab_traverse (htab_t htab, htab_trav callback, PTR info)
795*e4b17023SJohn Marino {
796*e4b17023SJohn Marino size_t size = htab_size (htab);
797*e4b17023SJohn Marino if (htab_elements (htab) * 8 < size && size > 32)
798*e4b17023SJohn Marino htab_expand (htab);
799*e4b17023SJohn Marino
800*e4b17023SJohn Marino htab_traverse_noresize (htab, callback, info);
801*e4b17023SJohn Marino }
802*e4b17023SJohn Marino
803*e4b17023SJohn Marino /* Return the fraction of fixed collisions during all work with given
804*e4b17023SJohn Marino hash table. */
805*e4b17023SJohn Marino
806*e4b17023SJohn Marino double
htab_collisions(htab_t htab)807*e4b17023SJohn Marino htab_collisions (htab_t htab)
808*e4b17023SJohn Marino {
809*e4b17023SJohn Marino if (htab->searches == 0)
810*e4b17023SJohn Marino return 0.0;
811*e4b17023SJohn Marino
812*e4b17023SJohn Marino return (double) htab->collisions / (double) htab->searches;
813*e4b17023SJohn Marino }
814*e4b17023SJohn Marino
815*e4b17023SJohn Marino /* Hash P as a null-terminated string.
816*e4b17023SJohn Marino
817*e4b17023SJohn Marino Copied from gcc/hashtable.c. Zack had the following to say with respect
818*e4b17023SJohn Marino to applicability, though note that unlike hashtable.c, this hash table
819*e4b17023SJohn Marino implementation re-hashes rather than chain buckets.
820*e4b17023SJohn Marino
821*e4b17023SJohn Marino http://gcc.gnu.org/ml/gcc-patches/2001-08/msg01021.html
822*e4b17023SJohn Marino From: Zack Weinberg <zackw@panix.com>
823*e4b17023SJohn Marino Date: Fri, 17 Aug 2001 02:15:56 -0400
824*e4b17023SJohn Marino
825*e4b17023SJohn Marino I got it by extracting all the identifiers from all the source code
826*e4b17023SJohn Marino I had lying around in mid-1999, and testing many recurrences of
827*e4b17023SJohn Marino the form "H_n = H_{n-1} * K + c_n * L + M" where K, L, M were either
828*e4b17023SJohn Marino prime numbers or the appropriate identity. This was the best one.
829*e4b17023SJohn Marino I don't remember exactly what constituted "best", except I was
830*e4b17023SJohn Marino looking at bucket-length distributions mostly.
831*e4b17023SJohn Marino
832*e4b17023SJohn Marino So it should be very good at hashing identifiers, but might not be
833*e4b17023SJohn Marino as good at arbitrary strings.
834*e4b17023SJohn Marino
835*e4b17023SJohn Marino I'll add that it thoroughly trounces the hash functions recommended
836*e4b17023SJohn Marino for this use at http://burtleburtle.net/bob/hash/index.html, both
837*e4b17023SJohn Marino on speed and bucket distribution. I haven't tried it against the
838*e4b17023SJohn Marino function they just started using for Perl's hashes. */
839*e4b17023SJohn Marino
840*e4b17023SJohn Marino hashval_t
htab_hash_string(const PTR p)841*e4b17023SJohn Marino htab_hash_string (const PTR p)
842*e4b17023SJohn Marino {
843*e4b17023SJohn Marino const unsigned char *str = (const unsigned char *) p;
844*e4b17023SJohn Marino hashval_t r = 0;
845*e4b17023SJohn Marino unsigned char c;
846*e4b17023SJohn Marino
847*e4b17023SJohn Marino while ((c = *str++) != 0)
848*e4b17023SJohn Marino r = r * 67 + c - 113;
849*e4b17023SJohn Marino
850*e4b17023SJohn Marino return r;
851*e4b17023SJohn Marino }
852*e4b17023SJohn Marino
853*e4b17023SJohn Marino /* DERIVED FROM:
854*e4b17023SJohn Marino --------------------------------------------------------------------
855*e4b17023SJohn Marino lookup2.c, by Bob Jenkins, December 1996, Public Domain.
856*e4b17023SJohn Marino hash(), hash2(), hash3, and mix() are externally useful functions.
857*e4b17023SJohn Marino Routines to test the hash are included if SELF_TEST is defined.
858*e4b17023SJohn Marino You can use this free for any purpose. It has no warranty.
859*e4b17023SJohn Marino --------------------------------------------------------------------
860*e4b17023SJohn Marino */
861*e4b17023SJohn Marino
862*e4b17023SJohn Marino /*
863*e4b17023SJohn Marino --------------------------------------------------------------------
864*e4b17023SJohn Marino mix -- mix 3 32-bit values reversibly.
865*e4b17023SJohn Marino For every delta with one or two bit set, and the deltas of all three
866*e4b17023SJohn Marino high bits or all three low bits, whether the original value of a,b,c
867*e4b17023SJohn Marino is almost all zero or is uniformly distributed,
868*e4b17023SJohn Marino * If mix() is run forward or backward, at least 32 bits in a,b,c
869*e4b17023SJohn Marino have at least 1/4 probability of changing.
870*e4b17023SJohn Marino * If mix() is run forward, every bit of c will change between 1/3 and
871*e4b17023SJohn Marino 2/3 of the time. (Well, 22/100 and 78/100 for some 2-bit deltas.)
872*e4b17023SJohn Marino mix() was built out of 36 single-cycle latency instructions in a
873*e4b17023SJohn Marino structure that could supported 2x parallelism, like so:
874*e4b17023SJohn Marino a -= b;
875*e4b17023SJohn Marino a -= c; x = (c>>13);
876*e4b17023SJohn Marino b -= c; a ^= x;
877*e4b17023SJohn Marino b -= a; x = (a<<8);
878*e4b17023SJohn Marino c -= a; b ^= x;
879*e4b17023SJohn Marino c -= b; x = (b>>13);
880*e4b17023SJohn Marino ...
881*e4b17023SJohn Marino Unfortunately, superscalar Pentiums and Sparcs can't take advantage
882*e4b17023SJohn Marino of that parallelism. They've also turned some of those single-cycle
883*e4b17023SJohn Marino latency instructions into multi-cycle latency instructions. Still,
884*e4b17023SJohn Marino this is the fastest good hash I could find. There were about 2^^68
885*e4b17023SJohn Marino to choose from. I only looked at a billion or so.
886*e4b17023SJohn Marino --------------------------------------------------------------------
887*e4b17023SJohn Marino */
888*e4b17023SJohn Marino /* same, but slower, works on systems that might have 8 byte hashval_t's */
889*e4b17023SJohn Marino #define mix(a,b,c) \
890*e4b17023SJohn Marino { \
891*e4b17023SJohn Marino a -= b; a -= c; a ^= (c>>13); \
892*e4b17023SJohn Marino b -= c; b -= a; b ^= (a<< 8); \
893*e4b17023SJohn Marino c -= a; c -= b; c ^= ((b&0xffffffff)>>13); \
894*e4b17023SJohn Marino a -= b; a -= c; a ^= ((c&0xffffffff)>>12); \
895*e4b17023SJohn Marino b -= c; b -= a; b = (b ^ (a<<16)) & 0xffffffff; \
896*e4b17023SJohn Marino c -= a; c -= b; c = (c ^ (b>> 5)) & 0xffffffff; \
897*e4b17023SJohn Marino a -= b; a -= c; a = (a ^ (c>> 3)) & 0xffffffff; \
898*e4b17023SJohn Marino b -= c; b -= a; b = (b ^ (a<<10)) & 0xffffffff; \
899*e4b17023SJohn Marino c -= a; c -= b; c = (c ^ (b>>15)) & 0xffffffff; \
900*e4b17023SJohn Marino }
901*e4b17023SJohn Marino
902*e4b17023SJohn Marino /*
903*e4b17023SJohn Marino --------------------------------------------------------------------
904*e4b17023SJohn Marino hash() -- hash a variable-length key into a 32-bit value
905*e4b17023SJohn Marino k : the key (the unaligned variable-length array of bytes)
906*e4b17023SJohn Marino len : the length of the key, counting by bytes
907*e4b17023SJohn Marino level : can be any 4-byte value
908*e4b17023SJohn Marino Returns a 32-bit value. Every bit of the key affects every bit of
909*e4b17023SJohn Marino the return value. Every 1-bit and 2-bit delta achieves avalanche.
910*e4b17023SJohn Marino About 36+6len instructions.
911*e4b17023SJohn Marino
912*e4b17023SJohn Marino The best hash table sizes are powers of 2. There is no need to do
913*e4b17023SJohn Marino mod a prime (mod is sooo slow!). If you need less than 32 bits,
914*e4b17023SJohn Marino use a bitmask. For example, if you need only 10 bits, do
915*e4b17023SJohn Marino h = (h & hashmask(10));
916*e4b17023SJohn Marino In which case, the hash table should have hashsize(10) elements.
917*e4b17023SJohn Marino
918*e4b17023SJohn Marino If you are hashing n strings (ub1 **)k, do it like this:
919*e4b17023SJohn Marino for (i=0, h=0; i<n; ++i) h = hash( k[i], len[i], h);
920*e4b17023SJohn Marino
921*e4b17023SJohn Marino By Bob Jenkins, 1996. bob_jenkins@burtleburtle.net. You may use this
922*e4b17023SJohn Marino code any way you wish, private, educational, or commercial. It's free.
923*e4b17023SJohn Marino
924*e4b17023SJohn Marino See http://burtleburtle.net/bob/hash/evahash.html
925*e4b17023SJohn Marino Use for hash table lookup, or anything where one collision in 2^32 is
926*e4b17023SJohn Marino acceptable. Do NOT use for cryptographic purposes.
927*e4b17023SJohn Marino --------------------------------------------------------------------
928*e4b17023SJohn Marino */
929*e4b17023SJohn Marino
930*e4b17023SJohn Marino hashval_t
iterative_hash(const PTR k_in,register size_t length,register hashval_t initval)931*e4b17023SJohn Marino iterative_hash (const PTR k_in /* the key */,
932*e4b17023SJohn Marino register size_t length /* the length of the key */,
933*e4b17023SJohn Marino register hashval_t initval /* the previous hash, or
934*e4b17023SJohn Marino an arbitrary value */)
935*e4b17023SJohn Marino {
936*e4b17023SJohn Marino register const unsigned char *k = (const unsigned char *)k_in;
937*e4b17023SJohn Marino register hashval_t a,b,c,len;
938*e4b17023SJohn Marino
939*e4b17023SJohn Marino /* Set up the internal state */
940*e4b17023SJohn Marino len = length;
941*e4b17023SJohn Marino a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */
942*e4b17023SJohn Marino c = initval; /* the previous hash value */
943*e4b17023SJohn Marino
944*e4b17023SJohn Marino /*---------------------------------------- handle most of the key */
945*e4b17023SJohn Marino #ifndef WORDS_BIGENDIAN
946*e4b17023SJohn Marino /* On a little-endian machine, if the data is 4-byte aligned we can hash
947*e4b17023SJohn Marino by word for better speed. This gives nondeterministic results on
948*e4b17023SJohn Marino big-endian machines. */
949*e4b17023SJohn Marino if (sizeof (hashval_t) == 4 && (((size_t)k)&3) == 0)
950*e4b17023SJohn Marino while (len >= 12) /* aligned */
951*e4b17023SJohn Marino {
952*e4b17023SJohn Marino a += *(hashval_t *)(k+0);
953*e4b17023SJohn Marino b += *(hashval_t *)(k+4);
954*e4b17023SJohn Marino c += *(hashval_t *)(k+8);
955*e4b17023SJohn Marino mix(a,b,c);
956*e4b17023SJohn Marino k += 12; len -= 12;
957*e4b17023SJohn Marino }
958*e4b17023SJohn Marino else /* unaligned */
959*e4b17023SJohn Marino #endif
960*e4b17023SJohn Marino while (len >= 12)
961*e4b17023SJohn Marino {
962*e4b17023SJohn Marino a += (k[0] +((hashval_t)k[1]<<8) +((hashval_t)k[2]<<16) +((hashval_t)k[3]<<24));
963*e4b17023SJohn Marino b += (k[4] +((hashval_t)k[5]<<8) +((hashval_t)k[6]<<16) +((hashval_t)k[7]<<24));
964*e4b17023SJohn Marino c += (k[8] +((hashval_t)k[9]<<8) +((hashval_t)k[10]<<16)+((hashval_t)k[11]<<24));
965*e4b17023SJohn Marino mix(a,b,c);
966*e4b17023SJohn Marino k += 12; len -= 12;
967*e4b17023SJohn Marino }
968*e4b17023SJohn Marino
969*e4b17023SJohn Marino /*------------------------------------- handle the last 11 bytes */
970*e4b17023SJohn Marino c += length;
971*e4b17023SJohn Marino switch(len) /* all the case statements fall through */
972*e4b17023SJohn Marino {
973*e4b17023SJohn Marino case 11: c+=((hashval_t)k[10]<<24);
974*e4b17023SJohn Marino case 10: c+=((hashval_t)k[9]<<16);
975*e4b17023SJohn Marino case 9 : c+=((hashval_t)k[8]<<8);
976*e4b17023SJohn Marino /* the first byte of c is reserved for the length */
977*e4b17023SJohn Marino case 8 : b+=((hashval_t)k[7]<<24);
978*e4b17023SJohn Marino case 7 : b+=((hashval_t)k[6]<<16);
979*e4b17023SJohn Marino case 6 : b+=((hashval_t)k[5]<<8);
980*e4b17023SJohn Marino case 5 : b+=k[4];
981*e4b17023SJohn Marino case 4 : a+=((hashval_t)k[3]<<24);
982*e4b17023SJohn Marino case 3 : a+=((hashval_t)k[2]<<16);
983*e4b17023SJohn Marino case 2 : a+=((hashval_t)k[1]<<8);
984*e4b17023SJohn Marino case 1 : a+=k[0];
985*e4b17023SJohn Marino /* case 0: nothing left to add */
986*e4b17023SJohn Marino }
987*e4b17023SJohn Marino mix(a,b,c);
988*e4b17023SJohn Marino /*-------------------------------------------- report the result */
989*e4b17023SJohn Marino return c;
990*e4b17023SJohn Marino }
991