xref: /netbsd-src/external/gpl2/gettext/dist/gnulib-local/lib/hash.c (revision 946379e7b37692fc43f68eb0d1c10daa0a7f3b6c)
1 /* hash - implement simple hashing table with string based keys.
2    Copyright (C) 1994-1995, 2000-2006 Free Software Foundation, Inc.
3    Written by Ulrich Drepper <drepper@gnu.ai.mit.edu>, October 1994.
4 
5    This program is free software; you can redistribute it and/or modify
6    it under the terms of the GNU General Public License as published by
7    the Free Software Foundation; either version 2, or (at your option)
8    any later version.
9 
10    This program is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13    GNU General Public License for more details.
14 
15    You should have received a copy of the GNU General Public License
16    along with this program; if not, write to the Free Software Foundation,
17    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
18 
19 #include <config.h>
20 
21 /* Specification.  */
22 #include "hash.h"
23 
24 #include <stdlib.h>
25 #include <string.h>
26 #include <stdio.h>
27 #include <limits.h>
28 #include <sys/types.h>
29 
30 /* Since this simple implementation of hash tables allows only insertion, no
31    removal of entries, the right data structure for the memory holding all keys
32    is an obstack.  */
33 #include "obstack.h"
34 
35 /* Use checked memory allocation.  */
36 #include "xalloc.h"
37 
38 #define obstack_chunk_alloc xmalloc
39 #define obstack_chunk_free free
40 
41 
42 typedef struct hash_entry
43 {
44   unsigned long used;  /* Hash code of the key, or 0 for an unused entry.  */
45   const void *key;     /* Key.  */
46   size_t keylen;
47   void *data;          /* Value.  */
48   struct hash_entry *next;
49 }
50 hash_entry;
51 
52 
53 /* Given an odd CANDIDATE > 1, return true if it is a prime number.  */
54 static int
is_prime(unsigned long int candidate)55 is_prime (unsigned long int candidate)
56 {
57   /* No even number and none less than 10 will be passed here.  */
58   unsigned long int divn = 3;
59   unsigned long int sq = divn * divn;
60 
61   while (sq < candidate && candidate % divn != 0)
62     {
63       ++divn;
64       sq += 4 * divn;
65       ++divn;
66     }
67 
68   return candidate % divn != 0;
69 }
70 
71 
72 /* Given SEED > 1, return the smallest odd prime number >= SEED.  */
73 unsigned long
next_prime(unsigned long int seed)74 next_prime (unsigned long int seed)
75 {
76   /* Make it definitely odd.  */
77   seed |= 1;
78 
79   while (!is_prime (seed))
80     seed += 2;
81 
82   return seed;
83 }
84 
85 
86 /* Initialize a hash table.  INIT_SIZE > 1 is the initial number of available
87    entries.
88    Return 0 upon successful completion, -1 upon memory allocation error.  */
89 int
hash_init(hash_table * htab,unsigned long int init_size)90 hash_init (hash_table *htab, unsigned long int init_size)
91 {
92   /* We need the size to be a prime.  */
93   init_size = next_prime (init_size);
94 
95   /* Initialize the data structure.  */
96   htab->size = init_size;
97   htab->filled = 0;
98   htab->first = NULL;
99   htab->table = (hash_entry *) xcalloc (init_size + 1, sizeof (hash_entry));
100 
101   obstack_init (&htab->mem_pool);
102 
103   return 0;
104 }
105 
106 
107 /* Delete a hash table's contents.
108    Return 0 always.  */
109 int
hash_destroy(hash_table * htab)110 hash_destroy (hash_table *htab)
111 {
112   free (htab->table);
113   obstack_free (&htab->mem_pool, NULL);
114   return 0;
115 }
116 
117 
118 /* Compute a hash code for a key consisting of KEYLEN bytes starting at KEY
119    in memory.  */
120 static unsigned long
compute_hashval(const void * key,size_t keylen)121 compute_hashval (const void *key, size_t keylen)
122 {
123   size_t cnt;
124   unsigned long int hval;
125 
126   /* Compute the hash value for the given string.  The algorithm
127      is taken from [Aho,Sethi,Ullman], fixed according to
128      http://www.haible.de/bruno/hashfunc.html.  */
129   cnt = 0;
130   hval = keylen;
131   while (cnt < keylen)
132     {
133       hval = (hval << 9) | (hval >> (sizeof (unsigned long) * CHAR_BIT - 9));
134       hval += (unsigned long int) *(((const char *) key) + cnt++);
135     }
136   return hval != 0 ? hval : ~((unsigned long) 0);
137 }
138 
139 
140 /* References:
141    [Aho,Sethi,Ullman] Compilers: Principles, Techniques and Tools, 1986
142    [Knuth]	      The Art of Computer Programming, part3 (6.4) */
143 
144 /* Look up a given key in the hash table.
145    Return the index of the entry, if present, or otherwise the index a free
146    entry where it could be inserted.  */
147 static size_t
lookup(hash_table * htab,const void * key,size_t keylen,unsigned long int hval)148 lookup (hash_table *htab,
149 	const void *key, size_t keylen,
150 	unsigned long int hval)
151 {
152   unsigned long int hash;
153   size_t idx;
154   hash_entry *table = htab->table;
155 
156   /* First hash function: simply take the modul but prevent zero.  */
157   hash = 1 + hval % htab->size;
158 
159   idx = hash;
160 
161   if (table[idx].used)
162     {
163       if (table[idx].used == hval && table[idx].keylen == keylen
164 	  && memcmp (table[idx].key, key, keylen) == 0)
165 	return idx;
166 
167       /* Second hash function as suggested in [Knuth].  */
168       hash = 1 + hval % (htab->size - 2);
169 
170       do
171 	{
172 	  if (idx <= hash)
173 	    idx = htab->size + idx - hash;
174 	  else
175 	    idx -= hash;
176 
177 	  /* If entry is found use it.  */
178 	  if (table[idx].used == hval && table[idx].keylen == keylen
179 	      && memcmp (table[idx].key, key, keylen) == 0)
180 	    return idx;
181 	}
182       while (table[idx].used);
183     }
184   return idx;
185 }
186 
187 
188 /* Look up the value of a key in the given table.
189    If found, return 0 and set *RESULT to it.  Otherwise return -1.  */
190 int
hash_find_entry(hash_table * htab,const void * key,size_t keylen,void ** result)191 hash_find_entry (hash_table *htab, const void *key, size_t keylen,
192 		 void **result)
193 {
194   hash_entry *table = htab->table;
195   size_t idx = lookup (htab, key, keylen, compute_hashval (key, keylen));
196 
197   if (table[idx].used == 0)
198     return -1;
199 
200   *result = table[idx].data;
201   return 0;
202 }
203 
204 
205 /* Insert the pair (KEY[0..KEYLEN-1], DATA) in the hash table at index IDX.
206    HVAL is the key's hash code.  IDX depends on it.  The table entry at index
207    IDX is known to be unused.  */
208 static void
insert_entry_2(hash_table * htab,const void * key,size_t keylen,unsigned long int hval,size_t idx,void * data)209 insert_entry_2 (hash_table *htab,
210 		const void *key, size_t keylen,
211 		unsigned long int hval, size_t idx, void *data)
212 {
213   hash_entry *table = htab->table;
214 
215   table[idx].used = hval;
216   table[idx].key = key;
217   table[idx].keylen = keylen;
218   table[idx].data = data;
219 
220   /* List the new value in the list.  */
221   if (htab->first == NULL)
222     {
223       table[idx].next = &table[idx];
224       htab->first = &table[idx];
225     }
226   else
227     {
228       table[idx].next = htab->first->next;
229       htab->first->next = &table[idx];
230       htab->first = &table[idx];
231     }
232 
233   ++htab->filled;
234 }
235 
236 
237 /* Grow the hash table.  */
238 static void
resize(hash_table * htab)239 resize (hash_table *htab)
240 {
241   unsigned long int old_size = htab->size;
242   hash_entry *table = htab->table;
243   size_t idx;
244 
245   htab->size = next_prime (htab->size * 2);
246   htab->filled = 0;
247   htab->first = NULL;
248   htab->table = (hash_entry *) xcalloc (1 + htab->size, sizeof (hash_entry));
249 
250   for (idx = 1; idx <= old_size; ++idx)
251     if (table[idx].used)
252       insert_entry_2 (htab, table[idx].key, table[idx].keylen,
253 		      table[idx].used,
254 		      lookup (htab, table[idx].key, table[idx].keylen,
255 			      table[idx].used),
256 		      table[idx].data);
257 
258   free (table);
259 }
260 
261 
262 /* Try to insert the pair (KEY[0..KEYLEN-1], DATA) in the hash table.
263    Return non-NULL (more precisely, the address of the KEY inside the table's
264    memory pool) if successful, or NULL if there is already an entry with the
265    given key.  */
266 const void *
hash_insert_entry(hash_table * htab,const void * key,size_t keylen,void * data)267 hash_insert_entry (hash_table *htab,
268 		   const void *key, size_t keylen,
269 		   void *data)
270 {
271   unsigned long int hval = compute_hashval (key, keylen);
272   hash_entry *table = htab->table;
273   size_t idx = lookup (htab, key, keylen, hval);
274 
275   if (table[idx].used)
276     /* We don't want to overwrite the old value.  */
277     return NULL;
278   else
279     {
280       /* An empty bucket has been found.  */
281       void *keycopy = obstack_copy (&htab->mem_pool, key, keylen);
282       insert_entry_2 (htab, keycopy, keylen, hval, idx, data);
283       if (100 * htab->filled > 75 * htab->size)
284 	/* Table is filled more than 75%.  Resize the table.  */
285 	resize (htab);
286       return keycopy;
287     }
288 }
289 
290 
291 /* Insert the pair (KEY[0..KEYLEN-1], DATA) in the hash table.
292    Return 0.  */
293 int
hash_set_value(hash_table * htab,const void * key,size_t keylen,void * data)294 hash_set_value (hash_table *htab,
295 		const void *key, size_t keylen,
296 		void *data)
297 {
298   unsigned long int hval = compute_hashval (key, keylen);
299   hash_entry *table = htab->table;
300   size_t idx = lookup (htab, key, keylen, hval);
301 
302   if (table[idx].used)
303     {
304       /* Overwrite the old value.  */
305       table[idx].data = data;
306       return 0;
307     }
308   else
309     {
310       /* An empty bucket has been found.  */
311       void *keycopy = obstack_copy (&htab->mem_pool, key, keylen);
312       insert_entry_2 (htab, keycopy, keylen, hval, idx, data);
313       if (100 * htab->filled > 75 * htab->size)
314 	/* Table is filled more than 75%.  Resize the table.  */
315 	resize (htab);
316       return 0;
317     }
318 }
319 
320 
321 /* Steps *PTR forward to the next used entry in the given hash table.  *PTR
322    should be initially set to NULL.  Store information about the next entry
323    in *KEY, *KEYLEN, *DATA.
324    Return 0 normally, -1 when the whole hash table has been traversed.  */
325 int
hash_iterate(hash_table * htab,void ** ptr,const void ** key,size_t * keylen,void ** data)326 hash_iterate (hash_table *htab, void **ptr, const void **key, size_t *keylen,
327 	      void **data)
328 {
329   hash_entry *curr;
330 
331   if (*ptr == NULL)
332     {
333       if (htab->first == NULL)
334 	return -1;
335       curr = htab->first;
336     }
337   else
338     {
339       if (*ptr == htab->first)
340 	return -1;
341       curr = (hash_entry *) *ptr;
342     }
343   curr = curr->next;
344   *ptr = (void *) curr;
345 
346   *key = curr->key;
347   *keylen = curr->keylen;
348   *data = curr->data;
349   return 0;
350 }
351 
352 
353 /* Steps *PTR forward to the next used entry in the given hash table.  *PTR
354    should be initially set to NULL.  Store information about the next entry
355    in *KEY, *KEYLEN, *DATAP.  *DATAP is set to point to the storage of the
356    value; modifying **DATAP will modify the value of the entry.
357    Return 0 normally, -1 when the whole hash table has been traversed.  */
358 int
hash_iterate_modify(hash_table * htab,void ** ptr,const void ** key,size_t * keylen,void *** datap)359 hash_iterate_modify (hash_table *htab, void **ptr,
360 		     const void **key, size_t *keylen,
361 		     void ***datap)
362 {
363   hash_entry *curr;
364 
365   if (*ptr == NULL)
366     {
367       if (htab->first == NULL)
368 	return -1;
369       curr = htab->first;
370     }
371   else
372     {
373       if (*ptr == htab->first)
374 	return -1;
375       curr = (hash_entry *) *ptr;
376     }
377   curr = curr->next;
378   *ptr = (void *) curr;
379 
380   *key = curr->key;
381   *keylen = curr->keylen;
382   *datap = &curr->data;
383   return 0;
384 }
385