1 /* $NetBSD: binhash.c,v 1.3 2022/10/08 16:12:50 christos Exp $ */
2
3 /*++
4 /* NAME
5 /* binhash 3
6 /* SUMMARY
7 /* hash table manager
8 /* SYNOPSIS
9 /* #include <binhash.h>
10 /*
11 /* typedef struct {
12 /* .in +4
13 /* void *key;
14 /* ssize_t key_len;
15 /* void *value;
16 /* /* private fields... */
17 /* .in -4
18 /* } BINHASH_INFO;
19 /*
20 /* BINHASH *binhash_create(size)
21 /* ssize_t size;
22 /*
23 /* BINHASH_INFO *binhash_enter(table, key, key_len, value)
24 /* BINHASH *table;
25 /* const void *key;
26 /* ssize_t key_len;
27 /* void *value;
28 /*
29 /* char *binhash_find(table, key, key_len)
30 /* BINHASH *table;
31 /* const void *key;
32 /* ssize_t key_len;
33 /*
34 /* BINHASH_INFO *binhash_locate(table, key, key_len)
35 /* BINHASH *table;
36 /* const void *key;
37 /* ssize_t key_len;
38 /*
39 /* void binhash_delete(table, key, key_len, free_fn)
40 /* BINHASH *table;
41 /* const void *key;
42 /* ssize_t key_len;
43 /* void (*free_fn)(void *);
44 /*
45 /* void binhash_free(table, free_fn)
46 /* BINHASH *table;
47 /* void (*free_fn)(void *);
48 /*
49 /* void binhash_walk(table, action, ptr)
50 /* BINHASH *table;
51 /* void (*action)(BINHASH_INFO *info, void *ptr);
52 /* void *ptr;
53 /*
54 /* BINHASH_INFO **binhash_list(table)
55 /* BINHASH *table;
56 /*
57 /* BINHASH_INFO *binhash_sequence(table, how)
58 /* BINHASH *table;
59 /* int how;
60 /* DESCRIPTION
61 /* This module maintains one or more hash tables. Each table entry
62 /* consists of a unique binary-valued lookup key and a generic
63 /* character-pointer value.
64 /* The tables are automatically resized when they fill up. When the
65 /* values to be remembered are not character pointers, proper casts
66 /* should be used or the code will not be portable.
67 /*
68 /* binhash_create() creates a table of the specified size and returns a
69 /* pointer to the result. The lookup keys are saved with mymemdup().
70 /*
71 /* binhash_enter() stores a (key, value) pair into the specified table
72 /* and returns a pointer to the resulting entry. The code does not
73 /* check if an entry with that key already exists: use binhash_locate()
74 /* for updating an existing entry. The key is copied; the value is not.
75 /*
76 /* binhash_find() returns the value that was stored under the given key,
77 /* or a null pointer if it was not found. In order to distinguish
78 /* a null value from a non-existent value, use binhash_locate().
79 /*
80 /* binhash_locate() returns a pointer to the entry that was stored
81 /* for the given key, or a null pointer if it was not found.
82 /*
83 /* binhash_delete() removes one entry that was stored under the given key.
84 /* If the free_fn argument is not a null pointer, the corresponding
85 /* function is called with as argument the value that was stored under
86 /* the key.
87 /*
88 /* binhash_free() destroys a hash table, including contents. If the free_fn
89 /* argument is not a null pointer, the corresponding function is called
90 /* for each table entry, with as argument the value that was stored
91 /* with the entry.
92 /*
93 /* binhash_walk() invokes the action function for each table entry, with
94 /* a pointer to the entry as its argument. The ptr argument is passed
95 /* on to the action function.
96 /*
97 /* binhash_list() returns a null-terminated list of pointers to
98 /* all elements in the named table. The list should be passed to
99 /* myfree().
100 /*
101 /* binhash_sequence() returns the first or next element
102 /* depending on the value of the "how" argument. Specify
103 /* BINHASH_SEQ_FIRST to start a new sequence, BINHASH_SEQ_NEXT
104 /* to continue, and BINHASH_SEQ_STOP to terminate a sequence
105 /* early. The caller must not delete an element before it is
106 /* visited.
107 /* RESTRICTIONS
108 /* A callback function should not modify the hash table that is
109 /* specified to its caller.
110 /* DIAGNOSTICS
111 /* The following conditions are reported and cause the program to
112 /* terminate immediately: memory allocation failure; an attempt
113 /* to delete a non-existent entry.
114 /* SEE ALSO
115 /* mymalloc(3) memory management wrapper
116 /* hash_fnv(3) Fowler/Noll/Vo hash function
117 /* LICENSE
118 /* .ad
119 /* .fi
120 /* The Secure Mailer license must be distributed with this software.
121 /* AUTHOR(S)
122 /* Wietse Venema
123 /* IBM T.J. Watson Research
124 /* P.O. Box 704
125 /* Yorktown Heights, NY 10598, USA
126 /*
127 /* Wietse Venema
128 /* Google, Inc.
129 /* 111 8th Avenue
130 /* New York, NY 10011, USA
131 /*--*/
132
133 /* C library */
134
135 #include <sys_defs.h>
136 #include <string.h>
137
138 /* Local stuff */
139
140 #include "mymalloc.h"
141 #include "msg.h"
142 #include "binhash.h"
143
144 /* binhash_hash - hash a string */
145
146 #ifndef NO_HASH_FNV
147 #include "hash_fnv.h"
148
149 #define binhash_hash(key, len, size) (hash_fnv((key), (len)) % (size))
150
151 #else
152
binhash_hash(const void * key,ssize_t len,size_t size)153 static size_t binhash_hash(const void *key, ssize_t len, size_t size)
154 {
155 size_t h = 0;
156 size_t g;
157
158 /*
159 * From the "Dragon" book by Aho, Sethi and Ullman.
160 */
161
162 while (len-- > 0) {
163 h = (h << 4U) + *(unsigned const char *) key++;
164 if ((g = (h & 0xf0000000)) != 0) {
165 h ^= (g >> 24U);
166 h ^= g;
167 }
168 }
169 return (h % size);
170 }
171
172 #endif
173
174 /* binhash_link - insert element into table */
175
176 #define binhash_link(table, elm) { \
177 BINHASH_INFO **_h = table->data + binhash_hash(elm->key, elm->key_len, table->size);\
178 elm->prev = 0; \
179 if ((elm->next = *_h) != 0) \
180 (*_h)->prev = elm; \
181 *_h = elm; \
182 table->used++; \
183 }
184
185 /* binhash_size - allocate and initialize hash table */
186
binhash_size(BINHASH * table,size_t size)187 static void binhash_size(BINHASH *table, size_t size)
188 {
189 BINHASH_INFO **h;
190
191 size |= 1;
192
193 table->data = h = (BINHASH_INFO **) mymalloc(size * sizeof(BINHASH_INFO *));
194 table->size = size;
195 table->used = 0;
196
197 while (size-- > 0)
198 *h++ = 0;
199 }
200
201 /* binhash_create - create initial hash table */
202
binhash_create(ssize_t size)203 BINHASH *binhash_create(ssize_t size)
204 {
205 BINHASH *table;
206
207 table = (BINHASH *) mymalloc(sizeof(BINHASH));
208 binhash_size(table, size < 13 ? 13 : size);
209 table->seq_bucket = table->seq_element = 0;
210 return (table);
211 }
212
213 /* binhash_grow - extend existing table */
214
binhash_grow(BINHASH * table)215 static void binhash_grow(BINHASH *table)
216 {
217 BINHASH_INFO *ht;
218 BINHASH_INFO *next;
219 ssize_t old_size = table->size;
220 BINHASH_INFO **h = table->data;
221 BINHASH_INFO **old_entries = h;
222
223 binhash_size(table, 2 * old_size);
224
225 while (old_size-- > 0) {
226 for (ht = *h++; ht; ht = next) {
227 next = ht->next;
228 binhash_link(table, ht);
229 }
230 }
231 myfree((void *) old_entries);
232 }
233
234 /* binhash_enter - enter (key, value) pair */
235
binhash_enter(BINHASH * table,const void * key,ssize_t key_len,void * value)236 BINHASH_INFO *binhash_enter(BINHASH *table, const void *key, ssize_t key_len, void *value)
237 {
238 BINHASH_INFO *ht;
239
240 if (table->used >= table->size)
241 binhash_grow(table);
242 ht = (BINHASH_INFO *) mymalloc(sizeof(BINHASH_INFO));
243 ht->key = mymemdup(key, key_len);
244 ht->key_len = key_len;
245 ht->value = value;
246 binhash_link(table, ht);
247 return (ht);
248 }
249
250 /* binhash_find - lookup value */
251
binhash_find(BINHASH * table,const void * key,ssize_t key_len)252 void *binhash_find(BINHASH *table, const void *key, ssize_t key_len)
253 {
254 BINHASH_INFO *ht;
255
256 #define KEY_EQ(x,y,l) (((unsigned char *) x)[0] == ((unsigned char *) y)[0] && memcmp(x,y,l) == 0)
257
258 if (table != 0)
259 for (ht = table->data[binhash_hash(key, key_len, table->size)]; ht; ht = ht->next)
260 if (key_len == ht->key_len && KEY_EQ(key, ht->key, key_len))
261 return (ht->value);
262 return (0);
263 }
264
265 /* binhash_locate - lookup entry */
266
binhash_locate(BINHASH * table,const void * key,ssize_t key_len)267 BINHASH_INFO *binhash_locate(BINHASH *table, const void *key, ssize_t key_len)
268 {
269 BINHASH_INFO *ht;
270
271 if (table != 0)
272 for (ht = table->data[binhash_hash(key, key_len, table->size)]; ht; ht = ht->next)
273 if (key_len == ht->key_len && KEY_EQ(key, ht->key, key_len))
274 return (ht);
275 return (0);
276 }
277
278 /* binhash_delete - delete one entry */
279
binhash_delete(BINHASH * table,const void * key,ssize_t key_len,void (* free_fn)(void *))280 void binhash_delete(BINHASH *table, const void *key, ssize_t key_len, void (*free_fn) (void *))
281 {
282 if (table != 0) {
283 BINHASH_INFO *ht;
284 BINHASH_INFO **h = table->data + binhash_hash(key, key_len, table->size);
285
286 for (ht = *h; ht; ht = ht->next) {
287 if (key_len == ht->key_len && KEY_EQ(key, ht->key, key_len)) {
288 if (ht->next)
289 ht->next->prev = ht->prev;
290 if (ht->prev)
291 ht->prev->next = ht->next;
292 else
293 *h = ht->next;
294 table->used--;
295 myfree(ht->key);
296 if (free_fn)
297 (*free_fn) (ht->value);
298 myfree((void *) ht);
299 return;
300 }
301 }
302 msg_panic("binhash_delete: unknown_key: \"%s\"", (char *) key);
303 }
304 }
305
306 /* binhash_free - destroy hash table */
307
binhash_free(BINHASH * table,void (* free_fn)(void *))308 void binhash_free(BINHASH *table, void (*free_fn) (void *))
309 {
310 if (table != 0) {
311 ssize_t i = table->size;
312 BINHASH_INFO *ht;
313 BINHASH_INFO *next;
314 BINHASH_INFO **h = table->data;
315
316 while (i-- > 0) {
317 for (ht = *h++; ht; ht = next) {
318 next = ht->next;
319 myfree(ht->key);
320 if (free_fn)
321 (*free_fn) (ht->value);
322 myfree((void *) ht);
323 }
324 }
325 myfree((void *) table->data);
326 table->data = 0;
327 if (table->seq_bucket)
328 myfree((void *) table->seq_bucket);
329 table->seq_bucket = 0;
330 myfree((void *) table);
331 }
332 }
333
334 /* binhash_walk - iterate over hash table */
335
binhash_walk(BINHASH * table,void (* action)(BINHASH_INFO *,void *),void * ptr)336 void binhash_walk(BINHASH *table, void (*action) (BINHASH_INFO *, void *),
337 void *ptr) {
338 if (table != 0) {
339 ssize_t i = table->size;
340 BINHASH_INFO **h = table->data;
341 BINHASH_INFO *ht;
342
343 while (i-- > 0)
344 for (ht = *h++; ht; ht = ht->next)
345 (*action) (ht, ptr);
346 }
347 }
348
349 /* binhash_list - list all table members */
350
binhash_list(table)351 BINHASH_INFO **binhash_list(table)
352 BINHASH *table;
353 {
354 BINHASH_INFO **list;
355 BINHASH_INFO *member;
356 ssize_t count = 0;
357 ssize_t i;
358
359 if (table != 0) {
360 list = (BINHASH_INFO **) mymalloc(sizeof(*list) * (table->used + 1));
361 for (i = 0; i < table->size; i++)
362 for (member = table->data[i]; member != 0; member = member->next)
363 list[count++] = member;
364 } else {
365 list = (BINHASH_INFO **) mymalloc(sizeof(*list));
366 }
367 list[count] = 0;
368 return (list);
369 }
370
371 /* binhash_sequence - dict(3) compatibility iterator */
372
binhash_sequence(BINHASH * table,int how)373 BINHASH_INFO *binhash_sequence(BINHASH *table, int how)
374 {
375 if (table == 0)
376 return (0);
377
378 switch (how) {
379 case BINHASH_SEQ_FIRST: /* start new sequence */
380 if (table->seq_bucket)
381 myfree((void *) table->seq_bucket);
382 table->seq_bucket = binhash_list(table);
383 table->seq_element = table->seq_bucket;
384 return (*(table->seq_element)++);
385 case BINHASH_SEQ_NEXT: /* next element */
386 if (table->seq_element && *table->seq_element)
387 return (*(table->seq_element)++);
388 /* FALLTHROUGH */
389 default: /* terminate sequence */
390 if (table->seq_bucket) {
391 myfree((void *) table->seq_bucket);
392 table->seq_bucket = table->seq_element = 0;
393 }
394 return (0);
395 }
396 }
397
398 #ifdef TEST
399 #include <vstring_vstream.h>
400 #include <myrand.h>
401
main(int unused_argc,char ** unused_argv)402 int main(int unused_argc, char **unused_argv)
403 {
404 VSTRING *buf = vstring_alloc(10);
405 ssize_t count = 0;
406 BINHASH *hash;
407 BINHASH_INFO **ht_info;
408 BINHASH_INFO **ht;
409 BINHASH_INFO *info;
410 ssize_t i;
411 ssize_t r;
412 int op;
413
414 /*
415 * Load a large number of strings including terminator, and delete them
416 * in a random order.
417 */
418 hash = binhash_create(10);
419 while (vstring_get(buf, VSTREAM_IN) != VSTREAM_EOF)
420 binhash_enter(hash, vstring_str(buf), VSTRING_LEN(buf) + 1,
421 CAST_INT_TO_VOID_PTR(count++));
422 if (count != hash->used)
423 msg_panic("%ld entries stored, but %lu entries exist",
424 (long) count, (unsigned long) hash->used);
425 for (i = 0, op = BINHASH_SEQ_FIRST; (info = binhash_sequence(hash, op)) != 0;
426 i++, op = BINHASH_SEQ_NEXT)
427 if (memchr(info->key, 0, info->key_len) == 0)
428 msg_panic("no null byte in lookup key");
429 if (i != hash->used)
430 msg_panic("%ld entries found, but %lu entries exist",
431 (long) i, (unsigned long) hash->used);
432 ht_info = binhash_list(hash);
433 for (i = 0; i < hash->used; i++) {
434 r = myrand() % hash->used;
435 info = ht_info[i];
436 ht_info[i] = ht_info[r];
437 ht_info[r] = info;
438 }
439 for (ht = ht_info; *ht; ht++)
440 binhash_delete(hash, ht[0]->key, ht[0]->key_len, (void (*) (void *)) 0);
441 if (hash->used > 0)
442 msg_panic("%ld entries not deleted", (long) hash->used);
443 myfree((void *) ht_info);
444 binhash_free(hash, (void (*) (void *)) 0);
445 vstring_free(buf);
446 return (0);
447 }
448
449 #endif
450