xref: /openbsd-src/usr.bin/ctfconv/hash.c (revision 0687c322e07315b8b2d5d0eb4cd12f6106989d1c)
1*0687c322Sjasper /*	$OpenBSD: hash.c,v 1.2 2017/08/11 14:58:56 jasper Exp $ */
2*0687c322Sjasper 
3192095f7Smpi /* Copyright (c) 1999, 2004 Marc Espie <espie@openbsd.org>
4192095f7Smpi  *
5192095f7Smpi  * Permission to use, copy, modify, and distribute this software for any
6192095f7Smpi  * purpose with or without fee is hereby granted, provided that the above
7192095f7Smpi  * copyright notice and this permission notice appear in all copies.
8192095f7Smpi  *
9192095f7Smpi  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10192095f7Smpi  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11192095f7Smpi  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12192095f7Smpi  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13192095f7Smpi  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14192095f7Smpi  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15192095f7Smpi  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16192095f7Smpi  */
17192095f7Smpi 
18192095f7Smpi #include <stddef.h>
19192095f7Smpi #include <stdint.h>
20192095f7Smpi #include <stdlib.h>
21192095f7Smpi #include <string.h>
22192095f7Smpi #include <limits.h>
23192095f7Smpi 
24192095f7Smpi #include "hash.h"
25192095f7Smpi 
26192095f7Smpi struct _hash_record {
27192095f7Smpi 	uint32_t	hv;
28192095f7Smpi 	struct hash_entry	*p;
29192095f7Smpi };
30192095f7Smpi 
31192095f7Smpi struct hash {
32192095f7Smpi 	struct _hash_record 	*t;
33192095f7Smpi 	unsigned int 		size;
34192095f7Smpi 	unsigned int 		total;
35192095f7Smpi 	unsigned int 		deleted;
36192095f7Smpi };
37192095f7Smpi 
38192095f7Smpi #define DELETED		((struct hash_entry *)h)
39192095f7Smpi #define NONE		(h->size)
40192095f7Smpi 
41192095f7Smpi /* Don't bother changing the hash table if the change is small enough.  */
42192095f7Smpi #define MINSIZE		(1UL << 4)
43192095f7Smpi #define MINDELETED	4
44192095f7Smpi 
45192095f7Smpi static void hash_resize(struct hash *);
46192095f7Smpi static uint32_t hash_interval(const char *, const char **);
47192095f7Smpi static unsigned int hash_qlookup(struct hash *, const char *);
48192095f7Smpi 
49192095f7Smpi 
50192095f7Smpi /* hash_delete only frees the hash structure. Use hash_first/hash_next
51192095f7Smpi  * to free entries as well.  */
52192095f7Smpi void
hash_delete(struct hash * h)53192095f7Smpi hash_delete(struct hash *h)
54192095f7Smpi {
55192095f7Smpi 	free(h->t);
56192095f7Smpi 	h->t = NULL;
57192095f7Smpi }
58192095f7Smpi 
59192095f7Smpi static void
hash_resize(struct hash * h)60192095f7Smpi hash_resize(struct hash *h)
61192095f7Smpi {
62192095f7Smpi 	struct _hash_record *n;
63192095f7Smpi 	size_t ns;
64192095f7Smpi 	unsigned int	j;
65192095f7Smpi 	unsigned int	i, incr;
66192095f7Smpi 
67192095f7Smpi 	if (4 * h->deleted < h->total) {
68192095f7Smpi 		if (h->size >= (UINT_MAX >> 1U))
69192095f7Smpi 			ns = UINT_MAX;
70192095f7Smpi 		else
71192095f7Smpi 			ns = h->size << 1U;
72192095f7Smpi 	} else if (3 * h->deleted > 2 * h->total)
73192095f7Smpi 		ns = h->size >> 1U;
74192095f7Smpi 	else
75192095f7Smpi 		ns = h->size;
76192095f7Smpi 	if (ns < MINSIZE)
77192095f7Smpi 		ns = MINSIZE;
78192095f7Smpi 
79192095f7Smpi 	n = calloc(ns, sizeof(struct _hash_record));
80192095f7Smpi 	if (!n)
81192095f7Smpi 		return;
82192095f7Smpi 
83192095f7Smpi 	for (j = 0; j < h->size; j++) {
84192095f7Smpi 		if (h->t[j].p != NULL && h->t[j].p != DELETED) {
85192095f7Smpi 			i = h->t[j].hv % ns;
86192095f7Smpi 			incr = ((h->t[j].hv % (ns - 2)) & ~1) + 1;
87192095f7Smpi 			while (n[i].p != NULL) {
88192095f7Smpi 				i += incr;
89192095f7Smpi 				if (i >= ns)
90192095f7Smpi 					i -= ns;
91192095f7Smpi 			}
92192095f7Smpi 			n[i].hv = h->t[j].hv;
93192095f7Smpi 			n[i].p = h->t[j].p;
94192095f7Smpi 		}
95192095f7Smpi 	}
96192095f7Smpi 	free(h->t);
97192095f7Smpi 	h->t = n;
98192095f7Smpi 	h->size = ns;
99192095f7Smpi 	h->total -= h->deleted;
100192095f7Smpi 	h->deleted = 0;
101192095f7Smpi }
102192095f7Smpi 
103192095f7Smpi void *
hash_remove(struct hash * h,unsigned int i)104192095f7Smpi hash_remove(struct hash *h, unsigned int i)
105192095f7Smpi {
106192095f7Smpi 	void		*result = (void *)h->t[i].p;
107192095f7Smpi 
108192095f7Smpi 	if (result == NULL || result == DELETED)
109192095f7Smpi 		return NULL;
110192095f7Smpi 
111192095f7Smpi 	h->t[i].p = DELETED;
112192095f7Smpi 	h->deleted++;
113192095f7Smpi 	if (h->deleted >= MINDELETED && 4 * h->deleted > h->total)
114192095f7Smpi 		hash_resize(h);
115192095f7Smpi 	return result;
116192095f7Smpi }
117192095f7Smpi 
118192095f7Smpi void
hash_insert(struct hash * h,unsigned int i,struct hash_entry * p,const char * key)119192095f7Smpi hash_insert(struct hash *h, unsigned int i, struct hash_entry *p,
120192095f7Smpi     const char *key)
121192095f7Smpi {
122192095f7Smpi 	p->hkey = key;
123192095f7Smpi 
124192095f7Smpi 	if (h->t[i].p == DELETED) {
125192095f7Smpi 		h->deleted--;
126192095f7Smpi 		h->t[i].p = p;
127192095f7Smpi 	} else {
128192095f7Smpi 		h->t[i].p = p;
129192095f7Smpi 		/* Arbitrary resize boundary.  Tweak if not efficient enough. */
130192095f7Smpi 		if (++h->total * 4 > h->size * 3)
131192095f7Smpi 			hash_resize(h);
132192095f7Smpi 	}
133192095f7Smpi }
134192095f7Smpi 
135192095f7Smpi void *
hash_first(struct hash * h,unsigned int * pos)136192095f7Smpi hash_first(struct hash *h, unsigned int *pos)
137192095f7Smpi {
138192095f7Smpi 	*pos = 0;
139192095f7Smpi 	return hash_next(h, pos);
140192095f7Smpi }
141192095f7Smpi 
142192095f7Smpi void *
hash_next(struct hash * h,unsigned int * pos)143192095f7Smpi hash_next(struct hash *h, unsigned int *pos)
144192095f7Smpi {
145192095f7Smpi 	for (; *pos < h->size; (*pos)++)
146192095f7Smpi 		if (h->t[*pos].p != DELETED && h->t[*pos].p != NULL)
147192095f7Smpi 			return (void *)h->t[(*pos)++].p;
148192095f7Smpi 	return NULL;
149192095f7Smpi }
150192095f7Smpi 
151192095f7Smpi struct hash *
hash_init(unsigned int size)152192095f7Smpi hash_init(unsigned int size)
153192095f7Smpi {
154192095f7Smpi 	struct hash *h;
155192095f7Smpi 
156192095f7Smpi 	h = calloc(1, sizeof(*h));
157192095f7Smpi 	if (h == NULL)
158192095f7Smpi 		return NULL;
159192095f7Smpi 
160192095f7Smpi 	h->size = 1UL << size;
161192095f7Smpi 	if (h->size < MINSIZE)
162192095f7Smpi 		h->size = MINSIZE;
163192095f7Smpi 	/* Copy info so that caller may free it.  */
164192095f7Smpi 	h->total = h->deleted = 0;
165192095f7Smpi 	h->t = calloc(h->size, sizeof(struct _hash_record));
166192095f7Smpi 	if (h->t == NULL) {
167192095f7Smpi 		free(h);
168192095f7Smpi 		return NULL;
169192095f7Smpi 	}
170192095f7Smpi 
171192095f7Smpi 	return h;
172192095f7Smpi }
173192095f7Smpi 
174192095f7Smpi static uint32_t
hash_interval(const char * s,const char ** e)175192095f7Smpi hash_interval(const char *s, const char **e)
176192095f7Smpi {
177192095f7Smpi 	uint32_t k;
178192095f7Smpi 
179192095f7Smpi 	if (!*e)
180192095f7Smpi 		*e = s + strlen(s);
181192095f7Smpi 	if (s == *e)
182192095f7Smpi 		k = 0;
183192095f7Smpi 	else
184192095f7Smpi 		k = *s++;
185192095f7Smpi 	while (s != *e)
186192095f7Smpi 		k =  ((k << 2) | (k >> 30)) ^ *s++;
187192095f7Smpi 	return k;
188192095f7Smpi }
189192095f7Smpi 
190192095f7Smpi static unsigned int
hash_qlookup(struct hash * h,const char * start)191192095f7Smpi hash_qlookup(struct hash *h, const char *start)
192192095f7Smpi {
193192095f7Smpi 	const char *end = NULL;
194192095f7Smpi 	unsigned int i, incr;
195192095f7Smpi 	unsigned int empty;
196192095f7Smpi 	uint32_t hv;
197192095f7Smpi 
198192095f7Smpi 	hv = hash_interval(start, &end);
199192095f7Smpi 
200192095f7Smpi 	empty = NONE;
201192095f7Smpi 	i = hv % h->size;
202192095f7Smpi 	incr = ((hv % (h->size-2)) & ~1) + 1;
203192095f7Smpi 	while (h->t[i].p != NULL) {
204192095f7Smpi 		if (h->t[i].p == DELETED) {
205192095f7Smpi 			if (empty == NONE)
206192095f7Smpi 				empty = i;
207192095f7Smpi 		} else if (h->t[i].hv == hv &&
208192095f7Smpi 		    strncmp(h->t[i].p->hkey, start, end - start) == 0 &&
209192095f7Smpi 		    (h->t[i].p->hkey)[end-start] == '\0') {
210192095f7Smpi 			if (empty != NONE) {
211192095f7Smpi 				h->t[empty].hv = hv;
212192095f7Smpi 				h->t[empty].p = h->t[i].p;
213192095f7Smpi 				h->t[i].p = DELETED;
214192095f7Smpi 				return empty;
215192095f7Smpi 			} else {
216192095f7Smpi 				return i;
217192095f7Smpi 			}
218192095f7Smpi 		}
219192095f7Smpi 		i += incr;
220192095f7Smpi 		if (i >= h->size)
221192095f7Smpi 			i -= h->size;
222192095f7Smpi 	}
223192095f7Smpi 
224192095f7Smpi 	/* Found an empty position.  */
225192095f7Smpi 	if (empty != NONE)
226192095f7Smpi 		i = empty;
227192095f7Smpi 	h->t[i].hv = hv;
228192095f7Smpi 	return i;
229192095f7Smpi }
230192095f7Smpi 
231192095f7Smpi struct hash_entry *
hash_find(struct hash * h,const char * start,unsigned int * slot)232192095f7Smpi hash_find(struct hash *h, const char *start, unsigned int *slot)
233192095f7Smpi {
234192095f7Smpi 	unsigned int i;
235192095f7Smpi 
236192095f7Smpi 	i = hash_qlookup(h, start);
237192095f7Smpi 	if (slot != NULL)
238192095f7Smpi 		*slot = i;
239192095f7Smpi 
240192095f7Smpi 	if (h->t[i].p == DELETED)
241192095f7Smpi 		return NULL;
242192095f7Smpi 
243192095f7Smpi 	return h->t[i].p;
244192095f7Smpi }
245