157489Storek /*
2*61818Sbostic * Copyright (c) 1992, 1993
3*61818Sbostic * The Regents of the University of California. All rights reserved.
457489Storek *
557489Storek * This software was developed by the Computer Systems Engineering group
657489Storek * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
757489Storek * contributed to Berkeley.
857489Storek *
957489Storek * All advertising materials mentioning features or use of this software
1057489Storek * must display the following acknowledgement:
1157489Storek * This product includes software developed by the University of
1257489Storek * California, Lawrence Berkeley Laboratories.
1357489Storek *
1457489Storek * %sccs.include.redist.c%
1557489Storek *
16*61818Sbostic * @(#)hash.c 8.1 (Berkeley) 06/06/93
1757489Storek */
1857489Storek
1957489Storek #include <sys/param.h>
2057489Storek #include <stdlib.h>
2157489Storek #include <string.h>
2257489Storek #include "config.h"
2357489Storek
2457489Storek /*
2557489Storek * Interned strings are kept in a hash table. By making each string
2657489Storek * unique, the program can compare strings by comparing pointers.
2757489Storek */
2857489Storek struct hashent {
2957489Storek struct hashent *h_next; /* hash buckets are chained */
3057489Storek const char *h_name; /* the string */
3157489Storek u_int h_hash; /* its hash value */
3257489Storek void *h_value; /* other values (for name=value) */
3357489Storek };
3457489Storek struct hashtab {
3557489Storek size_t ht_size; /* size (power of 2) */
3657489Storek u_int ht_mask; /* == ht_size - 1 */
3757489Storek u_int ht_used; /* number of entries used */
3857489Storek u_int ht_lim; /* when to expand */
3957489Storek struct hashent **ht_tab; /* base of table */
4057489Storek };
4157489Storek static struct hashtab strings;
4257489Storek
4357489Storek /*
4457489Storek * HASHFRACTION controls ht_lim, which in turn controls the average chain
4557489Storek * length. We allow a few entries, on average, as comparing them is usually
4657489Storek * cheap (the h_hash values prevent a strcmp).
4757489Storek */
4857489Storek #define HASHFRACTION(sz) ((sz) * 3 / 2)
4957489Storek
5057489Storek /* round up to next multiple of y, where y is a power of 2 */
5157489Storek #define ROUND(x, y) (((x) + (y) - 1) & ~((y) - 1))
5257489Storek
5357489Storek /*
5457489Storek * Allocate space that will never be freed.
5557489Storek */
5657489Storek static void *
poolalloc(size)5757489Storek poolalloc(size)
5857489Storek size_t size;
5957489Storek {
6057489Storek register char *p;
6157489Storek register size_t alloc;
6257489Storek static char *pool;
6357489Storek static size_t nleft;
6457489Storek
6557489Storek if (nleft < size) {
6657489Storek /*
6757489Storek * Compute a `good' size to allocate via malloc.
6857489Storek * 16384 is a guess at a good page size for malloc;
6957489Storek * 32 is a guess at malloc's overhead.
7057489Storek */
7157489Storek alloc = ROUND(size + 32, 16384) - 32;
7257489Storek p = emalloc(alloc);
7357489Storek nleft = alloc - size;
7457489Storek } else {
7557489Storek p = pool;
7657489Storek nleft -= size;
7757489Storek }
7857489Storek pool = p + size;
7957489Storek return (p);
8057489Storek }
8157489Storek
8257489Storek /*
8357489Storek * Initialize a new hash table. The size must be a power of 2.
8457489Storek */
8557489Storek static void
ht_init(ht,sz)8657489Storek ht_init(ht, sz)
8757489Storek register struct hashtab *ht;
8857489Storek size_t sz;
8957489Storek {
9057489Storek register struct hashent **h;
9157489Storek register u_int n;
9257489Storek
9357489Storek h = emalloc(sz * sizeof *h);
9457489Storek ht->ht_tab = h;
9557489Storek ht->ht_size = sz;
9657489Storek ht->ht_mask = sz - 1;
9757489Storek for (n = 0; n < sz; n++)
9857489Storek *h++ = NULL;
9957489Storek ht->ht_used = 0;
10057489Storek ht->ht_lim = HASHFRACTION(sz);
10157489Storek }
10257489Storek
10357489Storek /*
10457489Storek * Expand an existing hash table.
10557489Storek */
10657489Storek static void
ht_expand(ht)10757489Storek ht_expand(ht)
10857489Storek register struct hashtab *ht;
10957489Storek {
11057489Storek register struct hashent *p, **h, **oldh, *q;
11157489Storek register u_int n, i;
11257489Storek
11357489Storek n = ht->ht_size * 2;
11457489Storek h = emalloc(n * sizeof *h);
11557489Storek for (i = 0; i < n; i++)
11657489Storek h[i] = NULL;
11757489Storek oldh = ht->ht_tab;
11857489Storek n--;
11957489Storek for (i = ht->ht_size; i != 0; i--) {
12057489Storek for (p = *oldh++; p != NULL; p = q) {
12157489Storek q = p->h_next;
12257489Storek p->h_next = h[p->h_hash & n];
12357489Storek h[p->h_hash & n] = p;
12457489Storek }
12557489Storek }
12657489Storek free(ht->ht_tab);
12757489Storek ht->ht_tab = h;
12857489Storek ht->ht_mask = n;
12957489Storek ht->ht_size = ++n;
13057489Storek ht->ht_lim = HASHFRACTION(n);
13157489Storek }
13257489Storek
13357489Storek /*
13457489Storek * Make a new hash entry, setting its h_next to NULL.
13557489Storek */
13657489Storek static inline struct hashent *
newhashent(name,h)13757489Storek newhashent(name, h)
13857489Storek const char *name;
13957489Storek u_int h;
14057489Storek {
14157489Storek register struct hashent *hp;
14257489Storek register char *m;
14357489Storek
14457489Storek m = poolalloc(sizeof(*hp) + ALIGNBYTES);
14557489Storek hp = (struct hashent *)ALIGN(m);
14657489Storek hp->h_name = name;
14757489Storek hp->h_hash = h;
14857489Storek hp->h_next = NULL;
14957489Storek return (hp);
15057489Storek }
15157489Storek
15257489Storek /*
15357489Storek * Hash a string.
15457489Storek */
15557489Storek static inline u_int
hash(str)15657489Storek hash(str)
15757489Storek register const char *str;
15857489Storek {
15957489Storek register u_int h;
16057489Storek
16157489Storek for (h = 0; *str;)
16257489Storek h = (h << 5) + h + *str++;
16357489Storek return (h);
16457489Storek }
16557489Storek
16657489Storek void
initintern()16757489Storek initintern()
16857489Storek {
16957489Storek
17057489Storek ht_init(&strings, 128);
17157489Storek }
17257489Storek
17357489Storek /*
17457489Storek * Generate a single unique copy of the given string. We expect this
17557489Storek * function to be used frequently, so it should be fast.
17657489Storek */
17757489Storek const char *
intern(s)17857489Storek intern(s)
17957489Storek register const char *s;
18057489Storek {
18157489Storek register struct hashtab *ht;
18257489Storek register struct hashent *hp, **hpp;
18357489Storek register u_int h;
18457489Storek register char *p;
18557489Storek register size_t l;
18657489Storek
18757489Storek ht = &strings;
18857489Storek h = hash(s);
18957489Storek hpp = &ht->ht_tab[h & ht->ht_mask];
19057489Storek for (; (hp = *hpp) != NULL; hpp = &hp->h_next)
19157489Storek if (hp->h_hash == h && strcmp(hp->h_name, s) == 0)
19257489Storek return (hp->h_name);
19357489Storek l = strlen(s) + 1;
19457489Storek p = poolalloc(l);
19557489Storek bcopy(s, p, l);
19657489Storek *hpp = newhashent(p, h);
19757489Storek if (++ht->ht_used > ht->ht_lim)
19857489Storek ht_expand(ht);
19957489Storek return (p);
20057489Storek }
20157489Storek
20257489Storek struct hashtab *
ht_new()20357489Storek ht_new()
20457489Storek {
20557489Storek register struct hashtab *ht;
20657489Storek
20757489Storek ht = emalloc(sizeof *ht);
20857489Storek ht_init(ht, 8);
20957489Storek return (ht);
21057489Storek }
21157489Storek
21257489Storek /*
21357489Storek * Insert and/or replace.
21457489Storek */
21557489Storek int
ht_insrep(ht,nam,val,replace)21657489Storek ht_insrep(ht, nam, val, replace)
21757489Storek register struct hashtab *ht;
21857489Storek register const char *nam;
21957489Storek void *val;
22057489Storek int replace;
22157489Storek {
22257489Storek register struct hashent *hp, **hpp;
22357489Storek register u_int h;
22457489Storek
22557489Storek h = hash(nam);
22657489Storek hpp = &ht->ht_tab[h & ht->ht_mask];
22757489Storek for (; (hp = *hpp) != NULL; hpp = &hp->h_next) {
22857489Storek if (hp->h_name == nam) {
22957489Storek if (replace)
23057489Storek hp->h_value = val;
23157489Storek return (1);
23257489Storek }
23357489Storek }
23457489Storek *hpp = hp = newhashent(nam, h);
23557489Storek hp->h_value = val;
23657489Storek return (0);
23757489Storek }
23857489Storek
23957489Storek void *
ht_lookup(ht,nam)24057489Storek ht_lookup(ht, nam)
24157489Storek register struct hashtab *ht;
24257489Storek register const char *nam;
24357489Storek {
24457489Storek register struct hashent *hp, **hpp;
24557489Storek register u_int h;
24657489Storek
24757489Storek h = hash(nam);
24857489Storek hpp = &ht->ht_tab[h & ht->ht_mask];
24957489Storek for (; (hp = *hpp) != NULL; hpp = &hp->h_next)
25057489Storek if (hp->h_name == nam)
25157489Storek return (hp->h_value);
25257489Storek return (NULL);
25357489Storek }
254