15e9d01f2Sgwr /************************************************************************
25e9d01f2Sgwr Copyright 1988, 1991 by Carnegie Mellon University
35e9d01f2Sgwr
45e9d01f2Sgwr All Rights Reserved
55e9d01f2Sgwr
65e9d01f2Sgwr Permission to use, copy, modify, and distribute this software and its
75e9d01f2Sgwr documentation for any purpose and without fee is hereby granted, provided
85e9d01f2Sgwr that the above copyright notice appear in all copies and that both that
95e9d01f2Sgwr copyright notice and this permission notice appear in supporting
105e9d01f2Sgwr documentation, and that the name of Carnegie Mellon University not be used
115e9d01f2Sgwr in advertising or publicity pertaining to distribution of the software
125e9d01f2Sgwr without specific, written prior permission.
135e9d01f2Sgwr
145e9d01f2Sgwr CARNEGIE MELLON UNIVERSITY DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
155e9d01f2Sgwr SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
165e9d01f2Sgwr IN NO EVENT SHALL CMU BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
175e9d01f2Sgwr DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
185e9d01f2Sgwr PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
195e9d01f2Sgwr ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
205e9d01f2Sgwr SOFTWARE.
215e9d01f2Sgwr ************************************************************************/
225e9d01f2Sgwr
239493bb79Slukem #include <sys/cdefs.h>
245e9d01f2Sgwr #ifndef lint
25*32cded6cSdholland __RCSID("$NetBSD: hash.c,v 1.8 2018/02/08 09:05:21 dholland Exp $");
265e9d01f2Sgwr #endif
275e9d01f2Sgwr
285e9d01f2Sgwr
295e9d01f2Sgwr /*
305e9d01f2Sgwr * Generalized hash table ADT
315e9d01f2Sgwr *
325e9d01f2Sgwr * Provides multiple, dynamically-allocated, variable-sized hash tables on
335e9d01f2Sgwr * various data and keys.
345e9d01f2Sgwr *
355e9d01f2Sgwr * This package attempts to follow some of the coding conventions suggested
365e9d01f2Sgwr * by Bob Sidebotham and the AFS Clean Code Committee of the
375e9d01f2Sgwr * Information Technology Center at Carnegie Mellon.
385e9d01f2Sgwr */
395e9d01f2Sgwr
405e9d01f2Sgwr
415e9d01f2Sgwr #include <sys/types.h>
425e9d01f2Sgwr #include <stdlib.h>
43be45f4d0Stls #include <strings.h>
445e9d01f2Sgwr
455e9d01f2Sgwr #include "hash.h"
465e9d01f2Sgwr
475e9d01f2Sgwr #define TRUE 1
485e9d01f2Sgwr #define FALSE 0
495e9d01f2Sgwr #ifndef NULL
505e9d01f2Sgwr #define NULL 0
515e9d01f2Sgwr #endif
525e9d01f2Sgwr
535e9d01f2Sgwr /*
545e9d01f2Sgwr * This can be changed to make internal routines visible to debuggers, etc.
555e9d01f2Sgwr */
565e9d01f2Sgwr #ifndef PRIVATE
575e9d01f2Sgwr #define PRIVATE static
585e9d01f2Sgwr #endif
595e9d01f2Sgwr
60131109e4Swiz PRIVATE void hashi_FreeMembers(hash_member *, hash_freefp);
615e9d01f2Sgwr
625e9d01f2Sgwr
635e9d01f2Sgwr
645e9d01f2Sgwr
655e9d01f2Sgwr /*
665e9d01f2Sgwr * Hash table initialization routine.
675e9d01f2Sgwr *
68*32cded6cSdholland * This routine creates and initializes a hash table of size "tablesize"
695e9d01f2Sgwr * entries. Successful calls return a pointer to the hash table (which must
705e9d01f2Sgwr * be passed to other hash routines to identify the hash table). Failed
715e9d01f2Sgwr * calls return NULL.
725e9d01f2Sgwr */
735e9d01f2Sgwr
745e9d01f2Sgwr hash_tbl *
hash_Init(unsigned int tablesize)75131109e4Swiz hash_Init(unsigned int tablesize)
765e9d01f2Sgwr {
77aae9c2a0Swiz hash_tbl *hashtblptr;
78aae9c2a0Swiz unsigned totalsize;
795e9d01f2Sgwr
805e9d01f2Sgwr if (tablesize > 0) {
815e9d01f2Sgwr totalsize = sizeof(hash_tbl)
825e9d01f2Sgwr + sizeof(hash_member *) * (tablesize - 1);
835e9d01f2Sgwr hashtblptr = (hash_tbl *) malloc(totalsize);
845e9d01f2Sgwr if (hashtblptr) {
855e9d01f2Sgwr bzero((char *) hashtblptr, totalsize);
865e9d01f2Sgwr hashtblptr->size = tablesize; /* Success! */
875e9d01f2Sgwr hashtblptr->bucketnum = 0;
885e9d01f2Sgwr hashtblptr->member = (hashtblptr->table)[0];
895e9d01f2Sgwr }
905e9d01f2Sgwr } else {
915e9d01f2Sgwr hashtblptr = NULL; /* Disallow zero-length tables */
925e9d01f2Sgwr }
935e9d01f2Sgwr return hashtblptr; /* NULL if failure */
945e9d01f2Sgwr }
955e9d01f2Sgwr
965e9d01f2Sgwr
975e9d01f2Sgwr
985e9d01f2Sgwr /*
995e9d01f2Sgwr * Frees an entire linked list of bucket members (used in the open
1005e9d01f2Sgwr * hashing scheme). Does nothing if the passed pointer is NULL.
1015e9d01f2Sgwr */
1025e9d01f2Sgwr
1035e9d01f2Sgwr PRIVATE void
hashi_FreeMembers(hash_member * bucketptr,hash_freefp free_data)104131109e4Swiz hashi_FreeMembers(hash_member *bucketptr, hash_freefp free_data)
1055e9d01f2Sgwr {
1065e9d01f2Sgwr hash_member *nextbucket;
1075e9d01f2Sgwr while (bucketptr) {
1085e9d01f2Sgwr nextbucket = bucketptr->next;
1095e9d01f2Sgwr (*free_data) (bucketptr->data);
1105e9d01f2Sgwr free((char *) bucketptr);
1115e9d01f2Sgwr bucketptr = nextbucket;
1125e9d01f2Sgwr }
1135e9d01f2Sgwr }
1145e9d01f2Sgwr
1155e9d01f2Sgwr
1165e9d01f2Sgwr
1175e9d01f2Sgwr
1185e9d01f2Sgwr /*
1195e9d01f2Sgwr * This routine re-initializes the hash table. It frees all the allocated
1205e9d01f2Sgwr * memory and resets all bucket pointers to NULL.
1215e9d01f2Sgwr */
1225e9d01f2Sgwr
1235e9d01f2Sgwr void
hash_Reset(hash_tbl * hashtable,hash_freefp free_data)124131109e4Swiz hash_Reset(hash_tbl *hashtable, hash_freefp free_data)
1255e9d01f2Sgwr {
1265e9d01f2Sgwr hash_member **bucketptr;
1275e9d01f2Sgwr unsigned i;
1285e9d01f2Sgwr
1295e9d01f2Sgwr bucketptr = hashtable->table;
1305e9d01f2Sgwr for (i = 0; i < hashtable->size; i++) {
1315e9d01f2Sgwr hashi_FreeMembers(*bucketptr, free_data);
1325e9d01f2Sgwr *bucketptr++ = NULL;
1335e9d01f2Sgwr }
1345e9d01f2Sgwr hashtable->bucketnum = 0;
1355e9d01f2Sgwr hashtable->member = (hashtable->table)[0];
1365e9d01f2Sgwr }
1375e9d01f2Sgwr
1385e9d01f2Sgwr
1395e9d01f2Sgwr
1405e9d01f2Sgwr /*
1415e9d01f2Sgwr * Generic hash function to calculate a hash code from the given string.
1425e9d01f2Sgwr *
1435e9d01f2Sgwr * For each byte of the string, this function left-shifts the value in an
1445e9d01f2Sgwr * accumulator and then adds the byte into the accumulator. The contents of
1455e9d01f2Sgwr * the accumulator is returned after the entire string has been processed.
1465e9d01f2Sgwr * It is assumed that this result will be used as the "hashcode" parameter in
1475e9d01f2Sgwr * calls to other functions in this package. These functions automatically
1485e9d01f2Sgwr * adjust the hashcode for the size of each hashtable.
1495e9d01f2Sgwr *
1505e9d01f2Sgwr * This algorithm probably works best when the hash table size is a prime
1515e9d01f2Sgwr * number.
1525e9d01f2Sgwr *
1535e9d01f2Sgwr * Hopefully, this function is better than the previous one which returned
1545e9d01f2Sgwr * the sum of the squares of all the bytes. I'm still open to other
1555e9d01f2Sgwr * suggestions for a default hash function. The programmer is more than
1565e9d01f2Sgwr * welcome to supply his/her own hash function as that is one of the design
1575e9d01f2Sgwr * features of this package.
1585e9d01f2Sgwr */
1595e9d01f2Sgwr
1605e9d01f2Sgwr unsigned
hash_HashFunction(unsigned char * string,unsigned int len)161aae9c2a0Swiz hash_HashFunction(unsigned char *string, unsigned int len)
1625e9d01f2Sgwr {
163aae9c2a0Swiz unsigned accum;
1645e9d01f2Sgwr
1655e9d01f2Sgwr accum = 0;
1665e9d01f2Sgwr for (; len > 0; len--) {
1675e9d01f2Sgwr accum <<= 1;
1685e9d01f2Sgwr accum += (unsigned) (*string++ & 0xFF);
1695e9d01f2Sgwr }
1705e9d01f2Sgwr return accum;
1715e9d01f2Sgwr }
1725e9d01f2Sgwr
1735e9d01f2Sgwr
1745e9d01f2Sgwr
1755e9d01f2Sgwr /*
1765e9d01f2Sgwr * Returns TRUE if at least one entry for the given key exists; FALSE
1775e9d01f2Sgwr * otherwise.
1785e9d01f2Sgwr */
1795e9d01f2Sgwr
1805e9d01f2Sgwr int
hash_Exists(hash_tbl * hashtable,unsigned int hashcode,hash_cmpfp compare,hash_datum * key)181131109e4Swiz hash_Exists(hash_tbl *hashtable, unsigned int hashcode, hash_cmpfp compare,
182131109e4Swiz hash_datum *key)
1835e9d01f2Sgwr {
184aae9c2a0Swiz hash_member *memberptr;
1855e9d01f2Sgwr
1865e9d01f2Sgwr memberptr = (hashtable->table)[hashcode % (hashtable->size)];
1875e9d01f2Sgwr while (memberptr) {
1885e9d01f2Sgwr if ((*compare) (key, memberptr->data)) {
1895e9d01f2Sgwr return TRUE; /* Entry does exist */
1905e9d01f2Sgwr }
1915e9d01f2Sgwr memberptr = memberptr->next;
1925e9d01f2Sgwr }
1935e9d01f2Sgwr return FALSE; /* Entry does not exist */
1945e9d01f2Sgwr }
1955e9d01f2Sgwr
1965e9d01f2Sgwr
1975e9d01f2Sgwr
1985e9d01f2Sgwr /*
1995e9d01f2Sgwr * Insert the data item "element" into the hash table using "hashcode"
2005e9d01f2Sgwr * to determine the bucket number, and "compare" and "key" to determine
2015e9d01f2Sgwr * its uniqueness.
2025e9d01f2Sgwr *
2035e9d01f2Sgwr * If the insertion is successful 0 is returned. If a matching entry
2045e9d01f2Sgwr * already exists in the given bucket of the hash table, or some other error
2055e9d01f2Sgwr * occurs, -1 is returned and the insertion is not done.
2065e9d01f2Sgwr */
2075e9d01f2Sgwr
2085e9d01f2Sgwr int
hash_Insert(hash_tbl * hashtable,unsigned int hashcode,hash_cmpfp compare,hash_datum * key,hash_datum * element)209131109e4Swiz hash_Insert(hash_tbl *hashtable, unsigned int hashcode, hash_cmpfp compare,
210131109e4Swiz hash_datum *key, hash_datum *element)
2115e9d01f2Sgwr {
2125e9d01f2Sgwr hash_member *temp;
2135e9d01f2Sgwr
2145e9d01f2Sgwr hashcode %= hashtable->size;
2155e9d01f2Sgwr if (hash_Exists(hashtable, hashcode, compare, key)) {
2165e9d01f2Sgwr return -1; /* At least one entry already exists */
2175e9d01f2Sgwr }
2185e9d01f2Sgwr temp = (hash_member *) malloc(sizeof(hash_member));
2195e9d01f2Sgwr if (!temp)
2205e9d01f2Sgwr return -1; /* malloc failed! */
2215e9d01f2Sgwr
2225e9d01f2Sgwr temp->data = element;
2235e9d01f2Sgwr temp->next = (hashtable->table)[hashcode];
2245e9d01f2Sgwr (hashtable->table)[hashcode] = temp;
2255e9d01f2Sgwr return 0; /* Success */
2265e9d01f2Sgwr }
2275e9d01f2Sgwr
2285e9d01f2Sgwr
2295e9d01f2Sgwr
2305e9d01f2Sgwr /*
2315e9d01f2Sgwr * Delete all data elements which match the given key. If at least one
2325e9d01f2Sgwr * element is found and the deletion is successful, 0 is returned.
2335e9d01f2Sgwr * If no matching elements can be found in the hash table, -1 is returned.
2345e9d01f2Sgwr */
2355e9d01f2Sgwr
2365e9d01f2Sgwr int
hash_Delete(hash_tbl * hashtable,unsigned int hashcode,hash_cmpfp compare,hash_datum * key,hash_freefp free_data)237131109e4Swiz hash_Delete(hash_tbl *hashtable, unsigned int hashcode, hash_cmpfp compare,
238131109e4Swiz hash_datum *key, hash_freefp free_data)
2395e9d01f2Sgwr {
2405e9d01f2Sgwr hash_member *memberptr, *tempptr;
2415e9d01f2Sgwr hash_member *previous = NULL;
2425e9d01f2Sgwr int retval;
2435e9d01f2Sgwr
2445e9d01f2Sgwr retval = -1;
2455e9d01f2Sgwr hashcode %= hashtable->size;
2465e9d01f2Sgwr
2475e9d01f2Sgwr /*
2485e9d01f2Sgwr * Delete the first member of the list if it matches. Since this moves
2495e9d01f2Sgwr * the second member into the first position we have to keep doing this
2505e9d01f2Sgwr * over and over until it no longer matches.
2515e9d01f2Sgwr */
2525e9d01f2Sgwr memberptr = (hashtable->table)[hashcode];
2535e9d01f2Sgwr while (memberptr && (*compare) (key, memberptr->data)) {
2545e9d01f2Sgwr (hashtable->table)[hashcode] = memberptr->next;
2555e9d01f2Sgwr /*
2565e9d01f2Sgwr * Stop hashi_FreeMembers() from deleting the whole list!
2575e9d01f2Sgwr */
2585e9d01f2Sgwr memberptr->next = NULL;
2595e9d01f2Sgwr hashi_FreeMembers(memberptr, free_data);
2605e9d01f2Sgwr memberptr = (hashtable->table)[hashcode];
2615e9d01f2Sgwr retval = 0;
2625e9d01f2Sgwr }
2635e9d01f2Sgwr
2645e9d01f2Sgwr /*
2655e9d01f2Sgwr * Now traverse the rest of the list
2665e9d01f2Sgwr */
2675e9d01f2Sgwr if (memberptr) {
2685e9d01f2Sgwr previous = memberptr;
2695e9d01f2Sgwr memberptr = memberptr->next;
2705e9d01f2Sgwr }
2715e9d01f2Sgwr while (memberptr) {
2725e9d01f2Sgwr if ((*compare) (key, memberptr->data)) {
2735e9d01f2Sgwr tempptr = memberptr;
2745e9d01f2Sgwr previous->next = memberptr = memberptr->next;
2755e9d01f2Sgwr /*
2765e9d01f2Sgwr * Put the brakes on hashi_FreeMembers(). . . .
2775e9d01f2Sgwr */
2785e9d01f2Sgwr tempptr->next = NULL;
2795e9d01f2Sgwr hashi_FreeMembers(tempptr, free_data);
2805e9d01f2Sgwr retval = 0;
2815e9d01f2Sgwr } else {
2825e9d01f2Sgwr previous = memberptr;
2835e9d01f2Sgwr memberptr = memberptr->next;
2845e9d01f2Sgwr }
2855e9d01f2Sgwr }
2865e9d01f2Sgwr return retval;
2875e9d01f2Sgwr }
2885e9d01f2Sgwr
2895e9d01f2Sgwr
2905e9d01f2Sgwr
2915e9d01f2Sgwr /*
2925e9d01f2Sgwr * Locate and return the data entry associated with the given key.
2935e9d01f2Sgwr *
2945e9d01f2Sgwr * If the data entry is found, a pointer to it is returned. Otherwise,
2955e9d01f2Sgwr * NULL is returned.
2965e9d01f2Sgwr */
2975e9d01f2Sgwr
2985e9d01f2Sgwr hash_datum *
hash_Lookup(hash_tbl * hashtable,unsigned int hashcode,hash_cmpfp compare,hash_datum * key)299131109e4Swiz hash_Lookup(hash_tbl *hashtable, unsigned int hashcode, hash_cmpfp compare,
300131109e4Swiz hash_datum *key)
3015e9d01f2Sgwr {
3025e9d01f2Sgwr hash_member *memberptr;
3035e9d01f2Sgwr
3045e9d01f2Sgwr memberptr = (hashtable->table)[hashcode % (hashtable->size)];
3055e9d01f2Sgwr while (memberptr) {
3065e9d01f2Sgwr if ((*compare) (key, memberptr->data)) {
3075e9d01f2Sgwr return (memberptr->data);
3085e9d01f2Sgwr }
3095e9d01f2Sgwr memberptr = memberptr->next;
3105e9d01f2Sgwr }
3115e9d01f2Sgwr return NULL;
3125e9d01f2Sgwr }
3135e9d01f2Sgwr
3145e9d01f2Sgwr
3155e9d01f2Sgwr
3165e9d01f2Sgwr /*
3175e9d01f2Sgwr * Return the next available entry in the hashtable for a linear search
3185e9d01f2Sgwr */
3195e9d01f2Sgwr
3205e9d01f2Sgwr hash_datum *
hash_NextEntry(hash_tbl * hashtable)321131109e4Swiz hash_NextEntry(hash_tbl *hashtable)
3225e9d01f2Sgwr {
323aae9c2a0Swiz unsigned bucket;
324aae9c2a0Swiz hash_member *memberptr;
3255e9d01f2Sgwr
3265e9d01f2Sgwr /*
3275e9d01f2Sgwr * First try to pick up where we left off.
3285e9d01f2Sgwr */
3295e9d01f2Sgwr memberptr = hashtable->member;
3305e9d01f2Sgwr if (memberptr) {
3315e9d01f2Sgwr hashtable->member = memberptr->next; /* Set up for next call */
3325e9d01f2Sgwr return memberptr->data; /* Return the data */
3335e9d01f2Sgwr }
3345e9d01f2Sgwr /*
3355e9d01f2Sgwr * We hit the end of a chain, so look through the array of buckets
3365e9d01f2Sgwr * until we find a new chain (non-empty bucket) or run out of buckets.
3375e9d01f2Sgwr */
3385e9d01f2Sgwr bucket = hashtable->bucketnum + 1;
3395e9d01f2Sgwr while ((bucket < hashtable->size) &&
3405e9d01f2Sgwr !(memberptr = (hashtable->table)[bucket])) {
3415e9d01f2Sgwr bucket++;
3425e9d01f2Sgwr }
3435e9d01f2Sgwr
3445e9d01f2Sgwr /*
3455e9d01f2Sgwr * Check to see if we ran out of buckets.
3465e9d01f2Sgwr */
3475e9d01f2Sgwr if (bucket >= hashtable->size) {
3485e9d01f2Sgwr /*
3495e9d01f2Sgwr * Reset to top of table for next call.
3505e9d01f2Sgwr */
3515e9d01f2Sgwr hashtable->bucketnum = 0;
3525e9d01f2Sgwr hashtable->member = (hashtable->table)[0];
3535e9d01f2Sgwr /*
3545e9d01f2Sgwr * But return end-of-table indication to the caller this time.
3555e9d01f2Sgwr */
3565e9d01f2Sgwr return NULL;
3575e9d01f2Sgwr }
3585e9d01f2Sgwr /*
3595e9d01f2Sgwr * Must have found a non-empty bucket.
3605e9d01f2Sgwr */
3615e9d01f2Sgwr hashtable->bucketnum = bucket;
3625e9d01f2Sgwr hashtable->member = memberptr->next; /* Set up for next call */
3635e9d01f2Sgwr return memberptr->data; /* Return the data */
3645e9d01f2Sgwr }
3655e9d01f2Sgwr
3665e9d01f2Sgwr
3675e9d01f2Sgwr
3685e9d01f2Sgwr /*
3695e9d01f2Sgwr * Return the first entry in a hash table for a linear search
3705e9d01f2Sgwr */
3715e9d01f2Sgwr
3725e9d01f2Sgwr hash_datum *
hash_FirstEntry(hash_tbl * hashtable)373131109e4Swiz hash_FirstEntry(hash_tbl *hashtable)
3745e9d01f2Sgwr {
3755e9d01f2Sgwr hashtable->bucketnum = 0;
3765e9d01f2Sgwr hashtable->member = (hashtable->table)[0];
3775e9d01f2Sgwr return hash_NextEntry(hashtable);
3785e9d01f2Sgwr }
3795e9d01f2Sgwr
3805e9d01f2Sgwr /*
3815e9d01f2Sgwr * Local Variables:
3825e9d01f2Sgwr * tab-width: 4
3835e9d01f2Sgwr * c-indent-level: 4
3845e9d01f2Sgwr * c-argdecl-indent: 4
3855e9d01f2Sgwr * c-continued-statement-offset: 4
3865e9d01f2Sgwr * c-continued-brace-offset: -4
3875e9d01f2Sgwr * c-label-offset: -4
3885e9d01f2Sgwr * c-brace-offset: 0
3895e9d01f2Sgwr * End:
3905e9d01f2Sgwr */
391