xref: /netbsd-src/lib/libc/db/hash/hash_func.c (revision 52c4a82ef2f53d71ad0fe3b4ccfc42c8ab79f3a4)
1*52c4a82eSjoerg /*	$NetBSD: hash_func.c,v 1.13 2008/09/10 17:52:35 joerg Exp $	*/
2402f19d1Scgd 
39f0aa214Scgd /*-
49f0aa214Scgd  * Copyright (c) 1990, 1993
59f0aa214Scgd  *	The Regents of the University of California.  All rights reserved.
69f0aa214Scgd  *
79f0aa214Scgd  * This code is derived from software contributed to Berkeley by
89f0aa214Scgd  * Margo Seltzer.
99f0aa214Scgd  *
109f0aa214Scgd  * Redistribution and use in source and binary forms, with or without
119f0aa214Scgd  * modification, are permitted provided that the following conditions
129f0aa214Scgd  * are met:
139f0aa214Scgd  * 1. Redistributions of source code must retain the above copyright
149f0aa214Scgd  *    notice, this list of conditions and the following disclaimer.
159f0aa214Scgd  * 2. Redistributions in binary form must reproduce the above copyright
169f0aa214Scgd  *    notice, this list of conditions and the following disclaimer in the
179f0aa214Scgd  *    documentation and/or other materials provided with the distribution.
18eb7c1594Sagc  * 3. Neither the name of the University nor the names of its contributors
199f0aa214Scgd  *    may be used to endorse or promote products derived from this software
209f0aa214Scgd  *    without specific prior written permission.
219f0aa214Scgd  *
229f0aa214Scgd  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
239f0aa214Scgd  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
249f0aa214Scgd  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
259f0aa214Scgd  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
269f0aa214Scgd  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
279f0aa214Scgd  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
289f0aa214Scgd  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
299f0aa214Scgd  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
309f0aa214Scgd  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
319f0aa214Scgd  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
329f0aa214Scgd  * SUCH DAMAGE.
339f0aa214Scgd  */
349f0aa214Scgd 
35b2f78261Sjmc #if HAVE_NBTOOL_CONFIG_H
36b2f78261Sjmc #include "nbtool_config.h"
37b2f78261Sjmc #endif
38b2f78261Sjmc 
3900ae392dSchristos #include <sys/cdefs.h>
40*52c4a82eSjoerg __RCSID("$NetBSD: hash_func.c,v 1.13 2008/09/10 17:52:35 joerg Exp $");
419f0aa214Scgd 
429f0aa214Scgd #include <sys/types.h>
439f0aa214Scgd 
449f0aa214Scgd #include <db.h>
459f0aa214Scgd #include "hash.h"
469f0aa214Scgd #include "page.h"
479f0aa214Scgd #include "extern.h"
489f0aa214Scgd 
4961238e71Schristos #if 0
5040b37a3bSjoerg static uint32_t hash1(const void *, size_t) __attribute__((__unused__));
5140b37a3bSjoerg static uint32_t hash2(const void *, size_t) __attribute__((__unused__));
5240b37a3bSjoerg static uint32_t hash3(const void *, size_t) __attribute__((__unused__));
5361238e71Schristos #endif
5440b37a3bSjoerg static uint32_t hash4(const void *, size_t) __attribute__((__unused__));
559f0aa214Scgd 
569f0aa214Scgd /* Global default hash function */
5740b37a3bSjoerg uint32_t (*__default_hash)(const void *, size_t) = hash4;
5861238e71Schristos #if 0
599f0aa214Scgd /*
60a6d14e36Scgd  * HASH FUNCTIONS
61a6d14e36Scgd  *
629f0aa214Scgd  * Assume that we've already split the bucket to which this key hashes,
639f0aa214Scgd  * calculate that bucket, and check that in fact we did already split it.
649f0aa214Scgd  *
659f0aa214Scgd  * This came from ejb's hsearch.
669f0aa214Scgd  */
679f0aa214Scgd 
689f0aa214Scgd #define PRIME1		37
699f0aa214Scgd #define PRIME2		1048583
709f0aa214Scgd 
7140b37a3bSjoerg static uint32_t
72cb9daf8fSchristos hash1(const void *keyarg, size_t len)
739f0aa214Scgd {
7440b37a3bSjoerg 	const uint8_t *key;
7540b37a3bSjoerg 	uint32_t h;
769f0aa214Scgd 
779f0aa214Scgd 	/* Convert string to integer */
78a6d14e36Scgd 	for (key = keyarg, h = 0; len--;)
799f0aa214Scgd 		h = h * PRIME1 ^ (*key++ - ' ');
809f0aa214Scgd 	h %= PRIME2;
819f0aa214Scgd 	return (h);
829f0aa214Scgd }
839f0aa214Scgd 
849f0aa214Scgd /*
859f0aa214Scgd  * Phong's linear congruential hash
869f0aa214Scgd  */
879f0aa214Scgd #define dcharhash(h, c)	((h) = 0x63c63cd9*(h) + 0x9c39c33d + (c))
889f0aa214Scgd 
8940b37a3bSjoerg static uint32_t
90cb9daf8fSchristos hash2(const void *keyarg, size_t len)
919f0aa214Scgd {
9240b37a3bSjoerg 	const uint8_t *e, *key;
9340b37a3bSjoerg 	uint32_t h;
9440b37a3bSjoerg 	uint8_t c;
959f0aa214Scgd 
96a6d14e36Scgd 	key = keyarg;
979f0aa214Scgd 	e = key + len;
989f0aa214Scgd 	for (h = 0; key != e;) {
999f0aa214Scgd 		c = *key++;
1009f0aa214Scgd 		if (!c && key > e)
1019f0aa214Scgd 			break;
1029f0aa214Scgd 		dcharhash(h, c);
1039f0aa214Scgd 	}
1049f0aa214Scgd 	return (h);
1059f0aa214Scgd }
1069f0aa214Scgd 
1079f0aa214Scgd /*
1089f0aa214Scgd  * This is INCREDIBLY ugly, but fast.  We break the string up into 8 byte
1099f0aa214Scgd  * units.  On the first time through the loop we get the "leftover bytes"
1109f0aa214Scgd  * (strlen % 8).  On every other iteration, we perform 8 HASHC's so we handle
1119f0aa214Scgd  * all 8 bytes.  Essentially, this saves us 7 cmp & branch instructions.  If
1129f0aa214Scgd  * this routine is heavily used enough, it's worth the ugly coding.
1139f0aa214Scgd  *
1149f0aa214Scgd  * OZ's original sdbm hash
1159f0aa214Scgd  */
11640b37a3bSjoerg static uint32_t
117cb9daf8fSchristos hash3(const void *keyarg, size_t len)
1189f0aa214Scgd {
11940b37a3bSjoerg 	const uint8_t *key;
120cb9daf8fSchristos 	size_t loop;
12140b37a3bSjoerg 	uint32_t h;
1229f0aa214Scgd 
123a6d14e36Scgd #define HASHC   h = *key++ + 65599 * h
1249f0aa214Scgd 
125a6d14e36Scgd 	h = 0;
126a6d14e36Scgd 	key = keyarg;
1279f0aa214Scgd 	if (len > 0) {
1289f0aa214Scgd 		loop = (len + 8 - 1) >> 3;
1299f0aa214Scgd 
1309f0aa214Scgd 		switch (len & (8 - 1)) {
1319f0aa214Scgd 		case 0:
132a6d14e36Scgd 			do {
1339f0aa214Scgd 				HASHC;
134a6d14e36Scgd 				/* FALLTHROUGH */
1359f0aa214Scgd 		case 7:
1369f0aa214Scgd 				HASHC;
137a6d14e36Scgd 				/* FALLTHROUGH */
1389f0aa214Scgd 		case 6:
1399f0aa214Scgd 				HASHC;
140a6d14e36Scgd 				/* FALLTHROUGH */
1419f0aa214Scgd 		case 5:
1429f0aa214Scgd 				HASHC;
143a6d14e36Scgd 				/* FALLTHROUGH */
1449f0aa214Scgd 		case 4:
1459f0aa214Scgd 				HASHC;
146a6d14e36Scgd 				/* FALLTHROUGH */
1479f0aa214Scgd 		case 3:
1489f0aa214Scgd 				HASHC;
149a6d14e36Scgd 				/* FALLTHROUGH */
1509f0aa214Scgd 		case 2:
1519f0aa214Scgd 				HASHC;
152a6d14e36Scgd 				/* FALLTHROUGH */
1539f0aa214Scgd 		case 1:
1549f0aa214Scgd 				HASHC;
1559f0aa214Scgd 			} while (--loop);
1569f0aa214Scgd 		}
1579f0aa214Scgd 	}
158a6d14e36Scgd 	return (h);
1599f0aa214Scgd }
16061238e71Schristos #endif
1619f0aa214Scgd 
1629f0aa214Scgd /* Hash function from Chris Torek. */
16340b37a3bSjoerg static uint32_t
hash4(const void * keyarg,size_t len)164cb9daf8fSchristos hash4(const void *keyarg, size_t len)
1659f0aa214Scgd {
16640b37a3bSjoerg 	const uint8_t *key;
167cb9daf8fSchristos 	size_t loop;
16840b37a3bSjoerg 	uint32_t h;
1699f0aa214Scgd 
1709f0aa214Scgd #define HASH4a   h = (h << 5) - h + *key++;
1719f0aa214Scgd #define HASH4b   h = (h << 5) + h + *key++;
1729f0aa214Scgd #define HASH4 HASH4b
1739f0aa214Scgd 
1749f0aa214Scgd 	h = 0;
175a6d14e36Scgd 	key = keyarg;
1769f0aa214Scgd 	if (len > 0) {
1779f0aa214Scgd 		loop = (len + 8 - 1) >> 3;
1789f0aa214Scgd 
1799f0aa214Scgd 		switch (len & (8 - 1)) {
1809f0aa214Scgd 		case 0:
181a6d14e36Scgd 			do {
1829f0aa214Scgd 				HASH4;
183a6d14e36Scgd 				/* FALLTHROUGH */
1849f0aa214Scgd 		case 7:
1859f0aa214Scgd 				HASH4;
186a6d14e36Scgd 				/* FALLTHROUGH */
1879f0aa214Scgd 		case 6:
1889f0aa214Scgd 				HASH4;
189a6d14e36Scgd 				/* FALLTHROUGH */
1909f0aa214Scgd 		case 5:
1919f0aa214Scgd 				HASH4;
192a6d14e36Scgd 				/* FALLTHROUGH */
1939f0aa214Scgd 		case 4:
1949f0aa214Scgd 				HASH4;
195a6d14e36Scgd 				/* FALLTHROUGH */
1969f0aa214Scgd 		case 3:
1979f0aa214Scgd 				HASH4;
198a6d14e36Scgd 				/* FALLTHROUGH */
1999f0aa214Scgd 		case 2:
2009f0aa214Scgd 				HASH4;
201a6d14e36Scgd 				/* FALLTHROUGH */
2029f0aa214Scgd 		case 1:
2039f0aa214Scgd 				HASH4;
2049f0aa214Scgd 			} while (--loop);
2059f0aa214Scgd 		}
2069f0aa214Scgd 	}
2079f0aa214Scgd 	return (h);
2089f0aa214Scgd }
209