1*52c4a82eSjoerg /* $NetBSD: hash_func.c,v 1.13 2008/09/10 17:52:35 joerg Exp $ */
2402f19d1Scgd
39f0aa214Scgd /*-
49f0aa214Scgd * Copyright (c) 1990, 1993
59f0aa214Scgd * The Regents of the University of California. All rights reserved.
69f0aa214Scgd *
79f0aa214Scgd * This code is derived from software contributed to Berkeley by
89f0aa214Scgd * Margo Seltzer.
99f0aa214Scgd *
109f0aa214Scgd * Redistribution and use in source and binary forms, with or without
119f0aa214Scgd * modification, are permitted provided that the following conditions
129f0aa214Scgd * are met:
139f0aa214Scgd * 1. Redistributions of source code must retain the above copyright
149f0aa214Scgd * notice, this list of conditions and the following disclaimer.
159f0aa214Scgd * 2. Redistributions in binary form must reproduce the above copyright
169f0aa214Scgd * notice, this list of conditions and the following disclaimer in the
179f0aa214Scgd * documentation and/or other materials provided with the distribution.
18eb7c1594Sagc * 3. Neither the name of the University nor the names of its contributors
199f0aa214Scgd * may be used to endorse or promote products derived from this software
209f0aa214Scgd * without specific prior written permission.
219f0aa214Scgd *
229f0aa214Scgd * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
239f0aa214Scgd * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
249f0aa214Scgd * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
259f0aa214Scgd * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
269f0aa214Scgd * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
279f0aa214Scgd * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
289f0aa214Scgd * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
299f0aa214Scgd * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
309f0aa214Scgd * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
319f0aa214Scgd * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
329f0aa214Scgd * SUCH DAMAGE.
339f0aa214Scgd */
349f0aa214Scgd
35b2f78261Sjmc #if HAVE_NBTOOL_CONFIG_H
36b2f78261Sjmc #include "nbtool_config.h"
37b2f78261Sjmc #endif
38b2f78261Sjmc
3900ae392dSchristos #include <sys/cdefs.h>
40*52c4a82eSjoerg __RCSID("$NetBSD: hash_func.c,v 1.13 2008/09/10 17:52:35 joerg Exp $");
419f0aa214Scgd
429f0aa214Scgd #include <sys/types.h>
439f0aa214Scgd
449f0aa214Scgd #include <db.h>
459f0aa214Scgd #include "hash.h"
469f0aa214Scgd #include "page.h"
479f0aa214Scgd #include "extern.h"
489f0aa214Scgd
4961238e71Schristos #if 0
5040b37a3bSjoerg static uint32_t hash1(const void *, size_t) __attribute__((__unused__));
5140b37a3bSjoerg static uint32_t hash2(const void *, size_t) __attribute__((__unused__));
5240b37a3bSjoerg static uint32_t hash3(const void *, size_t) __attribute__((__unused__));
5361238e71Schristos #endif
5440b37a3bSjoerg static uint32_t hash4(const void *, size_t) __attribute__((__unused__));
559f0aa214Scgd
569f0aa214Scgd /* Global default hash function */
5740b37a3bSjoerg uint32_t (*__default_hash)(const void *, size_t) = hash4;
5861238e71Schristos #if 0
599f0aa214Scgd /*
60a6d14e36Scgd * HASH FUNCTIONS
61a6d14e36Scgd *
629f0aa214Scgd * Assume that we've already split the bucket to which this key hashes,
639f0aa214Scgd * calculate that bucket, and check that in fact we did already split it.
649f0aa214Scgd *
659f0aa214Scgd * This came from ejb's hsearch.
669f0aa214Scgd */
679f0aa214Scgd
689f0aa214Scgd #define PRIME1 37
699f0aa214Scgd #define PRIME2 1048583
709f0aa214Scgd
7140b37a3bSjoerg static uint32_t
72cb9daf8fSchristos hash1(const void *keyarg, size_t len)
739f0aa214Scgd {
7440b37a3bSjoerg const uint8_t *key;
7540b37a3bSjoerg uint32_t h;
769f0aa214Scgd
779f0aa214Scgd /* Convert string to integer */
78a6d14e36Scgd for (key = keyarg, h = 0; len--;)
799f0aa214Scgd h = h * PRIME1 ^ (*key++ - ' ');
809f0aa214Scgd h %= PRIME2;
819f0aa214Scgd return (h);
829f0aa214Scgd }
839f0aa214Scgd
849f0aa214Scgd /*
859f0aa214Scgd * Phong's linear congruential hash
869f0aa214Scgd */
879f0aa214Scgd #define dcharhash(h, c) ((h) = 0x63c63cd9*(h) + 0x9c39c33d + (c))
889f0aa214Scgd
8940b37a3bSjoerg static uint32_t
90cb9daf8fSchristos hash2(const void *keyarg, size_t len)
919f0aa214Scgd {
9240b37a3bSjoerg const uint8_t *e, *key;
9340b37a3bSjoerg uint32_t h;
9440b37a3bSjoerg uint8_t c;
959f0aa214Scgd
96a6d14e36Scgd key = keyarg;
979f0aa214Scgd e = key + len;
989f0aa214Scgd for (h = 0; key != e;) {
999f0aa214Scgd c = *key++;
1009f0aa214Scgd if (!c && key > e)
1019f0aa214Scgd break;
1029f0aa214Scgd dcharhash(h, c);
1039f0aa214Scgd }
1049f0aa214Scgd return (h);
1059f0aa214Scgd }
1069f0aa214Scgd
1079f0aa214Scgd /*
1089f0aa214Scgd * This is INCREDIBLY ugly, but fast. We break the string up into 8 byte
1099f0aa214Scgd * units. On the first time through the loop we get the "leftover bytes"
1109f0aa214Scgd * (strlen % 8). On every other iteration, we perform 8 HASHC's so we handle
1119f0aa214Scgd * all 8 bytes. Essentially, this saves us 7 cmp & branch instructions. If
1129f0aa214Scgd * this routine is heavily used enough, it's worth the ugly coding.
1139f0aa214Scgd *
1149f0aa214Scgd * OZ's original sdbm hash
1159f0aa214Scgd */
11640b37a3bSjoerg static uint32_t
117cb9daf8fSchristos hash3(const void *keyarg, size_t len)
1189f0aa214Scgd {
11940b37a3bSjoerg const uint8_t *key;
120cb9daf8fSchristos size_t loop;
12140b37a3bSjoerg uint32_t h;
1229f0aa214Scgd
123a6d14e36Scgd #define HASHC h = *key++ + 65599 * h
1249f0aa214Scgd
125a6d14e36Scgd h = 0;
126a6d14e36Scgd key = keyarg;
1279f0aa214Scgd if (len > 0) {
1289f0aa214Scgd loop = (len + 8 - 1) >> 3;
1299f0aa214Scgd
1309f0aa214Scgd switch (len & (8 - 1)) {
1319f0aa214Scgd case 0:
132a6d14e36Scgd do {
1339f0aa214Scgd HASHC;
134a6d14e36Scgd /* FALLTHROUGH */
1359f0aa214Scgd case 7:
1369f0aa214Scgd HASHC;
137a6d14e36Scgd /* FALLTHROUGH */
1389f0aa214Scgd case 6:
1399f0aa214Scgd HASHC;
140a6d14e36Scgd /* FALLTHROUGH */
1419f0aa214Scgd case 5:
1429f0aa214Scgd HASHC;
143a6d14e36Scgd /* FALLTHROUGH */
1449f0aa214Scgd case 4:
1459f0aa214Scgd HASHC;
146a6d14e36Scgd /* FALLTHROUGH */
1479f0aa214Scgd case 3:
1489f0aa214Scgd HASHC;
149a6d14e36Scgd /* FALLTHROUGH */
1509f0aa214Scgd case 2:
1519f0aa214Scgd HASHC;
152a6d14e36Scgd /* FALLTHROUGH */
1539f0aa214Scgd case 1:
1549f0aa214Scgd HASHC;
1559f0aa214Scgd } while (--loop);
1569f0aa214Scgd }
1579f0aa214Scgd }
158a6d14e36Scgd return (h);
1599f0aa214Scgd }
16061238e71Schristos #endif
1619f0aa214Scgd
1629f0aa214Scgd /* Hash function from Chris Torek. */
16340b37a3bSjoerg static uint32_t
hash4(const void * keyarg,size_t len)164cb9daf8fSchristos hash4(const void *keyarg, size_t len)
1659f0aa214Scgd {
16640b37a3bSjoerg const uint8_t *key;
167cb9daf8fSchristos size_t loop;
16840b37a3bSjoerg uint32_t h;
1699f0aa214Scgd
1709f0aa214Scgd #define HASH4a h = (h << 5) - h + *key++;
1719f0aa214Scgd #define HASH4b h = (h << 5) + h + *key++;
1729f0aa214Scgd #define HASH4 HASH4b
1739f0aa214Scgd
1749f0aa214Scgd h = 0;
175a6d14e36Scgd key = keyarg;
1769f0aa214Scgd if (len > 0) {
1779f0aa214Scgd loop = (len + 8 - 1) >> 3;
1789f0aa214Scgd
1799f0aa214Scgd switch (len & (8 - 1)) {
1809f0aa214Scgd case 0:
181a6d14e36Scgd do {
1829f0aa214Scgd HASH4;
183a6d14e36Scgd /* FALLTHROUGH */
1849f0aa214Scgd case 7:
1859f0aa214Scgd HASH4;
186a6d14e36Scgd /* FALLTHROUGH */
1879f0aa214Scgd case 6:
1889f0aa214Scgd HASH4;
189a6d14e36Scgd /* FALLTHROUGH */
1909f0aa214Scgd case 5:
1919f0aa214Scgd HASH4;
192a6d14e36Scgd /* FALLTHROUGH */
1939f0aa214Scgd case 4:
1949f0aa214Scgd HASH4;
195a6d14e36Scgd /* FALLTHROUGH */
1969f0aa214Scgd case 3:
1979f0aa214Scgd HASH4;
198a6d14e36Scgd /* FALLTHROUGH */
1999f0aa214Scgd case 2:
2009f0aa214Scgd HASH4;
201a6d14e36Scgd /* FALLTHROUGH */
2029f0aa214Scgd case 1:
2039f0aa214Scgd HASH4;
2049f0aa214Scgd } while (--loop);
2059f0aa214Scgd }
2069f0aa214Scgd }
2079f0aa214Scgd return (h);
2089f0aa214Scgd }
209