xref: /minix3/lib/libc/db/hash/hash_func.c (revision 2fe8fb192fe7e8720e3e7a77f928da545e872a6a)
1*2639ae9bSBen Gras /*	$NetBSD: hash_func.c,v 1.13 2008/09/10 17:52:35 joerg Exp $	*/
2*2639ae9bSBen Gras 
3*2639ae9bSBen Gras /*-
4*2639ae9bSBen Gras  * Copyright (c) 1990, 1993
5*2639ae9bSBen Gras  *	The Regents of the University of California.  All rights reserved.
6*2639ae9bSBen Gras  *
7*2639ae9bSBen Gras  * This code is derived from software contributed to Berkeley by
8*2639ae9bSBen Gras  * Margo Seltzer.
9*2639ae9bSBen Gras  *
10*2639ae9bSBen Gras  * Redistribution and use in source and binary forms, with or without
11*2639ae9bSBen Gras  * modification, are permitted provided that the following conditions
12*2639ae9bSBen Gras  * are met:
13*2639ae9bSBen Gras  * 1. Redistributions of source code must retain the above copyright
14*2639ae9bSBen Gras  *    notice, this list of conditions and the following disclaimer.
15*2639ae9bSBen Gras  * 2. Redistributions in binary form must reproduce the above copyright
16*2639ae9bSBen Gras  *    notice, this list of conditions and the following disclaimer in the
17*2639ae9bSBen Gras  *    documentation and/or other materials provided with the distribution.
18*2639ae9bSBen Gras  * 3. Neither the name of the University nor the names of its contributors
19*2639ae9bSBen Gras  *    may be used to endorse or promote products derived from this software
20*2639ae9bSBen Gras  *    without specific prior written permission.
21*2639ae9bSBen Gras  *
22*2639ae9bSBen Gras  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23*2639ae9bSBen Gras  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24*2639ae9bSBen Gras  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25*2639ae9bSBen Gras  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26*2639ae9bSBen Gras  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27*2639ae9bSBen Gras  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28*2639ae9bSBen Gras  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29*2639ae9bSBen Gras  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30*2639ae9bSBen Gras  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31*2639ae9bSBen Gras  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32*2639ae9bSBen Gras  * SUCH DAMAGE.
33*2639ae9bSBen Gras  */
34*2639ae9bSBen Gras 
35*2639ae9bSBen Gras #if HAVE_NBTOOL_CONFIG_H
36*2639ae9bSBen Gras #include "nbtool_config.h"
37*2639ae9bSBen Gras #endif
38*2639ae9bSBen Gras 
39*2639ae9bSBen Gras #include <sys/cdefs.h>
40*2639ae9bSBen Gras __RCSID("$NetBSD: hash_func.c,v 1.13 2008/09/10 17:52:35 joerg Exp $");
41*2639ae9bSBen Gras 
42*2639ae9bSBen Gras #include <sys/types.h>
43*2639ae9bSBen Gras 
44*2639ae9bSBen Gras #include <db.h>
45*2639ae9bSBen Gras #include "hash.h"
46*2639ae9bSBen Gras #include "page.h"
47*2639ae9bSBen Gras #include "extern.h"
48*2639ae9bSBen Gras 
49*2639ae9bSBen Gras #if 0
50*2639ae9bSBen Gras static uint32_t hash1(const void *, size_t) __attribute__((__unused__));
51*2639ae9bSBen Gras static uint32_t hash2(const void *, size_t) __attribute__((__unused__));
52*2639ae9bSBen Gras static uint32_t hash3(const void *, size_t) __attribute__((__unused__));
53*2639ae9bSBen Gras #endif
54*2639ae9bSBen Gras static uint32_t hash4(const void *, size_t) __attribute__((__unused__));
55*2639ae9bSBen Gras 
56*2639ae9bSBen Gras /* Global default hash function */
57*2639ae9bSBen Gras uint32_t (*__default_hash)(const void *, size_t) = hash4;
58*2639ae9bSBen Gras #if 0
59*2639ae9bSBen Gras /*
60*2639ae9bSBen Gras  * HASH FUNCTIONS
61*2639ae9bSBen Gras  *
62*2639ae9bSBen Gras  * Assume that we've already split the bucket to which this key hashes,
63*2639ae9bSBen Gras  * calculate that bucket, and check that in fact we did already split it.
64*2639ae9bSBen Gras  *
65*2639ae9bSBen Gras  * This came from ejb's hsearch.
66*2639ae9bSBen Gras  */
67*2639ae9bSBen Gras 
68*2639ae9bSBen Gras #define PRIME1		37
69*2639ae9bSBen Gras #define PRIME2		1048583
70*2639ae9bSBen Gras 
71*2639ae9bSBen Gras static uint32_t
72*2639ae9bSBen Gras hash1(const void *keyarg, size_t len)
73*2639ae9bSBen Gras {
74*2639ae9bSBen Gras 	const uint8_t *key;
75*2639ae9bSBen Gras 	uint32_t h;
76*2639ae9bSBen Gras 
77*2639ae9bSBen Gras 	/* Convert string to integer */
78*2639ae9bSBen Gras 	for (key = keyarg, h = 0; len--;)
79*2639ae9bSBen Gras 		h = h * PRIME1 ^ (*key++ - ' ');
80*2639ae9bSBen Gras 	h %= PRIME2;
81*2639ae9bSBen Gras 	return (h);
82*2639ae9bSBen Gras }
83*2639ae9bSBen Gras 
84*2639ae9bSBen Gras /*
85*2639ae9bSBen Gras  * Phong's linear congruential hash
86*2639ae9bSBen Gras  */
87*2639ae9bSBen Gras #define dcharhash(h, c)	((h) = 0x63c63cd9*(h) + 0x9c39c33d + (c))
88*2639ae9bSBen Gras 
89*2639ae9bSBen Gras static uint32_t
90*2639ae9bSBen Gras hash2(const void *keyarg, size_t len)
91*2639ae9bSBen Gras {
92*2639ae9bSBen Gras 	const uint8_t *e, *key;
93*2639ae9bSBen Gras 	uint32_t h;
94*2639ae9bSBen Gras 	uint8_t c;
95*2639ae9bSBen Gras 
96*2639ae9bSBen Gras 	key = keyarg;
97*2639ae9bSBen Gras 	e = key + len;
98*2639ae9bSBen Gras 	for (h = 0; key != e;) {
99*2639ae9bSBen Gras 		c = *key++;
100*2639ae9bSBen Gras 		if (!c && key > e)
101*2639ae9bSBen Gras 			break;
102*2639ae9bSBen Gras 		dcharhash(h, c);
103*2639ae9bSBen Gras 	}
104*2639ae9bSBen Gras 	return (h);
105*2639ae9bSBen Gras }
106*2639ae9bSBen Gras 
107*2639ae9bSBen Gras /*
108*2639ae9bSBen Gras  * This is INCREDIBLY ugly, but fast.  We break the string up into 8 byte
109*2639ae9bSBen Gras  * units.  On the first time through the loop we get the "leftover bytes"
110*2639ae9bSBen Gras  * (strlen % 8).  On every other iteration, we perform 8 HASHC's so we handle
111*2639ae9bSBen Gras  * all 8 bytes.  Essentially, this saves us 7 cmp & branch instructions.  If
112*2639ae9bSBen Gras  * this routine is heavily used enough, it's worth the ugly coding.
113*2639ae9bSBen Gras  *
114*2639ae9bSBen Gras  * OZ's original sdbm hash
115*2639ae9bSBen Gras  */
116*2639ae9bSBen Gras static uint32_t
117*2639ae9bSBen Gras hash3(const void *keyarg, size_t len)
118*2639ae9bSBen Gras {
119*2639ae9bSBen Gras 	const uint8_t *key;
120*2639ae9bSBen Gras 	size_t loop;
121*2639ae9bSBen Gras 	uint32_t h;
122*2639ae9bSBen Gras 
123*2639ae9bSBen Gras #define HASHC   h = *key++ + 65599 * h
124*2639ae9bSBen Gras 
125*2639ae9bSBen Gras 	h = 0;
126*2639ae9bSBen Gras 	key = keyarg;
127*2639ae9bSBen Gras 	if (len > 0) {
128*2639ae9bSBen Gras 		loop = (len + 8 - 1) >> 3;
129*2639ae9bSBen Gras 
130*2639ae9bSBen Gras 		switch (len & (8 - 1)) {
131*2639ae9bSBen Gras 		case 0:
132*2639ae9bSBen Gras 			do {
133*2639ae9bSBen Gras 				HASHC;
134*2639ae9bSBen Gras 				/* FALLTHROUGH */
135*2639ae9bSBen Gras 		case 7:
136*2639ae9bSBen Gras 				HASHC;
137*2639ae9bSBen Gras 				/* FALLTHROUGH */
138*2639ae9bSBen Gras 		case 6:
139*2639ae9bSBen Gras 				HASHC;
140*2639ae9bSBen Gras 				/* FALLTHROUGH */
141*2639ae9bSBen Gras 		case 5:
142*2639ae9bSBen Gras 				HASHC;
143*2639ae9bSBen Gras 				/* FALLTHROUGH */
144*2639ae9bSBen Gras 		case 4:
145*2639ae9bSBen Gras 				HASHC;
146*2639ae9bSBen Gras 				/* FALLTHROUGH */
147*2639ae9bSBen Gras 		case 3:
148*2639ae9bSBen Gras 				HASHC;
149*2639ae9bSBen Gras 				/* FALLTHROUGH */
150*2639ae9bSBen Gras 		case 2:
151*2639ae9bSBen Gras 				HASHC;
152*2639ae9bSBen Gras 				/* FALLTHROUGH */
153*2639ae9bSBen Gras 		case 1:
154*2639ae9bSBen Gras 				HASHC;
155*2639ae9bSBen Gras 			} while (--loop);
156*2639ae9bSBen Gras 		}
157*2639ae9bSBen Gras 	}
158*2639ae9bSBen Gras 	return (h);
159*2639ae9bSBen Gras }
160*2639ae9bSBen Gras #endif
161*2639ae9bSBen Gras 
162*2639ae9bSBen Gras /* Hash function from Chris Torek. */
163*2639ae9bSBen Gras static uint32_t
hash4(const void * keyarg,size_t len)164*2639ae9bSBen Gras hash4(const void *keyarg, size_t len)
165*2639ae9bSBen Gras {
166*2639ae9bSBen Gras 	const uint8_t *key;
167*2639ae9bSBen Gras 	size_t loop;
168*2639ae9bSBen Gras 	uint32_t h;
169*2639ae9bSBen Gras 
170*2639ae9bSBen Gras #define HASH4a   h = (h << 5) - h + *key++;
171*2639ae9bSBen Gras #define HASH4b   h = (h << 5) + h + *key++;
172*2639ae9bSBen Gras #define HASH4 HASH4b
173*2639ae9bSBen Gras 
174*2639ae9bSBen Gras 	h = 0;
175*2639ae9bSBen Gras 	key = keyarg;
176*2639ae9bSBen Gras 	if (len > 0) {
177*2639ae9bSBen Gras 		loop = (len + 8 - 1) >> 3;
178*2639ae9bSBen Gras 
179*2639ae9bSBen Gras 		switch (len & (8 - 1)) {
180*2639ae9bSBen Gras 		case 0:
181*2639ae9bSBen Gras 			do {
182*2639ae9bSBen Gras 				HASH4;
183*2639ae9bSBen Gras 				/* FALLTHROUGH */
184*2639ae9bSBen Gras 		case 7:
185*2639ae9bSBen Gras 				HASH4;
186*2639ae9bSBen Gras 				/* FALLTHROUGH */
187*2639ae9bSBen Gras 		case 6:
188*2639ae9bSBen Gras 				HASH4;
189*2639ae9bSBen Gras 				/* FALLTHROUGH */
190*2639ae9bSBen Gras 		case 5:
191*2639ae9bSBen Gras 				HASH4;
192*2639ae9bSBen Gras 				/* FALLTHROUGH */
193*2639ae9bSBen Gras 		case 4:
194*2639ae9bSBen Gras 				HASH4;
195*2639ae9bSBen Gras 				/* FALLTHROUGH */
196*2639ae9bSBen Gras 		case 3:
197*2639ae9bSBen Gras 				HASH4;
198*2639ae9bSBen Gras 				/* FALLTHROUGH */
199*2639ae9bSBen Gras 		case 2:
200*2639ae9bSBen Gras 				HASH4;
201*2639ae9bSBen Gras 				/* FALLTHROUGH */
202*2639ae9bSBen Gras 		case 1:
203*2639ae9bSBen Gras 				HASH4;
204*2639ae9bSBen Gras 			} while (--loop);
205*2639ae9bSBen Gras 		}
206*2639ae9bSBen Gras 	}
207*2639ae9bSBen Gras 	return (h);
208*2639ae9bSBen Gras }
209