xref: /minix3/lib/libc/db/hash/hash.h (revision 2639ae9b1755f49e4eb4b211ce63d8879b0edd4a)
1*2639ae9bSBen Gras /*	$NetBSD: hash.h,v 1.15 2008/08/26 21:18:38 joerg Exp $	*/
2*2639ae9bSBen Gras 
3*2639ae9bSBen Gras /*-
4*2639ae9bSBen Gras  * Copyright (c) 1990, 1993, 1994
5*2639ae9bSBen Gras  *	The Regents of the University of California.  All rights reserved.
6*2639ae9bSBen Gras  *
7*2639ae9bSBen Gras  * This code is derived from software contributed to Berkeley by
8*2639ae9bSBen Gras  * Margo Seltzer.
9*2639ae9bSBen Gras  *
10*2639ae9bSBen Gras  * Redistribution and use in source and binary forms, with or without
11*2639ae9bSBen Gras  * modification, are permitted provided that the following conditions
12*2639ae9bSBen Gras  * are met:
13*2639ae9bSBen Gras  * 1. Redistributions of source code must retain the above copyright
14*2639ae9bSBen Gras  *    notice, this list of conditions and the following disclaimer.
15*2639ae9bSBen Gras  * 2. Redistributions in binary form must reproduce the above copyright
16*2639ae9bSBen Gras  *    notice, this list of conditions and the following disclaimer in the
17*2639ae9bSBen Gras  *    documentation and/or other materials provided with the distribution.
18*2639ae9bSBen Gras  * 3. Neither the name of the University nor the names of its contributors
19*2639ae9bSBen Gras  *    may be used to endorse or promote products derived from this software
20*2639ae9bSBen Gras  *    without specific prior written permission.
21*2639ae9bSBen Gras  *
22*2639ae9bSBen Gras  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23*2639ae9bSBen Gras  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24*2639ae9bSBen Gras  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25*2639ae9bSBen Gras  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26*2639ae9bSBen Gras  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27*2639ae9bSBen Gras  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28*2639ae9bSBen Gras  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29*2639ae9bSBen Gras  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30*2639ae9bSBen Gras  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31*2639ae9bSBen Gras  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32*2639ae9bSBen Gras  * SUCH DAMAGE.
33*2639ae9bSBen Gras  *
34*2639ae9bSBen Gras  *	@(#)hash.h	8.3 (Berkeley) 5/31/94
35*2639ae9bSBen Gras  */
36*2639ae9bSBen Gras 
37*2639ae9bSBen Gras #if HAVE_NBTOOL_CONFIG_H
38*2639ae9bSBen Gras #include "nbtool_config.h"
39*2639ae9bSBen Gras #endif
40*2639ae9bSBen Gras 
41*2639ae9bSBen Gras /* Operations */
42*2639ae9bSBen Gras typedef enum {
43*2639ae9bSBen Gras 	HASH_GET, HASH_PUT, HASH_PUTNEW, HASH_DELETE, HASH_FIRST, HASH_NEXT
44*2639ae9bSBen Gras } ACTION;
45*2639ae9bSBen Gras 
46*2639ae9bSBen Gras /* Buffer Management structures */
47*2639ae9bSBen Gras typedef struct _bufhead BUFHEAD;
48*2639ae9bSBen Gras 
49*2639ae9bSBen Gras struct _bufhead {
50*2639ae9bSBen Gras 	BUFHEAD		*prev;		/* LRU links */
51*2639ae9bSBen Gras 	BUFHEAD		*next;		/* LRU links */
52*2639ae9bSBen Gras 	BUFHEAD		*ovfl;		/* Overflow page buffer header */
53*2639ae9bSBen Gras 	uint32_t	 addr;		/* Address of this page */
54*2639ae9bSBen Gras 	char		*page;		/* Actual page data */
55*2639ae9bSBen Gras 	char	 	flags;
56*2639ae9bSBen Gras #define	BUF_MOD		0x0001
57*2639ae9bSBen Gras #define BUF_DISK	0x0002
58*2639ae9bSBen Gras #define	BUF_BUCKET	0x0004
59*2639ae9bSBen Gras #define	BUF_PIN		0x0008
60*2639ae9bSBen Gras };
61*2639ae9bSBen Gras 
62*2639ae9bSBen Gras #define IS_BUCKET(X)	((X) & BUF_BUCKET)
63*2639ae9bSBen Gras 
64*2639ae9bSBen Gras typedef BUFHEAD **SEGMENT;
65*2639ae9bSBen Gras 
66*2639ae9bSBen Gras /* Hash Table Information */
67*2639ae9bSBen Gras typedef struct hashhdr {		/* Disk resident portion */
68*2639ae9bSBen Gras 	int32_t		magic;		/* Magic NO for hash tables */
69*2639ae9bSBen Gras 	int32_t		version;	/* Version ID */
70*2639ae9bSBen Gras 	uint32_t	lorder;		/* Byte Order */
71*2639ae9bSBen Gras 	int32_t		bsize;		/* Bucket/Page Size */
72*2639ae9bSBen Gras 	int32_t		bshift;		/* Bucket shift */
73*2639ae9bSBen Gras 	int32_t		dsize;		/* Directory Size */
74*2639ae9bSBen Gras 	int32_t		ssize;		/* Segment Size */
75*2639ae9bSBen Gras 	int32_t		sshift;		/* Segment shift */
76*2639ae9bSBen Gras 	int32_t		ovfl_point;	/* Where overflow pages are being
77*2639ae9bSBen Gras 					 * allocated */
78*2639ae9bSBen Gras 	int32_t		last_freed;	/* Last overflow page freed */
79*2639ae9bSBen Gras 	int32_t		max_bucket;	/* ID of Maximum bucket in use */
80*2639ae9bSBen Gras 	int32_t		high_mask;	/* Mask to modulo into entire table */
81*2639ae9bSBen Gras 	int32_t		low_mask;	/* Mask to modulo into lower half of
82*2639ae9bSBen Gras 					 * table */
83*2639ae9bSBen Gras 	int32_t		ffactor;	/* Fill factor */
84*2639ae9bSBen Gras 	int32_t		nkeys;		/* Number of keys in hash table */
85*2639ae9bSBen Gras 	int32_t		hdrpages;	/* Size of table header */
86*2639ae9bSBen Gras 	int32_t		h_charkey;	/* value of hash(CHARKEY) */
87*2639ae9bSBen Gras #define NCACHED	32			/* number of bit maps and spare
88*2639ae9bSBen Gras 					 * points */
89*2639ae9bSBen Gras 	int32_t		spares[NCACHED];/* spare pages for overflow */
90*2639ae9bSBen Gras 	uint16_t	bitmaps[NCACHED];	/* address of overflow page
91*2639ae9bSBen Gras 						 * bitmaps */
92*2639ae9bSBen Gras } HASHHDR;
93*2639ae9bSBen Gras 
94*2639ae9bSBen Gras typedef struct htab	 {		/* Memory resident data structure */
95*2639ae9bSBen Gras 	HASHHDR 	hdr;		/* Header */
96*2639ae9bSBen Gras 	int		nsegs;		/* Number of allocated segments */
97*2639ae9bSBen Gras 	int		exsegs;		/* Number of extra allocated
98*2639ae9bSBen Gras 					 * segments */
99*2639ae9bSBen Gras 	uint32_t	(*hash)(const void *, size_t);	/* Hash function */
100*2639ae9bSBen Gras 	int		flags;		/* Flag values */
101*2639ae9bSBen Gras 	int		fp;		/* File pointer */
102*2639ae9bSBen Gras 	char		*tmp_buf;	/* Temporary Buffer for BIG data */
103*2639ae9bSBen Gras 	char		*tmp_key;	/* Temporary Buffer for BIG keys */
104*2639ae9bSBen Gras 	BUFHEAD 	*cpage;		/* Current page */
105*2639ae9bSBen Gras 	int		cbucket;	/* Current bucket */
106*2639ae9bSBen Gras 	int		cndx;		/* Index of next item on cpage */
107*2639ae9bSBen Gras 	int		err;		/* Error Number -- for DBM
108*2639ae9bSBen Gras 					 * compatibility */
109*2639ae9bSBen Gras 	int		new_file;	/* Indicates if fd is backing store
110*2639ae9bSBen Gras 					 * or no */
111*2639ae9bSBen Gras 	int		save_file;	/* Indicates whether we need to flush
112*2639ae9bSBen Gras 					 * file at
113*2639ae9bSBen Gras 					 * exit */
114*2639ae9bSBen Gras 	uint32_t	*mapp[NCACHED];	/* Pointers to page maps */
115*2639ae9bSBen Gras 	int		nmaps;		/* Initial number of bitmaps */
116*2639ae9bSBen Gras 	int		nbufs;		/* Number of buffers left to
117*2639ae9bSBen Gras 					 * allocate */
118*2639ae9bSBen Gras 	BUFHEAD 	bufhead;	/* Header of buffer lru list */
119*2639ae9bSBen Gras 	SEGMENT 	*dir;		/* Hash Bucket directory */
120*2639ae9bSBen Gras } HTAB;
121*2639ae9bSBen Gras 
122*2639ae9bSBen Gras /*
123*2639ae9bSBen Gras  * Constants
124*2639ae9bSBen Gras  */
125*2639ae9bSBen Gras #define	MAX_BSIZE		65536		/* 2^16 */
126*2639ae9bSBen Gras #define MIN_BUFFERS		6
127*2639ae9bSBen Gras #define MINHDRSIZE		512
128*2639ae9bSBen Gras #define DEF_BUFSIZE		65536		/* 64 K */
129*2639ae9bSBen Gras #define DEF_BUCKET_SIZE		4096
130*2639ae9bSBen Gras #define DEF_BUCKET_SHIFT	12		/* log2(BUCKET) */
131*2639ae9bSBen Gras #define DEF_SEGSIZE		256
132*2639ae9bSBen Gras #define DEF_SEGSIZE_SHIFT	8		/* log2(SEGSIZE)	 */
133*2639ae9bSBen Gras #define DEF_DIRSIZE		256
134*2639ae9bSBen Gras #define DEF_FFACTOR		65536
135*2639ae9bSBen Gras #define MIN_FFACTOR		4
136*2639ae9bSBen Gras #define SPLTMAX			8
137*2639ae9bSBen Gras #define CHARKEY			"%$sniglet^&"
138*2639ae9bSBen Gras #define NUMKEY			1038583
139*2639ae9bSBen Gras #define BYTE_SHIFT		3
140*2639ae9bSBen Gras #define INT_TO_BYTE		2
141*2639ae9bSBen Gras #define INT_BYTE_SHIFT		5
142*2639ae9bSBen Gras #define ALL_SET			((uint32_t)0xFFFFFFFF)
143*2639ae9bSBen Gras #define ALL_CLEAR		0
144*2639ae9bSBen Gras 
145*2639ae9bSBen Gras #define PTROF(X)	((BUFHEAD *)(void *)((u_long)(X)&~0x3))
146*2639ae9bSBen Gras #define ISMOD(X)	((uint32_t)(u_long)(X)&0x1)
147*2639ae9bSBen Gras #define DOMOD(X)	((X) = (char *)(void *)((u_long)(X)|0x1))
148*2639ae9bSBen Gras #define ISDISK(X)	((uint32_t)(u_long)(X)&0x2)
149*2639ae9bSBen Gras #define DODISK(X)	((X) = (char *)(void *)((u_long)(X)|0x2))
150*2639ae9bSBen Gras 
151*2639ae9bSBen Gras #define BITS_PER_MAP	32
152*2639ae9bSBen Gras 
153*2639ae9bSBen Gras /* Given the address of the beginning of a big map, clear/set the nth bit */
154*2639ae9bSBen Gras #define CLRBIT(A, N)	((A)[(N)/BITS_PER_MAP] &= ~(1<<((N)%BITS_PER_MAP)))
155*2639ae9bSBen Gras #define SETBIT(A, N)	((A)[(N)/BITS_PER_MAP] |= (1<<((N)%BITS_PER_MAP)))
156*2639ae9bSBen Gras #define ISSET(A, N)	((A)[(N)/BITS_PER_MAP] & (1<<((N)%BITS_PER_MAP)))
157*2639ae9bSBen Gras 
158*2639ae9bSBen Gras /* Overflow management */
159*2639ae9bSBen Gras /*
160*2639ae9bSBen Gras  * Overflow page numbers are allocated per split point.  At each doubling of
161*2639ae9bSBen Gras  * the table, we can allocate extra pages.  So, an overflow page number has
162*2639ae9bSBen Gras  * the top 5 bits indicate which split point and the lower 11 bits indicate
163*2639ae9bSBen Gras  * which page at that split point is indicated (pages within split points are
164*2639ae9bSBen Gras  * numberered starting with 1).
165*2639ae9bSBen Gras  */
166*2639ae9bSBen Gras 
167*2639ae9bSBen Gras #define SPLITSHIFT	11
168*2639ae9bSBen Gras #define SPLITMASK	0x7FF
169*2639ae9bSBen Gras #define SPLITNUM(N)	(((uint32_t)(N)) >> SPLITSHIFT)
170*2639ae9bSBen Gras #define OPAGENUM(N)	((N) & SPLITMASK)
171*2639ae9bSBen Gras #define	OADDR_OF(S,O)	((uint32_t)((uint32_t)(S) << SPLITSHIFT) + (O))
172*2639ae9bSBen Gras 
173*2639ae9bSBen Gras #define BUCKET_TO_PAGE(B) \
174*2639ae9bSBen Gras 	(B) + hashp->HDRPAGES + \
175*2639ae9bSBen Gras 	((B) ? hashp->SPARES[__log2((uint32_t)((B)+1))-1] : 0)
176*2639ae9bSBen Gras #define OADDR_TO_PAGE(B) 	\
177*2639ae9bSBen Gras 	BUCKET_TO_PAGE ( (1 << SPLITNUM((B))) -1 ) + OPAGENUM((B));
178*2639ae9bSBen Gras 
179*2639ae9bSBen Gras /*
180*2639ae9bSBen Gras  * page.h contains a detailed description of the page format.
181*2639ae9bSBen Gras  *
182*2639ae9bSBen Gras  * Normally, keys and data are accessed from offset tables in the top of
183*2639ae9bSBen Gras  * each page which point to the beginning of the key and data.  There are
184*2639ae9bSBen Gras  * four flag values which may be stored in these offset tables which indicate
185*2639ae9bSBen Gras  * the following:
186*2639ae9bSBen Gras  *
187*2639ae9bSBen Gras  *
188*2639ae9bSBen Gras  * OVFLPAGE	Rather than a key data pair, this pair contains
189*2639ae9bSBen Gras  *		the address of an overflow page.  The format of
190*2639ae9bSBen Gras  *		the pair is:
191*2639ae9bSBen Gras  *		    OVERFLOW_PAGE_NUMBER OVFLPAGE
192*2639ae9bSBen Gras  *
193*2639ae9bSBen Gras  * PARTIAL_KEY	This must be the first key/data pair on a page
194*2639ae9bSBen Gras  *		and implies that page contains only a partial key.
195*2639ae9bSBen Gras  *		That is, the key is too big to fit on a single page
196*2639ae9bSBen Gras  *		so it starts on this page and continues on the next.
197*2639ae9bSBen Gras  *		The format of the page is:
198*2639ae9bSBen Gras  *		    KEY_OFF PARTIAL_KEY OVFL_PAGENO OVFLPAGE
199*2639ae9bSBen Gras  *
200*2639ae9bSBen Gras  *		    KEY_OFF -- offset of the beginning of the key
201*2639ae9bSBen Gras  *		    PARTIAL_KEY -- 1
202*2639ae9bSBen Gras  *		    OVFL_PAGENO - page number of the next overflow page
203*2639ae9bSBen Gras  *		    OVFLPAGE -- 0
204*2639ae9bSBen Gras  *
205*2639ae9bSBen Gras  * FULL_KEY	This must be the first key/data pair on the page.  It
206*2639ae9bSBen Gras  *		is used in two cases.
207*2639ae9bSBen Gras  *
208*2639ae9bSBen Gras  *		Case 1:
209*2639ae9bSBen Gras  *		    There is a complete key on the page but no data
210*2639ae9bSBen Gras  *		    (because it wouldn't fit).  The next page contains
211*2639ae9bSBen Gras  *		    the data.
212*2639ae9bSBen Gras  *
213*2639ae9bSBen Gras  *		    Page format it:
214*2639ae9bSBen Gras  *		    KEY_OFF FULL_KEY OVFL_PAGENO OVFL_PAGE
215*2639ae9bSBen Gras  *
216*2639ae9bSBen Gras  *		    KEY_OFF -- offset of the beginning of the key
217*2639ae9bSBen Gras  *		    FULL_KEY -- 2
218*2639ae9bSBen Gras  *		    OVFL_PAGENO - page number of the next overflow page
219*2639ae9bSBen Gras  *		    OVFLPAGE -- 0
220*2639ae9bSBen Gras  *
221*2639ae9bSBen Gras  *		Case 2:
222*2639ae9bSBen Gras  *		    This page contains no key, but part of a large
223*2639ae9bSBen Gras  *		    data field, which is continued on the next page.
224*2639ae9bSBen Gras  *
225*2639ae9bSBen Gras  *		    Page format it:
226*2639ae9bSBen Gras  *		    DATA_OFF FULL_KEY OVFL_PAGENO OVFL_PAGE
227*2639ae9bSBen Gras  *
228*2639ae9bSBen Gras  *		    KEY_OFF -- offset of the beginning of the data on
229*2639ae9bSBen Gras  *				this page
230*2639ae9bSBen Gras  *		    FULL_KEY -- 2
231*2639ae9bSBen Gras  *		    OVFL_PAGENO - page number of the next overflow page
232*2639ae9bSBen Gras  *		    OVFLPAGE -- 0
233*2639ae9bSBen Gras  *
234*2639ae9bSBen Gras  * FULL_KEY_DATA
235*2639ae9bSBen Gras  *		This must be the first key/data pair on the page.
236*2639ae9bSBen Gras  *		There are two cases:
237*2639ae9bSBen Gras  *
238*2639ae9bSBen Gras  *		Case 1:
239*2639ae9bSBen Gras  *		    This page contains a key and the beginning of the
240*2639ae9bSBen Gras  *		    data field, but the data field is continued on the
241*2639ae9bSBen Gras  *		    next page.
242*2639ae9bSBen Gras  *
243*2639ae9bSBen Gras  *		    Page format is:
244*2639ae9bSBen Gras  *		    KEY_OFF FULL_KEY_DATA OVFL_PAGENO DATA_OFF
245*2639ae9bSBen Gras  *
246*2639ae9bSBen Gras  *		    KEY_OFF -- offset of the beginning of the key
247*2639ae9bSBen Gras  *		    FULL_KEY_DATA -- 3
248*2639ae9bSBen Gras  *		    OVFL_PAGENO - page number of the next overflow page
249*2639ae9bSBen Gras  *		    DATA_OFF -- offset of the beginning of the data
250*2639ae9bSBen Gras  *
251*2639ae9bSBen Gras  *		Case 2:
252*2639ae9bSBen Gras  *		    This page contains the last page of a big data pair.
253*2639ae9bSBen Gras  *		    There is no key, only the  tail end of the data
254*2639ae9bSBen Gras  *		    on this page.
255*2639ae9bSBen Gras  *
256*2639ae9bSBen Gras  *		    Page format is:
257*2639ae9bSBen Gras  *		    DATA_OFF FULL_KEY_DATA <OVFL_PAGENO> <OVFLPAGE>
258*2639ae9bSBen Gras  *
259*2639ae9bSBen Gras  *		    DATA_OFF -- offset of the beginning of the data on
260*2639ae9bSBen Gras  *				this page
261*2639ae9bSBen Gras  *		    FULL_KEY_DATA -- 3
262*2639ae9bSBen Gras  *		    OVFL_PAGENO - page number of the next overflow page
263*2639ae9bSBen Gras  *		    OVFLPAGE -- 0
264*2639ae9bSBen Gras  *
265*2639ae9bSBen Gras  *		    OVFL_PAGENO and OVFLPAGE are optional (they are
266*2639ae9bSBen Gras  *		    not present if there is no next page).
267*2639ae9bSBen Gras  */
268*2639ae9bSBen Gras 
269*2639ae9bSBen Gras #define OVFLPAGE	0
270*2639ae9bSBen Gras #define PARTIAL_KEY	1
271*2639ae9bSBen Gras #define FULL_KEY	2
272*2639ae9bSBen Gras #define FULL_KEY_DATA	3
273*2639ae9bSBen Gras #define	REAL_KEY	4
274*2639ae9bSBen Gras 
275*2639ae9bSBen Gras /* Short hands for accessing structure */
276*2639ae9bSBen Gras #define BSIZE		hdr.bsize
277*2639ae9bSBen Gras #define BSHIFT		hdr.bshift
278*2639ae9bSBen Gras #define DSIZE		hdr.dsize
279*2639ae9bSBen Gras #define SGSIZE		hdr.ssize
280*2639ae9bSBen Gras #define SSHIFT		hdr.sshift
281*2639ae9bSBen Gras #define LORDER		hdr.lorder
282*2639ae9bSBen Gras #define OVFL_POINT	hdr.ovfl_point
283*2639ae9bSBen Gras #define	LAST_FREED	hdr.last_freed
284*2639ae9bSBen Gras #define MAX_BUCKET	hdr.max_bucket
285*2639ae9bSBen Gras #define FFACTOR		hdr.ffactor
286*2639ae9bSBen Gras #define HIGH_MASK	hdr.high_mask
287*2639ae9bSBen Gras #define LOW_MASK	hdr.low_mask
288*2639ae9bSBen Gras #define NKEYS		hdr.nkeys
289*2639ae9bSBen Gras #define HDRPAGES	hdr.hdrpages
290*2639ae9bSBen Gras #define SPARES		hdr.spares
291*2639ae9bSBen Gras #define BITMAPS		hdr.bitmaps
292*2639ae9bSBen Gras #define VERSION		hdr.version
293*2639ae9bSBen Gras #define MAGIC		hdr.magic
294*2639ae9bSBen Gras #define NEXT_FREE	hdr.next_free
295*2639ae9bSBen Gras #define H_CHARKEY	hdr.h_charkey
296