xref: /csrg-svn/lib/libc/db/hash/hash.h (revision 46367)
1*46367Sbostic /*-
2*46367Sbostic  * Copyright (c) 1990 The Regents of the University of California.
3*46367Sbostic  * All rights reserved.
4*46367Sbostic  *
5*46367Sbostic  * This code is derived from software contributed to Berkeley by
6*46367Sbostic  * Margo Seltzer.
7*46367Sbostic  *
8*46367Sbostic  * %sccs.include.redist.c%
9*46367Sbostic  *
10*46367Sbostic  *	@(#)hash.h	5.1 (Berkeley) 02/12/91
11*46367Sbostic  */
12*46367Sbostic 
13*46367Sbostic /* Operations */
14*46367Sbostic typedef enum { HASH_GET, HASH_PUT, HASH_PUTNEW, HASH_DELETE,
15*46367Sbostic 		HASH_FIRST, HASH_NEXT } ACTION;
16*46367Sbostic 
17*46367Sbostic /* Buffer Management structures */
18*46367Sbostic typedef struct _bufhead BUFHEAD;
19*46367Sbostic 
20*46367Sbostic struct _bufhead {
21*46367Sbostic     BUFHEAD	*prev;		/* LRU links */
22*46367Sbostic     BUFHEAD	*next;		/* LRU links */
23*46367Sbostic     BUFHEAD	*ovfl;		/* Overflow page buffer header */
24*46367Sbostic     int		addr;		/* Address of this page */
25*46367Sbostic     char	*page;		/* Actual page data */
26*46367Sbostic     char	flags;	/* Combination of BUF_MOD, BUF_DISK, BUF_BUCKET */
27*46367Sbostic };
28*46367Sbostic 
29*46367Sbostic /* Flag Values */
30*46367Sbostic #define	BUF_MOD		0x0001
31*46367Sbostic #define BUF_DISK	0x0002
32*46367Sbostic #define	BUF_BUCKET	0x0004
33*46367Sbostic 
34*46367Sbostic #define IS_BUCKET(X)	(X & BUF_BUCKET)
35*46367Sbostic 
36*46367Sbostic typedef BUFHEAD	**SEGMENT;
37*46367Sbostic 
38*46367Sbostic /* Hash Table Information */
39*46367Sbostic typedef struct hashhdr {	/* Disk resident portion */
40*46367Sbostic 	int magic;	/* Magic NO for hash tables */
41*46367Sbostic 	int version;	/* Version ID */
42*46367Sbostic 	long lorder;	/* Byte Order */
43*46367Sbostic 	int bsize;	/* Bucket/Page Size */
44*46367Sbostic 	int bshift;	/* Bucket shift */
45*46367Sbostic 	int dsize;	/* Directory Size */
46*46367Sbostic 	int ssize;	/* Segment Size */
47*46367Sbostic 	int sshift;	/* Segment shift */
48*46367Sbostic 	int max_bucket;	/* ID of Maximum bucket in use */
49*46367Sbostic 	int high_mask;	/* Mask to modulo into entire table */
50*46367Sbostic 	int low_mask;	/* Mask to modulo into lower half of table */
51*46367Sbostic 	int ffactor;	/* Fill factor */
52*46367Sbostic 	int nkeys;	/* Number of keys in hash table */
53*46367Sbostic 	int hdrpages;	/* Size of table header */
54*46367Sbostic 	int h_charkey;	/* value of hash(CHARKEY) */
55*46367Sbostic # define NCACHED		32	/* number of bit maps and spare points*/
56*46367Sbostic 	int spares[NCACHED];	/* spare pages for overflow */
57*46367Sbostic 	u_short bitmaps[NCACHED];	/* address of overflow page bitmaps */
58*46367Sbostic } HASHHDR;
59*46367Sbostic 
60*46367Sbostic typedef struct htab {	/* Memory resident data structure */
61*46367Sbostic 	HASHHDR hdr;	/* Header */
62*46367Sbostic 	int nsegs;	/* Number of allocated segments */
63*46367Sbostic 	int exsegs;	/* Number of extra allocated segments */
64*46367Sbostic 	int (*hash)();	/* Hash Function */
65*46367Sbostic 	int flags;	/* Flag values */
66*46367Sbostic 	int fp;		/* File pointer */
67*46367Sbostic 	char *tmp_buf;	/* Temporary Buffer for BIG data */
68*46367Sbostic 	char *tmp_key;	/* Temporary Buffer for BIG keys */
69*46367Sbostic 	BUFHEAD *cpage;	/* Current page */
70*46367Sbostic 	int cbucket;	/* Current bucket */
71*46367Sbostic 	int cndx;  	/* Index of next item on cpage */
72*46367Sbostic 	int errno;	/* Error Number -- for DBM compatability */
73*46367Sbostic 	int new_file;	/* Indicates whether fd is backing store or no */
74*46367Sbostic 	int save_file;	/* Indicates whether we need to flush file at exit */
75*46367Sbostic 	u_long *mapp[NCACHED];	/* Pointers to page maps */
76*46367Sbostic 	int nbufs;	/* Number of buffers left to allocate */
77*46367Sbostic 	BUFHEAD	bufhead; /* Header of buffer lru list */
78*46367Sbostic 	SEGMENT	 *dir;	/* Hash Bucket directory */
79*46367Sbostic } HTAB;
80*46367Sbostic 
81*46367Sbostic 
82*46367Sbostic /*
83*46367Sbostic  * Constants
84*46367Sbostic  */
85*46367Sbostic #define	MAX_BSIZE		65536	/* 2^16 */
86*46367Sbostic #define MIN_BUFFERS		6
87*46367Sbostic #define MINHDRSIZE		512
88*46367Sbostic #define DEF_BUFSIZE		65536	/* 64 K */
89*46367Sbostic #define DEF_BUCKET_SIZE	256
90*46367Sbostic #define DEF_BUCKET_SHIFT	8	/* log2(BUCKET) */
91*46367Sbostic #define DEF_SEGSIZE		256
92*46367Sbostic #define DEF_SEGSIZE_SHIFT		8      /* log2(SEGSIZE)	 */
93*46367Sbostic #define DEF_DIRSIZE		256
94*46367Sbostic #define DEF_FFACTOR		5
95*46367Sbostic #define SPLTMAX		8
96*46367Sbostic #define CHARKEY		"%$sniglet^&"
97*46367Sbostic #define NUMKEY			1038583
98*46367Sbostic #define VERSION_NO		3
99*46367Sbostic #define BYTE_SHIFT		3
100*46367Sbostic #define INT_TO_BYTE		2
101*46367Sbostic #define INT_BYTE_SHIFT		5
102*46367Sbostic #define ALL_SET		((unsigned)0xFFFFFFFF)
103*46367Sbostic #define ALL_CLEAR		0
104*46367Sbostic 
105*46367Sbostic 
106*46367Sbostic #define PTROF(X)	((BUFHEAD *)((unsigned)(X)&~0x3))
107*46367Sbostic #define ISMOD(X)	((unsigned)(X)&0x1)
108*46367Sbostic #define DOMOD(X)	(X = (char *)( (unsigned)X | 0x1))
109*46367Sbostic #define ISDISK(X)	((unsigned)(X)&0x2)
110*46367Sbostic #define DODISK(X)	(X = (char *)( (unsigned)X | 0x2))
111*46367Sbostic 
112*46367Sbostic #define BITS_PER_MAP    32
113*46367Sbostic 
114*46367Sbostic /* Given the address of the beginning of a big map, clear/set the nth bit */
115*46367Sbostic 
116*46367Sbostic #define CLRBIT(A,N) ((A)[N/BITS_PER_MAP] &= ~(1<<(N%BITS_PER_MAP)))
117*46367Sbostic #define SETBIT(A,N) ((A)[N/BITS_PER_MAP] |= (1<<(N%BITS_PER_MAP)))
118*46367Sbostic #define ISSET(A,N) ((A)[N/BITS_PER_MAP] & (1<<(N%BITS_PER_MAP)))
119*46367Sbostic 
120*46367Sbostic /* Overflow management */
121*46367Sbostic /*
122*46367Sbostic 	Overflow page numbers are allocated per split point.
123*46367Sbostic 	At each doubling of the table, we can allocate extra
124*46367Sbostic 	pages.  So, an overflow page number has the top 5 bits
125*46367Sbostic 	indicate which split point and the lower 11 bits indicate
126*46367Sbostic 	which page at that split point is indicated (pages within
127*46367Sbostic 	split points are numberered starting with 1).
128*46367Sbostic 
129*46367Sbostic 
130*46367Sbostic */
131*46367Sbostic 
132*46367Sbostic #define SPLITSHIFT	11
133*46367Sbostic #define SPLITMASK	0x7FF
134*46367Sbostic #define SPLITNUM(N)	(((unsigned)N) >> SPLITSHIFT)
135*46367Sbostic #define OPAGENUM(N)	(N & SPLITMASK)
136*46367Sbostic #define	OADDR_OF(S,O)	((S << SPLITSHIFT) + O)
137*46367Sbostic 
138*46367Sbostic #define BUCKET_TO_PAGE(B) \
139*46367Sbostic 	B + hashp->HDRPAGES + (B ? hashp->SPARES[__log2(B+1)-1] : 0)
140*46367Sbostic #define OADDR_TO_PAGE(B) 	\
141*46367Sbostic 	BUCKET_TO_PAGE ( (1 << SPLITNUM(B)) -1 ) + OPAGENUM(B);
142*46367Sbostic 
143*46367Sbostic /*
144*46367Sbostic     page.h contains a detailed description of the page format.
145*46367Sbostic 
146*46367Sbostic     Normally, keys and data are accessed from offset tables in the
147*46367Sbostic     top of each page which point to the beginning of the key and
148*46367Sbostic     data.  There are four flag values which may be stored in these
149*46367Sbostic     offset tables which indicate the following:
150*46367Sbostic 
151*46367Sbostic 	OVFLPAGE	Rather than a key data pair, this pair contains
152*46367Sbostic 			the address of an overflow page.  The format of
153*46367Sbostic 			the pair is:
154*46367Sbostic 			    OVERFLOW_PAGE_NUMBER OVFLPAGE
155*46367Sbostic 
156*46367Sbostic 	PARTIAL_KEY	This must be the first key/data pair on a page
157*46367Sbostic 			and implies that page contains only a partial key.
158*46367Sbostic 			That is, the key is too big to fit on a single page
159*46367Sbostic 			so it starts on this page and continues on the next.
160*46367Sbostic 			The format of the page is:
161*46367Sbostic 			    KEY_OFF PARTIAL_KEY OVFL_PAGENO OVFLPAGE
162*46367Sbostic 
163*46367Sbostic 			    KEY_OFF -- offset of the beginning of the key
164*46367Sbostic 			    PARTIAL_KEY -- 1
165*46367Sbostic 			    OVFL_PAGENO - page number of the next overflow page
166*46367Sbostic 			    OVFLPAGE -- 0
167*46367Sbostic 	FULL_KEY	This must be the first key/data pair on the page.  It
168*46367Sbostic 			is used in two cases.
169*46367Sbostic 
170*46367Sbostic 			Case 1:
171*46367Sbostic 			    There is a complete key on the page but no data
172*46367Sbostic 			    (because it wouldn't fit).  The next page contains
173*46367Sbostic 			    the data.
174*46367Sbostic 
175*46367Sbostic 			    Page format it:
176*46367Sbostic 			    KEY_OFF FULL_KEY OVFL_PAGENO OVFL_PAGE
177*46367Sbostic 
178*46367Sbostic 			    KEY_OFF -- offset of the beginning of the key
179*46367Sbostic 			    FULL_KEY -- 2
180*46367Sbostic 			    OVFL_PAGENO - page number of the next overflow page
181*46367Sbostic 			    OVFLPAGE -- 0
182*46367Sbostic 
183*46367Sbostic 			Case 2:
184*46367Sbostic 			    This page contains no key, but part of a large
185*46367Sbostic 			    data field, which is continued on the next page.
186*46367Sbostic 
187*46367Sbostic 			    Page format it:
188*46367Sbostic 			    DATA_OFF FULL_KEY OVFL_PAGENO OVFL_PAGE
189*46367Sbostic 
190*46367Sbostic 			    KEY_OFF -- offset of the beginning of the data on
191*46367Sbostic 					this page
192*46367Sbostic 			    FULL_KEY -- 2
193*46367Sbostic 			    OVFL_PAGENO - page number of the next overflow page
194*46367Sbostic 			    OVFLPAGE -- 0
195*46367Sbostic 
196*46367Sbostic 	FULL_KEY_DATA	This must be the first key/data pair on the page.
197*46367Sbostic 			There are two cases:
198*46367Sbostic 
199*46367Sbostic 			Case 1:
200*46367Sbostic 			    This page contains a key and the beginning of the
201*46367Sbostic 			    data field, but the data field is continued on the
202*46367Sbostic 			    next page.
203*46367Sbostic 
204*46367Sbostic 			    Page format is:
205*46367Sbostic 			    KEY_OFF FULL_KEY_DATA OVFL_PAGENO DATA_OFF
206*46367Sbostic 
207*46367Sbostic 			    KEY_OFF -- offset of the beginning of the key
208*46367Sbostic 			    FULL_KEY_DATA -- 3
209*46367Sbostic 			    OVFL_PAGENO - page number of the next overflow page
210*46367Sbostic 			    DATA_OFF -- offset of the beginning of the data
211*46367Sbostic 
212*46367Sbostic 			Case 2:
213*46367Sbostic 			    This page contains the last page of a big data pair.
214*46367Sbostic 			    There is no key, only the  tail end of the data
215*46367Sbostic 			    on this page.
216*46367Sbostic 
217*46367Sbostic 			    Page format is:
218*46367Sbostic 			    DATA_OFF FULL_KEY_DATA <OVFL_PAGENO> <OVFLPAGE>
219*46367Sbostic 
220*46367Sbostic 			    DATA_OFF -- offset of the beginning of the data on
221*46367Sbostic 					this page
222*46367Sbostic 			    FULL_KEY_DATA -- 3
223*46367Sbostic 			    OVFL_PAGENO - page number of the next overflow page
224*46367Sbostic 			    OVFLPAGE -- 0
225*46367Sbostic 
226*46367Sbostic 			    OVFL_PAGENO and OVFLPAGE are optional (they are
227*46367Sbostic 			    not present if there is no next page).
228*46367Sbostic */
229*46367Sbostic #define OVFLPAGE	0
230*46367Sbostic #define PARTIAL_KEY	1
231*46367Sbostic #define FULL_KEY	2
232*46367Sbostic #define FULL_KEY_DATA	3
233*46367Sbostic #define	REAL_KEY	4
234*46367Sbostic 
235*46367Sbostic 
236*46367Sbostic /* Short hands for accessing structure */
237*46367Sbostic #define BSIZE	hdr.bsize
238*46367Sbostic #define BSHIFT	hdr.bshift
239*46367Sbostic #define DSIZE	hdr.dsize
240*46367Sbostic #define SGSIZE	hdr.ssize
241*46367Sbostic #define SSHIFT	hdr.sshift
242*46367Sbostic #define LORDER	hdr.lorder
243*46367Sbostic #define MAX_BUCKET	hdr.max_bucket
244*46367Sbostic #define FFACTOR		hdr.ffactor
245*46367Sbostic #define HIGH_MASK	hdr.high_mask
246*46367Sbostic #define LOW_MASK	hdr.low_mask
247*46367Sbostic #define NKEYS		hdr.nkeys
248*46367Sbostic #define HDRPAGES	hdr.hdrpages
249*46367Sbostic #define SPARES		hdr.spares
250*46367Sbostic #define BITMAPS		hdr.bitmaps
251*46367Sbostic #define VERSION		hdr.version
252*46367Sbostic #define MAGIC		hdr.magic
253*46367Sbostic #define NEXT_FREE	hdr.next_free
254*46367Sbostic #define H_CHARKEY	hdr.h_charkey
255