xref: /netbsd-src/lib/libc/db/hash/hash.c (revision 7fa608457b817eca6e0977b37f758ae064f3c99c)
1 /*	$NetBSD: hash.c,v 1.27 2007/02/03 23:46:09 christos Exp $	*/
2 
3 /*-
4  * Copyright (c) 1990, 1993, 1994
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * Margo Seltzer.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 #include <sys/cdefs.h>
36 #if defined(LIBC_SCCS) && !defined(lint)
37 #if 0
38 static char sccsid[] = "@(#)hash.c	8.9 (Berkeley) 6/16/94";
39 #else
40 __RCSID("$NetBSD: hash.c,v 1.27 2007/02/03 23:46:09 christos Exp $");
41 #endif
42 #endif /* LIBC_SCCS and not lint */
43 
44 #include "namespace.h"
45 #include <sys/param.h>
46 #include <sys/stat.h>
47 
48 #include <errno.h>
49 #include <fcntl.h>
50 #include <stdio.h>
51 #include <stdlib.h>
52 #include <string.h>
53 #include <unistd.h>
54 #include <assert.h>
55 
56 #include <db.h>
57 #include "hash.h"
58 #include "page.h"
59 #include "extern.h"
60 
61 static int   alloc_segs(HTAB *, int);
62 static int   flush_meta(HTAB *);
63 static int   hash_access(HTAB *, ACTION, DBT *, DBT *);
64 static int   hash_close(DB *);
65 static int   hash_delete(const DB *, const DBT *, u_int32_t);
66 static int   hash_fd(const DB *);
67 static int   hash_get(const DB *, const DBT *, DBT *, u_int32_t);
68 static int   hash_put(const DB *, DBT *, const DBT *, u_int32_t);
69 static void *hash_realloc(SEGMENT **, size_t, size_t);
70 static int   hash_seq(const DB *, DBT *, DBT *, u_int32_t);
71 static int   hash_sync(const DB *, u_int32_t);
72 static int   hdestroy(HTAB *);
73 static HTAB *init_hash(HTAB *, const char *, const HASHINFO *);
74 static int   init_htab(HTAB *, size_t);
75 #if BYTE_ORDER == LITTLE_ENDIAN
76 static void  swap_header(HTAB *);
77 static void  swap_header_copy(HASHHDR *, HASHHDR *);
78 #endif
79 
80 /* Fast arithmetic, relying on powers of 2, */
81 #define MOD(x, y)		((x) & ((y) - 1))
82 
83 #define RETURN_ERROR(ERR, LOC)	{ save_errno = ERR; goto LOC; }
84 
85 /* Return values */
86 #define	SUCCESS	 (0)
87 #define	ERROR	(-1)
88 #define	ABNORMAL (1)
89 
90 #ifdef HASH_STATISTICS
91 int hash_accesses, hash_collisions, hash_expansions, hash_overflows;
92 #endif
93 
94 /************************** INTERFACE ROUTINES ***************************/
95 /* OPEN/CLOSE */
96 
97 /* ARGSUSED */
98 DB *
99 __hash_open(const char *file, int flags, mode_t mode, const HASHINFO *info,
100     int dflags)
101 {
102 	HTAB *hashp;
103 	struct stat statbuf;
104 	DB *dbp;
105 	int bpages, new_table, nsegs, save_errno;
106 	ssize_t hdrsize;
107 
108 	if ((flags & O_ACCMODE) == O_WRONLY) {
109 		errno = EINVAL;
110 		return (NULL);
111 	}
112 
113 	if (!(hashp = calloc(1, sizeof(HTAB))))
114 		return (NULL);
115 	hashp->fp = -1;
116 
117 	/*
118 	 * Even if user wants write only, we need to be able to read
119 	 * the actual file, so we need to open it read/write. But, the
120 	 * field in the hashp structure needs to be accurate so that
121 	 * we can check accesses.
122 	 */
123 	hashp->flags = flags;
124 
125 	new_table = 0;
126 	if (!file || (flags & O_TRUNC) ||
127 	    (stat(file, &statbuf) && (errno == ENOENT))) {
128 		if (errno == ENOENT)
129 			errno = 0; /* Just in case someone looks at errno */
130 		new_table = 1;
131 	}
132 	if (file) {
133 		if ((hashp->fp = open(file, flags, mode)) == -1)
134 			RETURN_ERROR(errno, error0);
135 		if (fcntl(hashp->fp, F_SETFD, FD_CLOEXEC) == -1)
136 			RETURN_ERROR(errno, error1);
137 		if (fstat(hashp->fp, &statbuf) == -1)
138 			RETURN_ERROR(errno, error1);
139 		new_table |= statbuf.st_size == 0;
140 	}
141 	if (new_table) {
142 		if (!(hashp = init_hash(hashp, file, info)))
143 			RETURN_ERROR(errno, error1);
144 	} else {
145 		/* Table already exists */
146 		if (info && info->hash)
147 			hashp->hash = info->hash;
148 		else
149 			hashp->hash = __default_hash;
150 
151 		hdrsize = read(hashp->fp, &hashp->hdr, sizeof(HASHHDR));
152 #if BYTE_ORDER == LITTLE_ENDIAN
153 		swap_header(hashp);
154 #endif
155 		if (hdrsize == -1)
156 			RETURN_ERROR(errno, error1);
157 		if (hdrsize != sizeof(HASHHDR))
158 			RETURN_ERROR(EFTYPE, error1);
159 		/* Verify file type, versions and hash function */
160 		if (hashp->MAGIC != HASHMAGIC)
161 			RETURN_ERROR(EFTYPE, error1);
162 #define	OLDHASHVERSION	1
163 		if (hashp->VERSION != HASHVERSION &&
164 		    hashp->VERSION != OLDHASHVERSION)
165 			RETURN_ERROR(EFTYPE, error1);
166 		if (hashp->hash(CHARKEY, sizeof(CHARKEY)) != hashp->H_CHARKEY)
167 			RETURN_ERROR(EFTYPE, error1);
168 		/*
169 		 * Figure out how many segments we need.  Max_Bucket is the
170 		 * maximum bucket number, so the number of buckets is
171 		 * max_bucket + 1.
172 		 */
173 		nsegs = (hashp->MAX_BUCKET + 1 + hashp->SGSIZE - 1) /
174 			 hashp->SGSIZE;
175 		hashp->nsegs = 0;
176 		if (alloc_segs(hashp, nsegs))
177 			/*
178 			 * If alloc_segs fails, table will have been destroyed
179 			 * and errno will have been set.
180 			 */
181 			return (NULL);
182 		/* Read in bitmaps */
183 		bpages = (hashp->SPARES[hashp->OVFL_POINT] +
184 		    (unsigned int)(hashp->BSIZE << BYTE_SHIFT) - 1) >>
185 		    (hashp->BSHIFT + BYTE_SHIFT);
186 
187 		hashp->nmaps = bpages;
188 		(void)memset(&hashp->mapp[0], 0, bpages * sizeof(u_int32_t *));
189 	}
190 
191 	/* Initialize Buffer Manager */
192 	if (info && info->cachesize)
193 		__buf_init(hashp, info->cachesize);
194 	else
195 		__buf_init(hashp, DEF_BUFSIZE);
196 
197 	hashp->new_file = new_table;
198 	hashp->save_file = file && (hashp->flags & O_RDWR);
199 	hashp->cbucket = -1;
200 	if (!(dbp = malloc(sizeof(DB)))) {
201 		save_errno = errno;
202 		hdestroy(hashp);
203 		errno = save_errno;
204 		return (NULL);
205 	}
206 	dbp->internal = hashp;
207 	dbp->close = hash_close;
208 	dbp->del = hash_delete;
209 	dbp->fd = hash_fd;
210 	dbp->get = hash_get;
211 	dbp->put = hash_put;
212 	dbp->seq = hash_seq;
213 	dbp->sync = hash_sync;
214 	dbp->type = DB_HASH;
215 
216 #ifdef DEBUG
217 	(void)fprintf(stderr,
218 "%s\n%s%p\n%s%d\n%s%d\n%s%d\n%s%d\n%s%d\n%s%d\n%s%d\n%s%d\n%s%d\n%s%x\n%s%x\n%s%d\n%s%d\n",
219 	    "init_htab:",
220 	    "TABLE POINTER   ", hashp,
221 	    "BUCKET SIZE     ", hashp->BSIZE,
222 	    "BUCKET SHIFT    ", hashp->BSHIFT,
223 	    "DIRECTORY SIZE  ", hashp->DSIZE,
224 	    "SEGMENT SIZE    ", hashp->SGSIZE,
225 	    "SEGMENT SHIFT   ", hashp->SSHIFT,
226 	    "FILL FACTOR     ", hashp->FFACTOR,
227 	    "MAX BUCKET      ", hashp->MAX_BUCKET,
228 	    "OVFL POINT	     ", hashp->OVFL_POINT,
229 	    "LAST FREED      ", hashp->LAST_FREED,
230 	    "HIGH MASK       ", hashp->HIGH_MASK,
231 	    "LOW  MASK       ", hashp->LOW_MASK,
232 	    "NSEGS           ", hashp->nsegs,
233 	    "NKEYS           ", hashp->NKEYS);
234 #endif
235 #ifdef HASH_STATISTICS
236 	hash_overflows = hash_accesses = hash_collisions = hash_expansions = 0;
237 #endif
238 	return (dbp);
239 
240 error1:
241 	if (hashp != NULL)
242 		(void)close(hashp->fp);
243 
244 error0:
245 	free(hashp);
246 	errno = save_errno;
247 	return (NULL);
248 }
249 
250 static int
251 hash_close(DB *dbp)
252 {
253 	HTAB *hashp;
254 	int retval;
255 
256 	if (!dbp)
257 		return (ERROR);
258 
259 	hashp = dbp->internal;
260 	retval = hdestroy(hashp);
261 	free(dbp);
262 	return (retval);
263 }
264 
265 static int
266 hash_fd(const DB *dbp)
267 {
268 	HTAB *hashp;
269 
270 	if (!dbp)
271 		return (ERROR);
272 
273 	hashp = dbp->internal;
274 	if (hashp->fp == -1) {
275 		errno = ENOENT;
276 		return (-1);
277 	}
278 	return (hashp->fp);
279 }
280 
281 /************************** LOCAL CREATION ROUTINES **********************/
282 static HTAB *
283 init_hash(HTAB *hashp, const char *file, const HASHINFO *info)
284 {
285 	struct stat statbuf;
286 	int nelem;
287 
288 	nelem = 1;
289 	hashp->NKEYS = 0;
290 	hashp->LORDER = BYTE_ORDER;
291 	hashp->BSIZE = DEF_BUCKET_SIZE;
292 	hashp->BSHIFT = DEF_BUCKET_SHIFT;
293 	hashp->SGSIZE = DEF_SEGSIZE;
294 	hashp->SSHIFT = DEF_SEGSIZE_SHIFT;
295 	hashp->DSIZE = DEF_DIRSIZE;
296 	hashp->FFACTOR = DEF_FFACTOR;
297 	hashp->hash = __default_hash;
298 	memset(hashp->SPARES, 0, sizeof(hashp->SPARES));
299 	memset(hashp->BITMAPS, 0, sizeof (hashp->BITMAPS));
300 
301 	/* Fix bucket size to be optimal for file system */
302 	if (file != NULL) {
303 		if (stat(file, &statbuf))
304 			return (NULL);
305 		hashp->BSIZE = MIN(statbuf.st_blksize, MAX_BSIZE);
306 		hashp->BSHIFT = __log2((u_int32_t)hashp->BSIZE);
307 	}
308 
309 	if (info) {
310 		if (info->bsize) {
311 			/* Round pagesize up to power of 2 */
312 			hashp->BSHIFT = __log2(info->bsize);
313 			hashp->BSIZE = 1 << hashp->BSHIFT;
314 			if (hashp->BSIZE > MAX_BSIZE) {
315 				errno = EINVAL;
316 				return (NULL);
317 			}
318 		}
319 		if (info->ffactor)
320 			hashp->FFACTOR = info->ffactor;
321 		if (info->hash)
322 			hashp->hash = info->hash;
323 		if (info->nelem)
324 			nelem = info->nelem;
325 		if (info->lorder) {
326 			if (info->lorder != BIG_ENDIAN &&
327 			    info->lorder != LITTLE_ENDIAN) {
328 				errno = EINVAL;
329 				return (NULL);
330 			}
331 			hashp->LORDER = info->lorder;
332 		}
333 	}
334 	/* init_htab should destroy the table and set errno if it fails */
335 	if (init_htab(hashp, (size_t)nelem))
336 		return (NULL);
337 	else
338 		return (hashp);
339 }
340 /*
341  * This calls alloc_segs which may run out of memory.  Alloc_segs will destroy
342  * the table and set errno, so we just pass the error information along.
343  *
344  * Returns 0 on No Error
345  */
346 static int
347 init_htab(HTAB *hashp, size_t nelem)
348 {
349 	int nbuckets;
350 	u_int32_t nsegs;
351 	int l2;
352 
353 	/*
354 	 * Divide number of elements by the fill factor and determine a
355 	 * desired number of buckets.  Allocate space for the next greater
356 	 * power of two number of buckets.
357 	 */
358 	nelem = (nelem - 1) / hashp->FFACTOR + 1;
359 
360 	_DBFIT(nelem, u_int32_t);
361 	l2 = __log2(MAX((u_int32_t)nelem, 2));
362 	nbuckets = 1 << l2;
363 
364 	hashp->SPARES[l2] = l2 + 1;
365 	hashp->SPARES[l2 + 1] = l2 + 1;
366 	hashp->OVFL_POINT = l2;
367 	hashp->LAST_FREED = 2;
368 
369 	/* First bitmap page is at: splitpoint l2 page offset 1 */
370 	if (__ibitmap(hashp, (int)OADDR_OF(l2, 1), l2 + 1, 0))
371 		return (-1);
372 
373 	hashp->MAX_BUCKET = hashp->LOW_MASK = nbuckets - 1;
374 	hashp->HIGH_MASK = (nbuckets << 1) - 1;
375 	/* LINTED constant in conditional context */
376 	hashp->HDRPAGES = ((MAX(sizeof(HASHHDR), MINHDRSIZE) - 1) >>
377 	    hashp->BSHIFT) + 1;
378 
379 	nsegs = (nbuckets - 1) / hashp->SGSIZE + 1;
380 	nsegs = 1 << __log2(nsegs);
381 
382 	if (nsegs > hashp->DSIZE)
383 		hashp->DSIZE = nsegs;
384 	return (alloc_segs(hashp, (int)nsegs));
385 }
386 
387 /********************** DESTROY/CLOSE ROUTINES ************************/
388 
389 /*
390  * Flushes any changes to the file if necessary and destroys the hashp
391  * structure, freeing all allocated space.
392  */
393 static int
394 hdestroy(HTAB *hashp)
395 {
396 	int i, save_errno;
397 
398 	save_errno = 0;
399 
400 #ifdef HASH_STATISTICS
401 	(void)fprintf(stderr, "hdestroy: accesses %d collisions %d\n",
402 	    hash_accesses, hash_collisions);
403 	(void)fprintf(stderr, "hdestroy: expansions %d\n",
404 	    hash_expansions);
405 	(void)fprintf(stderr, "hdestroy: overflows %d\n",
406 	    hash_overflows);
407 	(void)fprintf(stderr, "keys %d maxp %d segmentcount %d\n",
408 	    hashp->NKEYS, hashp->MAX_BUCKET, hashp->nsegs);
409 
410 	for (i = 0; i < NCACHED; i++)
411 		(void)fprintf(stderr,
412 		    "spares[%d] = %d\n", i, hashp->SPARES[i]);
413 #endif
414 	/*
415 	 * Call on buffer manager to free buffers, and if required,
416 	 * write them to disk.
417 	 */
418 	if (__buf_free(hashp, 1, hashp->save_file))
419 		save_errno = errno;
420 	if (hashp->dir) {
421 		free(*hashp->dir);	/* Free initial segments */
422 		/* Free extra segments */
423 		while (hashp->exsegs--)
424 			free(hashp->dir[--hashp->nsegs]);
425 		free(hashp->dir);
426 	}
427 	if (flush_meta(hashp) && !save_errno)
428 		save_errno = errno;
429 	/* Free Bigmaps */
430 	for (i = 0; i < hashp->nmaps; i++)
431 		if (hashp->mapp[i])
432 			free(hashp->mapp[i]);
433 
434 	if (hashp->fp != -1)
435 		(void)close(hashp->fp);
436 
437 	free(hashp);
438 
439 	if (save_errno) {
440 		errno = save_errno;
441 		return (ERROR);
442 	}
443 	return (SUCCESS);
444 }
445 /*
446  * Write modified pages to disk
447  *
448  * Returns:
449  *	 0 == OK
450  *	-1 ERROR
451  */
452 static int
453 hash_sync(const DB *dbp, u_int32_t flags)
454 {
455 	HTAB *hashp;
456 
457 	if (flags != 0) {
458 		errno = EINVAL;
459 		return (ERROR);
460 	}
461 
462 	if (!dbp)
463 		return (ERROR);
464 
465 	hashp = dbp->internal;
466 	if (!hashp->save_file)
467 		return (0);
468 	if (__buf_free(hashp, 0, 1) || flush_meta(hashp))
469 		return (ERROR);
470 	hashp->new_file = 0;
471 	return (0);
472 }
473 
474 /*
475  * Returns:
476  *	 0 == OK
477  *	-1 indicates that errno should be set
478  */
479 static int
480 flush_meta(HTAB *hashp)
481 {
482 	HASHHDR *whdrp;
483 #if BYTE_ORDER == LITTLE_ENDIAN
484 	HASHHDR whdr;
485 #endif
486 	int fp, i;
487 	ssize_t wsize;
488 
489 	if (!hashp->save_file)
490 		return (0);
491 	hashp->MAGIC = HASHMAGIC;
492 	hashp->VERSION = HASHVERSION;
493 	hashp->H_CHARKEY = hashp->hash(CHARKEY, sizeof(CHARKEY));
494 
495 	fp = hashp->fp;
496 	whdrp = &hashp->hdr;
497 #if BYTE_ORDER == LITTLE_ENDIAN
498 	whdrp = &whdr;
499 	swap_header_copy(&hashp->hdr, whdrp);
500 #endif
501 	if ((wsize = pwrite(fp, whdrp, sizeof(HASHHDR), (off_t)0)) == -1)
502 		return (-1);
503 	else
504 		if (wsize != sizeof(HASHHDR)) {
505 			errno = EFTYPE;
506 			hashp->err = errno;
507 			return (-1);
508 		}
509 	for (i = 0; i < NCACHED; i++)
510 		if (hashp->mapp[i])
511 			if (__put_page(hashp, (char *)(void *)hashp->mapp[i],
512 				(u_int)hashp->BITMAPS[i], 0, 1))
513 				return (-1);
514 	return (0);
515 }
516 
517 /*******************************SEARCH ROUTINES *****************************/
518 /*
519  * All the access routines return
520  *
521  * Returns:
522  *	 0 on SUCCESS
523  *	 1 to indicate an external ERROR (i.e. key not found, etc)
524  *	-1 to indicate an internal ERROR (i.e. out of memory, etc)
525  */
526 static int
527 hash_get(const DB *dbp, const DBT *key, DBT *data, u_int32_t flag)
528 {
529 	HTAB *hashp;
530 
531 	hashp = dbp->internal;
532 	if (flag) {
533 		hashp->err = errno = EINVAL;
534 		return (ERROR);
535 	}
536 	return (hash_access(hashp, HASH_GET, __UNCONST(key), data));
537 }
538 
539 static int
540 hash_put(const DB *dbp, DBT *key, const DBT *data, u_int32_t flag)
541 {
542 	HTAB *hashp;
543 
544 	hashp = dbp->internal;
545 	if (flag && flag != R_NOOVERWRITE) {
546 		hashp->err = errno = EINVAL;
547 		return (ERROR);
548 	}
549 	if ((hashp->flags & O_ACCMODE) == O_RDONLY) {
550 		hashp->err = errno = EPERM;
551 		return (ERROR);
552 	}
553 	/* LINTED const castaway */
554 	return (hash_access(hashp, flag == R_NOOVERWRITE ?
555 	    HASH_PUTNEW : HASH_PUT, __UNCONST(key), __UNCONST(data)));
556 }
557 
558 static int
559 hash_delete(const DB *dbp, const DBT *key, u_int32_t flag)
560 {
561 	HTAB *hashp;
562 
563 	hashp = dbp->internal;
564 	if (flag && flag != R_CURSOR) {
565 		hashp->err = errno = EINVAL;
566 		return (ERROR);
567 	}
568 	if ((hashp->flags & O_ACCMODE) == O_RDONLY) {
569 		hashp->err = errno = EPERM;
570 		return (ERROR);
571 	}
572 	return hash_access(hashp, HASH_DELETE, __UNCONST(key), NULL);
573 }
574 
575 /*
576  * Assume that hashp has been set in wrapper routine.
577  */
578 static int
579 hash_access(HTAB *hashp, ACTION action, DBT *key, DBT *val)
580 {
581 	BUFHEAD *rbufp;
582 	BUFHEAD *bufp, *save_bufp;
583 	u_int16_t *bp;
584 	int n, ndx, off;
585 	size_t size;
586 	char *kp;
587 	u_int16_t pageno;
588 
589 #ifdef HASH_STATISTICS
590 	hash_accesses++;
591 #endif
592 
593 	off = hashp->BSIZE;
594 	size = key->size;
595 	kp = (char *)key->data;
596 	rbufp = __get_buf(hashp, __call_hash(hashp, kp, (int)size), NULL, 0);
597 	if (!rbufp)
598 		return (ERROR);
599 	save_bufp = rbufp;
600 
601 	/* Pin the bucket chain */
602 	rbufp->flags |= BUF_PIN;
603 	for (bp = (u_int16_t *)(void *)rbufp->page, n = *bp++, ndx = 1; ndx < n;)
604 		if (bp[1] >= REAL_KEY) {
605 			/* Real key/data pair */
606 			if (size == off - *bp &&
607 			    memcmp(kp, rbufp->page + *bp, size) == 0)
608 				goto found;
609 			off = bp[1];
610 #ifdef HASH_STATISTICS
611 			hash_collisions++;
612 #endif
613 			bp += 2;
614 			ndx += 2;
615 		} else if (bp[1] == OVFLPAGE) {
616 			rbufp = __get_buf(hashp, (u_int32_t)*bp, rbufp, 0);
617 			if (!rbufp) {
618 				save_bufp->flags &= ~BUF_PIN;
619 				return (ERROR);
620 			}
621 			/* FOR LOOP INIT */
622 			bp = (u_int16_t *)(void *)rbufp->page;
623 			n = *bp++;
624 			ndx = 1;
625 			off = hashp->BSIZE;
626 		} else if (bp[1] < REAL_KEY) {
627 			if ((ndx =
628 			    __find_bigpair(hashp, rbufp, ndx, kp, (int)size)) > 0)
629 				goto found;
630 			if (ndx == -2) {
631 				bufp = rbufp;
632 				if (!(pageno =
633 				    __find_last_page(hashp, &bufp))) {
634 					ndx = 0;
635 					rbufp = bufp;
636 					break;	/* FOR */
637 				}
638 				rbufp = __get_buf(hashp, (u_int32_t)pageno,
639 				    bufp, 0);
640 				if (!rbufp) {
641 					save_bufp->flags &= ~BUF_PIN;
642 					return (ERROR);
643 				}
644 				/* FOR LOOP INIT */
645 				bp = (u_int16_t *)(void *)rbufp->page;
646 				n = *bp++;
647 				ndx = 1;
648 				off = hashp->BSIZE;
649 			} else {
650 				save_bufp->flags &= ~BUF_PIN;
651 				return (ERROR);
652 			}
653 		}
654 
655 	/* Not found */
656 	switch (action) {
657 	case HASH_PUT:
658 	case HASH_PUTNEW:
659 		if (__addel(hashp, rbufp, key, val)) {
660 			save_bufp->flags &= ~BUF_PIN;
661 			return (ERROR);
662 		} else {
663 			save_bufp->flags &= ~BUF_PIN;
664 			return (SUCCESS);
665 		}
666 	case HASH_GET:
667 	case HASH_DELETE:
668 	default:
669 		save_bufp->flags &= ~BUF_PIN;
670 		return (ABNORMAL);
671 	}
672 
673 found:
674 	switch (action) {
675 	case HASH_PUTNEW:
676 		save_bufp->flags &= ~BUF_PIN;
677 		return (ABNORMAL);
678 	case HASH_GET:
679 		bp = (u_int16_t *)(void *)rbufp->page;
680 		if (bp[ndx + 1] < REAL_KEY) {
681 			if (__big_return(hashp, rbufp, ndx, val, 0))
682 				return (ERROR);
683 		} else {
684 			val->data = (u_char *)rbufp->page + (int)bp[ndx + 1];
685 			val->size = bp[ndx] - bp[ndx + 1];
686 		}
687 		break;
688 	case HASH_PUT:
689 		if ((__delpair(hashp, rbufp, ndx)) ||
690 		    (__addel(hashp, rbufp, key, val))) {
691 			save_bufp->flags &= ~BUF_PIN;
692 			return (ERROR);
693 		}
694 		break;
695 	case HASH_DELETE:
696 		if (__delpair(hashp, rbufp, ndx))
697 			return (ERROR);
698 		break;
699 	default:
700 		abort();
701 	}
702 	save_bufp->flags &= ~BUF_PIN;
703 	return (SUCCESS);
704 }
705 
706 static int
707 hash_seq(const DB *dbp, DBT *key, DBT *data, u_int32_t flag)
708 {
709 	u_int32_t bucket;
710 	BUFHEAD *bufp = NULL; /* XXX: gcc */
711 	HTAB *hashp;
712 	u_int16_t *bp, ndx;
713 
714 	hashp = dbp->internal;
715 	if (flag && flag != R_FIRST && flag != R_NEXT) {
716 		hashp->err = errno = EINVAL;
717 		return (ERROR);
718 	}
719 #ifdef HASH_STATISTICS
720 	hash_accesses++;
721 #endif
722 	if ((hashp->cbucket < 0) || (flag == R_FIRST)) {
723 		hashp->cbucket = 0;
724 		hashp->cndx = 1;
725 		hashp->cpage = NULL;
726 	}
727 
728 	for (bp = NULL; !bp || !bp[0]; ) {
729 		if (!(bufp = hashp->cpage)) {
730 			for (bucket = hashp->cbucket;
731 			    bucket <= hashp->MAX_BUCKET;
732 			    bucket++, hashp->cndx = 1) {
733 				bufp = __get_buf(hashp, bucket, NULL, 0);
734 				if (!bufp)
735 					return (ERROR);
736 				hashp->cpage = bufp;
737 				bp = (u_int16_t *)(void *)bufp->page;
738 				if (bp[0])
739 					break;
740 			}
741 			hashp->cbucket = bucket;
742 			if (hashp->cbucket > hashp->MAX_BUCKET) {
743 				hashp->cbucket = -1;
744 				return (ABNORMAL);
745 			}
746 		} else
747 			bp = (u_int16_t *)(void *)hashp->cpage->page;
748 
749 		_DIAGASSERT(bp != NULL);
750 		_DIAGASSERT(bufp != NULL);
751 		while (bp[hashp->cndx + 1] == OVFLPAGE) {
752 			bufp = hashp->cpage =
753 			    __get_buf(hashp, (u_int32_t)bp[hashp->cndx], bufp,
754 				0);
755 			if (!bufp)
756 				return (ERROR);
757 			bp = (u_int16_t *)(void *)(bufp->page);
758 			hashp->cndx = 1;
759 		}
760 		if (!bp[0]) {
761 			hashp->cpage = NULL;
762 			++hashp->cbucket;
763 		}
764 	}
765 	ndx = hashp->cndx;
766 	if (bp[ndx + 1] < REAL_KEY) {
767 		if (__big_keydata(hashp, bufp, key, data, 1))
768 			return (ERROR);
769 	} else {
770 		if (hashp->cpage == NULL)
771 			return (ERROR);
772 		key->data = (u_char *)hashp->cpage->page + bp[ndx];
773 		key->size = (ndx > 1 ? bp[ndx - 1] : hashp->BSIZE) - bp[ndx];
774 		data->data = (u_char *)hashp->cpage->page + bp[ndx + 1];
775 		data->size = bp[ndx] - bp[ndx + 1];
776 		ndx += 2;
777 		if (ndx > bp[0]) {
778 			hashp->cpage = NULL;
779 			hashp->cbucket++;
780 			hashp->cndx = 1;
781 		} else
782 			hashp->cndx = ndx;
783 	}
784 	return (SUCCESS);
785 }
786 
787 /********************************* UTILITIES ************************/
788 
789 /*
790  * Returns:
791  *	 0 ==> OK
792  *	-1 ==> Error
793  */
794 int
795 __expand_table(HTAB *hashp)
796 {
797 	u_int32_t old_bucket, new_bucket;
798 	int new_segnum, spare_ndx;
799 	size_t dirsize;
800 
801 #ifdef HASH_STATISTICS
802 	hash_expansions++;
803 #endif
804 	new_bucket = ++hashp->MAX_BUCKET;
805 	old_bucket = (hashp->MAX_BUCKET & hashp->LOW_MASK);
806 
807 	new_segnum = new_bucket >> hashp->SSHIFT;
808 
809 	/* Check if we need a new segment */
810 	if (new_segnum >= hashp->nsegs) {
811 		/* Check if we need to expand directory */
812 		if (new_segnum >= hashp->DSIZE) {
813 			/* Reallocate directory */
814 			dirsize = hashp->DSIZE * sizeof(SEGMENT *);
815 			if (!hash_realloc(&hashp->dir, dirsize, dirsize << 1))
816 				return (-1);
817 			hashp->DSIZE = dirsize << 1;
818 		}
819 		if ((hashp->dir[new_segnum] =
820 		    calloc((size_t)hashp->SGSIZE, sizeof(SEGMENT))) == NULL)
821 			return (-1);
822 		hashp->exsegs++;
823 		hashp->nsegs++;
824 	}
825 	/*
826 	 * If the split point is increasing (MAX_BUCKET's log base 2
827 	 * * increases), we need to copy the current contents of the spare
828 	 * split bucket to the next bucket.
829 	 */
830 	spare_ndx = __log2((u_int32_t)(hashp->MAX_BUCKET + 1));
831 	if (spare_ndx > hashp->OVFL_POINT) {
832 		hashp->SPARES[spare_ndx] = hashp->SPARES[hashp->OVFL_POINT];
833 		hashp->OVFL_POINT = spare_ndx;
834 	}
835 
836 	if (new_bucket > hashp->HIGH_MASK) {
837 		/* Starting a new doubling */
838 		hashp->LOW_MASK = hashp->HIGH_MASK;
839 		hashp->HIGH_MASK = new_bucket | hashp->LOW_MASK;
840 	}
841 	/* Relocate records to the new bucket */
842 	return (__split_page(hashp, old_bucket, new_bucket));
843 }
844 
845 /*
846  * If realloc guarantees that the pointer is not destroyed if the realloc
847  * fails, then this routine can go away.
848  */
849 static void *
850 hash_realloc(SEGMENT **p_ptr, size_t oldsize, size_t newsize)
851 {
852 	void *p;
853 
854 	if ((p = malloc(newsize)) != NULL) {
855 		memmove(p, *p_ptr, oldsize);
856 		memset((char *)p + oldsize, 0, newsize - oldsize);
857 		free(*p_ptr);
858 		*p_ptr = p;
859 	}
860 	return (p);
861 }
862 
863 u_int32_t
864 __call_hash(HTAB *hashp, char *k, int len)
865 {
866 	int n, bucket;
867 
868 	n = hashp->hash(k, (size_t)len);
869 	bucket = n & hashp->HIGH_MASK;
870 	if (bucket > hashp->MAX_BUCKET)
871 		bucket = bucket & hashp->LOW_MASK;
872 	return (bucket);
873 }
874 
875 /*
876  * Allocate segment table.  On error, destroy the table and set errno.
877  *
878  * Returns 0 on success
879  */
880 static int
881 alloc_segs(HTAB *hashp, int nsegs)
882 {
883 	int i;
884 	SEGMENT store;
885 
886 	int save_errno;
887 
888 	hashp->dir = calloc((size_t)hashp->DSIZE, sizeof(SEGMENT *));
889 	if (hashp->dir == NULL) {
890 		save_errno = errno;
891 		(void)hdestroy(hashp);
892 		errno = save_errno;
893 		return (-1);
894 	}
895 	hashp->nsegs = nsegs;
896 	if (nsegs == 0)
897 		return 0;
898 	/* Allocate segments */
899 	store = calloc((size_t)(nsegs << hashp->SSHIFT), sizeof(SEGMENT));
900 	if (store == NULL) {
901 		save_errno = errno;
902 		(void)hdestroy(hashp);
903 		errno = save_errno;
904 		return (-1);
905 	}
906 	for (i = 0; i < nsegs; i++)
907 		hashp->dir[i] = &store[i << hashp->SSHIFT];
908 	return (0);
909 }
910 
911 #if BYTE_ORDER == LITTLE_ENDIAN
912 /*
913  * Hashp->hdr needs to be byteswapped.
914  */
915 static void
916 swap_header_copy(HASHHDR *srcp, HASHHDR *destp)
917 {
918 	size_t i;
919 
920 	P_32_COPY(srcp->magic, destp->magic);
921 	P_32_COPY(srcp->version, destp->version);
922 	P_32_COPY(srcp->lorder, destp->lorder);
923 	P_32_COPY(srcp->bsize, destp->bsize);
924 	P_32_COPY(srcp->bshift, destp->bshift);
925 	P_32_COPY(srcp->dsize, destp->dsize);
926 	P_32_COPY(srcp->ssize, destp->ssize);
927 	P_32_COPY(srcp->sshift, destp->sshift);
928 	P_32_COPY(srcp->ovfl_point, destp->ovfl_point);
929 	P_32_COPY(srcp->last_freed, destp->last_freed);
930 	P_32_COPY(srcp->max_bucket, destp->max_bucket);
931 	P_32_COPY(srcp->high_mask, destp->high_mask);
932 	P_32_COPY(srcp->low_mask, destp->low_mask);
933 	P_32_COPY(srcp->ffactor, destp->ffactor);
934 	P_32_COPY(srcp->nkeys, destp->nkeys);
935 	P_32_COPY(srcp->hdrpages, destp->hdrpages);
936 	P_32_COPY(srcp->h_charkey, destp->h_charkey);
937 	for (i = 0; i < NCACHED; i++) {
938 		P_32_COPY(srcp->spares[i], destp->spares[i]);
939 		P_16_COPY(srcp->bitmaps[i], destp->bitmaps[i]);
940 	}
941 }
942 
943 static void
944 swap_header(HTAB *hashp)
945 {
946 	HASHHDR *hdrp;
947 	size_t i;
948 
949 	hdrp = &hashp->hdr;
950 
951 	M_32_SWAP(hdrp->magic);
952 	M_32_SWAP(hdrp->version);
953 	M_32_SWAP(hdrp->lorder);
954 	M_32_SWAP(hdrp->bsize);
955 	M_32_SWAP(hdrp->bshift);
956 	M_32_SWAP(hdrp->dsize);
957 	M_32_SWAP(hdrp->ssize);
958 	M_32_SWAP(hdrp->sshift);
959 	M_32_SWAP(hdrp->ovfl_point);
960 	M_32_SWAP(hdrp->last_freed);
961 	M_32_SWAP(hdrp->max_bucket);
962 	M_32_SWAP(hdrp->high_mask);
963 	M_32_SWAP(hdrp->low_mask);
964 	M_32_SWAP(hdrp->ffactor);
965 	M_32_SWAP(hdrp->nkeys);
966 	M_32_SWAP(hdrp->hdrpages);
967 	M_32_SWAP(hdrp->h_charkey);
968 	for (i = 0; i < NCACHED; i++) {
969 		M_32_SWAP(hdrp->spares[i]);
970 		M_16_SWAP(hdrp->bitmaps[i]);
971 	}
972 }
973 #endif
974