1 /* $NetBSD: hash_page.c,v 1.20 2007/02/03 23:46:09 christos Exp $ */ 2 3 /*- 4 * Copyright (c) 1990, 1993, 1994 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Margo Seltzer. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 #include <sys/cdefs.h> 36 #if defined(LIBC_SCCS) && !defined(lint) 37 #if 0 38 static char sccsid[] = "@(#)hash_page.c 8.7 (Berkeley) 8/16/94"; 39 #else 40 __RCSID("$NetBSD: hash_page.c,v 1.20 2007/02/03 23:46:09 christos Exp $"); 41 #endif 42 #endif /* LIBC_SCCS and not lint */ 43 44 /* 45 * PACKAGE: hashing 46 * 47 * DESCRIPTION: 48 * Page manipulation for hashing package. 49 * 50 * ROUTINES: 51 * 52 * External 53 * __get_page 54 * __add_ovflpage 55 * Internal 56 * overflow_page 57 * open_temp 58 */ 59 60 #include "namespace.h" 61 62 #include <sys/types.h> 63 64 #include <errno.h> 65 #include <fcntl.h> 66 #include <signal.h> 67 #include <stdio.h> 68 #include <stdlib.h> 69 #include <string.h> 70 #include <unistd.h> 71 #include <paths.h> 72 #include <assert.h> 73 74 #include <db.h> 75 #include "hash.h" 76 #include "page.h" 77 #include "extern.h" 78 79 static u_int32_t*fetch_bitmap(HTAB *, int); 80 static u_int32_t first_free(u_int32_t); 81 static int open_temp(HTAB *); 82 static u_int16_t overflow_page(HTAB *); 83 static void putpair(char *, const DBT *, const DBT *); 84 static void squeeze_key(u_int16_t *, const DBT *, const DBT *); 85 static int ugly_split(HTAB *, u_int32_t, BUFHEAD *, BUFHEAD *, int, int); 86 87 #define PAGE_INIT(P) { \ 88 ((u_int16_t *)(void *)(P))[0] = 0; \ 89 temp = 3 * sizeof(u_int16_t); \ 90 _DIAGASSERT(hashp->BSIZE >= temp); \ 91 ((u_int16_t *)(void *)(P))[1] = (u_int16_t)(hashp->BSIZE - temp); \ 92 ((u_int16_t *)(void *)(P))[2] = hashp->BSIZE; \ 93 } 94 95 /* 96 * This is called AFTER we have verified that there is room on the page for 97 * the pair (PAIRFITS has returned true) so we go right ahead and start moving 98 * stuff on. 99 */ 100 static void 101 putpair(char *p, const DBT *key, const DBT *val) 102 { 103 u_int16_t *bp, n, off; 104 size_t temp; 105 106 bp = (u_int16_t *)(void *)p; 107 108 /* Enter the key first. */ 109 n = bp[0]; 110 111 temp = OFFSET(bp); 112 _DIAGASSERT(temp >= key->size); 113 off = (u_int16_t)(temp - key->size); 114 memmove(p + off, key->data, key->size); 115 bp[++n] = off; 116 117 /* Now the data. */ 118 _DIAGASSERT(off >= val->size); 119 off -= (u_int16_t)val->size; 120 memmove(p + off, val->data, val->size); 121 bp[++n] = off; 122 123 /* Adjust page info. */ 124 bp[0] = n; 125 temp = (n + 3) * sizeof(u_int16_t); 126 _DIAGASSERT(off >= temp); 127 bp[n + 1] = (u_int16_t)(off - temp); 128 bp[n + 2] = off; 129 } 130 131 /* 132 * Returns: 133 * 0 OK 134 * -1 error 135 */ 136 int 137 __delpair(HTAB *hashp, BUFHEAD *bufp, int ndx) 138 { 139 u_int16_t *bp, newoff; 140 int n; 141 u_int16_t pairlen; 142 size_t temp; 143 144 bp = (u_int16_t *)(void *)bufp->page; 145 n = bp[0]; 146 147 if (bp[ndx + 1] < REAL_KEY) 148 return (__big_delete(hashp, bufp)); 149 if (ndx != 1) 150 newoff = bp[ndx - 1]; 151 else 152 newoff = hashp->BSIZE; 153 pairlen = newoff - bp[ndx + 1]; 154 155 if (ndx != (n - 1)) { 156 /* Hard Case -- need to shuffle keys */ 157 int i; 158 char *src = bufp->page + (int)OFFSET(bp); 159 char *dst = src + (int)pairlen; 160 memmove(dst, src, (size_t)(bp[ndx + 1] - OFFSET(bp))); 161 162 /* Now adjust the pointers */ 163 for (i = ndx + 2; i <= n; i += 2) { 164 if (bp[i + 1] == OVFLPAGE) { 165 bp[i - 2] = bp[i]; 166 bp[i - 1] = bp[i + 1]; 167 } else { 168 bp[i - 2] = bp[i] + pairlen; 169 bp[i - 1] = bp[i + 1] + pairlen; 170 } 171 } 172 } 173 /* Finally adjust the page data */ 174 bp[n] = OFFSET(bp) + pairlen; 175 temp = bp[n + 1] + pairlen + 2 * sizeof(u_int16_t); 176 _DIAGASSERT(temp <= 0xffff); 177 bp[n - 1] = (u_int16_t)temp; 178 bp[0] = n - 2; 179 hashp->NKEYS--; 180 181 bufp->flags |= BUF_MOD; 182 return (0); 183 } 184 /* 185 * Returns: 186 * 0 ==> OK 187 * -1 ==> Error 188 */ 189 int 190 __split_page(HTAB *hashp, u_int32_t obucket, u_int32_t nbucket) 191 { 192 BUFHEAD *new_bufp, *old_bufp; 193 u_int16_t *ino; 194 char *np; 195 DBT key, val; 196 int n, ndx, retval; 197 u_int16_t copyto, diff, off, moved; 198 char *op; 199 size_t temp; 200 201 copyto = (u_int16_t)hashp->BSIZE; 202 off = (u_int16_t)hashp->BSIZE; 203 old_bufp = __get_buf(hashp, obucket, NULL, 0); 204 if (old_bufp == NULL) 205 return (-1); 206 new_bufp = __get_buf(hashp, nbucket, NULL, 0); 207 if (new_bufp == NULL) 208 return (-1); 209 210 old_bufp->flags |= (BUF_MOD | BUF_PIN); 211 new_bufp->flags |= (BUF_MOD | BUF_PIN); 212 213 ino = (u_int16_t *)(void *)(op = old_bufp->page); 214 np = new_bufp->page; 215 216 moved = 0; 217 218 for (n = 1, ndx = 1; n < ino[0]; n += 2) { 219 if (ino[n + 1] < REAL_KEY) { 220 retval = ugly_split(hashp, obucket, old_bufp, new_bufp, 221 (int)copyto, (int)moved); 222 old_bufp->flags &= ~BUF_PIN; 223 new_bufp->flags &= ~BUF_PIN; 224 return (retval); 225 226 } 227 key.data = (u_char *)op + ino[n]; 228 key.size = off - ino[n]; 229 230 if (__call_hash(hashp, key.data, (int)key.size) == obucket) { 231 /* Don't switch page */ 232 diff = copyto - off; 233 if (diff) { 234 copyto = ino[n + 1] + diff; 235 memmove(op + copyto, op + ino[n + 1], 236 (size_t)(off - ino[n + 1])); 237 ino[ndx] = copyto + ino[n] - ino[n + 1]; 238 ino[ndx + 1] = copyto; 239 } else 240 copyto = ino[n + 1]; 241 ndx += 2; 242 } else { 243 /* Switch page */ 244 val.data = (u_char *)op + ino[n + 1]; 245 val.size = ino[n] - ino[n + 1]; 246 putpair(np, &key, &val); 247 moved += 2; 248 } 249 250 off = ino[n + 1]; 251 } 252 253 /* Now clean up the page */ 254 ino[0] -= moved; 255 temp = sizeof(u_int16_t) * (ino[0] + 3); 256 _DIAGASSERT(copyto >= temp); 257 FREESPACE(ino) = (u_int16_t)(copyto - temp); 258 OFFSET(ino) = copyto; 259 260 #ifdef DEBUG3 261 (void)fprintf(stderr, "split %d/%d\n", 262 ((u_int16_t *)np)[0] / 2, 263 ((u_int16_t *)op)[0] / 2); 264 #endif 265 /* unpin both pages */ 266 old_bufp->flags &= ~BUF_PIN; 267 new_bufp->flags &= ~BUF_PIN; 268 return (0); 269 } 270 271 /* 272 * Called when we encounter an overflow or big key/data page during split 273 * handling. This is special cased since we have to begin checking whether 274 * the key/data pairs fit on their respective pages and because we may need 275 * overflow pages for both the old and new pages. 276 * 277 * The first page might be a page with regular key/data pairs in which case 278 * we have a regular overflow condition and just need to go on to the next 279 * page or it might be a big key/data pair in which case we need to fix the 280 * big key/data pair. 281 * 282 * Returns: 283 * 0 ==> success 284 * -1 ==> failure 285 */ 286 static int 287 ugly_split( 288 HTAB *hashp, 289 u_int32_t obucket, /* Same as __split_page. */ 290 BUFHEAD *old_bufp, 291 BUFHEAD *new_bufp, 292 int copyto, /* First byte on page which contains key/data values. */ 293 int moved /* Number of pairs moved to new page. */ 294 ) 295 { 296 BUFHEAD *bufp; /* Buffer header for ino */ 297 u_int16_t *ino; /* Page keys come off of */ 298 u_int16_t *np; /* New page */ 299 u_int16_t *op; /* Page keys go on to if they aren't moving */ 300 size_t temp; 301 302 BUFHEAD *last_bfp; /* Last buf header OVFL needing to be freed */ 303 DBT key, val; 304 SPLIT_RETURN ret; 305 u_int16_t n, off, ov_addr, scopyto; 306 char *cino; /* Character value of ino */ 307 308 bufp = old_bufp; 309 ino = (u_int16_t *)(void *)old_bufp->page; 310 np = (u_int16_t *)(void *)new_bufp->page; 311 op = (u_int16_t *)(void *)old_bufp->page; 312 last_bfp = NULL; 313 scopyto = (u_int16_t)copyto; /* ANSI */ 314 315 n = ino[0] - 1; 316 while (n < ino[0]) { 317 if (ino[2] < REAL_KEY && ino[2] != OVFLPAGE) { 318 if (__big_split(hashp, old_bufp, 319 new_bufp, bufp, (int)bufp->addr, obucket, &ret)) 320 return (-1); 321 old_bufp = ret.oldp; 322 if (!old_bufp) 323 return (-1); 324 op = (u_int16_t *)(void *)old_bufp->page; 325 new_bufp = ret.newp; 326 if (!new_bufp) 327 return (-1); 328 np = (u_int16_t *)(void *)new_bufp->page; 329 bufp = ret.nextp; 330 if (!bufp) 331 return (0); 332 cino = (char *)bufp->page; 333 ino = (u_int16_t *)(void *)cino; 334 last_bfp = ret.nextp; 335 } else if (ino[n + 1] == OVFLPAGE) { 336 ov_addr = ino[n]; 337 /* 338 * Fix up the old page -- the extra 2 are the fields 339 * which contained the overflow information. 340 */ 341 ino[0] -= (moved + 2); 342 temp = sizeof(u_int16_t) * (ino[0] + 3); 343 _DIAGASSERT(scopyto >= temp); 344 FREESPACE(ino) = (u_int16_t)(scopyto - temp); 345 OFFSET(ino) = scopyto; 346 347 bufp = __get_buf(hashp, (u_int32_t)ov_addr, bufp, 0); 348 if (!bufp) 349 return (-1); 350 351 ino = (u_int16_t *)(void *)bufp->page; 352 n = 1; 353 scopyto = hashp->BSIZE; 354 moved = 0; 355 356 if (last_bfp) 357 __free_ovflpage(hashp, last_bfp); 358 last_bfp = bufp; 359 } 360 /* Move regular sized pairs of there are any */ 361 off = hashp->BSIZE; 362 for (n = 1; (n < ino[0]) && (ino[n + 1] >= REAL_KEY); n += 2) { 363 cino = (char *)(void *)ino; 364 key.data = (u_char *)cino + ino[n]; 365 key.size = off - ino[n]; 366 val.data = (u_char *)cino + ino[n + 1]; 367 val.size = ino[n] - ino[n + 1]; 368 off = ino[n + 1]; 369 370 if (__call_hash(hashp, key.data, (int)key.size) == obucket) { 371 /* Keep on old page */ 372 if (PAIRFITS(op, (&key), (&val))) 373 putpair((char *)(void *)op, &key, &val); 374 else { 375 old_bufp = 376 __add_ovflpage(hashp, old_bufp); 377 if (!old_bufp) 378 return (-1); 379 op = (u_int16_t *)(void *)old_bufp->page; 380 putpair((char *)(void *)op, &key, &val); 381 } 382 old_bufp->flags |= BUF_MOD; 383 } else { 384 /* Move to new page */ 385 if (PAIRFITS(np, (&key), (&val))) 386 putpair((char *)(void *)np, &key, &val); 387 else { 388 new_bufp = 389 __add_ovflpage(hashp, new_bufp); 390 if (!new_bufp) 391 return (-1); 392 np = (u_int16_t *)(void *)new_bufp->page; 393 putpair((char *)(void *)np, &key, &val); 394 } 395 new_bufp->flags |= BUF_MOD; 396 } 397 } 398 } 399 if (last_bfp) 400 __free_ovflpage(hashp, last_bfp); 401 return (0); 402 } 403 404 /* 405 * Add the given pair to the page 406 * 407 * Returns: 408 * 0 ==> OK 409 * 1 ==> failure 410 */ 411 int 412 __addel(HTAB *hashp, BUFHEAD *bufp, const DBT *key, const DBT *val) 413 { 414 u_int16_t *bp, *sop; 415 int do_expand; 416 417 bp = (u_int16_t *)(void *)bufp->page; 418 do_expand = 0; 419 while (bp[0] && (bp[2] < REAL_KEY || bp[bp[0]] < REAL_KEY)) 420 /* Exception case */ 421 if (bp[2] == FULL_KEY_DATA && bp[0] == 2) 422 /* This is the last page of a big key/data pair 423 and we need to add another page */ 424 break; 425 else if (bp[2] < REAL_KEY && bp[bp[0]] != OVFLPAGE) { 426 bufp = __get_buf(hashp, (u_int32_t)bp[bp[0] - 1], bufp, 427 0); 428 if (!bufp) 429 return (-1); 430 bp = (u_int16_t *)(void *)bufp->page; 431 } else if (bp[bp[0]] != OVFLPAGE) { 432 /* Short key/data pairs, no more pages */ 433 break; 434 } else { 435 /* Try to squeeze key on this page */ 436 if (bp[2] >= REAL_KEY && 437 FREESPACE(bp) >= PAIRSIZE(key, val)) { 438 squeeze_key(bp, key, val); 439 goto stats; 440 } else { 441 bufp = __get_buf(hashp, 442 (u_int32_t)bp[bp[0] - 1], bufp, 0); 443 if (!bufp) 444 return (-1); 445 bp = (u_int16_t *)(void *)bufp->page; 446 } 447 } 448 449 if (PAIRFITS(bp, key, val)) 450 putpair(bufp->page, key, val); 451 else { 452 do_expand = 1; 453 bufp = __add_ovflpage(hashp, bufp); 454 if (!bufp) 455 return (-1); 456 sop = (u_int16_t *)(void *)bufp->page; 457 458 if (PAIRFITS(sop, key, val)) 459 putpair((char *)(void *)sop, key, val); 460 else 461 if (__big_insert(hashp, bufp, key, val)) 462 return (-1); 463 } 464 stats: 465 bufp->flags |= BUF_MOD; 466 /* 467 * If the average number of keys per bucket exceeds the fill factor, 468 * expand the table. 469 */ 470 hashp->NKEYS++; 471 if (do_expand || 472 (hashp->NKEYS / (hashp->MAX_BUCKET + 1) > hashp->FFACTOR)) 473 return (__expand_table(hashp)); 474 return (0); 475 } 476 477 /* 478 * 479 * Returns: 480 * pointer on success 481 * NULL on error 482 */ 483 BUFHEAD * 484 __add_ovflpage(HTAB *hashp, BUFHEAD *bufp) 485 { 486 u_int16_t *sp; 487 u_int16_t ndx, ovfl_num; 488 size_t temp; 489 #ifdef DEBUG1 490 int tmp1, tmp2; 491 #endif 492 sp = (u_int16_t *)(void *)bufp->page; 493 494 /* Check if we are dynamically determining the fill factor */ 495 if (hashp->FFACTOR == DEF_FFACTOR) { 496 hashp->FFACTOR = (u_int32_t)sp[0] >> 1; 497 if (hashp->FFACTOR < MIN_FFACTOR) 498 hashp->FFACTOR = MIN_FFACTOR; 499 } 500 bufp->flags |= BUF_MOD; 501 ovfl_num = overflow_page(hashp); 502 #ifdef DEBUG1 503 tmp1 = bufp->addr; 504 tmp2 = bufp->ovfl ? bufp->ovfl->addr : 0; 505 #endif 506 if (!ovfl_num || !(bufp->ovfl = __get_buf(hashp, (u_int32_t)ovfl_num, 507 bufp, 1))) 508 return (NULL); 509 bufp->ovfl->flags |= BUF_MOD; 510 #ifdef DEBUG1 511 (void)fprintf(stderr, "ADDOVFLPAGE: %d->ovfl was %d is now %d\n", 512 tmp1, tmp2, bufp->ovfl->addr); 513 #endif 514 ndx = sp[0]; 515 /* 516 * Since a pair is allocated on a page only if there's room to add 517 * an overflow page, we know that the OVFL information will fit on 518 * the page. 519 */ 520 sp[ndx + 4] = OFFSET(sp); 521 temp = FREESPACE(sp); 522 _DIAGASSERT(temp >= OVFLSIZE); 523 sp[ndx + 3] = (u_int16_t)(temp - OVFLSIZE); 524 sp[ndx + 1] = ovfl_num; 525 sp[ndx + 2] = OVFLPAGE; 526 sp[0] = ndx + 2; 527 #ifdef HASH_STATISTICS 528 hash_overflows++; 529 #endif 530 return (bufp->ovfl); 531 } 532 533 /* 534 * Returns: 535 * 0 indicates SUCCESS 536 * -1 indicates FAILURE 537 */ 538 int 539 __get_page(HTAB *hashp, char *p, u_int32_t bucket, int is_bucket, int is_disk, 540 int is_bitmap) 541 { 542 int fd, page, size; 543 ssize_t rsize; 544 u_int16_t *bp; 545 size_t temp; 546 547 fd = hashp->fp; 548 size = hashp->BSIZE; 549 550 if ((fd == -1) || !is_disk) { 551 PAGE_INIT(p); 552 return (0); 553 } 554 if (is_bucket) 555 page = BUCKET_TO_PAGE(bucket); 556 else 557 page = OADDR_TO_PAGE(bucket); 558 if ((rsize = pread(fd, p, (size_t)size, (off_t)page << hashp->BSHIFT)) == -1) 559 return (-1); 560 bp = (u_int16_t *)(void *)p; 561 if (!rsize) 562 bp[0] = 0; /* We hit the EOF, so initialize a new page */ 563 else 564 if (rsize != size) { 565 errno = EFTYPE; 566 return (-1); 567 } 568 if (!is_bitmap && !bp[0]) { 569 PAGE_INIT(p); 570 } else 571 if (hashp->LORDER != BYTE_ORDER) { 572 int i, max; 573 574 if (is_bitmap) { 575 max = (u_int32_t)hashp->BSIZE >> 2; /* divide by 4 */ 576 for (i = 0; i < max; i++) 577 M_32_SWAP(((int *)(void *)p)[i]); 578 } else { 579 M_16_SWAP(bp[0]); 580 max = bp[0] + 2; 581 for (i = 1; i <= max; i++) 582 M_16_SWAP(bp[i]); 583 } 584 } 585 return (0); 586 } 587 588 /* 589 * Write page p to disk 590 * 591 * Returns: 592 * 0 ==> OK 593 * -1 ==>failure 594 */ 595 int 596 __put_page(HTAB *hashp, char *p, u_int32_t bucket, int is_bucket, int is_bitmap) 597 { 598 int fd, page, size; 599 ssize_t wsize; 600 601 size = hashp->BSIZE; 602 if ((hashp->fp == -1) && open_temp(hashp)) 603 return (-1); 604 fd = hashp->fp; 605 606 if (hashp->LORDER != BYTE_ORDER) { 607 int i; 608 int max; 609 610 if (is_bitmap) { 611 max = (u_int32_t)hashp->BSIZE >> 2; /* divide by 4 */ 612 for (i = 0; i < max; i++) 613 M_32_SWAP(((int *)(void *)p)[i]); 614 } else { 615 max = ((u_int16_t *)(void *)p)[0] + 2; 616 for (i = 0; i <= max; i++) 617 M_16_SWAP(((u_int16_t *)(void *)p)[i]); 618 } 619 } 620 if (is_bucket) 621 page = BUCKET_TO_PAGE(bucket); 622 else 623 page = OADDR_TO_PAGE(bucket); 624 if ((wsize = pwrite(fd, p, (size_t)size, (off_t)page << hashp->BSHIFT)) == -1) 625 /* Errno is set */ 626 return (-1); 627 if (wsize != size) { 628 errno = EFTYPE; 629 return (-1); 630 } 631 return (0); 632 } 633 634 #define BYTE_MASK ((1 << INT_BYTE_SHIFT) -1) 635 /* 636 * Initialize a new bitmap page. Bitmap pages are left in memory 637 * once they are read in. 638 */ 639 int 640 __ibitmap(HTAB *hashp, int pnum, int nbits, int ndx) 641 { 642 u_int32_t *ip; 643 int clearbytes, clearints; 644 645 if ((ip = malloc((size_t)hashp->BSIZE)) == NULL) 646 return (1); 647 hashp->nmaps++; 648 clearints = ((u_int32_t)(nbits - 1) >> INT_BYTE_SHIFT) + 1; 649 clearbytes = clearints << INT_TO_BYTE; 650 (void)memset(ip, 0, (size_t)clearbytes); 651 (void)memset(((char *)(void *)ip) + clearbytes, 0xFF, 652 (size_t)(hashp->BSIZE - clearbytes)); 653 ip[clearints - 1] = ALL_SET << (nbits & BYTE_MASK); 654 SETBIT(ip, 0); 655 hashp->BITMAPS[ndx] = (u_int16_t)pnum; 656 hashp->mapp[ndx] = ip; 657 return (0); 658 } 659 660 static u_int32_t 661 first_free(u_int32_t map) 662 { 663 u_int32_t i, mask; 664 665 mask = 0x1; 666 for (i = 0; i < BITS_PER_MAP; i++) { 667 if (!(mask & map)) 668 return (i); 669 mask = mask << 1; 670 } 671 return (i); 672 } 673 674 static u_int16_t 675 overflow_page(HTAB *hashp) 676 { 677 u_int32_t *freep = NULL; 678 int max_free, offset, splitnum; 679 u_int16_t addr; 680 int bit, first_page, free_bit, free_page, i, in_use_bits, j; 681 #ifdef DEBUG2 682 int tmp1, tmp2; 683 #endif 684 splitnum = hashp->OVFL_POINT; 685 max_free = hashp->SPARES[splitnum]; 686 687 free_page = (u_int32_t)(max_free - 1) >> (hashp->BSHIFT + BYTE_SHIFT); 688 free_bit = (max_free - 1) & ((hashp->BSIZE << BYTE_SHIFT) - 1); 689 690 /* Look through all the free maps to find the first free block */ 691 first_page = (u_int32_t)hashp->LAST_FREED >>(hashp->BSHIFT + BYTE_SHIFT); 692 for ( i = first_page; i <= free_page; i++ ) { 693 if (!(freep = (u_int32_t *)hashp->mapp[i]) && 694 !(freep = fetch_bitmap(hashp, i))) 695 return (0); 696 if (i == free_page) 697 in_use_bits = free_bit; 698 else 699 in_use_bits = (hashp->BSIZE << BYTE_SHIFT) - 1; 700 701 if (i == first_page) { 702 bit = hashp->LAST_FREED & 703 ((hashp->BSIZE << BYTE_SHIFT) - 1); 704 j = bit / BITS_PER_MAP; 705 bit = bit & ~(BITS_PER_MAP - 1); 706 } else { 707 bit = 0; 708 j = 0; 709 } 710 for (; bit <= in_use_bits; j++, bit += BITS_PER_MAP) 711 if (freep[j] != ALL_SET) 712 goto found; 713 } 714 715 /* No Free Page Found */ 716 hashp->LAST_FREED = hashp->SPARES[splitnum]; 717 hashp->SPARES[splitnum]++; 718 offset = hashp->SPARES[splitnum] - 719 (splitnum ? hashp->SPARES[splitnum - 1] : 0); 720 721 #define OVMSG "HASH: Out of overflow pages. Increase page size\n" 722 if (offset > SPLITMASK) { 723 if (++splitnum >= NCACHED) { 724 (void)write(STDERR_FILENO, OVMSG, sizeof(OVMSG) - 1); 725 errno = EFBIG; 726 return (0); 727 } 728 hashp->OVFL_POINT = splitnum; 729 hashp->SPARES[splitnum] = hashp->SPARES[splitnum-1]; 730 hashp->SPARES[splitnum-1]--; 731 offset = 1; 732 } 733 734 /* Check if we need to allocate a new bitmap page */ 735 if (free_bit == (hashp->BSIZE << BYTE_SHIFT) - 1) { 736 free_page++; 737 if (free_page >= NCACHED) { 738 (void)write(STDERR_FILENO, OVMSG, sizeof(OVMSG) - 1); 739 errno = EFBIG; 740 return (0); 741 } 742 /* 743 * This is tricky. The 1 indicates that you want the new page 744 * allocated with 1 clear bit. Actually, you are going to 745 * allocate 2 pages from this map. The first is going to be 746 * the map page, the second is the overflow page we were 747 * looking for. The init_bitmap routine automatically, sets 748 * the first bit of itself to indicate that the bitmap itself 749 * is in use. We would explicitly set the second bit, but 750 * don't have to if we tell init_bitmap not to leave it clear 751 * in the first place. 752 */ 753 if (__ibitmap(hashp, 754 (int)OADDR_OF(splitnum, offset), 1, free_page)) 755 return (0); 756 hashp->SPARES[splitnum]++; 757 #ifdef DEBUG2 758 free_bit = 2; 759 #endif 760 offset++; 761 if (offset > SPLITMASK) { 762 if (++splitnum >= NCACHED) { 763 (void)write(STDERR_FILENO, OVMSG, 764 sizeof(OVMSG) - 1); 765 errno = EFBIG; 766 return (0); 767 } 768 hashp->OVFL_POINT = splitnum; 769 hashp->SPARES[splitnum] = hashp->SPARES[splitnum-1]; 770 hashp->SPARES[splitnum-1]--; 771 offset = 0; 772 } 773 } else { 774 /* 775 * Free_bit addresses the last used bit. Bump it to address 776 * the first available bit. 777 */ 778 free_bit++; 779 SETBIT(freep, free_bit); 780 } 781 782 /* Calculate address of the new overflow page */ 783 addr = OADDR_OF(splitnum, offset); 784 #ifdef DEBUG2 785 (void)fprintf(stderr, "OVERFLOW_PAGE: ADDR: %d BIT: %d PAGE %d\n", 786 addr, free_bit, free_page); 787 #endif 788 return (addr); 789 790 found: 791 bit = bit + first_free(freep[j]); 792 SETBIT(freep, bit); 793 #ifdef DEBUG2 794 tmp1 = bit; 795 tmp2 = i; 796 #endif 797 /* 798 * Bits are addressed starting with 0, but overflow pages are addressed 799 * beginning at 1. Bit is a bit addressnumber, so we need to increment 800 * it to convert it to a page number. 801 */ 802 bit = 1 + bit + (i * (hashp->BSIZE << BYTE_SHIFT)); 803 if (bit >= hashp->LAST_FREED) 804 hashp->LAST_FREED = bit - 1; 805 806 /* Calculate the split number for this page */ 807 for (i = 0; (i < splitnum) && (bit > hashp->SPARES[i]); i++); 808 offset = (i ? bit - hashp->SPARES[i - 1] : bit); 809 if (offset >= SPLITMASK) { 810 (void)write(STDERR_FILENO, OVMSG, sizeof(OVMSG) - 1); 811 errno = EFBIG; 812 return (0); /* Out of overflow pages */ 813 } 814 addr = OADDR_OF(i, offset); 815 #ifdef DEBUG2 816 (void)fprintf(stderr, "OVERFLOW_PAGE: ADDR: %d BIT: %d PAGE %d\n", 817 addr, tmp1, tmp2); 818 #endif 819 820 /* Allocate and return the overflow page */ 821 return (addr); 822 } 823 824 /* 825 * Mark this overflow page as free. 826 */ 827 void 828 __free_ovflpage(HTAB *hashp, BUFHEAD *obufp) 829 { 830 u_int16_t addr; 831 u_int32_t *freep; 832 int bit_address, free_page, free_bit; 833 u_int16_t ndx; 834 835 addr = obufp->addr; 836 #ifdef DEBUG1 837 (void)fprintf(stderr, "Freeing %d\n", addr); 838 #endif 839 ndx = (((u_int32_t)addr) >> SPLITSHIFT); 840 bit_address = 841 (ndx ? hashp->SPARES[ndx - 1] : 0) + (addr & SPLITMASK) - 1; 842 if (bit_address < hashp->LAST_FREED) 843 hashp->LAST_FREED = bit_address; 844 free_page = ((u_int32_t)bit_address >> (hashp->BSHIFT + BYTE_SHIFT)); 845 free_bit = bit_address & ((hashp->BSIZE << BYTE_SHIFT) - 1); 846 847 if (!(freep = hashp->mapp[free_page])) 848 freep = fetch_bitmap(hashp, free_page); 849 /* 850 * This had better never happen. It means we tried to read a bitmap 851 * that has already had overflow pages allocated off it, and we 852 * failed to read it from the file. 853 */ 854 _DIAGASSERT(freep != NULL); 855 CLRBIT(freep, free_bit); 856 #ifdef DEBUG2 857 (void)fprintf(stderr, "FREE_OVFLPAGE: ADDR: %d BIT: %d PAGE %d\n", 858 obufp->addr, free_bit, free_page); 859 #endif 860 __reclaim_buf(hashp, obufp); 861 } 862 863 /* 864 * Returns: 865 * 0 success 866 * -1 failure 867 */ 868 static int 869 open_temp(HTAB *hashp) 870 { 871 sigset_t set, oset; 872 char *envtmp; 873 char namestr[PATH_MAX]; 874 875 if (issetugid()) 876 envtmp = NULL; 877 else 878 envtmp = getenv("TMPDIR"); 879 880 if (-1 == snprintf(namestr, sizeof(namestr), "%s/_hashXXXXXX", 881 envtmp ? envtmp : _PATH_TMP)) 882 return -1; 883 884 /* Block signals; make sure file goes away at process exit. */ 885 (void)sigfillset(&set); 886 (void)sigprocmask(SIG_BLOCK, &set, &oset); 887 if ((hashp->fp = mkstemp(namestr)) != -1) { 888 (void)unlink(namestr); 889 (void)fcntl(hashp->fp, F_SETFD, FD_CLOEXEC); 890 } 891 (void)sigprocmask(SIG_SETMASK, &oset, (sigset_t *)NULL); 892 return (hashp->fp != -1 ? 0 : -1); 893 } 894 895 /* 896 * We have to know that the key will fit, but the last entry on the page is 897 * an overflow pair, so we need to shift things. 898 */ 899 static void 900 squeeze_key(u_int16_t *sp, const DBT *key, const DBT *val) 901 { 902 char *p; 903 u_int16_t free_space, n, off, pageno; 904 size_t temp; 905 906 p = (char *)(void *)sp; 907 n = sp[0]; 908 free_space = FREESPACE(sp); 909 off = OFFSET(sp); 910 911 pageno = sp[n - 1]; 912 _DIAGASSERT(off >= key->size); 913 off -= (u_int16_t)key->size; 914 sp[n - 1] = off; 915 memmove(p + off, key->data, key->size); 916 _DIAGASSERT(off >= val->size); 917 off -= (u_int16_t)val->size; 918 sp[n] = off; 919 memmove(p + off, val->data, val->size); 920 sp[0] = n + 2; 921 sp[n + 1] = pageno; 922 sp[n + 2] = OVFLPAGE; 923 temp = PAIRSIZE(key, val); 924 _DIAGASSERT(free_space >= temp); 925 FREESPACE(sp) = (u_int16_t)(free_space - temp); 926 OFFSET(sp) = off; 927 } 928 929 static u_int32_t * 930 fetch_bitmap(HTAB *hashp, int ndx) 931 { 932 if (ndx >= hashp->nmaps) 933 return (NULL); 934 if ((hashp->mapp[ndx] = malloc((size_t)hashp->BSIZE)) == NULL) 935 return (NULL); 936 if (__get_page(hashp, 937 (char *)(void *)hashp->mapp[ndx], (u_int32_t)hashp->BITMAPS[ndx], 0, 1, 1)) { 938 free(hashp->mapp[ndx]); 939 return (NULL); 940 } 941 return (hashp->mapp[ndx]); 942 } 943 944 #ifdef DEBUG4 945 void print_chain(HTAB *, u_int32_t); 946 void 947 print_chain(HTAB *hashp, u_int32_t addr) 948 { 949 BUFHEAD *bufp; 950 u_int16_t *bp, oaddr; 951 952 (void)fprintf(stderr, "%d ", addr); 953 bufp = __get_buf(hashp, addr, NULL, 0); 954 bp = (u_int16_t *)bufp->page; 955 while (bp[0] && ((bp[bp[0]] == OVFLPAGE) || 956 ((bp[0] > 2) && bp[2] < REAL_KEY))) { 957 oaddr = bp[bp[0] - 1]; 958 (void)fprintf(stderr, "%d ", (int)oaddr); 959 bufp = __get_buf(hashp, (u_int32_t)oaddr, bufp, 0); 960 bp = (u_int16_t *)bufp->page; 961 } 962 (void)fprintf(stderr, "\n"); 963 } 964 #endif 965