1 /* 2 * sdbm - ndbm work-alike hashed database library 3 * based on Per-Aake Larson's Dynamic Hashing algorithms. BIT 18 (1978). 4 * author: oz@nexus.yorku.ca 5 * status: public domain. 6 * 7 * core routines 8 */ 9 10 #include "config.h" 11 #ifdef WIN32 12 #include "io.h" 13 #endif 14 #include "sdbm.h" 15 #include "tune.h" 16 #include "pair.h" 17 18 #ifdef I_FCNTL 19 # include <fcntl.h> 20 #endif 21 #ifdef I_SYS_FILE 22 # include <sys/file.h> 23 #endif 24 25 #include <string.h> 26 27 /* 28 * externals 29 */ 30 31 #include <errno.h> /* See notes in perl.h about avoiding 32 extern int errno; */ 33 #ifdef __cplusplus 34 extern "C" { 35 #endif 36 37 extern Malloc_t malloc(MEM_SIZE); 38 extern Free_t free(Malloc_t); 39 40 #ifdef __cplusplus 41 } 42 #endif 43 44 const datum nullitem = {0, 0}; 45 46 #ifdef WIN32 47 # undef lseek 48 # define lseek _lseeki64 49 #endif 50 51 /* 52 * forward 53 */ 54 static int getdbit(DBM *, long); 55 static int setdbit(DBM *, long); 56 static int getpage(DBM *, long); 57 static datum getnext(DBM *); 58 static int makroom(DBM *, long, int); 59 60 /* 61 * useful macros 62 */ 63 #define bad(x) ((x).dptr == NULL || (x).dsize < 0) 64 #define exhash(item) sdbm_hash((item).dptr, (item).dsize) 65 #define ioerr(db) ((db)->flags |= DBM_IOERR) 66 67 #define OFF_PAG(off) (Off_t) (off) * PBLKSIZ 68 #define OFF_DIR(off) (Off_t) (off) * DBLKSIZ 69 70 static const long masks[] = { 71 000000000000, 000000000001, 000000000003, 000000000007, 72 000000000017, 000000000037, 000000000077, 000000000177, 73 000000000377, 000000000777, 000000001777, 000000003777, 74 000000007777, 000000017777, 000000037777, 000000077777, 75 000000177777, 000000377777, 000000777777, 000001777777, 76 000003777777, 000007777777, 000017777777, 000037777777, 77 000077777777, 000177777777, 000377777777, 000777777777, 78 001777777777, 003777777777, 007777777777, 017777777777 79 }; 80 81 DBM * 82 sdbm_open(char *file, int flags, int mode) 83 { 84 DBM *db; 85 char *dirname; 86 char *pagname; 87 size_t filelen; 88 const size_t dirfext_size = sizeof(DIRFEXT ""); 89 const size_t pagfext_size = sizeof(PAGFEXT ""); 90 91 if (file == NULL || !*file) 92 return errno = EINVAL, (DBM *) NULL; 93 /* 94 * need space for two separate filenames 95 */ 96 filelen = strlen(file); 97 98 if ((dirname = (char *) malloc(filelen + dirfext_size 99 + filelen + pagfext_size)) == NULL) 100 return errno = ENOMEM, (DBM *) NULL; 101 /* 102 * build the file names 103 */ 104 memcpy(dirname, file, filelen); 105 memcpy(dirname + filelen, DIRFEXT, dirfext_size); 106 pagname = dirname + filelen + dirfext_size; 107 memcpy(pagname, file, filelen); 108 memcpy(pagname + filelen, PAGFEXT, pagfext_size); 109 110 db = sdbm_prep(dirname, pagname, flags, mode); 111 free((char *) dirname); 112 return db; 113 } 114 115 DBM * 116 sdbm_prep(char *dirname, char *pagname, int flags, int mode) 117 { 118 DBM *db; 119 struct stat dstat; 120 121 if ((db = (DBM *) malloc(sizeof(DBM))) == NULL) 122 return errno = ENOMEM, (DBM *) NULL; 123 124 db->flags = 0; 125 db->hmask = 0; 126 db->blkptr = 0; 127 db->keyptr = 0; 128 /* 129 * adjust user flags so that WRONLY becomes RDWR, 130 * as required by this package. Also set our internal 131 * flag for RDONLY if needed. 132 */ 133 if (flags & O_WRONLY) 134 flags = (flags & ~O_WRONLY) | O_RDWR; 135 136 else if ((flags & 03) == O_RDONLY) 137 db->flags = DBM_RDONLY; 138 /* 139 * open the files in sequence, and stat the dirfile. 140 * If we fail anywhere, undo everything, return NULL. 141 */ 142 #if defined(OS2) || defined(WIN32) || defined(__CYGWIN__) 143 flags |= O_BINARY; 144 # endif 145 if ((db->pagf = open(pagname, flags, mode)) > -1) { 146 if ((db->dirf = open(dirname, flags, mode)) > -1) { 147 /* 148 * need the dirfile size to establish max bit number. 149 */ 150 if (fstat(db->dirf, &dstat) == 0) { 151 /* 152 * zero size: either a fresh database, or one with a single, 153 * unsplit data page: dirpage is all zeros. 154 */ 155 db->dirbno = (!dstat.st_size) ? 0 : -1; 156 db->pagbno = -1; 157 db->maxbno = dstat.st_size * BYTESIZ; 158 159 (void) memset(db->pagbuf, 0, PBLKSIZ); 160 (void) memset(db->dirbuf, 0, DBLKSIZ); 161 /* 162 * success 163 */ 164 return db; 165 } 166 (void) close(db->dirf); 167 } 168 (void) close(db->pagf); 169 } 170 free((char *) db); 171 return (DBM *) NULL; 172 } 173 174 void 175 sdbm_close(DBM *db) 176 { 177 if (db == NULL) 178 errno = EINVAL; 179 else { 180 (void) close(db->dirf); 181 (void) close(db->pagf); 182 free((char *) db); 183 } 184 } 185 186 datum 187 sdbm_fetch(DBM *db, datum key) 188 { 189 if (db == NULL || bad(key)) 190 return errno = EINVAL, nullitem; 191 192 if (getpage(db, exhash(key))) 193 return getpair(db->pagbuf, key); 194 195 return ioerr(db), nullitem; 196 } 197 198 int 199 sdbm_exists(DBM *db, datum key) 200 { 201 if (db == NULL || bad(key)) 202 return errno = EINVAL, -1; 203 204 if (getpage(db, exhash(key))) 205 return exipair(db->pagbuf, key); 206 207 return ioerr(db), -1; 208 } 209 210 int 211 sdbm_delete(DBM *db, datum key) 212 { 213 if (db == NULL || bad(key)) 214 return errno = EINVAL, -1; 215 if (sdbm_rdonly(db)) 216 return errno = EPERM, -1; 217 218 if (getpage(db, exhash(key))) { 219 if (!delpair(db->pagbuf, key)) 220 return -1; 221 /* 222 * update the page file 223 */ 224 if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0 225 || write(db->pagf, db->pagbuf, PBLKSIZ) < 0) 226 return ioerr(db), -1; 227 228 return 0; 229 } 230 231 return ioerr(db), -1; 232 } 233 234 int 235 sdbm_store(DBM *db, datum key, datum val, int flags) 236 { 237 int need; 238 long hash; 239 240 if (db == NULL || bad(key)) 241 return errno = EINVAL, -1; 242 if (sdbm_rdonly(db)) 243 return errno = EPERM, -1; 244 245 need = key.dsize + val.dsize; 246 /* 247 * is the pair too big (or too small) for this database ?? 248 */ 249 if (need < 0 || need > PAIRMAX) 250 return errno = EINVAL, -1; 251 252 if (getpage(db, (hash = exhash(key)))) { 253 /* 254 * if we need to replace, delete the key/data pair 255 * first. If it is not there, ignore. 256 */ 257 if (flags == DBM_REPLACE) 258 (void) delpair(db->pagbuf, key); 259 #ifdef SEEDUPS 260 else if (duppair(db->pagbuf, key)) 261 return 1; 262 #endif 263 /* 264 * if we do not have enough room, we have to split. 265 */ 266 if (!fitpair(db->pagbuf, need)) 267 if (!makroom(db, hash, need)) 268 return ioerr(db), -1; 269 /* 270 * we have enough room or split is successful. insert the key, 271 * and update the page file. 272 */ 273 (void) putpair(db->pagbuf, key, val); 274 275 if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0 276 || write(db->pagf, db->pagbuf, PBLKSIZ) < 0) 277 return ioerr(db), -1; 278 /* 279 * success 280 */ 281 return 0; 282 } 283 284 return ioerr(db), -1; 285 } 286 287 /* 288 * makroom - make room by splitting the overfull page 289 * this routine will attempt to make room for SPLTMAX times before 290 * giving up. 291 */ 292 static int 293 makroom(DBM *db, long int hash, int need) 294 { 295 long newp; 296 char twin[PBLKSIZ]; 297 #if defined(DOSISH) || defined(WIN32) 298 char zer[PBLKSIZ]; 299 Off_t oldtail; 300 #endif 301 char *pag = db->pagbuf; 302 char *New = twin; 303 int smax = SPLTMAX; 304 #ifdef BADMESS 305 int rc; 306 #endif 307 308 do { 309 /* 310 * split the current page 311 */ 312 (void) splpage(pag, New, db->hmask + 1); 313 /* 314 * address of the new page 315 */ 316 newp = (hash & db->hmask) | (db->hmask + 1); 317 318 /* 319 * write delay, read avoidance/cache shuffle: 320 * select the page for incoming pair: if key is to go to the new page, 321 * write out the previous one, and copy the new one over, thus making 322 * it the current page. If not, simply write the new page, and we are 323 * still looking at the page of interest. current page is not updated 324 * here, as sdbm_store will do so, after it inserts the incoming pair. 325 */ 326 327 #if defined(DOSISH) || defined(WIN32) 328 /* 329 * Fill hole with 0 if made it. 330 * (hole is NOT read as 0) 331 */ 332 oldtail = lseek(db->pagf, 0L, SEEK_END); 333 memset(zer, 0, PBLKSIZ); 334 while (OFF_PAG(newp) > oldtail) { 335 if (lseek(db->pagf, 0L, SEEK_END) < 0 || 336 write(db->pagf, zer, PBLKSIZ) < 0) { 337 338 return 0; 339 } 340 oldtail += PBLKSIZ; 341 } 342 #endif 343 if (hash & (db->hmask + 1)) { 344 if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0 345 || write(db->pagf, db->pagbuf, PBLKSIZ) < 0) 346 return 0; 347 db->pagbno = newp; 348 (void) memcpy(pag, New, PBLKSIZ); 349 } 350 else if (lseek(db->pagf, OFF_PAG(newp), SEEK_SET) < 0 351 || write(db->pagf, New, PBLKSIZ) < 0) 352 return 0; 353 354 if (!setdbit(db, db->curbit)) 355 return 0; 356 /* 357 * see if we have enough room now 358 */ 359 if (fitpair(pag, need)) 360 return 1; 361 /* 362 * try again... update curbit and hmask as getpage would have 363 * done. because of our update of the current page, we do not 364 * need to read in anything. BUT we have to write the current 365 * [deferred] page out, as the window of failure is too great. 366 */ 367 db->curbit = 2 * db->curbit + 368 ((hash & (db->hmask + 1)) ? 2 : 1); 369 db->hmask |= db->hmask + 1; 370 371 if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0 372 || write(db->pagf, db->pagbuf, PBLKSIZ) < 0) 373 return 0; 374 375 } while (--smax); 376 /* 377 * if we are here, this is real bad news. After SPLTMAX splits, 378 * we still cannot fit the key. say goodnight. 379 */ 380 #ifdef BADMESS 381 rc = write(2, "sdbm: cannot insert after SPLTMAX attempts.\n", 44); 382 /* PERL_UNUSED_VAR() or PERL_UNUSED_RESULT() would be 383 * useful here but that would mean pulling in perl.h */ 384 (void)rc; 385 #endif 386 return 0; 387 388 } 389 390 /* 391 * the following two routines will break if 392 * deletions aren't taken into account. (ndbm bug) 393 */ 394 datum 395 sdbm_firstkey(DBM *db) 396 { 397 if (db == NULL) 398 return errno = EINVAL, nullitem; 399 /* 400 * start at page 0 401 */ 402 if (lseek(db->pagf, OFF_PAG(0), SEEK_SET) < 0 403 || read(db->pagf, db->pagbuf, PBLKSIZ) < 0) 404 return ioerr(db), nullitem; 405 if (!chkpage(db->pagbuf)) { 406 errno = EINVAL; 407 ioerr(db); 408 db->pagbno = -1; 409 return nullitem; 410 } 411 db->pagbno = 0; 412 db->blkptr = 0; 413 db->keyptr = 0; 414 415 return getnext(db); 416 } 417 418 datum 419 sdbm_nextkey(DBM *db) 420 { 421 if (db == NULL) 422 return errno = EINVAL, nullitem; 423 return getnext(db); 424 } 425 426 /* 427 * all important binary trie traversal 428 */ 429 static int 430 getpage(DBM *db, long int hash) 431 { 432 int hbit; 433 long dbit; 434 long pagb; 435 436 dbit = 0; 437 hbit = 0; 438 while (dbit < db->maxbno && getdbit(db, dbit)) 439 dbit = 2 * dbit + ((hash & (1 << hbit++)) ? 2 : 1); 440 441 debug(("dbit: %d...", dbit)); 442 443 db->curbit = dbit; 444 db->hmask = masks[hbit]; 445 446 pagb = hash & db->hmask; 447 /* 448 * see if the block we need is already in memory. 449 * note: this lookaside cache has about 10% hit rate. 450 */ 451 if (pagb != db->pagbno) { 452 /* 453 * note: here, we assume a "hole" is read as 0s. 454 * if not, must zero pagbuf first. 455 */ 456 if (lseek(db->pagf, OFF_PAG(pagb), SEEK_SET) < 0 457 || read(db->pagf, db->pagbuf, PBLKSIZ) < 0) 458 return 0; 459 if (!chkpage(db->pagbuf)) { 460 errno = EINVAL; 461 db->pagbno = -1; 462 ioerr(db); 463 return 0; 464 } 465 db->pagbno = pagb; 466 467 debug(("pag read: %d\n", pagb)); 468 } 469 return 1; 470 } 471 472 static int 473 getdbit(DBM *db, long int dbit) 474 { 475 long c; 476 long dirb; 477 478 c = dbit / BYTESIZ; 479 dirb = c / DBLKSIZ; 480 481 if (dirb != db->dirbno) { 482 int got; 483 if (lseek(db->dirf, OFF_DIR(dirb), SEEK_SET) < 0 484 || (got=read(db->dirf, db->dirbuf, DBLKSIZ)) < 0) 485 return 0; 486 if (got==0) 487 memset(db->dirbuf,0,DBLKSIZ); 488 db->dirbno = dirb; 489 490 debug(("dir read: %d\n", dirb)); 491 } 492 493 return db->dirbuf[c % DBLKSIZ] & (1 << dbit % BYTESIZ); 494 } 495 496 static int 497 setdbit(DBM *db, long int dbit) 498 { 499 long c; 500 long dirb; 501 502 c = dbit / BYTESIZ; 503 dirb = c / DBLKSIZ; 504 505 if (dirb != db->dirbno) { 506 int got; 507 if (lseek(db->dirf, OFF_DIR(dirb), SEEK_SET) < 0 508 || (got=read(db->dirf, db->dirbuf, DBLKSIZ)) < 0) 509 return 0; 510 if (got==0) 511 memset(db->dirbuf,0,DBLKSIZ); 512 db->dirbno = dirb; 513 514 debug(("dir read: %d\n", dirb)); 515 } 516 517 db->dirbuf[c % DBLKSIZ] |= (1 << dbit % BYTESIZ); 518 519 #if 0 520 if (dbit >= db->maxbno) 521 db->maxbno += DBLKSIZ * BYTESIZ; 522 #else 523 if (OFF_DIR((dirb+1))*BYTESIZ > db->maxbno) 524 db->maxbno=OFF_DIR((dirb+1))*BYTESIZ; 525 #endif 526 527 if (lseek(db->dirf, OFF_DIR(dirb), SEEK_SET) < 0 528 || write(db->dirf, db->dirbuf, DBLKSIZ) < 0) 529 return 0; 530 531 return 1; 532 } 533 534 /* 535 * getnext - get the next key in the page, and if done with 536 * the page, try the next page in sequence 537 */ 538 static datum 539 getnext(DBM *db) 540 { 541 datum key; 542 543 for (;;) { 544 db->keyptr++; 545 key = getnkey(db->pagbuf, db->keyptr); 546 if (key.dptr != NULL) 547 return key; 548 /* 549 * we either run out, or there is nothing on this page.. 550 * try the next one... If we lost our position on the 551 * file, we will have to seek. 552 */ 553 db->keyptr = 0; 554 if (db->pagbno != db->blkptr++) 555 if (lseek(db->pagf, OFF_PAG(db->blkptr), SEEK_SET) < 0) 556 break; 557 db->pagbno = db->blkptr; 558 if (read(db->pagf, db->pagbuf, PBLKSIZ) <= 0) 559 break; 560 if (!chkpage(db->pagbuf)) { 561 errno = EINVAL; 562 db->pagbno = -1; 563 ioerr(db); 564 break; 565 } 566 } 567 568 return ioerr(db), nullitem; 569 } 570 571