1 /* 2 * sdbm - ndbm work-alike hashed database library 3 * based on Per-Aake Larson's Dynamic Hashing algorithms. BIT 18 (1978). 4 * author: oz@nexus.yorku.ca 5 * status: public domain. 6 * 7 * core routines 8 */ 9 10 #include "INTERN.h" 11 #include "config.h" 12 #ifdef WIN32 13 #include "io.h" 14 #endif 15 #include "sdbm.h" 16 #include "tune.h" 17 #include "pair.h" 18 19 #ifdef I_FCNTL 20 # include <fcntl.h> 21 #endif 22 #ifdef I_SYS_FILE 23 # include <sys/file.h> 24 #endif 25 26 #ifdef I_STRING 27 # ifndef __ultrix__ 28 # include <string.h> 29 # endif 30 #else 31 # include <strings.h> 32 #endif 33 34 /* 35 * externals 36 */ 37 38 #include <errno.h> /* See notes in perl.h about avoiding 39 extern int errno; */ 40 #ifdef __cplusplus 41 extern "C" { 42 #endif 43 44 extern Malloc_t malloc proto((MEM_SIZE)); 45 extern Free_t free proto((Malloc_t)); 46 47 #ifdef __cplusplus 48 } 49 #endif 50 51 const datum nullitem = {0, 0}; 52 53 /* 54 * forward 55 */ 56 static int getdbit proto((DBM *, long)); 57 static int setdbit proto((DBM *, long)); 58 static int getpage proto((DBM *, long)); 59 static datum getnext proto((DBM *)); 60 static int makroom proto((DBM *, long, int)); 61 62 /* 63 * useful macros 64 */ 65 #define bad(x) ((x).dptr == NULL || (x).dsize < 0) 66 #define exhash(item) sdbm_hash((item).dptr, (item).dsize) 67 #define ioerr(db) ((db)->flags |= DBM_IOERR) 68 69 #define OFF_PAG(off) (long) (off) * PBLKSIZ 70 #define OFF_DIR(off) (long) (off) * DBLKSIZ 71 72 static const long masks[] = { 73 000000000000, 000000000001, 000000000003, 000000000007, 74 000000000017, 000000000037, 000000000077, 000000000177, 75 000000000377, 000000000777, 000000001777, 000000003777, 76 000000007777, 000000017777, 000000037777, 000000077777, 77 000000177777, 000000377777, 000000777777, 000001777777, 78 000003777777, 000007777777, 000017777777, 000037777777, 79 000077777777, 000177777777, 000377777777, 000777777777, 80 001777777777, 003777777777, 007777777777, 017777777777 81 }; 82 83 DBM * 84 sdbm_open(char *file, int flags, int mode) 85 { 86 DBM *db; 87 char *dirname; 88 char *pagname; 89 size_t filelen; 90 const size_t dirfext_size = sizeof(DIRFEXT ""); 91 const size_t pagfext_size = sizeof(PAGFEXT ""); 92 93 if (file == NULL || !*file) 94 return errno = EINVAL, (DBM *) NULL; 95 /* 96 * need space for two separate filenames 97 */ 98 filelen = strlen(file); 99 100 if ((dirname = (char *) malloc(filelen + dirfext_size 101 + filelen + pagfext_size)) == NULL) 102 return errno = ENOMEM, (DBM *) NULL; 103 /* 104 * build the file names 105 */ 106 memcpy(dirname, file, filelen); 107 memcpy(dirname + filelen, DIRFEXT, dirfext_size); 108 pagname = dirname + filelen + dirfext_size; 109 memcpy(pagname, file, filelen); 110 memcpy(pagname + filelen, PAGFEXT, pagfext_size); 111 112 db = sdbm_prep(dirname, pagname, flags, mode); 113 free((char *) dirname); 114 return db; 115 } 116 117 DBM * 118 sdbm_prep(char *dirname, char *pagname, int flags, int mode) 119 { 120 DBM *db; 121 struct stat dstat; 122 123 if ((db = (DBM *) malloc(sizeof(DBM))) == NULL) 124 return errno = ENOMEM, (DBM *) NULL; 125 126 db->flags = 0; 127 db->hmask = 0; 128 db->blkptr = 0; 129 db->keyptr = 0; 130 /* 131 * adjust user flags so that WRONLY becomes RDWR, 132 * as required by this package. Also set our internal 133 * flag for RDONLY if needed. 134 */ 135 if (flags & O_WRONLY) 136 flags = (flags & ~O_WRONLY) | O_RDWR; 137 138 else if ((flags & 03) == O_RDONLY) 139 db->flags = DBM_RDONLY; 140 /* 141 * open the files in sequence, and stat the dirfile. 142 * If we fail anywhere, undo everything, return NULL. 143 */ 144 #if defined(OS2) || defined(MSDOS) || defined(WIN32) || defined(__CYGWIN__) 145 flags |= O_BINARY; 146 # endif 147 if ((db->pagf = open(pagname, flags, mode)) > -1) { 148 if ((db->dirf = open(dirname, flags, mode)) > -1) { 149 /* 150 * need the dirfile size to establish max bit number. 151 */ 152 if (fstat(db->dirf, &dstat) == 0) { 153 /* 154 * zero size: either a fresh database, or one with a single, 155 * unsplit data page: dirpage is all zeros. 156 */ 157 db->dirbno = (!dstat.st_size) ? 0 : -1; 158 db->pagbno = -1; 159 db->maxbno = dstat.st_size * BYTESIZ; 160 161 (void) memset(db->pagbuf, 0, PBLKSIZ); 162 (void) memset(db->dirbuf, 0, DBLKSIZ); 163 /* 164 * success 165 */ 166 return db; 167 } 168 (void) close(db->dirf); 169 } 170 (void) close(db->pagf); 171 } 172 free((char *) db); 173 return (DBM *) NULL; 174 } 175 176 void 177 sdbm_close(DBM *db) 178 { 179 if (db == NULL) 180 errno = EINVAL; 181 else { 182 (void) close(db->dirf); 183 (void) close(db->pagf); 184 free((char *) db); 185 } 186 } 187 188 datum 189 sdbm_fetch(DBM *db, datum key) 190 { 191 if (db == NULL || bad(key)) 192 return errno = EINVAL, nullitem; 193 194 if (getpage(db, exhash(key))) 195 return getpair(db->pagbuf, key); 196 197 return ioerr(db), nullitem; 198 } 199 200 int 201 sdbm_exists(DBM *db, datum key) 202 { 203 if (db == NULL || bad(key)) 204 return errno = EINVAL, -1; 205 206 if (getpage(db, exhash(key))) 207 return exipair(db->pagbuf, key); 208 209 return ioerr(db), -1; 210 } 211 212 int 213 sdbm_delete(DBM *db, datum key) 214 { 215 if (db == NULL || bad(key)) 216 return errno = EINVAL, -1; 217 if (sdbm_rdonly(db)) 218 return errno = EPERM, -1; 219 220 if (getpage(db, exhash(key))) { 221 if (!delpair(db->pagbuf, key)) 222 return -1; 223 /* 224 * update the page file 225 */ 226 if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0 227 || write(db->pagf, db->pagbuf, PBLKSIZ) < 0) 228 return ioerr(db), -1; 229 230 return 0; 231 } 232 233 return ioerr(db), -1; 234 } 235 236 int 237 sdbm_store(DBM *db, datum key, datum val, int flags) 238 { 239 int need; 240 long hash; 241 242 if (db == NULL || bad(key)) 243 return errno = EINVAL, -1; 244 if (sdbm_rdonly(db)) 245 return errno = EPERM, -1; 246 247 need = key.dsize + val.dsize; 248 /* 249 * is the pair too big (or too small) for this database ?? 250 */ 251 if (need < 0 || need > PAIRMAX) 252 return errno = EINVAL, -1; 253 254 if (getpage(db, (hash = exhash(key)))) { 255 /* 256 * if we need to replace, delete the key/data pair 257 * first. If it is not there, ignore. 258 */ 259 if (flags == DBM_REPLACE) 260 (void) delpair(db->pagbuf, key); 261 #ifdef SEEDUPS 262 else if (duppair(db->pagbuf, key)) 263 return 1; 264 #endif 265 /* 266 * if we do not have enough room, we have to split. 267 */ 268 if (!fitpair(db->pagbuf, need)) 269 if (!makroom(db, hash, need)) 270 return ioerr(db), -1; 271 /* 272 * we have enough room or split is successful. insert the key, 273 * and update the page file. 274 */ 275 (void) putpair(db->pagbuf, key, val); 276 277 if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0 278 || write(db->pagf, db->pagbuf, PBLKSIZ) < 0) 279 return ioerr(db), -1; 280 /* 281 * success 282 */ 283 return 0; 284 } 285 286 return ioerr(db), -1; 287 } 288 289 /* 290 * makroom - make room by splitting the overfull page 291 * this routine will attempt to make room for SPLTMAX times before 292 * giving up. 293 */ 294 static int 295 makroom(DBM *db, long int hash, int need) 296 { 297 long newp; 298 char twin[PBLKSIZ]; 299 #if defined(DOSISH) || defined(WIN32) 300 char zer[PBLKSIZ]; 301 long oldtail; 302 #endif 303 char *pag = db->pagbuf; 304 char *New = twin; 305 int smax = SPLTMAX; 306 #ifdef BADMESS 307 int rc; 308 #endif 309 310 do { 311 /* 312 * split the current page 313 */ 314 (void) splpage(pag, New, db->hmask + 1); 315 /* 316 * address of the new page 317 */ 318 newp = (hash & db->hmask) | (db->hmask + 1); 319 320 /* 321 * write delay, read avoidance/cache shuffle: 322 * select the page for incoming pair: if key is to go to the new page, 323 * write out the previous one, and copy the new one over, thus making 324 * it the current page. If not, simply write the new page, and we are 325 * still looking at the page of interest. current page is not updated 326 * here, as sdbm_store will do so, after it inserts the incoming pair. 327 */ 328 329 #if defined(DOSISH) || defined(WIN32) 330 /* 331 * Fill hole with 0 if made it. 332 * (hole is NOT read as 0) 333 */ 334 oldtail = lseek(db->pagf, 0L, SEEK_END); 335 memset(zer, 0, PBLKSIZ); 336 while (OFF_PAG(newp) > oldtail) { 337 if (lseek(db->pagf, 0L, SEEK_END) < 0 || 338 write(db->pagf, zer, PBLKSIZ) < 0) { 339 340 return 0; 341 } 342 oldtail += PBLKSIZ; 343 } 344 #endif 345 if (hash & (db->hmask + 1)) { 346 if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0 347 || write(db->pagf, db->pagbuf, PBLKSIZ) < 0) 348 return 0; 349 db->pagbno = newp; 350 (void) memcpy(pag, New, PBLKSIZ); 351 } 352 else if (lseek(db->pagf, OFF_PAG(newp), SEEK_SET) < 0 353 || write(db->pagf, New, PBLKSIZ) < 0) 354 return 0; 355 356 if (!setdbit(db, db->curbit)) 357 return 0; 358 /* 359 * see if we have enough room now 360 */ 361 if (fitpair(pag, need)) 362 return 1; 363 /* 364 * try again... update curbit and hmask as getpage would have 365 * done. because of our update of the current page, we do not 366 * need to read in anything. BUT we have to write the current 367 * [deferred] page out, as the window of failure is too great. 368 */ 369 db->curbit = 2 * db->curbit + 370 ((hash & (db->hmask + 1)) ? 2 : 1); 371 db->hmask |= db->hmask + 1; 372 373 if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0 374 || write(db->pagf, db->pagbuf, PBLKSIZ) < 0) 375 return 0; 376 377 } while (--smax); 378 /* 379 * if we are here, this is real bad news. After SPLTMAX splits, 380 * we still cannot fit the key. say goodnight. 381 */ 382 #ifdef BADMESS 383 rc = write(2, "sdbm: cannot insert after SPLTMAX attempts.\n", 44); 384 /* PERL_UNUSED_VAR() or PERL_UNUSED_RESULT() would be 385 * useful here but that would mean pulling in perl.h */ 386 (void)rc; 387 #endif 388 return 0; 389 390 } 391 392 /* 393 * the following two routines will break if 394 * deletions aren't taken into account. (ndbm bug) 395 */ 396 datum 397 sdbm_firstkey(DBM *db) 398 { 399 if (db == NULL) 400 return errno = EINVAL, nullitem; 401 /* 402 * start at page 0 403 */ 404 if (lseek(db->pagf, OFF_PAG(0), SEEK_SET) < 0 405 || read(db->pagf, db->pagbuf, PBLKSIZ) < 0) 406 return ioerr(db), nullitem; 407 db->pagbno = 0; 408 db->blkptr = 0; 409 db->keyptr = 0; 410 411 return getnext(db); 412 } 413 414 datum 415 sdbm_nextkey(DBM *db) 416 { 417 if (db == NULL) 418 return errno = EINVAL, nullitem; 419 return getnext(db); 420 } 421 422 /* 423 * all important binary trie traversal 424 */ 425 static int 426 getpage(DBM *db, long int hash) 427 { 428 int hbit; 429 long dbit; 430 long pagb; 431 432 dbit = 0; 433 hbit = 0; 434 while (dbit < db->maxbno && getdbit(db, dbit)) 435 dbit = 2 * dbit + ((hash & (1 << hbit++)) ? 2 : 1); 436 437 debug(("dbit: %d...", dbit)); 438 439 db->curbit = dbit; 440 db->hmask = masks[hbit]; 441 442 pagb = hash & db->hmask; 443 /* 444 * see if the block we need is already in memory. 445 * note: this lookaside cache has about 10% hit rate. 446 */ 447 if (pagb != db->pagbno) { 448 /* 449 * note: here, we assume a "hole" is read as 0s. 450 * if not, must zero pagbuf first. 451 */ 452 if (lseek(db->pagf, OFF_PAG(pagb), SEEK_SET) < 0 453 || read(db->pagf, db->pagbuf, PBLKSIZ) < 0) 454 return 0; 455 if (!chkpage(db->pagbuf)) 456 return 0; 457 db->pagbno = pagb; 458 459 debug(("pag read: %d\n", pagb)); 460 } 461 return 1; 462 } 463 464 static int 465 getdbit(DBM *db, long int dbit) 466 { 467 long c; 468 long dirb; 469 470 c = dbit / BYTESIZ; 471 dirb = c / DBLKSIZ; 472 473 if (dirb != db->dirbno) { 474 int got; 475 if (lseek(db->dirf, OFF_DIR(dirb), SEEK_SET) < 0 476 || (got=read(db->dirf, db->dirbuf, DBLKSIZ)) < 0) 477 return 0; 478 if (got==0) 479 memset(db->dirbuf,0,DBLKSIZ); 480 db->dirbno = dirb; 481 482 debug(("dir read: %d\n", dirb)); 483 } 484 485 return db->dirbuf[c % DBLKSIZ] & (1 << dbit % BYTESIZ); 486 } 487 488 static int 489 setdbit(DBM *db, long int dbit) 490 { 491 long c; 492 long dirb; 493 494 c = dbit / BYTESIZ; 495 dirb = c / DBLKSIZ; 496 497 if (dirb != db->dirbno) { 498 int got; 499 if (lseek(db->dirf, OFF_DIR(dirb), SEEK_SET) < 0 500 || (got=read(db->dirf, db->dirbuf, DBLKSIZ)) < 0) 501 return 0; 502 if (got==0) 503 memset(db->dirbuf,0,DBLKSIZ); 504 db->dirbno = dirb; 505 506 debug(("dir read: %d\n", dirb)); 507 } 508 509 db->dirbuf[c % DBLKSIZ] |= (1 << dbit % BYTESIZ); 510 511 #if 0 512 if (dbit >= db->maxbno) 513 db->maxbno += DBLKSIZ * BYTESIZ; 514 #else 515 if (OFF_DIR((dirb+1))*BYTESIZ > db->maxbno) 516 db->maxbno=OFF_DIR((dirb+1))*BYTESIZ; 517 #endif 518 519 if (lseek(db->dirf, OFF_DIR(dirb), SEEK_SET) < 0 520 || write(db->dirf, db->dirbuf, DBLKSIZ) < 0) 521 return 0; 522 523 return 1; 524 } 525 526 /* 527 * getnext - get the next key in the page, and if done with 528 * the page, try the next page in sequence 529 */ 530 static datum 531 getnext(DBM *db) 532 { 533 datum key; 534 535 for (;;) { 536 db->keyptr++; 537 key = getnkey(db->pagbuf, db->keyptr); 538 if (key.dptr != NULL) 539 return key; 540 /* 541 * we either run out, or there is nothing on this page.. 542 * try the next one... If we lost our position on the 543 * file, we will have to seek. 544 */ 545 db->keyptr = 0; 546 if (db->pagbno != db->blkptr++) 547 if (lseek(db->pagf, OFF_PAG(db->blkptr), SEEK_SET) < 0) 548 break; 549 db->pagbno = db->blkptr; 550 if (read(db->pagf, db->pagbuf, PBLKSIZ) <= 0) 551 break; 552 if (!chkpage(db->pagbuf)) 553 break; 554 } 555 556 return ioerr(db), nullitem; 557 } 558 559