1 /* NetBSD: tdb.c,v 1.7 2013/10/19 17:16:38 christos Exp */ 2 3 /* 4 * Database functions 5 * Copyright (C) Andrew Tridgell 1999 6 * 7 * Redistribution and use in source and binary forms are permitted 8 * provided that the above copyright notice and this paragraph are 9 * duplicated in all such forms AND provided that this software or 10 * any derived work is only used as part of the PPP daemon (pppd) 11 * and related utilities. 12 * The name of the author may not be used to endorse or promote products 13 * derived from this software without specific prior written permission. 14 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR 15 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED 16 * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. 17 * 18 * Note: this software is also available under the Gnu Public License 19 * version 2 or later. 20 */ 21 22 #include <sys/cdefs.h> 23 #ifndef lint 24 __RCSID("NetBSD: tdb.c,v 1.7 2013/10/19 17:16:38 christos Exp "); 25 #endif 26 27 #include <stdlib.h> 28 #include <stdio.h> 29 #include <fcntl.h> 30 #include <unistd.h> 31 #include <string.h> 32 #include <errno.h> 33 #include <sys/mman.h> 34 #include <sys/stat.h> 35 #include "tdb.h" 36 37 #define TDB_VERSION (0x26011967 + 1) 38 #define TDB_MAGIC (0x26011999U) 39 #define TDB_FREE_MAGIC (~TDB_MAGIC) 40 #define TDB_ALIGN 4 41 #define MIN_REC_SIZE (2*sizeof(struct list_struct) + TDB_ALIGN) 42 #define DEFAULT_HASH_SIZE 128 43 #define TDB_PAGE_SIZE 0x2000 44 #define TDB_LEN_MULTIPLIER 10 45 #define FREELIST_TOP (sizeof(struct tdb_header)) 46 47 #define LOCK_SET 1 48 #define LOCK_CLEAR 0 49 50 /* lock offsets */ 51 #define GLOBAL_LOCK 0 52 #define ACTIVE_LOCK 4 53 #define LIST_LOCK_BASE 1024 54 55 #define BUCKET(hash) ((hash) % tdb->header.hash_size) 56 57 #ifndef MAP_FILE 58 #define MAP_FILE 0 59 #endif 60 61 /* the body of the database is made of one list_struct for the free space 62 plus a separate data list for each hash value */ 63 struct list_struct { 64 tdb_len rec_len; /* total byte length of record */ 65 tdb_off next; /* offset of the next record in the list */ 66 tdb_len key_len; /* byte length of key */ 67 tdb_len data_len; /* byte length of data */ 68 unsigned full_hash; /* the full 32 bit hash of the key */ 69 unsigned magic; /* try to catch errors */ 70 /* 71 the following union is implied 72 union { 73 char record[rec_len]; 74 struct { 75 char key[key_len]; 76 char data[data_len]; 77 } 78 } 79 */ 80 }; 81 82 /* a null data record - useful for error returns */ 83 static TDB_DATA null_data; 84 85 static int tdb_update __P((TDB_CONTEXT *, TDB_DATA, TDB_DATA)); 86 #ifdef notyet 87 static int tdb_lockchain __P((TDB_CONTEXT *, TDB_DATA)); 88 static int tdb_unlockchain __P((TDB_CONTEXT *, TDB_DATA)); 89 #endif 90 91 /* a byte range locking function - return 0 on success 92 this functions locks/unlocks 1 byte at the specified offset */ 93 static int tdb_brlock(TDB_CONTEXT *tdb, tdb_off offset, 94 int set, int rw_type, int lck_type) 95 { 96 #if NOLOCK 97 return 0; 98 #else 99 struct flock fl; 100 101 if (tdb->fd == -1) return 0; /* for in memory tdb */ 102 103 if (tdb->read_only) return -1; 104 105 fl.l_type = set==LOCK_SET?rw_type:F_UNLCK; 106 fl.l_whence = SEEK_SET; 107 fl.l_start = offset; 108 fl.l_len = 1; 109 fl.l_pid = 0; 110 111 if (fcntl(tdb->fd, lck_type, &fl) != 0) { 112 #if TDB_DEBUG 113 if (lck_type == F_SETLKW) { 114 printf("lock %d failed at %d (%s)\n", 115 set, offset, strerror(errno)); 116 } 117 #endif 118 tdb->ecode = TDB_ERR_LOCK; 119 return -1; 120 } 121 return 0; 122 #endif 123 } 124 125 /* lock a list in the database. list -1 is the alloc list */ 126 static int tdb_lock(TDB_CONTEXT *tdb, int list) 127 { 128 if (list < -1 || list >= (int)tdb->header.hash_size) { 129 #if TDB_DEBUG 130 printf("bad list %d\n", list); 131 #endif 132 return -1; 133 } 134 if (tdb->locked[list+1] == 0) { 135 if (tdb_brlock(tdb, LIST_LOCK_BASE + 4*list, LOCK_SET, 136 F_WRLCK, F_SETLKW) != 0) { 137 return -1; 138 } 139 } 140 tdb->locked[list+1]++; 141 return 0; 142 } 143 144 /* unlock the database. */ 145 static int tdb_unlock(TDB_CONTEXT *tdb, int list) 146 { 147 if (list < -1 || list >= (int)tdb->header.hash_size) { 148 #if TDB_DEBUG 149 printf("bad unlock list %d\n", list); 150 #endif 151 return -1; 152 } 153 154 if (tdb->locked[list+1] == 0) { 155 #if TDB_DEBUG 156 printf("not locked %d\n", list); 157 #endif 158 tdb->ecode = TDB_ERR_LOCK; 159 return -1; 160 } 161 if (tdb->locked[list+1] == 1) { 162 if (tdb_brlock(tdb, LIST_LOCK_BASE + 4*list, LOCK_CLEAR, 163 F_WRLCK, F_SETLKW) != 0) { 164 return -1; 165 } 166 } 167 tdb->locked[list+1]--; 168 return 0; 169 } 170 171 /* the hash algorithm - turn a key into an integer 172 This is based on the hash agorithm from gdbm */ 173 static unsigned tdb_hash(TDB_DATA *key) 174 { 175 unsigned value; /* Used to compute the hash value. */ 176 unsigned i; /* Used to cycle through random values. */ 177 178 /* Set the initial value from the key size. */ 179 value = 0x238F13AF * key->dsize; 180 for (i=0; i < key->dsize; i++) { 181 value = (value + (key->dptr[i] << (i*5 % 24))); 182 } 183 184 value = (1103515243 * value + 12345); 185 186 return value; 187 } 188 189 /* find the top of the hash chain for an open database */ 190 static tdb_off tdb_hash_top(TDB_CONTEXT *tdb, unsigned hash) 191 { 192 tdb_off ret; 193 hash = BUCKET(hash); 194 ret = FREELIST_TOP + (hash+1)*sizeof(tdb_off); 195 return ret; 196 } 197 198 199 /* check for an out of bounds access - if it is out of bounds then 200 see if the database has been expanded by someone else and expand 201 if necessary */ 202 static int tdb_oob(TDB_CONTEXT *tdb, tdb_off offset) 203 { 204 struct stat st; 205 if ((offset <= tdb->map_size) || (tdb->fd == -1)) return 0; 206 207 fstat(tdb->fd, &st); 208 if (st.st_size <= (ssize_t)offset) { 209 tdb->ecode = TDB_ERR_IO; 210 return -1; 211 } 212 213 #if HAVE_MMAP 214 if (tdb->map_ptr) { 215 munmap(tdb->map_ptr, tdb->map_size); 216 tdb->map_ptr = NULL; 217 } 218 #endif 219 220 tdb->map_size = st.st_size; 221 #if HAVE_MMAP 222 tdb->map_ptr = (void *)mmap(NULL, tdb->map_size, 223 tdb->read_only?PROT_READ:PROT_READ|PROT_WRITE, 224 MAP_SHARED | MAP_FILE, tdb->fd, 0); 225 #endif 226 return 0; 227 } 228 229 230 /* write a lump of data at a specified offset */ 231 static int tdb_write(TDB_CONTEXT *tdb, tdb_off offset, const char *buf, tdb_len len) 232 { 233 if (tdb_oob(tdb, offset + len) != 0) { 234 /* oops - trying to write beyond the end of the database! */ 235 return -1; 236 } 237 238 if (tdb->map_ptr) { 239 memcpy(offset + (char *)tdb->map_ptr, buf, len); 240 } else { 241 if (lseek(tdb->fd, offset, SEEK_SET) != offset || 242 write(tdb->fd, buf, len) != (ssize_t)len) { 243 tdb->ecode = TDB_ERR_IO; 244 return -1; 245 } 246 } 247 return 0; 248 } 249 250 /* read a lump of data at a specified offset */ 251 static int tdb_read(TDB_CONTEXT *tdb, tdb_off offset, char *buf, tdb_len len) 252 { 253 if (tdb_oob(tdb, offset + len) != 0) { 254 /* oops - trying to read beyond the end of the database! */ 255 return -1; 256 } 257 258 if (tdb->map_ptr) { 259 memcpy(buf, offset + (char *)tdb->map_ptr, len); 260 } else { 261 if (lseek(tdb->fd, offset, SEEK_SET) != offset || 262 read(tdb->fd, buf, len) != (ssize_t)len) { 263 tdb->ecode = TDB_ERR_IO; 264 return -1; 265 } 266 } 267 return 0; 268 } 269 270 271 /* read a lump of data, allocating the space for it */ 272 static char *tdb_alloc_read(TDB_CONTEXT *tdb, tdb_off offset, tdb_len len) 273 { 274 char *buf; 275 276 buf = (char *)malloc(len); 277 278 if (!buf) { 279 tdb->ecode = TDB_ERR_OOM; 280 return NULL; 281 } 282 283 if (tdb_read(tdb, offset, buf, len) == -1) { 284 free(buf); 285 return NULL; 286 } 287 288 return buf; 289 } 290 291 /* convenience routine for writing a record */ 292 static int rec_write(TDB_CONTEXT *tdb, tdb_off offset, struct list_struct *rec) 293 { 294 return tdb_write(tdb, offset, (char *)rec, sizeof(*rec)); 295 } 296 297 /* convenience routine for writing a tdb_off */ 298 static int ofs_write(TDB_CONTEXT *tdb, tdb_off offset, tdb_off *d) 299 { 300 return tdb_write(tdb, offset, (char *)d, sizeof(*d)); 301 } 302 303 /* read a tdb_off from the store */ 304 static int ofs_read(TDB_CONTEXT *tdb, tdb_off offset, tdb_off *d) 305 { 306 return tdb_read(tdb, offset, (char *)d, sizeof(*d)); 307 } 308 309 /* read a record and check for simple errors */ 310 static int rec_read(TDB_CONTEXT *tdb, tdb_off offset, struct list_struct *rec) 311 { 312 if (tdb_read(tdb, offset, (char *)rec, sizeof(*rec)) == -1) return -1; 313 if (rec->magic != TDB_MAGIC) { 314 #if TDB_DEBUG 315 printf("bad magic 0x%08x at offset %d\n", 316 rec->magic, offset); 317 #endif 318 tdb->ecode = TDB_ERR_CORRUPT; 319 return -1; 320 } 321 if (tdb_oob(tdb, rec->next) != 0) { 322 return -1; 323 } 324 return 0; 325 } 326 327 /* expand the database at least length bytes by expanding the 328 underlying file and doing the mmap again if necessary */ 329 static int tdb_expand(TDB_CONTEXT *tdb, tdb_off length) 330 { 331 struct list_struct rec; 332 tdb_off offset, ptr; 333 char b = 0; 334 335 tdb_lock(tdb,-1); 336 337 /* make sure we know about any previous expansions by another 338 process */ 339 tdb_oob(tdb,tdb->map_size + 1); 340 341 /* always make room for at least 10 more records */ 342 length *= TDB_LEN_MULTIPLIER; 343 344 /* and round the database up to a multiple of TDB_PAGE_SIZE */ 345 length = ((tdb->map_size + length + TDB_PAGE_SIZE) & ~(TDB_PAGE_SIZE - 1)) - tdb->map_size; 346 347 /* expand the file itself */ 348 if (tdb->fd != -1) { 349 lseek(tdb->fd, tdb->map_size + length - 1, SEEK_SET); 350 if (write(tdb->fd, &b, 1) != 1) goto fail; 351 } 352 353 /* form a new freelist record */ 354 offset = FREELIST_TOP; 355 rec.rec_len = length - sizeof(rec); 356 rec.magic = TDB_FREE_MAGIC; 357 if (ofs_read(tdb, offset, &rec.next) == -1) { 358 goto fail; 359 } 360 361 #if HAVE_MMAP 362 if (tdb->fd != -1 && tdb->map_ptr) { 363 munmap(tdb->map_ptr, tdb->map_size); 364 tdb->map_ptr = NULL; 365 } 366 #endif 367 368 tdb->map_size += length; 369 370 if (tdb->fd == -1) { 371 void *n; 372 n = realloc(tdb->map_ptr, tdb->map_size); 373 if (!n) 374 goto fail; 375 tdb->map_ptr = n; 376 } 377 378 /* write it out */ 379 if (rec_write(tdb, tdb->map_size - length, &rec) == -1) { 380 goto fail; 381 } 382 383 /* link it into the free list */ 384 ptr = tdb->map_size - length; 385 if (ofs_write(tdb, offset, &ptr) == -1) goto fail; 386 387 #if HAVE_MMAP 388 if (tdb->fd != -1) { 389 tdb->map_ptr = (void *)mmap(NULL, tdb->map_size, 390 PROT_READ|PROT_WRITE, 391 MAP_SHARED | MAP_FILE, tdb->fd, 0); 392 } 393 #endif 394 395 tdb_unlock(tdb, -1); 396 return 0; 397 398 fail: 399 tdb_unlock(tdb,-1); 400 return -1; 401 } 402 403 /* allocate some space from the free list. The offset returned points 404 to a unconnected list_struct within the database with room for at 405 least length bytes of total data 406 407 0 is returned if the space could not be allocated 408 */ 409 static tdb_off tdb_allocate(TDB_CONTEXT *tdb, tdb_len length) 410 { 411 tdb_off offset, rec_ptr, last_ptr; 412 struct list_struct rec, lastrec, newrec; 413 414 tdb_lock(tdb, -1); 415 416 again: 417 last_ptr = 0; 418 offset = FREELIST_TOP; 419 420 /* read in the freelist top */ 421 if (ofs_read(tdb, offset, &rec_ptr) == -1) { 422 goto fail; 423 } 424 425 /* keep looking until we find a freelist record that is big 426 enough */ 427 while (rec_ptr) { 428 if (tdb_read(tdb, rec_ptr, (char *)&rec, sizeof(rec)) == -1) { 429 goto fail; 430 } 431 432 if (rec.magic != TDB_FREE_MAGIC) { 433 #if TDB_DEBUG 434 printf("bad magic 0x%08x in free list\n", rec.magic); 435 #endif 436 goto fail; 437 } 438 439 if (rec.rec_len >= length) { 440 /* found it - now possibly split it up */ 441 if (rec.rec_len > length + MIN_REC_SIZE) { 442 length = (length + TDB_ALIGN) & ~(TDB_ALIGN-1); 443 444 newrec.rec_len = rec.rec_len - (sizeof(rec) + length); 445 newrec.next = rec.next; 446 newrec.magic = TDB_FREE_MAGIC; 447 448 rec.rec_len = length; 449 rec.next = rec_ptr + sizeof(rec) + rec.rec_len; 450 451 if (rec_write(tdb, rec.next, &newrec) == -1) { 452 goto fail; 453 } 454 455 if (rec_write(tdb, rec_ptr, &rec) == -1) { 456 goto fail; 457 } 458 } 459 460 /* remove it from the list */ 461 if (last_ptr == 0) { 462 offset = FREELIST_TOP; 463 464 if (ofs_write(tdb, offset, &rec.next) == -1) { 465 goto fail; 466 } 467 } else { 468 lastrec.next = rec.next; 469 if (rec_write(tdb, last_ptr, &lastrec) == -1) { 470 goto fail; 471 } 472 } 473 474 /* all done - return the new record offset */ 475 tdb_unlock(tdb, -1); 476 return rec_ptr; 477 } 478 479 /* move to the next record */ 480 lastrec = rec; 481 last_ptr = rec_ptr; 482 rec_ptr = rec.next; 483 } 484 485 /* we didn't find enough space. See if we can expand the 486 database and if we can then try again */ 487 if (tdb_expand(tdb, length + sizeof(rec)) == 0) goto again; 488 489 fail: 490 #if TDB_DEBUG 491 printf("tdb_allocate failed for size %u\n", length); 492 #endif 493 tdb_unlock(tdb, -1); 494 return 0; 495 } 496 497 /* initialise a new database with a specified hash size */ 498 static int tdb_new_database(TDB_CONTEXT *tdb, int hash_size) 499 { 500 struct tdb_header header; 501 int i, size = 0; 502 tdb_off buf[16]; 503 504 /* create the header */ 505 memset(&header, 0, sizeof(header)); 506 memcpy(header.magic_food, TDB_MAGIC_FOOD, strlen(TDB_MAGIC_FOOD)+1); 507 header.version = TDB_VERSION; 508 header.hash_size = hash_size; 509 lseek(tdb->fd, 0, SEEK_SET); 510 ftruncate(tdb->fd, 0); 511 512 if (tdb->fd != -1 && write(tdb->fd, &header, sizeof(header)) != 513 sizeof(header)) { 514 tdb->ecode = TDB_ERR_IO; 515 return -1; 516 } else size += sizeof(header); 517 518 /* the freelist and hash pointers */ 519 memset(buf, 0, sizeof(buf)); 520 521 for (i=0;(hash_size+1)-i >= 16; i += 16) { 522 if (tdb->fd != -1 && write(tdb->fd, buf, sizeof(buf)) != 523 sizeof(buf)) { 524 tdb->ecode = TDB_ERR_IO; 525 return -1; 526 } else size += sizeof(buf); 527 } 528 529 for (;i<hash_size+1; i++) { 530 if (tdb->fd != -1 && write(tdb->fd, buf, sizeof(tdb_off)) != 531 sizeof(tdb_off)) { 532 tdb->ecode = TDB_ERR_IO; 533 return -1; 534 } else size += sizeof(tdb_off); 535 } 536 537 if (tdb->fd == -1) { 538 tdb->map_ptr = calloc(size, 1); 539 tdb->map_size = size; 540 if (tdb->map_ptr == NULL) { 541 tdb->ecode = TDB_ERR_IO; 542 return -1; 543 } 544 memcpy(&tdb->header, &header, sizeof(header)); 545 } 546 547 #if TDB_DEBUG 548 printf("initialised database of hash_size %u\n", 549 hash_size); 550 #endif 551 return 0; 552 } 553 554 /* Returns 0 on fail. On success, return offset of record, and fills 555 in rec */ 556 static tdb_off tdb_find(TDB_CONTEXT *tdb, TDB_DATA key, unsigned int hash, 557 struct list_struct *rec) 558 { 559 tdb_off offset, rec_ptr; 560 561 /* find the top of the hash chain */ 562 offset = tdb_hash_top(tdb, hash); 563 564 /* read in the hash top */ 565 if (ofs_read(tdb, offset, &rec_ptr) == -1) 566 return 0; 567 568 /* keep looking until we find the right record */ 569 while (rec_ptr) { 570 if (rec_read(tdb, rec_ptr, rec) == -1) 571 return 0; 572 573 if (hash == rec->full_hash && key.dsize == rec->key_len) { 574 char *k; 575 /* a very likely hit - read the key */ 576 k = tdb_alloc_read(tdb, rec_ptr + sizeof(*rec), 577 rec->key_len); 578 579 if (!k) 580 return 0; 581 582 if (memcmp(key.dptr, k, key.dsize) == 0) { 583 free(k); 584 return rec_ptr; 585 } 586 free(k); 587 } 588 589 /* move to the next record */ 590 rec_ptr = rec->next; 591 } 592 return 0; 593 } 594 595 /* 596 return an error string for the last tdb error 597 */ 598 char *tdb_errorstr(TDB_CONTEXT *tdb) 599 { 600 int i; 601 static struct { 602 enum TDB_ERROR ecode; 603 char *estring; 604 } emap[] = { 605 {TDB_SUCCESS, "Success"}, 606 {TDB_ERR_CORRUPT, "Corrupt database"}, 607 {TDB_ERR_IO, "IO Error"}, 608 {TDB_ERR_LOCK, "Locking error"}, 609 {TDB_ERR_OOM, "Out of memory"}, 610 {TDB_ERR_EXISTS, "Record exists"}, 611 {-1, NULL}}; 612 if (tdb != NULL) { 613 for (i=0;emap[i].estring;i++) { 614 if (tdb->ecode == emap[i].ecode) return emap[i].estring; 615 } 616 } else { 617 return "Invalid tdb context"; 618 } 619 return "Invalid error code"; 620 } 621 622 623 /* update an entry in place - this only works if the new data size 624 is <= the old data size and the key exists. 625 on failure return -1 626 */ 627 static int tdb_update(TDB_CONTEXT *tdb, TDB_DATA key, TDB_DATA dbuf) 628 { 629 unsigned hash; 630 struct list_struct rec; 631 tdb_off rec_ptr; 632 int ret = -1; 633 634 if (tdb == NULL) { 635 #ifdef TDB_DEBUG 636 printf("tdb_update() called with null context\n"); 637 #endif 638 return -1; 639 } 640 641 /* find which hash bucket it is in */ 642 hash = tdb_hash(&key); 643 644 tdb_lock(tdb, BUCKET(hash)); 645 rec_ptr = tdb_find(tdb, key, hash, &rec); 646 647 if (!rec_ptr) 648 goto out; 649 650 /* must be long enough */ 651 if (rec.rec_len < key.dsize + dbuf.dsize) 652 goto out; 653 654 if (tdb_write(tdb, rec_ptr + sizeof(rec) + rec.key_len, 655 dbuf.dptr, dbuf.dsize) == -1) 656 goto out; 657 658 if (dbuf.dsize != rec.data_len) { 659 /* update size */ 660 rec.data_len = dbuf.dsize; 661 ret = rec_write(tdb, rec_ptr, &rec); 662 } else 663 ret = 0; 664 665 out: 666 tdb_unlock(tdb, BUCKET(hash)); 667 return ret; 668 } 669 670 /* find an entry in the database given a key */ 671 TDB_DATA tdb_fetch(TDB_CONTEXT *tdb, TDB_DATA key) 672 { 673 unsigned hash; 674 tdb_off rec_ptr; 675 struct list_struct rec; 676 TDB_DATA ret = null_data; 677 678 if (tdb == NULL) { 679 #ifdef TDB_DEBUG 680 printf("tdb_fetch() called with null context\n"); 681 #endif 682 return null_data; 683 } 684 685 /* find which hash bucket it is in */ 686 hash = tdb_hash(&key); 687 688 tdb_lock(tdb, BUCKET(hash)); 689 rec_ptr = tdb_find(tdb, key, hash, &rec); 690 691 if (rec_ptr) { 692 ret.dptr = tdb_alloc_read(tdb, 693 rec_ptr + sizeof(rec) + rec.key_len, 694 rec.data_len); 695 ret.dsize = rec.data_len; 696 } 697 698 tdb_unlock(tdb, BUCKET(hash)); 699 return ret; 700 } 701 702 /* check if an entry in the database exists 703 704 note that 1 is returned if the key is found and 0 is returned if not found 705 this doesn't match the conventions in the rest of this module, but is 706 compatible with gdbm 707 */ 708 int tdb_exists(TDB_CONTEXT *tdb, TDB_DATA key) 709 { 710 unsigned hash; 711 tdb_off rec_ptr; 712 struct list_struct rec; 713 714 if (tdb == NULL) { 715 #ifdef TDB_DEBUG 716 printf("tdb_exists() called with null context\n"); 717 #endif 718 return 0; 719 } 720 721 /* find which hash bucket it is in */ 722 hash = tdb_hash(&key); 723 724 tdb_lock(tdb, BUCKET(hash)); 725 rec_ptr = tdb_find(tdb, key, hash, &rec); 726 tdb_unlock(tdb, BUCKET(hash)); 727 728 return rec_ptr != 0; 729 } 730 731 /* traverse the entire database - calling fn(tdb, key, data) on each element. 732 return -1 on error or the record count traversed 733 if fn is NULL then it is not called 734 a non-zero return value from fn() indicates that the traversal should stop 735 */ 736 int tdb_traverse(TDB_CONTEXT *tdb, int (*fn)(TDB_CONTEXT *tdb, TDB_DATA key, TDB_DATA dbuf, void* state), void* state) 737 { 738 int count = 0; 739 unsigned h; 740 tdb_off offset, rec_ptr; 741 struct list_struct rec; 742 char *data; 743 TDB_DATA key, dbuf; 744 745 if (tdb == NULL) { 746 #ifdef TDB_DEBUG 747 printf("tdb_traverse() called with null context\n"); 748 #endif 749 return -1; 750 } 751 752 /* loop over all hash chains */ 753 for (h = 0; h < tdb->header.hash_size; h++) { 754 tdb_lock(tdb, BUCKET(h)); 755 756 /* read in the hash top */ 757 offset = tdb_hash_top(tdb, h); 758 if (ofs_read(tdb, offset, &rec_ptr) == -1) { 759 goto fail; 760 } 761 762 /* traverse all records for this hash */ 763 while (rec_ptr) { 764 if (rec_read(tdb, rec_ptr, &rec) == -1) { 765 goto fail; 766 } 767 768 /* now read the full record */ 769 data = tdb_alloc_read(tdb, rec_ptr + sizeof(rec), 770 rec.key_len + rec.data_len); 771 if (!data) { 772 goto fail; 773 } 774 775 key.dptr = data; 776 key.dsize = rec.key_len; 777 dbuf.dptr = data + rec.key_len; 778 dbuf.dsize = rec.data_len; 779 count++; 780 781 if (fn && fn(tdb, key, dbuf, state) != 0) { 782 /* they want us to stop traversing */ 783 free(data); 784 tdb_unlock(tdb, BUCKET(h)); 785 return count; 786 } 787 788 /* a miss - drat */ 789 free(data); 790 791 /* move to the next record */ 792 rec_ptr = rec.next; 793 } 794 tdb_unlock(tdb, BUCKET(h)); 795 } 796 797 /* return the number traversed */ 798 return count; 799 800 fail: 801 tdb_unlock(tdb, BUCKET(h)); 802 return -1; 803 } 804 805 806 /* find the first entry in the database and return its key */ 807 TDB_DATA tdb_firstkey(TDB_CONTEXT *tdb) 808 { 809 tdb_off offset, rec_ptr; 810 struct list_struct rec; 811 unsigned hash; 812 TDB_DATA ret; 813 814 if (tdb == NULL) { 815 #ifdef TDB_DEBUG 816 printf("tdb_firstkey() called with null context\n"); 817 #endif 818 return null_data; 819 } 820 821 /* look for a non-empty hash chain */ 822 for (hash = 0, rec_ptr = 0; 823 hash < tdb->header.hash_size; 824 hash++) { 825 /* find the top of the hash chain */ 826 offset = tdb_hash_top(tdb, hash); 827 828 tdb_lock(tdb, BUCKET(hash)); 829 830 /* read in the hash top */ 831 if (ofs_read(tdb, offset, &rec_ptr) == -1) { 832 goto fail; 833 } 834 835 if (rec_ptr) break; 836 837 tdb_unlock(tdb, BUCKET(hash)); 838 } 839 840 if (rec_ptr == 0) return null_data; 841 842 /* we've found a non-empty chain, now read the record */ 843 if (rec_read(tdb, rec_ptr, &rec) == -1) { 844 goto fail; 845 } 846 847 /* allocate and read the key space */ 848 ret.dptr = tdb_alloc_read(tdb, rec_ptr + sizeof(rec), rec.key_len); 849 ret.dsize = rec.key_len; 850 tdb_unlock(tdb, BUCKET(hash)); 851 return ret; 852 853 fail: 854 tdb_unlock(tdb, BUCKET(hash)); 855 return null_data; 856 } 857 858 /* find the next entry in the database, returning its key */ 859 TDB_DATA tdb_nextkey(TDB_CONTEXT *tdb, TDB_DATA key) 860 { 861 unsigned hash, hbucket; 862 tdb_off rec_ptr, offset; 863 struct list_struct rec; 864 TDB_DATA ret; 865 866 if (tdb == NULL) { 867 #ifdef TDB_DEBUG 868 printf("tdb_nextkey() called with null context\n"); 869 #endif 870 return null_data; 871 } 872 873 /* find which hash bucket it is in */ 874 hash = tdb_hash(&key); 875 hbucket = BUCKET(hash); 876 877 tdb_lock(tdb, hbucket); 878 rec_ptr = tdb_find(tdb, key, hash, &rec); 879 if (rec_ptr) { 880 /* we want the next record after this one */ 881 rec_ptr = rec.next; 882 } 883 884 /* not found or last in hash: look for next non-empty hash chain */ 885 while (rec_ptr == 0) { 886 tdb_unlock(tdb, hbucket); 887 888 if (++hbucket >= tdb->header.hash_size - 1) 889 return null_data; 890 891 offset = tdb_hash_top(tdb, hbucket); 892 tdb_lock(tdb, hbucket); 893 /* read in the hash top */ 894 if (ofs_read(tdb, offset, &rec_ptr) == -1) { 895 tdb_unlock(tdb, hbucket); 896 return null_data; 897 } 898 } 899 900 /* Read the record. */ 901 if (rec_read(tdb, rec_ptr, &rec) == -1) { 902 tdb_unlock(tdb, hbucket); 903 return null_data; 904 } 905 /* allocate and read the key */ 906 ret.dptr = tdb_alloc_read(tdb, rec_ptr + sizeof(rec), rec.key_len); 907 ret.dsize = rec.key_len; 908 tdb_unlock(tdb, hbucket); 909 910 return ret; 911 } 912 913 /* delete an entry in the database given a key */ 914 int tdb_delete(TDB_CONTEXT *tdb, TDB_DATA key) 915 { 916 unsigned hash; 917 tdb_off offset, rec_ptr, last_ptr; 918 struct list_struct rec, lastrec; 919 char *data = NULL; 920 921 if (tdb == NULL) { 922 #ifdef TDB_DEBUG 923 printf("tdb_delete() called with null context\n"); 924 #endif 925 return -1; 926 } 927 928 /* find which hash bucket it is in */ 929 hash = tdb_hash(&key); 930 931 tdb_lock(tdb, BUCKET(hash)); 932 933 /* find the top of the hash chain */ 934 offset = tdb_hash_top(tdb, hash); 935 936 /* read in the hash top */ 937 if (ofs_read(tdb, offset, &rec_ptr) == -1) { 938 goto fail; 939 } 940 941 last_ptr = 0; 942 943 /* keep looking until we find the right record */ 944 while (rec_ptr) { 945 if (rec_read(tdb, rec_ptr, &rec) == -1) { 946 goto fail; 947 } 948 949 if (hash == rec.full_hash && key.dsize == rec.key_len) { 950 /* a very likely hit - read the record and full key */ 951 data = tdb_alloc_read(tdb, rec_ptr + sizeof(rec), 952 rec.key_len); 953 if (!data) { 954 goto fail; 955 } 956 957 if (memcmp(key.dptr, data, key.dsize) == 0) { 958 /* a definite match - delete it */ 959 if (last_ptr == 0) { 960 offset = tdb_hash_top(tdb, hash); 961 if (ofs_write(tdb, offset, &rec.next) == -1) { 962 goto fail; 963 } 964 } else { 965 lastrec.next = rec.next; 966 if (rec_write(tdb, last_ptr, &lastrec) == -1) { 967 goto fail; 968 } 969 } 970 tdb_unlock(tdb, BUCKET(hash)); 971 tdb_lock(tdb, -1); 972 /* and recover the space */ 973 offset = FREELIST_TOP; 974 if (ofs_read(tdb, offset, &rec.next) == -1) { 975 goto fail2; 976 } 977 rec.magic = TDB_FREE_MAGIC; 978 if (rec_write(tdb, rec_ptr, &rec) == -1) { 979 goto fail2; 980 } 981 if (ofs_write(tdb, offset, &rec_ptr) == -1) { 982 goto fail2; 983 } 984 985 /* yipee - all done */ 986 free(data); 987 tdb_unlock(tdb, -1); 988 return 0; 989 } 990 991 /* a miss - drat */ 992 free(data); 993 data = NULL; 994 } 995 996 /* move to the next record */ 997 last_ptr = rec_ptr; 998 lastrec = rec; 999 rec_ptr = rec.next; 1000 } 1001 1002 fail: 1003 if (data) free(data); 1004 tdb_unlock(tdb, BUCKET(hash)); 1005 return -1; 1006 1007 fail2: 1008 if (data) free(data); 1009 tdb_unlock(tdb, -1); 1010 return -1; 1011 } 1012 1013 1014 /* store an element in the database, replacing any existing element 1015 with the same key 1016 1017 return 0 on success, -1 on failure 1018 */ 1019 int tdb_store(TDB_CONTEXT *tdb, TDB_DATA key, TDB_DATA dbuf, int flag) 1020 { 1021 struct list_struct rec; 1022 unsigned hash; 1023 tdb_off rec_ptr, offset; 1024 char *p = NULL; 1025 1026 if (tdb == NULL) { 1027 #ifdef TDB_DEBUG 1028 printf("tdb_store() called with null context\n"); 1029 #endif 1030 return -1; 1031 } 1032 1033 /* find which hash bucket it is in */ 1034 hash = tdb_hash(&key); 1035 1036 /* check for it existing */ 1037 if (flag == TDB_INSERT && tdb_exists(tdb, key)) { 1038 tdb->ecode = TDB_ERR_EXISTS; 1039 return -1; 1040 } 1041 1042 /* first try in-place update */ 1043 if (flag != TDB_INSERT && tdb_update(tdb, key, dbuf) == 0) { 1044 return 0; 1045 } 1046 1047 rec_ptr = tdb_allocate(tdb, key.dsize + dbuf.dsize); 1048 if (rec_ptr == 0) { 1049 return -1; 1050 } 1051 1052 tdb_lock(tdb, BUCKET(hash)); 1053 1054 /* delete any existing record - if it doesn't exist we don't care */ 1055 if (flag != TDB_INSERT) { 1056 tdb_delete(tdb, key); 1057 } 1058 1059 /* read the newly created record */ 1060 if (tdb_read(tdb, rec_ptr, (char *)&rec, sizeof(rec)) == -1) { 1061 goto fail; 1062 } 1063 1064 if (rec.magic != TDB_FREE_MAGIC) goto fail; 1065 1066 /* find the top of the hash chain */ 1067 offset = tdb_hash_top(tdb, hash); 1068 1069 /* read in the hash top diretcly into our next pointer */ 1070 if (ofs_read(tdb, offset, &rec.next) == -1) { 1071 goto fail; 1072 } 1073 1074 rec.key_len = key.dsize; 1075 rec.data_len = dbuf.dsize; 1076 rec.full_hash = hash; 1077 rec.magic = TDB_MAGIC; 1078 1079 p = (char *)malloc(sizeof(rec) + key.dsize + dbuf.dsize); 1080 if (!p) { 1081 tdb->ecode = TDB_ERR_OOM; 1082 goto fail; 1083 } 1084 1085 memcpy(p, &rec, sizeof(rec)); 1086 memcpy(p+sizeof(rec), key.dptr, key.dsize); 1087 memcpy(p+sizeof(rec)+key.dsize, dbuf.dptr, dbuf.dsize); 1088 1089 if (tdb_write(tdb, rec_ptr, p, sizeof(rec)+key.dsize+dbuf.dsize) == -1) 1090 goto fail; 1091 1092 free(p); 1093 p = NULL; 1094 1095 /* and point the top of the hash chain at it */ 1096 if (ofs_write(tdb, offset, &rec_ptr) == -1) goto fail; 1097 1098 tdb_unlock(tdb, BUCKET(hash)); 1099 return 0; 1100 1101 fail: 1102 #if TDB_DEBUG 1103 printf("store failed for hash 0x%08x in bucket %u\n", hash, BUCKET(hash)); 1104 #endif 1105 if (p) free(p); 1106 tdb_unlock(tdb, BUCKET(hash)); 1107 return -1; 1108 } 1109 1110 1111 /* open the database, creating it if necessary 1112 1113 The open_flags and mode are passed straight to the open call on the database 1114 file. A flags value of O_WRONLY is invalid 1115 1116 The hash size is advisory, use zero for a default value. 1117 1118 return is NULL on error 1119 */ 1120 TDB_CONTEXT *tdb_open(char *name, int hash_size, int tdb_flags, 1121 int open_flags, mode_t mode) 1122 { 1123 TDB_CONTEXT tdb, *ret; 1124 struct stat st; 1125 1126 memset(&tdb, 0, sizeof(tdb)); 1127 1128 tdb.fd = -1; 1129 tdb.name = NULL; 1130 tdb.map_ptr = NULL; 1131 1132 if ((open_flags & O_ACCMODE) == O_WRONLY) { 1133 goto fail; 1134 } 1135 1136 if (hash_size == 0) hash_size = DEFAULT_HASH_SIZE; 1137 1138 tdb.read_only = ((open_flags & O_ACCMODE) == O_RDONLY); 1139 1140 if (name != NULL) { 1141 tdb.fd = open(name, open_flags, mode); 1142 if (tdb.fd == -1) { 1143 goto fail; 1144 } 1145 } 1146 1147 /* ensure there is only one process initialising at once */ 1148 tdb_brlock(&tdb, GLOBAL_LOCK, LOCK_SET, F_WRLCK, F_SETLKW); 1149 1150 if (tdb_flags & TDB_CLEAR_IF_FIRST) { 1151 /* we need to zero the database if we are the only 1152 one with it open */ 1153 if (tdb_brlock(&tdb, ACTIVE_LOCK, LOCK_SET, F_WRLCK, F_SETLK) == 0) { 1154 ftruncate(tdb.fd, 0); 1155 tdb_brlock(&tdb, ACTIVE_LOCK, LOCK_CLEAR, F_WRLCK, F_SETLK); 1156 } 1157 } 1158 1159 /* leave this lock in place */ 1160 tdb_brlock(&tdb, ACTIVE_LOCK, LOCK_SET, F_RDLCK, F_SETLKW); 1161 1162 if (read(tdb.fd, &tdb.header, sizeof(tdb.header)) != sizeof(tdb.header) || 1163 strcmp(tdb.header.magic_food, TDB_MAGIC_FOOD) != 0 || 1164 tdb.header.version != TDB_VERSION) { 1165 /* its not a valid database - possibly initialise it */ 1166 if (!(open_flags & O_CREAT)) { 1167 goto fail; 1168 } 1169 if (tdb_new_database(&tdb, hash_size) == -1) goto fail; 1170 1171 lseek(tdb.fd, 0, SEEK_SET); 1172 if (tdb.fd != -1 && read(tdb.fd, &tdb.header, 1173 sizeof(tdb.header)) != 1174 sizeof(tdb.header)) 1175 goto fail; 1176 } 1177 1178 if (tdb.fd != -1) { 1179 fstat(tdb.fd, &st); 1180 1181 /* map the database and fill in the return structure */ 1182 tdb.name = name ? strdup(name) : NULL; 1183 tdb.map_size = st.st_size; 1184 } 1185 1186 tdb.locked = (int *)calloc(tdb.header.hash_size+1, 1187 sizeof(tdb.locked[0])); 1188 if (!tdb.locked) { 1189 goto fail; 1190 } 1191 1192 #if HAVE_MMAP 1193 if (tdb.fd != -1) { 1194 tdb.map_ptr = (void *)mmap(NULL, st.st_size, 1195 tdb.read_only? PROT_READ : PROT_READ|PROT_WRITE, 1196 MAP_SHARED | MAP_FILE, tdb.fd, 0); 1197 } 1198 #endif 1199 1200 ret = (TDB_CONTEXT *)malloc(sizeof(tdb)); 1201 if (!ret) goto fail; 1202 1203 *ret = tdb; 1204 1205 #if TDB_DEBUG 1206 printf("mapped database of hash_size %u map_size=%u\n", 1207 hash_size, tdb.map_size); 1208 #endif 1209 1210 tdb_brlock(&tdb, GLOBAL_LOCK, LOCK_CLEAR, F_WRLCK, F_SETLKW); 1211 return ret; 1212 1213 fail: 1214 if (tdb.name) free(tdb.name); 1215 if (tdb.fd != -1) close(tdb.fd); 1216 if (tdb.map_ptr) munmap(tdb.map_ptr, tdb.map_size); 1217 1218 return NULL; 1219 } 1220 1221 /* close a database */ 1222 int tdb_close(TDB_CONTEXT *tdb) 1223 { 1224 if (!tdb) return -1; 1225 1226 if (tdb->name) free(tdb->name); 1227 if (tdb->fd != -1) close(tdb->fd); 1228 if (tdb->locked) free(tdb->locked); 1229 1230 if (tdb->map_ptr) { 1231 if (tdb->fd != -1) { 1232 munmap(tdb->map_ptr, tdb->map_size); 1233 } else { 1234 free(tdb->map_ptr); 1235 } 1236 } 1237 1238 memset(tdb, 0, sizeof(*tdb)); 1239 free(tdb); 1240 1241 return 0; 1242 } 1243 1244 /* lock the database. If we already have it locked then don't do anything */ 1245 int tdb_writelock(TDB_CONTEXT *tdb) 1246 { 1247 if (tdb == NULL) { 1248 #ifdef TDB_DEBUG 1249 printf("tdb_writelock() called with null context\n"); 1250 #endif 1251 return -1; 1252 } 1253 1254 return tdb_lock(tdb, -1); 1255 } 1256 1257 /* unlock the database. */ 1258 int tdb_writeunlock(TDB_CONTEXT *tdb) 1259 { 1260 if (tdb == NULL) { 1261 #ifdef TDB_DEBUG 1262 printf("tdb_writeunlock() called with null context\n"); 1263 #endif 1264 return -1; 1265 } 1266 1267 return tdb_unlock(tdb, -1); 1268 } 1269 1270 int tdb_chainlock(TDB_CONTEXT *tdb, TDB_DATA key) 1271 { 1272 if (tdb == NULL) { 1273 #ifdef TDB_DEBUG 1274 printf("tdb_lockchain() called with null context\n"); 1275 #endif 1276 return -1; 1277 } 1278 1279 return tdb_lock(tdb, BUCKET(tdb_hash(&key))); 1280 } 1281 1282 1283 int tdb_chainunlock(TDB_CONTEXT *tdb, TDB_DATA key) 1284 { 1285 if (tdb == NULL) { 1286 #ifdef TDB_DEBUG 1287 printf("tdb_unlockchain() called with null context\n"); 1288 #endif 1289 return -1; 1290 } 1291 1292 return tdb_unlock(tdb, BUCKET(tdb_hash(&key))); 1293 } 1294