xref: /netbsd-src/external/bsd/ppp/usr.sbin/pppd/tdb.c (revision bdc22b2e01993381dcefeff2bc9b56ca75a4235c)
1 /*	NetBSD: tdb.c,v 1.7 2013/10/19 17:16:38 christos Exp 	*/
2 
3 /*
4  * Database functions
5  * Copyright (C) Andrew Tridgell 1999
6  *
7  * Redistribution and use in source and binary forms are permitted
8  * provided that the above copyright notice and this paragraph are
9  * duplicated in all such forms AND provided that this software or
10  * any derived work is only used as part of the PPP daemon (pppd)
11  * and related utilities.
12  * The name of the author may not be used to endorse or promote products
13  * derived from this software without specific prior written permission.
14  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
15  * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
16  * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
17  *
18  * Note: this software is also available under the Gnu Public License
19  * version 2 or later.
20  */
21 
22 #include <sys/cdefs.h>
23 #ifndef lint
24 __RCSID("NetBSD: tdb.c,v 1.7 2013/10/19 17:16:38 christos Exp ");
25 #endif
26 
27 #include <stdlib.h>
28 #include <stdio.h>
29 #include <fcntl.h>
30 #include <unistd.h>
31 #include <string.h>
32 #include <errno.h>
33 #include <sys/mman.h>
34 #include <sys/stat.h>
35 #include "tdb.h"
36 
37 #define TDB_VERSION (0x26011967 + 1)
38 #define TDB_MAGIC (0x26011999U)
39 #define TDB_FREE_MAGIC (~TDB_MAGIC)
40 #define TDB_ALIGN 4
41 #define MIN_REC_SIZE (2*sizeof(struct list_struct) + TDB_ALIGN)
42 #define DEFAULT_HASH_SIZE 128
43 #define TDB_PAGE_SIZE 0x2000
44 #define TDB_LEN_MULTIPLIER 10
45 #define FREELIST_TOP (sizeof(struct tdb_header))
46 
47 #define LOCK_SET 1
48 #define LOCK_CLEAR 0
49 
50 /* lock offsets */
51 #define GLOBAL_LOCK 0
52 #define ACTIVE_LOCK 4
53 #define LIST_LOCK_BASE 1024
54 
55 #define BUCKET(hash) ((hash) % tdb->header.hash_size)
56 
57 #ifndef MAP_FILE
58 #define MAP_FILE 0
59 #endif
60 
61 /* the body of the database is made of one list_struct for the free space
62    plus a separate data list for each hash value */
63 struct list_struct {
64 	tdb_len rec_len; /* total byte length of record */
65 	tdb_off next; /* offset of the next record in the list */
66 	tdb_len key_len; /* byte length of key */
67 	tdb_len data_len; /* byte length of data */
68 	unsigned full_hash; /* the full 32 bit hash of the key */
69 	unsigned magic;   /* try to catch errors */
70 	/*
71 	   the following union is implied
72 	   union {
73               char record[rec_len];
74 	      struct {
75 	        char key[key_len];
76 		char data[data_len];
77 	      }
78            }
79 	*/
80 };
81 
82 /* a null data record - useful for error returns */
83 static TDB_DATA null_data;
84 
85 static int tdb_update __P((TDB_CONTEXT *, TDB_DATA, TDB_DATA));
86 #ifdef notyet
87 static int tdb_lockchain __P((TDB_CONTEXT *, TDB_DATA));
88 static int tdb_unlockchain __P((TDB_CONTEXT *, TDB_DATA));
89 #endif
90 
91 /* a byte range locking function - return 0 on success
92    this functions locks/unlocks 1 byte at the specified offset */
93 static int tdb_brlock(TDB_CONTEXT *tdb, tdb_off offset,
94 		      int set, int rw_type, int lck_type)
95 {
96 #if NOLOCK
97 	return 0;
98 #else
99 	struct flock fl;
100 
101         if (tdb->fd == -1) return 0;   /* for in memory tdb */
102 
103 	if (tdb->read_only) return -1;
104 
105 	fl.l_type = set==LOCK_SET?rw_type:F_UNLCK;
106 	fl.l_whence = SEEK_SET;
107 	fl.l_start = offset;
108 	fl.l_len = 1;
109 	fl.l_pid = 0;
110 
111 	if (fcntl(tdb->fd, lck_type, &fl) != 0) {
112 #if TDB_DEBUG
113 		if (lck_type == F_SETLKW) {
114 			printf("lock %d failed at %d (%s)\n",
115 			       set, offset, strerror(errno));
116 		}
117 #endif
118 		tdb->ecode = TDB_ERR_LOCK;
119 		return -1;
120 	}
121 	return 0;
122 #endif
123 }
124 
125 /* lock a list in the database. list -1 is the alloc list */
126 static int tdb_lock(TDB_CONTEXT *tdb, int list)
127 {
128 	if (list < -1 || list >= (int)tdb->header.hash_size) {
129 #if TDB_DEBUG
130 		printf("bad list %d\n", list);
131 #endif
132 		return -1;
133 	}
134 	if (tdb->locked[list+1] == 0) {
135 		if (tdb_brlock(tdb, LIST_LOCK_BASE + 4*list, LOCK_SET,
136 			       F_WRLCK, F_SETLKW) != 0) {
137 			return -1;
138 		}
139 	}
140 	tdb->locked[list+1]++;
141 	return 0;
142 }
143 
144 /* unlock the database. */
145 static int tdb_unlock(TDB_CONTEXT *tdb, int list)
146 {
147 	if (list < -1 || list >= (int)tdb->header.hash_size) {
148 #if TDB_DEBUG
149 		printf("bad unlock list %d\n", list);
150 #endif
151 		return -1;
152 	}
153 
154 	if (tdb->locked[list+1] == 0) {
155 #if TDB_DEBUG
156 		printf("not locked %d\n", list);
157 #endif
158 		tdb->ecode = TDB_ERR_LOCK;
159 		return -1;
160 	}
161 	if (tdb->locked[list+1] == 1) {
162 		if (tdb_brlock(tdb, LIST_LOCK_BASE + 4*list, LOCK_CLEAR,
163 			       F_WRLCK, F_SETLKW) != 0) {
164 			return -1;
165 		}
166 	}
167 	tdb->locked[list+1]--;
168 	return 0;
169 }
170 
171 /* the hash algorithm - turn a key into an integer
172    This is based on the hash agorithm from gdbm */
173 static unsigned tdb_hash(TDB_DATA *key)
174 {
175 	unsigned value;	/* Used to compute the hash value.  */
176 	unsigned   i;	/* Used to cycle through random values. */
177 
178 	/* Set the initial value from the key size. */
179 	value = 0x238F13AF * key->dsize;
180 	for (i=0; i < key->dsize; i++) {
181 		value = (value + (key->dptr[i] << (i*5 % 24)));
182 	}
183 
184 	value = (1103515243 * value + 12345);
185 
186 	return value;
187 }
188 
189 /* find the top of the hash chain for an open database */
190 static tdb_off tdb_hash_top(TDB_CONTEXT *tdb, unsigned hash)
191 {
192 	tdb_off ret;
193 	hash = BUCKET(hash);
194 	ret = FREELIST_TOP + (hash+1)*sizeof(tdb_off);
195 	return ret;
196 }
197 
198 
199 /* check for an out of bounds access - if it is out of bounds then
200    see if the database has been expanded by someone else and expand
201    if necessary */
202 static int tdb_oob(TDB_CONTEXT *tdb, tdb_off offset)
203 {
204 	struct stat st;
205 	if ((offset <= tdb->map_size) || (tdb->fd == -1)) return 0;
206 
207 	fstat(tdb->fd, &st);
208 	if (st.st_size <= (ssize_t)offset) {
209 		tdb->ecode = TDB_ERR_IO;
210 		return -1;
211 	}
212 
213 #if HAVE_MMAP
214 	if (tdb->map_ptr) {
215 		munmap(tdb->map_ptr, tdb->map_size);
216 		tdb->map_ptr = NULL;
217 	}
218 #endif
219 
220 	tdb->map_size = st.st_size;
221 #if HAVE_MMAP
222 	tdb->map_ptr = (void *)mmap(NULL, tdb->map_size,
223 				    tdb->read_only?PROT_READ:PROT_READ|PROT_WRITE,
224 				    MAP_SHARED | MAP_FILE, tdb->fd, 0);
225 #endif
226 	return 0;
227 }
228 
229 
230 /* write a lump of data at a specified offset */
231 static int tdb_write(TDB_CONTEXT *tdb, tdb_off offset, const char *buf, tdb_len len)
232 {
233 	if (tdb_oob(tdb, offset + len) != 0) {
234 		/* oops - trying to write beyond the end of the database! */
235 		return -1;
236 	}
237 
238 	if (tdb->map_ptr) {
239 		memcpy(offset + (char *)tdb->map_ptr, buf, len);
240 	} else {
241 		if (lseek(tdb->fd, offset, SEEK_SET) != offset ||
242 		    write(tdb->fd, buf, len) != (ssize_t)len) {
243 			tdb->ecode = TDB_ERR_IO;
244 			return -1;
245 		}
246 	}
247 	return 0;
248 }
249 
250 /* read a lump of data at a specified offset */
251 static int tdb_read(TDB_CONTEXT *tdb, tdb_off offset, char *buf, tdb_len len)
252 {
253 	if (tdb_oob(tdb, offset + len) != 0) {
254 		/* oops - trying to read beyond the end of the database! */
255 		return -1;
256 	}
257 
258 	if (tdb->map_ptr) {
259 		memcpy(buf, offset + (char *)tdb->map_ptr, len);
260 	} else {
261 		if (lseek(tdb->fd, offset, SEEK_SET) != offset ||
262 		    read(tdb->fd, buf, len) != (ssize_t)len) {
263 			tdb->ecode = TDB_ERR_IO;
264 			return -1;
265 		}
266 	}
267 	return 0;
268 }
269 
270 
271 /* read a lump of data, allocating the space for it */
272 static char *tdb_alloc_read(TDB_CONTEXT *tdb, tdb_off offset, tdb_len len)
273 {
274 	char *buf;
275 
276 	buf = (char *)malloc(len);
277 
278 	if (!buf) {
279 		tdb->ecode = TDB_ERR_OOM;
280 		return NULL;
281 	}
282 
283 	if (tdb_read(tdb, offset, buf, len) == -1) {
284 		free(buf);
285 		return NULL;
286 	}
287 
288 	return buf;
289 }
290 
291 /* convenience routine for writing a record */
292 static int rec_write(TDB_CONTEXT *tdb, tdb_off offset, struct list_struct *rec)
293 {
294 	return tdb_write(tdb, offset, (char *)rec, sizeof(*rec));
295 }
296 
297 /* convenience routine for writing a tdb_off */
298 static int ofs_write(TDB_CONTEXT *tdb, tdb_off offset, tdb_off *d)
299 {
300 	return tdb_write(tdb, offset, (char *)d, sizeof(*d));
301 }
302 
303 /* read a tdb_off from the store */
304 static int ofs_read(TDB_CONTEXT *tdb, tdb_off offset, tdb_off *d)
305 {
306 	return tdb_read(tdb, offset, (char *)d, sizeof(*d));
307 }
308 
309 /* read a record and check for simple errors */
310 static int rec_read(TDB_CONTEXT *tdb, tdb_off offset, struct list_struct *rec)
311 {
312 	if (tdb_read(tdb, offset, (char *)rec, sizeof(*rec)) == -1) return -1;
313 	if (rec->magic != TDB_MAGIC) {
314 #if TDB_DEBUG
315 		printf("bad magic 0x%08x at offset %d\n",
316 		       rec->magic, offset);
317 #endif
318 		tdb->ecode = TDB_ERR_CORRUPT;
319 		return -1;
320 	}
321 	if (tdb_oob(tdb, rec->next) != 0) {
322 		return -1;
323 	}
324 	return 0;
325 }
326 
327 /* expand the database at least length bytes by expanding the
328    underlying file and doing the mmap again if necessary */
329 static int tdb_expand(TDB_CONTEXT *tdb, tdb_off length)
330 {
331 	struct list_struct rec;
332 	tdb_off offset, ptr;
333 	char b = 0;
334 
335 	tdb_lock(tdb,-1);
336 
337 	/* make sure we know about any previous expansions by another
338            process */
339 	tdb_oob(tdb,tdb->map_size + 1);
340 
341 	/* always make room for at least 10 more records */
342 	length *= TDB_LEN_MULTIPLIER;
343 
344 	/* and round the database up to a multiple of TDB_PAGE_SIZE */
345 	length = ((tdb->map_size + length + TDB_PAGE_SIZE) & ~(TDB_PAGE_SIZE - 1)) - tdb->map_size;
346 
347 	/* expand the file itself */
348         if (tdb->fd != -1) {
349             lseek(tdb->fd, tdb->map_size + length - 1, SEEK_SET);
350             if (write(tdb->fd, &b, 1) != 1) goto fail;
351         }
352 
353 	/* form a new freelist record */
354 	offset = FREELIST_TOP;
355 	rec.rec_len = length - sizeof(rec);
356 	rec.magic = TDB_FREE_MAGIC;
357 	if (ofs_read(tdb, offset, &rec.next) == -1) {
358 		goto fail;
359 	}
360 
361 #if HAVE_MMAP
362 	if (tdb->fd != -1 && tdb->map_ptr) {
363 		munmap(tdb->map_ptr, tdb->map_size);
364 		tdb->map_ptr = NULL;
365 	}
366 #endif
367 
368 	tdb->map_size += length;
369 
370         if (tdb->fd == -1) {
371 		void *n;
372 		n = realloc(tdb->map_ptr, tdb->map_size);
373 		if (!n)
374 			goto fail;
375 		tdb->map_ptr = n;
376         }
377 
378 	/* write it out */
379 	if (rec_write(tdb, tdb->map_size - length, &rec) == -1) {
380 		goto fail;
381 	}
382 
383 	/* link it into the free list */
384 	ptr = tdb->map_size - length;
385 	if (ofs_write(tdb, offset, &ptr) == -1) goto fail;
386 
387 #if HAVE_MMAP
388         if (tdb->fd != -1) {
389             tdb->map_ptr = (void *)mmap(NULL, tdb->map_size,
390                                         PROT_READ|PROT_WRITE,
391                                         MAP_SHARED | MAP_FILE, tdb->fd, 0);
392         }
393 #endif
394 
395 	tdb_unlock(tdb, -1);
396 	return 0;
397 
398  fail:
399 	tdb_unlock(tdb,-1);
400 	return -1;
401 }
402 
403 /* allocate some space from the free list. The offset returned points
404    to a unconnected list_struct within the database with room for at
405    least length bytes of total data
406 
407    0 is returned if the space could not be allocated
408  */
409 static tdb_off tdb_allocate(TDB_CONTEXT *tdb, tdb_len length)
410 {
411 	tdb_off offset, rec_ptr, last_ptr;
412 	struct list_struct rec, lastrec, newrec;
413 
414 	tdb_lock(tdb, -1);
415 
416  again:
417 	last_ptr = 0;
418 	offset = FREELIST_TOP;
419 
420 	/* read in the freelist top */
421 	if (ofs_read(tdb, offset, &rec_ptr) == -1) {
422 		goto fail;
423 	}
424 
425 	/* keep looking until we find a freelist record that is big
426            enough */
427 	while (rec_ptr) {
428 		if (tdb_read(tdb, rec_ptr, (char *)&rec, sizeof(rec)) == -1) {
429 			goto fail;
430 		}
431 
432 		if (rec.magic != TDB_FREE_MAGIC) {
433 #if TDB_DEBUG
434 			printf("bad magic 0x%08x in free list\n", rec.magic);
435 #endif
436 			goto fail;
437 		}
438 
439 		if (rec.rec_len >= length) {
440 			/* found it - now possibly split it up  */
441 			if (rec.rec_len > length + MIN_REC_SIZE) {
442 				length = (length + TDB_ALIGN) & ~(TDB_ALIGN-1);
443 
444 				newrec.rec_len = rec.rec_len - (sizeof(rec) + length);
445 				newrec.next = rec.next;
446 				newrec.magic = TDB_FREE_MAGIC;
447 
448 				rec.rec_len = length;
449 				rec.next = rec_ptr + sizeof(rec) + rec.rec_len;
450 
451 				if (rec_write(tdb, rec.next, &newrec) == -1) {
452 					goto fail;
453 				}
454 
455 				if (rec_write(tdb, rec_ptr, &rec) == -1) {
456 					goto fail;
457 				}
458 			}
459 
460 			/* remove it from the list */
461 			if (last_ptr == 0) {
462 				offset = FREELIST_TOP;
463 
464 				if (ofs_write(tdb, offset, &rec.next) == -1) {
465 					goto fail;
466 				}
467 			} else {
468 				lastrec.next = rec.next;
469 				if (rec_write(tdb, last_ptr, &lastrec) == -1) {
470 					goto fail;
471 				}
472 			}
473 
474 			/* all done - return the new record offset */
475 			tdb_unlock(tdb, -1);
476 			return rec_ptr;
477 		}
478 
479 		/* move to the next record */
480 		lastrec = rec;
481 		last_ptr = rec_ptr;
482 		rec_ptr = rec.next;
483 	}
484 
485 	/* we didn't find enough space. See if we can expand the
486 	   database and if we can then try again */
487 	if (tdb_expand(tdb, length + sizeof(rec)) == 0) goto again;
488 
489  fail:
490 #if TDB_DEBUG
491 	printf("tdb_allocate failed for size %u\n", length);
492 #endif
493 	tdb_unlock(tdb, -1);
494 	return 0;
495 }
496 
497 /* initialise a new database with a specified hash size */
498 static int tdb_new_database(TDB_CONTEXT *tdb, int hash_size)
499 {
500 	struct tdb_header header;
501 	int i, size = 0;
502 	tdb_off buf[16];
503 
504         /* create the header */
505         memset(&header, 0, sizeof(header));
506         memcpy(header.magic_food, TDB_MAGIC_FOOD, strlen(TDB_MAGIC_FOOD)+1);
507         header.version = TDB_VERSION;
508         header.hash_size = hash_size;
509         lseek(tdb->fd, 0, SEEK_SET);
510         ftruncate(tdb->fd, 0);
511 
512         if (tdb->fd != -1 && write(tdb->fd, &header, sizeof(header)) !=
513             sizeof(header)) {
514             tdb->ecode = TDB_ERR_IO;
515             return -1;
516         } else size += sizeof(header);
517 
518         /* the freelist and hash pointers */
519         memset(buf, 0, sizeof(buf));
520 
521         for (i=0;(hash_size+1)-i >= 16; i += 16) {
522             if (tdb->fd != -1 && write(tdb->fd, buf, sizeof(buf)) !=
523                 sizeof(buf)) {
524                 tdb->ecode = TDB_ERR_IO;
525                 return -1;
526             } else size += sizeof(buf);
527         }
528 
529         for (;i<hash_size+1; i++) {
530             if (tdb->fd != -1 && write(tdb->fd, buf, sizeof(tdb_off)) !=
531                 sizeof(tdb_off)) {
532                 tdb->ecode = TDB_ERR_IO;
533                 return -1;
534             } else size += sizeof(tdb_off);
535         }
536 
537         if (tdb->fd == -1) {
538             tdb->map_ptr = calloc(size, 1);
539             tdb->map_size = size;
540             if (tdb->map_ptr == NULL) {
541                 tdb->ecode = TDB_ERR_IO;
542                 return -1;
543             }
544             memcpy(&tdb->header, &header, sizeof(header));
545         }
546 
547 #if TDB_DEBUG
548 	printf("initialised database of hash_size %u\n",
549 	       hash_size);
550 #endif
551 	return 0;
552 }
553 
554 /* Returns 0 on fail.  On success, return offset of record, and fills
555    in rec */
556 static tdb_off tdb_find(TDB_CONTEXT *tdb, TDB_DATA key, unsigned int hash,
557 			struct list_struct *rec)
558 {
559 	tdb_off offset, rec_ptr;
560 
561 	/* find the top of the hash chain */
562 	offset = tdb_hash_top(tdb, hash);
563 
564 	/* read in the hash top */
565 	if (ofs_read(tdb, offset, &rec_ptr) == -1)
566 		return 0;
567 
568 	/* keep looking until we find the right record */
569 	while (rec_ptr) {
570 		if (rec_read(tdb, rec_ptr, rec) == -1)
571 			return 0;
572 
573 		if (hash == rec->full_hash && key.dsize == rec->key_len) {
574 			char *k;
575 			/* a very likely hit - read the key */
576 			k = tdb_alloc_read(tdb, rec_ptr + sizeof(*rec),
577 					   rec->key_len);
578 
579 			if (!k)
580 				return 0;
581 
582 			if (memcmp(key.dptr, k, key.dsize) == 0) {
583 				free(k);
584 				return rec_ptr;
585 			}
586 			free(k);
587 		}
588 
589 		/* move to the next record */
590 		rec_ptr = rec->next;
591 	}
592 	return 0;
593 }
594 
595 /*
596    return an error string for the last tdb error
597 */
598 char *tdb_errorstr(TDB_CONTEXT *tdb)
599 {
600 	int i;
601 	static struct {
602 		enum TDB_ERROR ecode;
603 		char *estring;
604 	} emap[] = {
605 		{TDB_SUCCESS, "Success"},
606 		{TDB_ERR_CORRUPT, "Corrupt database"},
607 		{TDB_ERR_IO, "IO Error"},
608 		{TDB_ERR_LOCK, "Locking error"},
609 		{TDB_ERR_OOM, "Out of memory"},
610 		{TDB_ERR_EXISTS, "Record exists"},
611 		{-1, NULL}};
612         if (tdb != NULL) {
613             for (i=0;emap[i].estring;i++) {
614 		if (tdb->ecode == emap[i].ecode) return emap[i].estring;
615             }
616         } else {
617             return "Invalid tdb context";
618         }
619 	return "Invalid error code";
620 }
621 
622 
623 /* update an entry in place - this only works if the new data size
624    is <= the old data size and the key exists.
625    on failure return -1
626 */
627 static int tdb_update(TDB_CONTEXT *tdb, TDB_DATA key, TDB_DATA dbuf)
628 {
629 	unsigned hash;
630 	struct list_struct rec;
631 	tdb_off rec_ptr;
632 	int ret = -1;
633 
634         if (tdb == NULL) {
635 #ifdef TDB_DEBUG
636             printf("tdb_update() called with null context\n");
637 #endif
638             return -1;
639         }
640 
641 	/* find which hash bucket it is in */
642 	hash = tdb_hash(&key);
643 
644 	tdb_lock(tdb, BUCKET(hash));
645 	rec_ptr = tdb_find(tdb, key, hash, &rec);
646 
647 	if (!rec_ptr)
648 		goto out;
649 
650 	/* must be long enough */
651 	if (rec.rec_len < key.dsize + dbuf.dsize)
652 		goto out;
653 
654 	if (tdb_write(tdb, rec_ptr + sizeof(rec) + rec.key_len,
655 		      dbuf.dptr, dbuf.dsize) == -1)
656 		goto out;
657 
658 	if (dbuf.dsize != rec.data_len) {
659 		/* update size */
660 		rec.data_len = dbuf.dsize;
661 		ret = rec_write(tdb, rec_ptr, &rec);
662 	} else
663 		ret = 0;
664 
665  out:
666 	tdb_unlock(tdb, BUCKET(hash));
667 	return ret;
668 }
669 
670 /* find an entry in the database given a key */
671 TDB_DATA tdb_fetch(TDB_CONTEXT *tdb, TDB_DATA key)
672 {
673 	unsigned hash;
674 	tdb_off rec_ptr;
675 	struct list_struct rec;
676 	TDB_DATA ret = null_data;
677 
678         if (tdb == NULL) {
679 #ifdef TDB_DEBUG
680             printf("tdb_fetch() called with null context\n");
681 #endif
682             return null_data;
683         }
684 
685 	/* find which hash bucket it is in */
686 	hash = tdb_hash(&key);
687 
688 	tdb_lock(tdb, BUCKET(hash));
689 	rec_ptr = tdb_find(tdb, key, hash, &rec);
690 
691 	if (rec_ptr) {
692 		ret.dptr = tdb_alloc_read(tdb,
693 					  rec_ptr + sizeof(rec) + rec.key_len,
694 					  rec.data_len);
695 		ret.dsize = rec.data_len;
696 	}
697 
698 	tdb_unlock(tdb, BUCKET(hash));
699 	return ret;
700 }
701 
702 /* check if an entry in the database exists
703 
704    note that 1 is returned if the key is found and 0 is returned if not found
705    this doesn't match the conventions in the rest of this module, but is
706    compatible with gdbm
707 */
708 int tdb_exists(TDB_CONTEXT *tdb, TDB_DATA key)
709 {
710 	unsigned hash;
711 	tdb_off rec_ptr;
712 	struct list_struct rec;
713 
714         if (tdb == NULL) {
715 #ifdef TDB_DEBUG
716             printf("tdb_exists() called with null context\n");
717 #endif
718             return 0;
719         }
720 
721 	/* find which hash bucket it is in */
722 	hash = tdb_hash(&key);
723 
724 	tdb_lock(tdb, BUCKET(hash));
725 	rec_ptr = tdb_find(tdb, key, hash, &rec);
726 	tdb_unlock(tdb, BUCKET(hash));
727 
728 	return rec_ptr != 0;
729 }
730 
731 /* traverse the entire database - calling fn(tdb, key, data) on each element.
732    return -1 on error or the record count traversed
733    if fn is NULL then it is not called
734    a non-zero return value from fn() indicates that the traversal should stop
735   */
736 int tdb_traverse(TDB_CONTEXT *tdb, int (*fn)(TDB_CONTEXT *tdb, TDB_DATA key, TDB_DATA dbuf, void* state), void* state)
737 {
738 	int count = 0;
739 	unsigned h;
740 	tdb_off offset, rec_ptr;
741 	struct list_struct rec;
742 	char *data;
743 	TDB_DATA key, dbuf;
744 
745         if (tdb == NULL) {
746 #ifdef TDB_DEBUG
747             printf("tdb_traverse() called with null context\n");
748 #endif
749             return -1;
750         }
751 
752 	/* loop over all hash chains */
753 	for (h = 0; h < tdb->header.hash_size; h++) {
754 		tdb_lock(tdb, BUCKET(h));
755 
756 		/* read in the hash top */
757 		offset = tdb_hash_top(tdb, h);
758 		if (ofs_read(tdb, offset, &rec_ptr) == -1) {
759 			goto fail;
760 		}
761 
762 		/* traverse all records for this hash */
763 		while (rec_ptr) {
764 			if (rec_read(tdb, rec_ptr, &rec) == -1) {
765 				goto fail;
766 			}
767 
768 			/* now read the full record */
769 			data = tdb_alloc_read(tdb, rec_ptr + sizeof(rec),
770 					     rec.key_len + rec.data_len);
771 			if (!data) {
772 				goto fail;
773 			}
774 
775 			key.dptr = data;
776 			key.dsize = rec.key_len;
777 			dbuf.dptr = data + rec.key_len;
778 			dbuf.dsize = rec.data_len;
779 			count++;
780 
781 			if (fn && fn(tdb, key, dbuf, state) != 0) {
782 				/* they want us to stop traversing */
783 				free(data);
784 				tdb_unlock(tdb, BUCKET(h));
785 				return count;
786 			}
787 
788 			/* a miss - drat */
789 			free(data);
790 
791 			/* move to the next record */
792 			rec_ptr = rec.next;
793 		}
794 		tdb_unlock(tdb, BUCKET(h));
795 	}
796 
797 	/* return the number traversed */
798 	return count;
799 
800  fail:
801 	tdb_unlock(tdb, BUCKET(h));
802 	return -1;
803 }
804 
805 
806 /* find the first entry in the database and return its key */
807 TDB_DATA tdb_firstkey(TDB_CONTEXT *tdb)
808 {
809 	tdb_off offset, rec_ptr;
810 	struct list_struct rec;
811 	unsigned hash;
812 	TDB_DATA ret;
813 
814         if (tdb == NULL) {
815 #ifdef TDB_DEBUG
816             printf("tdb_firstkey() called with null context\n");
817 #endif
818             return null_data;
819         }
820 
821 	/* look for a non-empty hash chain */
822 	for (hash = 0, rec_ptr = 0;
823 	     hash < tdb->header.hash_size;
824 	     hash++) {
825 		/* find the top of the hash chain */
826 		offset = tdb_hash_top(tdb, hash);
827 
828 		tdb_lock(tdb, BUCKET(hash));
829 
830 		/* read in the hash top */
831 		if (ofs_read(tdb, offset, &rec_ptr) == -1) {
832 			goto fail;
833 		}
834 
835 		if (rec_ptr) break;
836 
837 		tdb_unlock(tdb, BUCKET(hash));
838 	}
839 
840 	if (rec_ptr == 0) return null_data;
841 
842 	/* we've found a non-empty chain, now read the record */
843 	if (rec_read(tdb, rec_ptr, &rec) == -1) {
844 		goto fail;
845 	}
846 
847 	/* allocate and read the key space */
848 	ret.dptr = tdb_alloc_read(tdb, rec_ptr + sizeof(rec), rec.key_len);
849 	ret.dsize = rec.key_len;
850 	tdb_unlock(tdb, BUCKET(hash));
851 	return ret;
852 
853  fail:
854 	tdb_unlock(tdb, BUCKET(hash));
855 	return null_data;
856 }
857 
858 /* find the next entry in the database, returning its key */
859 TDB_DATA tdb_nextkey(TDB_CONTEXT *tdb, TDB_DATA key)
860 {
861 	unsigned hash, hbucket;
862 	tdb_off rec_ptr, offset;
863 	struct list_struct rec;
864 	TDB_DATA ret;
865 
866         if (tdb == NULL) {
867 #ifdef TDB_DEBUG
868             printf("tdb_nextkey() called with null context\n");
869 #endif
870             return null_data;
871         }
872 
873 	/* find which hash bucket it is in */
874 	hash = tdb_hash(&key);
875 	hbucket = BUCKET(hash);
876 
877 	tdb_lock(tdb, hbucket);
878 	rec_ptr = tdb_find(tdb, key, hash, &rec);
879 	if (rec_ptr) {
880 		/* we want the next record after this one */
881 		rec_ptr = rec.next;
882 	}
883 
884 	/* not found or last in hash: look for next non-empty hash chain */
885 	while (rec_ptr == 0) {
886 		tdb_unlock(tdb, hbucket);
887 
888 		if (++hbucket >= tdb->header.hash_size - 1)
889 			return null_data;
890 
891 		offset = tdb_hash_top(tdb, hbucket);
892 		tdb_lock(tdb, hbucket);
893 		/* read in the hash top */
894 		if (ofs_read(tdb, offset, &rec_ptr) == -1) {
895 			tdb_unlock(tdb, hbucket);
896 			return null_data;
897 		}
898 	}
899 
900 	/* Read the record. */
901 	if (rec_read(tdb, rec_ptr, &rec) == -1) {
902 		tdb_unlock(tdb, hbucket);
903 		return null_data;
904 	}
905 	/* allocate and read the key */
906 	ret.dptr = tdb_alloc_read(tdb, rec_ptr + sizeof(rec), rec.key_len);
907 	ret.dsize = rec.key_len;
908 	tdb_unlock(tdb, hbucket);
909 
910 	return ret;
911 }
912 
913 /* delete an entry in the database given a key */
914 int tdb_delete(TDB_CONTEXT *tdb, TDB_DATA key)
915 {
916 	unsigned hash;
917 	tdb_off offset, rec_ptr, last_ptr;
918 	struct list_struct rec, lastrec;
919 	char *data = NULL;
920 
921         if (tdb == NULL) {
922 #ifdef TDB_DEBUG
923             printf("tdb_delete() called with null context\n");
924 #endif
925             return -1;
926         }
927 
928 	/* find which hash bucket it is in */
929 	hash = tdb_hash(&key);
930 
931 	tdb_lock(tdb, BUCKET(hash));
932 
933 	/* find the top of the hash chain */
934 	offset = tdb_hash_top(tdb, hash);
935 
936 	/* read in the hash top */
937 	if (ofs_read(tdb, offset, &rec_ptr) == -1) {
938 		goto fail;
939 	}
940 
941 	last_ptr = 0;
942 
943 	/* keep looking until we find the right record */
944 	while (rec_ptr) {
945 		if (rec_read(tdb, rec_ptr, &rec) == -1) {
946 			goto fail;
947 		}
948 
949 		if (hash == rec.full_hash && key.dsize == rec.key_len) {
950 			/* a very likely hit - read the record and full key */
951 			data = tdb_alloc_read(tdb, rec_ptr + sizeof(rec),
952 					     rec.key_len);
953 			if (!data) {
954 				goto fail;
955 			}
956 
957 			if (memcmp(key.dptr, data, key.dsize) == 0) {
958 				/* a definite match - delete it */
959 				if (last_ptr == 0) {
960 					offset = tdb_hash_top(tdb, hash);
961 					if (ofs_write(tdb, offset, &rec.next) == -1) {
962 						goto fail;
963 					}
964 				} else {
965 					lastrec.next = rec.next;
966 					if (rec_write(tdb, last_ptr, &lastrec) == -1) {
967 						goto fail;
968 					}
969 				}
970 				tdb_unlock(tdb, BUCKET(hash));
971 				tdb_lock(tdb, -1);
972 				/* and recover the space */
973 				offset = FREELIST_TOP;
974 				if (ofs_read(tdb, offset, &rec.next) == -1) {
975 					goto fail2;
976 				}
977 				rec.magic = TDB_FREE_MAGIC;
978 				if (rec_write(tdb, rec_ptr, &rec) == -1) {
979 					goto fail2;
980 				}
981 				if (ofs_write(tdb, offset, &rec_ptr) == -1) {
982 					goto fail2;
983 				}
984 
985 				/* yipee - all done */
986 				free(data);
987 				tdb_unlock(tdb, -1);
988 				return 0;
989 			}
990 
991 			/* a miss - drat */
992 			free(data);
993 			data = NULL;
994 		}
995 
996 		/* move to the next record */
997 		last_ptr = rec_ptr;
998 		lastrec = rec;
999 		rec_ptr = rec.next;
1000 	}
1001 
1002  fail:
1003 	if (data) free(data);
1004 	tdb_unlock(tdb, BUCKET(hash));
1005 	return -1;
1006 
1007  fail2:
1008 	if (data) free(data);
1009 	tdb_unlock(tdb, -1);
1010 	return -1;
1011 }
1012 
1013 
1014 /* store an element in the database, replacing any existing element
1015    with the same key
1016 
1017    return 0 on success, -1 on failure
1018 */
1019 int tdb_store(TDB_CONTEXT *tdb, TDB_DATA key, TDB_DATA dbuf, int flag)
1020 {
1021 	struct list_struct rec;
1022 	unsigned hash;
1023 	tdb_off rec_ptr, offset;
1024 	char *p = NULL;
1025 
1026         if (tdb == NULL) {
1027 #ifdef TDB_DEBUG
1028             printf("tdb_store() called with null context\n");
1029 #endif
1030             return -1;
1031         }
1032 
1033 	/* find which hash bucket it is in */
1034 	hash = tdb_hash(&key);
1035 
1036 	/* check for it existing */
1037 	if (flag == TDB_INSERT && tdb_exists(tdb, key)) {
1038 		tdb->ecode = TDB_ERR_EXISTS;
1039 		return -1;
1040 	}
1041 
1042 	/* first try in-place update */
1043 	if (flag != TDB_INSERT && tdb_update(tdb, key, dbuf) == 0) {
1044 		return 0;
1045 	}
1046 
1047 	rec_ptr = tdb_allocate(tdb, key.dsize + dbuf.dsize);
1048 	if (rec_ptr == 0) {
1049 		return -1;
1050 	}
1051 
1052 	tdb_lock(tdb, BUCKET(hash));
1053 
1054 	/* delete any existing record - if it doesn't exist we don't care */
1055 	if (flag != TDB_INSERT) {
1056 		tdb_delete(tdb, key);
1057 	}
1058 
1059 	/* read the newly created record */
1060 	if (tdb_read(tdb, rec_ptr, (char *)&rec, sizeof(rec)) == -1) {
1061 		goto fail;
1062 	}
1063 
1064 	if (rec.magic != TDB_FREE_MAGIC) goto fail;
1065 
1066 	/* find the top of the hash chain */
1067 	offset = tdb_hash_top(tdb, hash);
1068 
1069 	/* read in the hash top diretcly into our next pointer */
1070 	if (ofs_read(tdb, offset, &rec.next) == -1) {
1071 		goto fail;
1072 	}
1073 
1074 	rec.key_len = key.dsize;
1075 	rec.data_len = dbuf.dsize;
1076 	rec.full_hash = hash;
1077 	rec.magic = TDB_MAGIC;
1078 
1079 	p = (char *)malloc(sizeof(rec) + key.dsize + dbuf.dsize);
1080 	if (!p) {
1081 		tdb->ecode = TDB_ERR_OOM;
1082 		goto fail;
1083 	}
1084 
1085 	memcpy(p, &rec, sizeof(rec));
1086 	memcpy(p+sizeof(rec), key.dptr, key.dsize);
1087 	memcpy(p+sizeof(rec)+key.dsize, dbuf.dptr, dbuf.dsize);
1088 
1089 	if (tdb_write(tdb, rec_ptr, p, sizeof(rec)+key.dsize+dbuf.dsize) == -1)
1090 		goto fail;
1091 
1092 	free(p);
1093 	p = NULL;
1094 
1095 	/* and point the top of the hash chain at it */
1096 	if (ofs_write(tdb, offset, &rec_ptr) == -1) goto fail;
1097 
1098 	tdb_unlock(tdb, BUCKET(hash));
1099 	return 0;
1100 
1101  fail:
1102 #if TDB_DEBUG
1103 	printf("store failed for hash 0x%08x in bucket %u\n", hash, BUCKET(hash));
1104 #endif
1105 	if (p) free(p);
1106 	tdb_unlock(tdb, BUCKET(hash));
1107 	return -1;
1108 }
1109 
1110 
1111 /* open the database, creating it if necessary
1112 
1113    The open_flags and mode are passed straight to the open call on the database
1114    file. A flags value of O_WRONLY is invalid
1115 
1116    The hash size is advisory, use zero for a default value.
1117 
1118    return is NULL on error
1119 */
1120 TDB_CONTEXT *tdb_open(char *name, int hash_size, int tdb_flags,
1121 		      int open_flags, mode_t mode)
1122 {
1123 	TDB_CONTEXT tdb, *ret;
1124 	struct stat st;
1125 
1126 	memset(&tdb, 0, sizeof(tdb));
1127 
1128 	tdb.fd = -1;
1129 	tdb.name = NULL;
1130 	tdb.map_ptr = NULL;
1131 
1132 	if ((open_flags & O_ACCMODE) == O_WRONLY) {
1133 		goto fail;
1134 	}
1135 
1136 	if (hash_size == 0) hash_size = DEFAULT_HASH_SIZE;
1137 
1138 	tdb.read_only = ((open_flags & O_ACCMODE) == O_RDONLY);
1139 
1140         if (name != NULL) {
1141             tdb.fd = open(name, open_flags, mode);
1142             if (tdb.fd == -1) {
1143 		goto fail;
1144             }
1145         }
1146 
1147 	/* ensure there is only one process initialising at once */
1148 	tdb_brlock(&tdb, GLOBAL_LOCK, LOCK_SET, F_WRLCK, F_SETLKW);
1149 
1150 	if (tdb_flags & TDB_CLEAR_IF_FIRST) {
1151 		/* we need to zero the database if we are the only
1152 		   one with it open */
1153 		if (tdb_brlock(&tdb, ACTIVE_LOCK, LOCK_SET, F_WRLCK, F_SETLK) == 0) {
1154 			ftruncate(tdb.fd, 0);
1155 			tdb_brlock(&tdb, ACTIVE_LOCK, LOCK_CLEAR, F_WRLCK, F_SETLK);
1156 		}
1157 	}
1158 
1159 	/* leave this lock in place */
1160 	tdb_brlock(&tdb, ACTIVE_LOCK, LOCK_SET, F_RDLCK, F_SETLKW);
1161 
1162 	if (read(tdb.fd, &tdb.header, sizeof(tdb.header)) != sizeof(tdb.header) ||
1163 	    strcmp(tdb.header.magic_food, TDB_MAGIC_FOOD) != 0 ||
1164 	    tdb.header.version != TDB_VERSION) {
1165 		/* its not a valid database - possibly initialise it */
1166 		if (!(open_flags & O_CREAT)) {
1167 			goto fail;
1168 		}
1169 		if (tdb_new_database(&tdb, hash_size) == -1) goto fail;
1170 
1171 		lseek(tdb.fd, 0, SEEK_SET);
1172 		if (tdb.fd != -1 && read(tdb.fd, &tdb.header,
1173                                          sizeof(tdb.header)) !=
1174                                          sizeof(tdb.header))
1175                     goto fail;
1176 	}
1177 
1178         if (tdb.fd != -1) {
1179             fstat(tdb.fd, &st);
1180 
1181             /* map the database and fill in the return structure */
1182             tdb.name = name ? strdup(name) : NULL;
1183             tdb.map_size = st.st_size;
1184         }
1185 
1186         tdb.locked = (int *)calloc(tdb.header.hash_size+1,
1187                                    sizeof(tdb.locked[0]));
1188         if (!tdb.locked) {
1189             goto fail;
1190         }
1191 
1192 #if HAVE_MMAP
1193         if (tdb.fd != -1) {
1194             tdb.map_ptr = (void *)mmap(NULL, st.st_size,
1195                                        tdb.read_only? PROT_READ : PROT_READ|PROT_WRITE,
1196                                        MAP_SHARED | MAP_FILE, tdb.fd, 0);
1197         }
1198 #endif
1199 
1200 	ret = (TDB_CONTEXT *)malloc(sizeof(tdb));
1201 	if (!ret) goto fail;
1202 
1203 	*ret = tdb;
1204 
1205 #if TDB_DEBUG
1206 	printf("mapped database of hash_size %u map_size=%u\n",
1207 	       hash_size, tdb.map_size);
1208 #endif
1209 
1210 	tdb_brlock(&tdb, GLOBAL_LOCK, LOCK_CLEAR, F_WRLCK, F_SETLKW);
1211 	return ret;
1212 
1213  fail:
1214         if (tdb.name) free(tdb.name);
1215 	if (tdb.fd != -1) close(tdb.fd);
1216 	if (tdb.map_ptr) munmap(tdb.map_ptr, tdb.map_size);
1217 
1218 	return NULL;
1219 }
1220 
1221 /* close a database */
1222 int tdb_close(TDB_CONTEXT *tdb)
1223 {
1224 	if (!tdb) return -1;
1225 
1226 	if (tdb->name) free(tdb->name);
1227 	if (tdb->fd != -1) close(tdb->fd);
1228 	if (tdb->locked) free(tdb->locked);
1229 
1230 	if (tdb->map_ptr) {
1231             if (tdb->fd != -1) {
1232                 munmap(tdb->map_ptr, tdb->map_size);
1233             } else {
1234                 free(tdb->map_ptr);
1235             }
1236         }
1237 
1238 	memset(tdb, 0, sizeof(*tdb));
1239 	free(tdb);
1240 
1241 	return 0;
1242 }
1243 
1244 /* lock the database. If we already have it locked then don't do anything */
1245 int tdb_writelock(TDB_CONTEXT *tdb)
1246 {
1247         if (tdb == NULL) {
1248 #ifdef TDB_DEBUG
1249             printf("tdb_writelock() called with null context\n");
1250 #endif
1251             return -1;
1252         }
1253 
1254 	return tdb_lock(tdb, -1);
1255 }
1256 
1257 /* unlock the database. */
1258 int tdb_writeunlock(TDB_CONTEXT *tdb)
1259 {
1260         if (tdb == NULL) {
1261 #ifdef TDB_DEBUG
1262             printf("tdb_writeunlock() called with null context\n");
1263 #endif
1264             return -1;
1265         }
1266 
1267 	return tdb_unlock(tdb, -1);
1268 }
1269 
1270 int tdb_chainlock(TDB_CONTEXT *tdb, TDB_DATA key)
1271 {
1272         if (tdb == NULL) {
1273 #ifdef TDB_DEBUG
1274             printf("tdb_lockchain() called with null context\n");
1275 #endif
1276             return -1;
1277         }
1278 
1279 	return tdb_lock(tdb, BUCKET(tdb_hash(&key)));
1280 }
1281 
1282 
1283 int tdb_chainunlock(TDB_CONTEXT *tdb, TDB_DATA key)
1284 {
1285         if (tdb == NULL) {
1286 #ifdef TDB_DEBUG
1287             printf("tdb_unlockchain() called with null context\n");
1288 #endif
1289             return -1;
1290         }
1291 
1292 	return tdb_unlock(tdb, BUCKET(tdb_hash(&key)));
1293 }
1294