xref: /onnv-gate/usr/src/cmd/perl/5.8.4/distrib/ext/SDBM_File/sdbm/sdbm.c (revision 0:68f95e015346)
1 /*
2  * sdbm - ndbm work-alike hashed database library
3  * based on Per-Aake Larson's Dynamic Hashing algorithms. BIT 18 (1978).
4  * author: oz@nexus.yorku.ca
5  * status: public domain.
6  *
7  * core routines
8  */
9 
10 #include "INTERN.h"
11 #include "config.h"
12 #ifdef WIN32
13 #include "io.h"
14 #endif
15 #include "sdbm.h"
16 #include "tune.h"
17 #include "pair.h"
18 
19 #ifdef I_FCNTL
20 # include <fcntl.h>
21 #endif
22 #ifdef I_SYS_FILE
23 # include <sys/file.h>
24 #endif
25 
26 #ifdef I_STRING
27 # ifndef __ultrix__
28 #  include <string.h>
29 # endif
30 #else
31 # include <strings.h>
32 #endif
33 
34 /*
35  * externals
36  */
37 #ifndef WIN32
38 #ifndef sun
39 extern int errno;
40 #endif
41 #endif
42 
43 extern Malloc_t malloc proto((MEM_SIZE));
44 extern Free_t free proto((Malloc_t));
45 
46 /*
47  * forward
48  */
49 static int getdbit proto((DBM *, long));
50 static int setdbit proto((DBM *, long));
51 static int getpage proto((DBM *, long));
52 static datum getnext proto((DBM *));
53 static int makroom proto((DBM *, long, int));
54 
55 /*
56  * useful macros
57  */
58 #define bad(x)		((x).dptr == NULL || (x).dsize < 0)
59 #define exhash(item)	sdbm_hash((item).dptr, (item).dsize)
60 #define ioerr(db)	((db)->flags |= DBM_IOERR)
61 
62 #define OFF_PAG(off)	(long) (off) * PBLKSIZ
63 #define OFF_DIR(off)	(long) (off) * DBLKSIZ
64 
65 static long masks[] = {
66 	000000000000, 000000000001, 000000000003, 000000000007,
67 	000000000017, 000000000037, 000000000077, 000000000177,
68 	000000000377, 000000000777, 000000001777, 000000003777,
69 	000000007777, 000000017777, 000000037777, 000000077777,
70 	000000177777, 000000377777, 000000777777, 000001777777,
71 	000003777777, 000007777777, 000017777777, 000037777777,
72 	000077777777, 000177777777, 000377777777, 000777777777,
73 	001777777777, 003777777777, 007777777777, 017777777777
74 };
75 
76 DBM *
sdbm_open(register char * file,register int flags,register int mode)77 sdbm_open(register char *file, register int flags, register int mode)
78 {
79 	register DBM *db;
80 	register char *dirname;
81 	register char *pagname;
82 	register int n;
83 
84 	if (file == NULL || !*file)
85 		return errno = EINVAL, (DBM *) NULL;
86 /*
87  * need space for two seperate filenames
88  */
89 	n = strlen(file) * 2 + strlen(DIRFEXT) + strlen(PAGFEXT) + 2;
90 
91 	if ((dirname = (char *) malloc((unsigned) n)) == NULL)
92 		return errno = ENOMEM, (DBM *) NULL;
93 /*
94  * build the file names
95  */
96 	dirname = strcat(strcpy(dirname, file), DIRFEXT);
97 	pagname = strcpy(dirname + strlen(dirname) + 1, file);
98 	pagname = strcat(pagname, PAGFEXT);
99 
100 	db = sdbm_prep(dirname, pagname, flags, mode);
101 	free((char *) dirname);
102 	return db;
103 }
104 
105 DBM *
sdbm_prep(char * dirname,char * pagname,int flags,int mode)106 sdbm_prep(char *dirname, char *pagname, int flags, int mode)
107 {
108 	register DBM *db;
109 	struct stat dstat;
110 
111 	if ((db = (DBM *) malloc(sizeof(DBM))) == NULL)
112 		return errno = ENOMEM, (DBM *) NULL;
113 
114         db->flags = 0;
115         db->hmask = 0;
116         db->blkptr = 0;
117         db->keyptr = 0;
118 /*
119  * adjust user flags so that WRONLY becomes RDWR,
120  * as required by this package. Also set our internal
121  * flag for RDONLY if needed.
122  */
123 	if (flags & O_WRONLY)
124 		flags = (flags & ~O_WRONLY) | O_RDWR;
125 
126 	else if ((flags & 03) == O_RDONLY)
127 		db->flags = DBM_RDONLY;
128 /*
129  * open the files in sequence, and stat the dirfile.
130  * If we fail anywhere, undo everything, return NULL.
131  */
132 #if defined(OS2) || defined(MSDOS) || defined(WIN32) || defined(__CYGWIN__)
133 	flags |= O_BINARY;
134 #	endif
135 	if ((db->pagf = open(pagname, flags, mode)) > -1) {
136 		if ((db->dirf = open(dirname, flags, mode)) > -1) {
137 /*
138  * need the dirfile size to establish max bit number.
139  */
140 			if (fstat(db->dirf, &dstat) == 0) {
141 /*
142  * zero size: either a fresh database, or one with a single,
143  * unsplit data page: dirpage is all zeros.
144  */
145 				db->dirbno = (!dstat.st_size) ? 0 : -1;
146 				db->pagbno = -1;
147 				db->maxbno = dstat.st_size * BYTESIZ;
148 
149 				(void) memset(db->pagbuf, 0, PBLKSIZ);
150 				(void) memset(db->dirbuf, 0, DBLKSIZ);
151 			/*
152 			 * success
153 			 */
154 				return db;
155 			}
156 			(void) close(db->dirf);
157 		}
158 		(void) close(db->pagf);
159 	}
160 	free((char *) db);
161 	return (DBM *) NULL;
162 }
163 
164 void
sdbm_close(register DBM * db)165 sdbm_close(register DBM *db)
166 {
167 	if (db == NULL)
168 		errno = EINVAL;
169 	else {
170 		(void) close(db->dirf);
171 		(void) close(db->pagf);
172 		free((char *) db);
173 	}
174 }
175 
176 datum
sdbm_fetch(register DBM * db,datum key)177 sdbm_fetch(register DBM *db, datum key)
178 {
179 	if (db == NULL || bad(key))
180 		return errno = EINVAL, nullitem;
181 
182 	if (getpage(db, exhash(key)))
183 		return getpair(db->pagbuf, key);
184 
185 	return ioerr(db), nullitem;
186 }
187 
188 int
sdbm_exists(register DBM * db,datum key)189 sdbm_exists(register DBM *db, datum key)
190 {
191 	if (db == NULL || bad(key))
192 		return errno = EINVAL, -1;
193 
194 	if (getpage(db, exhash(key)))
195 		return exipair(db->pagbuf, key);
196 
197 	return ioerr(db), -1;
198 }
199 
200 int
sdbm_delete(register DBM * db,datum key)201 sdbm_delete(register DBM *db, datum key)
202 {
203 	if (db == NULL || bad(key))
204 		return errno = EINVAL, -1;
205 	if (sdbm_rdonly(db))
206 		return errno = EPERM, -1;
207 
208 	if (getpage(db, exhash(key))) {
209 		if (!delpair(db->pagbuf, key))
210 			return -1;
211 /*
212  * update the page file
213  */
214 		if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0
215 		    || write(db->pagf, db->pagbuf, PBLKSIZ) < 0)
216 			return ioerr(db), -1;
217 
218 		return 0;
219 	}
220 
221 	return ioerr(db), -1;
222 }
223 
224 int
sdbm_store(register DBM * db,datum key,datum val,int flags)225 sdbm_store(register DBM *db, datum key, datum val, int flags)
226 {
227 	int need;
228 	register long hash;
229 
230 	if (db == NULL || bad(key))
231 		return errno = EINVAL, -1;
232 	if (sdbm_rdonly(db))
233 		return errno = EPERM, -1;
234 
235 	need = key.dsize + val.dsize;
236 /*
237  * is the pair too big (or too small) for this database ??
238  */
239 	if (need < 0 || need > PAIRMAX)
240 		return errno = EINVAL, -1;
241 
242 	if (getpage(db, (hash = exhash(key)))) {
243 /*
244  * if we need to replace, delete the key/data pair
245  * first. If it is not there, ignore.
246  */
247 		if (flags == DBM_REPLACE)
248 			(void) delpair(db->pagbuf, key);
249 #ifdef SEEDUPS
250 		else if (duppair(db->pagbuf, key))
251 			return 1;
252 #endif
253 /*
254  * if we do not have enough room, we have to split.
255  */
256 		if (!fitpair(db->pagbuf, need))
257 			if (!makroom(db, hash, need))
258 				return ioerr(db), -1;
259 /*
260  * we have enough room or split is successful. insert the key,
261  * and update the page file.
262  */
263 		(void) putpair(db->pagbuf, key, val);
264 
265 		if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0
266 		    || write(db->pagf, db->pagbuf, PBLKSIZ) < 0)
267 			return ioerr(db), -1;
268 	/*
269 	 * success
270 	 */
271 		return 0;
272 	}
273 
274 	return ioerr(db), -1;
275 }
276 
277 /*
278  * makroom - make room by splitting the overfull page
279  * this routine will attempt to make room for SPLTMAX times before
280  * giving up.
281  */
282 static int
makroom(register DBM * db,long int hash,int need)283 makroom(register DBM *db, long int hash, int need)
284 {
285 	long newp;
286 	char twin[PBLKSIZ];
287 #if defined(DOSISH) || defined(WIN32)
288 	char zer[PBLKSIZ];
289 	long oldtail;
290 #endif
291 	char *pag = db->pagbuf;
292 	char *New = twin;
293 	register int smax = SPLTMAX;
294 
295 	do {
296 /*
297  * split the current page
298  */
299 		(void) splpage(pag, New, db->hmask + 1);
300 /*
301  * address of the new page
302  */
303 		newp = (hash & db->hmask) | (db->hmask + 1);
304 
305 /*
306  * write delay, read avoidence/cache shuffle:
307  * select the page for incoming pair: if key is to go to the new page,
308  * write out the previous one, and copy the new one over, thus making
309  * it the current page. If not, simply write the new page, and we are
310  * still looking at the page of interest. current page is not updated
311  * here, as sdbm_store will do so, after it inserts the incoming pair.
312  */
313 
314 #if defined(DOSISH) || defined(WIN32)
315 		/*
316 		 * Fill hole with 0 if made it.
317 		 * (hole is NOT read as 0)
318 		 */
319 		oldtail = lseek(db->pagf, 0L, SEEK_END);
320 		memset(zer, 0, PBLKSIZ);
321 		while (OFF_PAG(newp) > oldtail) {
322 			if (lseek(db->pagf, 0L, SEEK_END) < 0 ||
323 			    write(db->pagf, zer, PBLKSIZ) < 0) {
324 
325 				return 0;
326 			}
327 			oldtail += PBLKSIZ;
328 		}
329 #endif
330 		if (hash & (db->hmask + 1)) {
331 			if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0
332 			    || write(db->pagf, db->pagbuf, PBLKSIZ) < 0)
333 				return 0;
334 			db->pagbno = newp;
335 			(void) memcpy(pag, New, PBLKSIZ);
336 		}
337 		else if (lseek(db->pagf, OFF_PAG(newp), SEEK_SET) < 0
338 			 || write(db->pagf, New, PBLKSIZ) < 0)
339 			return 0;
340 
341 		if (!setdbit(db, db->curbit))
342 			return 0;
343 /*
344  * see if we have enough room now
345  */
346 		if (fitpair(pag, need))
347 			return 1;
348 /*
349  * try again... update curbit and hmask as getpage would have
350  * done. because of our update of the current page, we do not
351  * need to read in anything. BUT we have to write the current
352  * [deferred] page out, as the window of failure is too great.
353  */
354 		db->curbit = 2 * db->curbit +
355 			((hash & (db->hmask + 1)) ? 2 : 1);
356 		db->hmask |= db->hmask + 1;
357 
358 		if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0
359 		    || write(db->pagf, db->pagbuf, PBLKSIZ) < 0)
360 			return 0;
361 
362 	} while (--smax);
363 /*
364  * if we are here, this is real bad news. After SPLTMAX splits,
365  * we still cannot fit the key. say goodnight.
366  */
367 #ifdef BADMESS
368 	(void) write(2, "sdbm: cannot insert after SPLTMAX attempts.\n", 44);
369 #endif
370 	return 0;
371 
372 }
373 
374 /*
375  * the following two routines will break if
376  * deletions aren't taken into account. (ndbm bug)
377  */
378 datum
sdbm_firstkey(register DBM * db)379 sdbm_firstkey(register DBM *db)
380 {
381 	if (db == NULL)
382 		return errno = EINVAL, nullitem;
383 /*
384  * start at page 0
385  */
386 	if (lseek(db->pagf, OFF_PAG(0), SEEK_SET) < 0
387 	    || read(db->pagf, db->pagbuf, PBLKSIZ) < 0)
388 		return ioerr(db), nullitem;
389 	db->pagbno = 0;
390 	db->blkptr = 0;
391 	db->keyptr = 0;
392 
393 	return getnext(db);
394 }
395 
396 datum
sdbm_nextkey(register DBM * db)397 sdbm_nextkey(register DBM *db)
398 {
399 	if (db == NULL)
400 		return errno = EINVAL, nullitem;
401 	return getnext(db);
402 }
403 
404 /*
405  * all important binary trie traversal
406  */
407 static int
getpage(register DBM * db,register long int hash)408 getpage(register DBM *db, register long int hash)
409 {
410 	register int hbit;
411 	register long dbit;
412 	register long pagb;
413 
414 	dbit = 0;
415 	hbit = 0;
416 	while (dbit < db->maxbno && getdbit(db, dbit))
417 		dbit = 2 * dbit + ((hash & (1 << hbit++)) ? 2 : 1);
418 
419 	debug(("dbit: %d...", dbit));
420 
421 	db->curbit = dbit;
422 	db->hmask = masks[hbit];
423 
424 	pagb = hash & db->hmask;
425 /*
426  * see if the block we need is already in memory.
427  * note: this lookaside cache has about 10% hit rate.
428  */
429 	if (pagb != db->pagbno) {
430 /*
431  * note: here, we assume a "hole" is read as 0s.
432  * if not, must zero pagbuf first.
433  */
434 		if (lseek(db->pagf, OFF_PAG(pagb), SEEK_SET) < 0
435 		    || read(db->pagf, db->pagbuf, PBLKSIZ) < 0)
436 			return 0;
437 		if (!chkpage(db->pagbuf))
438 			return 0;
439 		db->pagbno = pagb;
440 
441 		debug(("pag read: %d\n", pagb));
442 	}
443 	return 1;
444 }
445 
446 static int
getdbit(register DBM * db,register long int dbit)447 getdbit(register DBM *db, register long int dbit)
448 {
449 	register long c;
450 	register long dirb;
451 
452 	c = dbit / BYTESIZ;
453 	dirb = c / DBLKSIZ;
454 
455 	if (dirb != db->dirbno) {
456 		int got;
457 		if (lseek(db->dirf, OFF_DIR(dirb), SEEK_SET) < 0
458 		    || (got=read(db->dirf, db->dirbuf, DBLKSIZ)) < 0)
459 			return 0;
460 		if (got==0)
461 			memset(db->dirbuf,0,DBLKSIZ);
462 		db->dirbno = dirb;
463 
464 		debug(("dir read: %d\n", dirb));
465 	}
466 
467 	return db->dirbuf[c % DBLKSIZ] & (1 << dbit % BYTESIZ);
468 }
469 
470 static int
setdbit(register DBM * db,register long int dbit)471 setdbit(register DBM *db, register long int dbit)
472 {
473 	register long c;
474 	register long dirb;
475 
476 	c = dbit / BYTESIZ;
477 	dirb = c / DBLKSIZ;
478 
479 	if (dirb != db->dirbno) {
480 		int got;
481 		if (lseek(db->dirf, OFF_DIR(dirb), SEEK_SET) < 0
482 		    || (got=read(db->dirf, db->dirbuf, DBLKSIZ)) < 0)
483 			return 0;
484 		if (got==0)
485 			memset(db->dirbuf,0,DBLKSIZ);
486 		db->dirbno = dirb;
487 
488 		debug(("dir read: %d\n", dirb));
489 	}
490 
491 	db->dirbuf[c % DBLKSIZ] |= (1 << dbit % BYTESIZ);
492 
493 #if 0
494 	if (dbit >= db->maxbno)
495 		db->maxbno += DBLKSIZ * BYTESIZ;
496 #else
497 	if (OFF_DIR((dirb+1))*BYTESIZ > db->maxbno)
498 		db->maxbno=OFF_DIR((dirb+1))*BYTESIZ;
499 #endif
500 
501 	if (lseek(db->dirf, OFF_DIR(dirb), SEEK_SET) < 0
502 	    || write(db->dirf, db->dirbuf, DBLKSIZ) < 0)
503 		return 0;
504 
505 	return 1;
506 }
507 
508 /*
509  * getnext - get the next key in the page, and if done with
510  * the page, try the next page in sequence
511  */
512 static datum
getnext(register DBM * db)513 getnext(register DBM *db)
514 {
515 	datum key;
516 
517 	for (;;) {
518 		db->keyptr++;
519 		key = getnkey(db->pagbuf, db->keyptr);
520 		if (key.dptr != NULL)
521 			return key;
522 /*
523  * we either run out, or there is nothing on this page..
524  * try the next one... If we lost our position on the
525  * file, we will have to seek.
526  */
527 		db->keyptr = 0;
528 		if (db->pagbno != db->blkptr++)
529 			if (lseek(db->pagf, OFF_PAG(db->blkptr), SEEK_SET) < 0)
530 				break;
531 		db->pagbno = db->blkptr;
532 		if (read(db->pagf, db->pagbuf, PBLKSIZ) <= 0)
533 			break;
534 		if (!chkpage(db->pagbuf))
535 			break;
536 	}
537 
538 	return ioerr(db), nullitem;
539 }
540 
541