xref: /openbsd-src/gnu/usr.bin/perl/ext/SDBM_File/sdbm.c (revision c90a81c56dcebd6a1b73fe4aff9b03385b8e63b3)
1 /*
2  * sdbm - ndbm work-alike hashed database library
3  * based on Per-Aake Larson's Dynamic Hashing algorithms. BIT 18 (1978).
4  * author: oz@nexus.yorku.ca
5  * status: public domain.
6  *
7  * core routines
8  */
9 
10 #include "INTERN.h"
11 #include "config.h"
12 #ifdef WIN32
13 #include "io.h"
14 #endif
15 #include "sdbm.h"
16 #include "tune.h"
17 #include "pair.h"
18 
19 #ifdef I_FCNTL
20 # include <fcntl.h>
21 #endif
22 #ifdef I_SYS_FILE
23 # include <sys/file.h>
24 #endif
25 
26 #ifdef I_STRING
27 # ifndef __ultrix__
28 #  include <string.h>
29 # endif
30 #else
31 # include <strings.h>
32 #endif
33 
34 /*
35  * externals
36  */
37 
38 #include <errno.h> /* See notes in perl.h about avoiding
39 			extern int errno; */
40 #ifdef __cplusplus
41 extern "C" {
42 #endif
43 
44 extern Malloc_t malloc proto((MEM_SIZE));
45 extern Free_t free proto((Malloc_t));
46 
47 #ifdef __cplusplus
48 }
49 #endif
50 
51 const datum nullitem = {0, 0};
52 
53 /*
54  * forward
55  */
56 static int getdbit proto((DBM *, long));
57 static int setdbit proto((DBM *, long));
58 static int getpage proto((DBM *, long));
59 static datum getnext proto((DBM *));
60 static int makroom proto((DBM *, long, int));
61 
62 /*
63  * useful macros
64  */
65 #define bad(x)		((x).dptr == NULL || (x).dsize < 0)
66 #define exhash(item)	sdbm_hash((item).dptr, (item).dsize)
67 #define ioerr(db)	((db)->flags |= DBM_IOERR)
68 
69 #define OFF_PAG(off)	(long) (off) * PBLKSIZ
70 #define OFF_DIR(off)	(long) (off) * DBLKSIZ
71 
72 static const long masks[] = {
73 	000000000000, 000000000001, 000000000003, 000000000007,
74 	000000000017, 000000000037, 000000000077, 000000000177,
75 	000000000377, 000000000777, 000000001777, 000000003777,
76 	000000007777, 000000017777, 000000037777, 000000077777,
77 	000000177777, 000000377777, 000000777777, 000001777777,
78 	000003777777, 000007777777, 000017777777, 000037777777,
79 	000077777777, 000177777777, 000377777777, 000777777777,
80 	001777777777, 003777777777, 007777777777, 017777777777
81 };
82 
83 DBM *
84 sdbm_open(char *file, int flags, int mode)
85 {
86 	DBM *db;
87 	char *dirname;
88 	char *pagname;
89 	size_t filelen;
90 	const size_t dirfext_size = sizeof(DIRFEXT "");
91 	const size_t pagfext_size = sizeof(PAGFEXT "");
92 
93 	if (file == NULL || !*file)
94 		return errno = EINVAL, (DBM *) NULL;
95 /*
96  * need space for two separate filenames
97  */
98 	filelen = strlen(file);
99 
100 	if ((dirname = (char *) malloc(filelen + dirfext_size
101 				       + filelen + pagfext_size)) == NULL)
102 		return errno = ENOMEM, (DBM *) NULL;
103 /*
104  * build the file names
105  */
106 	memcpy(dirname, file, filelen);
107 	memcpy(dirname + filelen, DIRFEXT, dirfext_size);
108 	pagname = dirname + filelen + dirfext_size;
109 	memcpy(pagname, file, filelen);
110 	memcpy(pagname + filelen, PAGFEXT, pagfext_size);
111 
112 	db = sdbm_prep(dirname, pagname, flags, mode);
113 	free((char *) dirname);
114 	return db;
115 }
116 
117 DBM *
118 sdbm_prep(char *dirname, char *pagname, int flags, int mode)
119 {
120 	DBM *db;
121 	struct stat dstat;
122 
123 	if ((db = (DBM *) malloc(sizeof(DBM))) == NULL)
124 		return errno = ENOMEM, (DBM *) NULL;
125 
126         db->flags = 0;
127         db->hmask = 0;
128         db->blkptr = 0;
129         db->keyptr = 0;
130 /*
131  * adjust user flags so that WRONLY becomes RDWR,
132  * as required by this package. Also set our internal
133  * flag for RDONLY if needed.
134  */
135 	if (flags & O_WRONLY)
136 		flags = (flags & ~O_WRONLY) | O_RDWR;
137 
138 	else if ((flags & 03) == O_RDONLY)
139 		db->flags = DBM_RDONLY;
140 /*
141  * open the files in sequence, and stat the dirfile.
142  * If we fail anywhere, undo everything, return NULL.
143  */
144 #if defined(OS2) || defined(MSDOS) || defined(WIN32) || defined(__CYGWIN__)
145 	flags |= O_BINARY;
146 #	endif
147 	if ((db->pagf = open(pagname, flags, mode)) > -1) {
148 		if ((db->dirf = open(dirname, flags, mode)) > -1) {
149 /*
150  * need the dirfile size to establish max bit number.
151  */
152 			if (fstat(db->dirf, &dstat) == 0) {
153 /*
154  * zero size: either a fresh database, or one with a single,
155  * unsplit data page: dirpage is all zeros.
156  */
157 				db->dirbno = (!dstat.st_size) ? 0 : -1;
158 				db->pagbno = -1;
159 				db->maxbno = dstat.st_size * BYTESIZ;
160 
161 				(void) memset(db->pagbuf, 0, PBLKSIZ);
162 				(void) memset(db->dirbuf, 0, DBLKSIZ);
163 			/*
164 			 * success
165 			 */
166 				return db;
167 			}
168 			(void) close(db->dirf);
169 		}
170 		(void) close(db->pagf);
171 	}
172 	free((char *) db);
173 	return (DBM *) NULL;
174 }
175 
176 void
177 sdbm_close(DBM *db)
178 {
179 	if (db == NULL)
180 		errno = EINVAL;
181 	else {
182 		(void) close(db->dirf);
183 		(void) close(db->pagf);
184 		free((char *) db);
185 	}
186 }
187 
188 datum
189 sdbm_fetch(DBM *db, datum key)
190 {
191 	if (db == NULL || bad(key))
192 		return errno = EINVAL, nullitem;
193 
194 	if (getpage(db, exhash(key)))
195 		return getpair(db->pagbuf, key);
196 
197 	return ioerr(db), nullitem;
198 }
199 
200 int
201 sdbm_exists(DBM *db, datum key)
202 {
203 	if (db == NULL || bad(key))
204 		return errno = EINVAL, -1;
205 
206 	if (getpage(db, exhash(key)))
207 		return exipair(db->pagbuf, key);
208 
209 	return ioerr(db), -1;
210 }
211 
212 int
213 sdbm_delete(DBM *db, datum key)
214 {
215 	if (db == NULL || bad(key))
216 		return errno = EINVAL, -1;
217 	if (sdbm_rdonly(db))
218 		return errno = EPERM, -1;
219 
220 	if (getpage(db, exhash(key))) {
221 		if (!delpair(db->pagbuf, key))
222 			return -1;
223 /*
224  * update the page file
225  */
226 		if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0
227 		    || write(db->pagf, db->pagbuf, PBLKSIZ) < 0)
228 			return ioerr(db), -1;
229 
230 		return 0;
231 	}
232 
233 	return ioerr(db), -1;
234 }
235 
236 int
237 sdbm_store(DBM *db, datum key, datum val, int flags)
238 {
239 	int need;
240 	long hash;
241 
242 	if (db == NULL || bad(key))
243 		return errno = EINVAL, -1;
244 	if (sdbm_rdonly(db))
245 		return errno = EPERM, -1;
246 
247 	need = key.dsize + val.dsize;
248 /*
249  * is the pair too big (or too small) for this database ??
250  */
251 	if (need < 0 || need > PAIRMAX)
252 		return errno = EINVAL, -1;
253 
254 	if (getpage(db, (hash = exhash(key)))) {
255 /*
256  * if we need to replace, delete the key/data pair
257  * first. If it is not there, ignore.
258  */
259 		if (flags == DBM_REPLACE)
260 			(void) delpair(db->pagbuf, key);
261 #ifdef SEEDUPS
262 		else if (duppair(db->pagbuf, key))
263 			return 1;
264 #endif
265 /*
266  * if we do not have enough room, we have to split.
267  */
268 		if (!fitpair(db->pagbuf, need))
269 			if (!makroom(db, hash, need))
270 				return ioerr(db), -1;
271 /*
272  * we have enough room or split is successful. insert the key,
273  * and update the page file.
274  */
275 		(void) putpair(db->pagbuf, key, val);
276 
277 		if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0
278 		    || write(db->pagf, db->pagbuf, PBLKSIZ) < 0)
279 			return ioerr(db), -1;
280 	/*
281 	 * success
282 	 */
283 		return 0;
284 	}
285 
286 	return ioerr(db), -1;
287 }
288 
289 /*
290  * makroom - make room by splitting the overfull page
291  * this routine will attempt to make room for SPLTMAX times before
292  * giving up.
293  */
294 static int
295 makroom(DBM *db, long int hash, int need)
296 {
297 	long newp;
298 	char twin[PBLKSIZ];
299 #if defined(DOSISH) || defined(WIN32)
300 	char zer[PBLKSIZ];
301 	long oldtail;
302 #endif
303 	char *pag = db->pagbuf;
304 	char *New = twin;
305 	int smax = SPLTMAX;
306 #ifdef BADMESS
307 	int rc;
308 #endif
309 
310 	do {
311 /*
312  * split the current page
313  */
314 		(void) splpage(pag, New, db->hmask + 1);
315 /*
316  * address of the new page
317  */
318 		newp = (hash & db->hmask) | (db->hmask + 1);
319 
320 /*
321  * write delay, read avoidance/cache shuffle:
322  * select the page for incoming pair: if key is to go to the new page,
323  * write out the previous one, and copy the new one over, thus making
324  * it the current page. If not, simply write the new page, and we are
325  * still looking at the page of interest. current page is not updated
326  * here, as sdbm_store will do so, after it inserts the incoming pair.
327  */
328 
329 #if defined(DOSISH) || defined(WIN32)
330 		/*
331 		 * Fill hole with 0 if made it.
332 		 * (hole is NOT read as 0)
333 		 */
334 		oldtail = lseek(db->pagf, 0L, SEEK_END);
335 		memset(zer, 0, PBLKSIZ);
336 		while (OFF_PAG(newp) > oldtail) {
337 			if (lseek(db->pagf, 0L, SEEK_END) < 0 ||
338 			    write(db->pagf, zer, PBLKSIZ) < 0) {
339 
340 				return 0;
341 			}
342 			oldtail += PBLKSIZ;
343 		}
344 #endif
345 		if (hash & (db->hmask + 1)) {
346 			if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0
347 			    || write(db->pagf, db->pagbuf, PBLKSIZ) < 0)
348 				return 0;
349 			db->pagbno = newp;
350 			(void) memcpy(pag, New, PBLKSIZ);
351 		}
352 		else if (lseek(db->pagf, OFF_PAG(newp), SEEK_SET) < 0
353 			 || write(db->pagf, New, PBLKSIZ) < 0)
354 			return 0;
355 
356 		if (!setdbit(db, db->curbit))
357 			return 0;
358 /*
359  * see if we have enough room now
360  */
361 		if (fitpair(pag, need))
362 			return 1;
363 /*
364  * try again... update curbit and hmask as getpage would have
365  * done. because of our update of the current page, we do not
366  * need to read in anything. BUT we have to write the current
367  * [deferred] page out, as the window of failure is too great.
368  */
369 		db->curbit = 2 * db->curbit +
370 			((hash & (db->hmask + 1)) ? 2 : 1);
371 		db->hmask |= db->hmask + 1;
372 
373 		if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0
374 		    || write(db->pagf, db->pagbuf, PBLKSIZ) < 0)
375 			return 0;
376 
377 	} while (--smax);
378 /*
379  * if we are here, this is real bad news. After SPLTMAX splits,
380  * we still cannot fit the key. say goodnight.
381  */
382 #ifdef BADMESS
383 	rc = write(2, "sdbm: cannot insert after SPLTMAX attempts.\n", 44);
384 	/* PERL_UNUSED_VAR() or PERL_UNUSED_RESULT() would be
385 	 * useful here but that would mean pulling in perl.h */
386 	(void)rc;
387 #endif
388 	return 0;
389 
390 }
391 
392 /*
393  * the following two routines will break if
394  * deletions aren't taken into account. (ndbm bug)
395  */
396 datum
397 sdbm_firstkey(DBM *db)
398 {
399 	if (db == NULL)
400 		return errno = EINVAL, nullitem;
401 /*
402  * start at page 0
403  */
404 	if (lseek(db->pagf, OFF_PAG(0), SEEK_SET) < 0
405 	    || read(db->pagf, db->pagbuf, PBLKSIZ) < 0)
406 		return ioerr(db), nullitem;
407 	db->pagbno = 0;
408 	db->blkptr = 0;
409 	db->keyptr = 0;
410 
411 	return getnext(db);
412 }
413 
414 datum
415 sdbm_nextkey(DBM *db)
416 {
417 	if (db == NULL)
418 		return errno = EINVAL, nullitem;
419 	return getnext(db);
420 }
421 
422 /*
423  * all important binary trie traversal
424  */
425 static int
426 getpage(DBM *db, long int hash)
427 {
428 	int hbit;
429 	long dbit;
430 	long pagb;
431 
432 	dbit = 0;
433 	hbit = 0;
434 	while (dbit < db->maxbno && getdbit(db, dbit))
435 		dbit = 2 * dbit + ((hash & (1 << hbit++)) ? 2 : 1);
436 
437 	debug(("dbit: %d...", dbit));
438 
439 	db->curbit = dbit;
440 	db->hmask = masks[hbit];
441 
442 	pagb = hash & db->hmask;
443 /*
444  * see if the block we need is already in memory.
445  * note: this lookaside cache has about 10% hit rate.
446  */
447 	if (pagb != db->pagbno) {
448 /*
449  * note: here, we assume a "hole" is read as 0s.
450  * if not, must zero pagbuf first.
451  */
452 		if (lseek(db->pagf, OFF_PAG(pagb), SEEK_SET) < 0
453 		    || read(db->pagf, db->pagbuf, PBLKSIZ) < 0)
454 			return 0;
455 		if (!chkpage(db->pagbuf))
456 			return 0;
457 		db->pagbno = pagb;
458 
459 		debug(("pag read: %d\n", pagb));
460 	}
461 	return 1;
462 }
463 
464 static int
465 getdbit(DBM *db, long int dbit)
466 {
467 	long c;
468 	long dirb;
469 
470 	c = dbit / BYTESIZ;
471 	dirb = c / DBLKSIZ;
472 
473 	if (dirb != db->dirbno) {
474 		int got;
475 		if (lseek(db->dirf, OFF_DIR(dirb), SEEK_SET) < 0
476 		    || (got=read(db->dirf, db->dirbuf, DBLKSIZ)) < 0)
477 			return 0;
478 		if (got==0)
479 			memset(db->dirbuf,0,DBLKSIZ);
480 		db->dirbno = dirb;
481 
482 		debug(("dir read: %d\n", dirb));
483 	}
484 
485 	return db->dirbuf[c % DBLKSIZ] & (1 << dbit % BYTESIZ);
486 }
487 
488 static int
489 setdbit(DBM *db, long int dbit)
490 {
491 	long c;
492 	long dirb;
493 
494 	c = dbit / BYTESIZ;
495 	dirb = c / DBLKSIZ;
496 
497 	if (dirb != db->dirbno) {
498 		int got;
499 		if (lseek(db->dirf, OFF_DIR(dirb), SEEK_SET) < 0
500 		    || (got=read(db->dirf, db->dirbuf, DBLKSIZ)) < 0)
501 			return 0;
502 		if (got==0)
503 			memset(db->dirbuf,0,DBLKSIZ);
504 		db->dirbno = dirb;
505 
506 		debug(("dir read: %d\n", dirb));
507 	}
508 
509 	db->dirbuf[c % DBLKSIZ] |= (1 << dbit % BYTESIZ);
510 
511 #if 0
512 	if (dbit >= db->maxbno)
513 		db->maxbno += DBLKSIZ * BYTESIZ;
514 #else
515 	if (OFF_DIR((dirb+1))*BYTESIZ > db->maxbno)
516 		db->maxbno=OFF_DIR((dirb+1))*BYTESIZ;
517 #endif
518 
519 	if (lseek(db->dirf, OFF_DIR(dirb), SEEK_SET) < 0
520 	    || write(db->dirf, db->dirbuf, DBLKSIZ) < 0)
521 		return 0;
522 
523 	return 1;
524 }
525 
526 /*
527  * getnext - get the next key in the page, and if done with
528  * the page, try the next page in sequence
529  */
530 static datum
531 getnext(DBM *db)
532 {
533 	datum key;
534 
535 	for (;;) {
536 		db->keyptr++;
537 		key = getnkey(db->pagbuf, db->keyptr);
538 		if (key.dptr != NULL)
539 			return key;
540 /*
541  * we either run out, or there is nothing on this page..
542  * try the next one... If we lost our position on the
543  * file, we will have to seek.
544  */
545 		db->keyptr = 0;
546 		if (db->pagbno != db->blkptr++)
547 			if (lseek(db->pagf, OFF_PAG(db->blkptr), SEEK_SET) < 0)
548 				break;
549 		db->pagbno = db->blkptr;
550 		if (read(db->pagf, db->pagbuf, PBLKSIZ) <= 0)
551 			break;
552 		if (!chkpage(db->pagbuf))
553 			break;
554 	}
555 
556 	return ioerr(db), nullitem;
557 }
558 
559