xref: /openbsd-src/gnu/usr.bin/perl/ext/SDBM_File/sdbm.c (revision 1ad61ae0a79a724d2d3ec69e69c8e1d1ff6b53a0)
1 /*
2  * sdbm - ndbm work-alike hashed database library
3  * based on Per-Aake Larson's Dynamic Hashing algorithms. BIT 18 (1978).
4  * author: oz@nexus.yorku.ca
5  * status: public domain.
6  *
7  * core routines
8  */
9 
10 #include "config.h"
11 #ifdef WIN32
12 #include "io.h"
13 #endif
14 #include "sdbm.h"
15 #include "tune.h"
16 #include "pair.h"
17 
18 #ifdef I_FCNTL
19 # include <fcntl.h>
20 #endif
21 #ifdef I_SYS_FILE
22 # include <sys/file.h>
23 #endif
24 
25 #include <string.h>
26 
27 /*
28  * externals
29  */
30 
31 #include <errno.h> /* See notes in perl.h about avoiding
32                         extern int errno; */
33 #ifdef __cplusplus
34 extern "C" {
35 #endif
36 
37 extern Malloc_t malloc(MEM_SIZE);
38 extern Free_t free(Malloc_t);
39 
40 #ifdef __cplusplus
41 }
42 #endif
43 
44 const datum nullitem = {0, 0};
45 
46 /*
47  * forward
48  */
49 static int getdbit(DBM *, long);
50 static int setdbit(DBM *, long);
51 static int getpage(DBM *, long);
52 static datum getnext(DBM *);
53 static int makroom(DBM *, long, int);
54 
55 /*
56  * useful macros
57  */
58 #define bad(x)		((x).dptr == NULL || (x).dsize < 0)
59 #define exhash(item)	sdbm_hash((item).dptr, (item).dsize)
60 #define ioerr(db)	((db)->flags |= DBM_IOERR)
61 
62 #define OFF_PAG(off)	(long) (off) * PBLKSIZ
63 #define OFF_DIR(off)	(long) (off) * DBLKSIZ
64 
65 static const long masks[] = {
66         000000000000, 000000000001, 000000000003, 000000000007,
67         000000000017, 000000000037, 000000000077, 000000000177,
68         000000000377, 000000000777, 000000001777, 000000003777,
69         000000007777, 000000017777, 000000037777, 000000077777,
70         000000177777, 000000377777, 000000777777, 000001777777,
71         000003777777, 000007777777, 000017777777, 000037777777,
72         000077777777, 000177777777, 000377777777, 000777777777,
73         001777777777, 003777777777, 007777777777, 017777777777
74 };
75 
76 DBM *
77 sdbm_open(char *file, int flags, int mode)
78 {
79         DBM *db;
80         char *dirname;
81         char *pagname;
82         size_t filelen;
83         const size_t dirfext_size = sizeof(DIRFEXT "");
84         const size_t pagfext_size = sizeof(PAGFEXT "");
85 
86         if (file == NULL || !*file)
87                 return errno = EINVAL, (DBM *) NULL;
88 /*
89  * need space for two separate filenames
90  */
91         filelen = strlen(file);
92 
93         if ((dirname = (char *) malloc(filelen + dirfext_size
94                                        + filelen + pagfext_size)) == NULL)
95                 return errno = ENOMEM, (DBM *) NULL;
96 /*
97  * build the file names
98  */
99         memcpy(dirname, file, filelen);
100         memcpy(dirname + filelen, DIRFEXT, dirfext_size);
101         pagname = dirname + filelen + dirfext_size;
102         memcpy(pagname, file, filelen);
103         memcpy(pagname + filelen, PAGFEXT, pagfext_size);
104 
105         db = sdbm_prep(dirname, pagname, flags, mode);
106         free((char *) dirname);
107         return db;
108 }
109 
110 DBM *
111 sdbm_prep(char *dirname, char *pagname, int flags, int mode)
112 {
113         DBM *db;
114         struct stat dstat;
115 
116         if ((db = (DBM *) malloc(sizeof(DBM))) == NULL)
117                 return errno = ENOMEM, (DBM *) NULL;
118 
119         db->flags = 0;
120         db->hmask = 0;
121         db->blkptr = 0;
122         db->keyptr = 0;
123 /*
124  * adjust user flags so that WRONLY becomes RDWR,
125  * as required by this package. Also set our internal
126  * flag for RDONLY if needed.
127  */
128         if (flags & O_WRONLY)
129                 flags = (flags & ~O_WRONLY) | O_RDWR;
130 
131         else if ((flags & 03) == O_RDONLY)
132                 db->flags = DBM_RDONLY;
133 /*
134  * open the files in sequence, and stat the dirfile.
135  * If we fail anywhere, undo everything, return NULL.
136  */
137 #if defined(OS2) || defined(WIN32) || defined(__CYGWIN__)
138         flags |= O_BINARY;
139 #	endif
140         if ((db->pagf = open(pagname, flags, mode)) > -1) {
141                 if ((db->dirf = open(dirname, flags, mode)) > -1) {
142 /*
143  * need the dirfile size to establish max bit number.
144  */
145                         if (fstat(db->dirf, &dstat) == 0) {
146 /*
147  * zero size: either a fresh database, or one with a single,
148  * unsplit data page: dirpage is all zeros.
149  */
150                                 db->dirbno = (!dstat.st_size) ? 0 : -1;
151                                 db->pagbno = -1;
152                                 db->maxbno = dstat.st_size * BYTESIZ;
153 
154                                 (void) memset(db->pagbuf, 0, PBLKSIZ);
155                                 (void) memset(db->dirbuf, 0, DBLKSIZ);
156                         /*
157                          * success
158                          */
159                                 return db;
160                         }
161                         (void) close(db->dirf);
162                 }
163                 (void) close(db->pagf);
164         }
165         free((char *) db);
166         return (DBM *) NULL;
167 }
168 
169 void
170 sdbm_close(DBM *db)
171 {
172         if (db == NULL)
173                 errno = EINVAL;
174         else {
175                 (void) close(db->dirf);
176                 (void) close(db->pagf);
177                 free((char *) db);
178         }
179 }
180 
181 datum
182 sdbm_fetch(DBM *db, datum key)
183 {
184         if (db == NULL || bad(key))
185                 return errno = EINVAL, nullitem;
186 
187         if (getpage(db, exhash(key)))
188                 return getpair(db->pagbuf, key);
189 
190         return ioerr(db), nullitem;
191 }
192 
193 int
194 sdbm_exists(DBM *db, datum key)
195 {
196         if (db == NULL || bad(key))
197                 return errno = EINVAL, -1;
198 
199         if (getpage(db, exhash(key)))
200                 return exipair(db->pagbuf, key);
201 
202         return ioerr(db), -1;
203 }
204 
205 int
206 sdbm_delete(DBM *db, datum key)
207 {
208         if (db == NULL || bad(key))
209                 return errno = EINVAL, -1;
210         if (sdbm_rdonly(db))
211                 return errno = EPERM, -1;
212 
213         if (getpage(db, exhash(key))) {
214                 if (!delpair(db->pagbuf, key))
215                         return -1;
216 /*
217  * update the page file
218  */
219                 if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0
220                     || write(db->pagf, db->pagbuf, PBLKSIZ) < 0)
221                         return ioerr(db), -1;
222 
223                 return 0;
224         }
225 
226         return ioerr(db), -1;
227 }
228 
229 int
230 sdbm_store(DBM *db, datum key, datum val, int flags)
231 {
232         int need;
233         long hash;
234 
235         if (db == NULL || bad(key))
236                 return errno = EINVAL, -1;
237         if (sdbm_rdonly(db))
238                 return errno = EPERM, -1;
239 
240         need = key.dsize + val.dsize;
241 /*
242  * is the pair too big (or too small) for this database ??
243  */
244         if (need < 0 || need > PAIRMAX)
245                 return errno = EINVAL, -1;
246 
247         if (getpage(db, (hash = exhash(key)))) {
248 /*
249  * if we need to replace, delete the key/data pair
250  * first. If it is not there, ignore.
251  */
252                 if (flags == DBM_REPLACE)
253                         (void) delpair(db->pagbuf, key);
254 #ifdef SEEDUPS
255                 else if (duppair(db->pagbuf, key))
256                         return 1;
257 #endif
258 /*
259  * if we do not have enough room, we have to split.
260  */
261                 if (!fitpair(db->pagbuf, need))
262                         if (!makroom(db, hash, need))
263                                 return ioerr(db), -1;
264 /*
265  * we have enough room or split is successful. insert the key,
266  * and update the page file.
267  */
268                 (void) putpair(db->pagbuf, key, val);
269 
270                 if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0
271                     || write(db->pagf, db->pagbuf, PBLKSIZ) < 0)
272                         return ioerr(db), -1;
273         /*
274          * success
275          */
276                 return 0;
277         }
278 
279         return ioerr(db), -1;
280 }
281 
282 /*
283  * makroom - make room by splitting the overfull page
284  * this routine will attempt to make room for SPLTMAX times before
285  * giving up.
286  */
287 static int
288 makroom(DBM *db, long int hash, int need)
289 {
290         long newp;
291         char twin[PBLKSIZ];
292 #if defined(DOSISH) || defined(WIN32)
293         char zer[PBLKSIZ];
294         long oldtail;
295 #endif
296         char *pag = db->pagbuf;
297         char *New = twin;
298         int smax = SPLTMAX;
299 #ifdef BADMESS
300         int rc;
301 #endif
302 
303         do {
304 /*
305  * split the current page
306  */
307                 (void) splpage(pag, New, db->hmask + 1);
308 /*
309  * address of the new page
310  */
311                 newp = (hash & db->hmask) | (db->hmask + 1);
312 
313 /*
314  * write delay, read avoidance/cache shuffle:
315  * select the page for incoming pair: if key is to go to the new page,
316  * write out the previous one, and copy the new one over, thus making
317  * it the current page. If not, simply write the new page, and we are
318  * still looking at the page of interest. current page is not updated
319  * here, as sdbm_store will do so, after it inserts the incoming pair.
320  */
321 
322 #if defined(DOSISH) || defined(WIN32)
323                 /*
324                  * Fill hole with 0 if made it.
325                  * (hole is NOT read as 0)
326                  */
327                 oldtail = lseek(db->pagf, 0L, SEEK_END);
328                 memset(zer, 0, PBLKSIZ);
329                 while (OFF_PAG(newp) > oldtail) {
330                         if (lseek(db->pagf, 0L, SEEK_END) < 0 ||
331                             write(db->pagf, zer, PBLKSIZ) < 0) {
332 
333                                 return 0;
334                         }
335                         oldtail += PBLKSIZ;
336                 }
337 #endif
338                 if (hash & (db->hmask + 1)) {
339                         if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0
340                             || write(db->pagf, db->pagbuf, PBLKSIZ) < 0)
341                                 return 0;
342                         db->pagbno = newp;
343                         (void) memcpy(pag, New, PBLKSIZ);
344                 }
345                 else if (lseek(db->pagf, OFF_PAG(newp), SEEK_SET) < 0
346                          || write(db->pagf, New, PBLKSIZ) < 0)
347                         return 0;
348 
349                 if (!setdbit(db, db->curbit))
350                         return 0;
351 /*
352  * see if we have enough room now
353  */
354                 if (fitpair(pag, need))
355                         return 1;
356 /*
357  * try again... update curbit and hmask as getpage would have
358  * done. because of our update of the current page, we do not
359  * need to read in anything. BUT we have to write the current
360  * [deferred] page out, as the window of failure is too great.
361  */
362                 db->curbit = 2 * db->curbit +
363                         ((hash & (db->hmask + 1)) ? 2 : 1);
364                 db->hmask |= db->hmask + 1;
365 
366                 if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0
367                     || write(db->pagf, db->pagbuf, PBLKSIZ) < 0)
368                         return 0;
369 
370         } while (--smax);
371 /*
372  * if we are here, this is real bad news. After SPLTMAX splits,
373  * we still cannot fit the key. say goodnight.
374  */
375 #ifdef BADMESS
376         rc = write(2, "sdbm: cannot insert after SPLTMAX attempts.\n", 44);
377         /* PERL_UNUSED_VAR() or PERL_UNUSED_RESULT() would be
378          * useful here but that would mean pulling in perl.h */
379         (void)rc;
380 #endif
381         return 0;
382 
383 }
384 
385 /*
386  * the following two routines will break if
387  * deletions aren't taken into account. (ndbm bug)
388  */
389 datum
390 sdbm_firstkey(DBM *db)
391 {
392         if (db == NULL)
393                 return errno = EINVAL, nullitem;
394 /*
395  * start at page 0
396  */
397         if (lseek(db->pagf, OFF_PAG(0), SEEK_SET) < 0
398             || read(db->pagf, db->pagbuf, PBLKSIZ) < 0)
399                 return ioerr(db), nullitem;
400         if (!chkpage(db->pagbuf)) {
401             errno = EINVAL;
402             ioerr(db);
403             db->pagbno = -1;
404             return nullitem;
405         }
406         db->pagbno = 0;
407         db->blkptr = 0;
408         db->keyptr = 0;
409 
410         return getnext(db);
411 }
412 
413 datum
414 sdbm_nextkey(DBM *db)
415 {
416         if (db == NULL)
417                 return errno = EINVAL, nullitem;
418         return getnext(db);
419 }
420 
421 /*
422  * all important binary trie traversal
423  */
424 static int
425 getpage(DBM *db, long int hash)
426 {
427         int hbit;
428         long dbit;
429         long pagb;
430 
431         dbit = 0;
432         hbit = 0;
433         while (dbit < db->maxbno && getdbit(db, dbit))
434                 dbit = 2 * dbit + ((hash & (1 << hbit++)) ? 2 : 1);
435 
436         debug(("dbit: %d...", dbit));
437 
438         db->curbit = dbit;
439         db->hmask = masks[hbit];
440 
441         pagb = hash & db->hmask;
442 /*
443  * see if the block we need is already in memory.
444  * note: this lookaside cache has about 10% hit rate.
445  */
446         if (pagb != db->pagbno) {
447 /*
448  * note: here, we assume a "hole" is read as 0s.
449  * if not, must zero pagbuf first.
450  */
451                 if (lseek(db->pagf, OFF_PAG(pagb), SEEK_SET) < 0
452                     || read(db->pagf, db->pagbuf, PBLKSIZ) < 0)
453                         return 0;
454                 if (!chkpage(db->pagbuf)) {
455                     errno = EINVAL;
456                     db->pagbno = -1;
457                     ioerr(db);
458                     return 0;
459                 }
460                 db->pagbno = pagb;
461 
462                 debug(("pag read: %d\n", pagb));
463         }
464         return 1;
465 }
466 
467 static int
468 getdbit(DBM *db, long int dbit)
469 {
470         long c;
471         long dirb;
472 
473         c = dbit / BYTESIZ;
474         dirb = c / DBLKSIZ;
475 
476         if (dirb != db->dirbno) {
477                 int got;
478                 if (lseek(db->dirf, OFF_DIR(dirb), SEEK_SET) < 0
479                     || (got=read(db->dirf, db->dirbuf, DBLKSIZ)) < 0)
480                         return 0;
481                 if (got==0)
482                         memset(db->dirbuf,0,DBLKSIZ);
483                 db->dirbno = dirb;
484 
485                 debug(("dir read: %d\n", dirb));
486         }
487 
488         return db->dirbuf[c % DBLKSIZ] & (1 << dbit % BYTESIZ);
489 }
490 
491 static int
492 setdbit(DBM *db, long int dbit)
493 {
494         long c;
495         long dirb;
496 
497         c = dbit / BYTESIZ;
498         dirb = c / DBLKSIZ;
499 
500         if (dirb != db->dirbno) {
501                 int got;
502                 if (lseek(db->dirf, OFF_DIR(dirb), SEEK_SET) < 0
503                     || (got=read(db->dirf, db->dirbuf, DBLKSIZ)) < 0)
504                         return 0;
505                 if (got==0)
506                         memset(db->dirbuf,0,DBLKSIZ);
507                 db->dirbno = dirb;
508 
509                 debug(("dir read: %d\n", dirb));
510         }
511 
512         db->dirbuf[c % DBLKSIZ] |= (1 << dbit % BYTESIZ);
513 
514 #if 0
515         if (dbit >= db->maxbno)
516                 db->maxbno += DBLKSIZ * BYTESIZ;
517 #else
518         if (OFF_DIR((dirb+1))*BYTESIZ > db->maxbno)
519                 db->maxbno=OFF_DIR((dirb+1))*BYTESIZ;
520 #endif
521 
522         if (lseek(db->dirf, OFF_DIR(dirb), SEEK_SET) < 0
523             || write(db->dirf, db->dirbuf, DBLKSIZ) < 0)
524                 return 0;
525 
526         return 1;
527 }
528 
529 /*
530  * getnext - get the next key in the page, and if done with
531  * the page, try the next page in sequence
532  */
533 static datum
534 getnext(DBM *db)
535 {
536         datum key;
537 
538         for (;;) {
539                 db->keyptr++;
540                 key = getnkey(db->pagbuf, db->keyptr);
541                 if (key.dptr != NULL)
542                         return key;
543 /*
544  * we either run out, or there is nothing on this page..
545  * try the next one... If we lost our position on the
546  * file, we will have to seek.
547  */
548                 db->keyptr = 0;
549                 if (db->pagbno != db->blkptr++)
550                         if (lseek(db->pagf, OFF_PAG(db->blkptr), SEEK_SET) < 0)
551                                 break;
552                 db->pagbno = db->blkptr;
553                 if (read(db->pagf, db->pagbuf, PBLKSIZ) <= 0)
554                         break;
555                 if (!chkpage(db->pagbuf)) {
556                     errno = EINVAL;
557                     db->pagbno = -1;
558                     ioerr(db);
559                     break;
560                 }
561         }
562 
563         return ioerr(db), nullitem;
564 }
565 
566