1 /* $NetBSD: mdb_load.c,v 1.3 2021/08/14 16:14:57 christos Exp $ */
2
3 /* mdb_load.c - memory-mapped database load tool */
4 /*
5 * Copyright 2011-2021 Howard Chu, Symas Corp.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted only as authorized by the OpenLDAP
10 * Public License.
11 *
12 * A copy of this license is available in the file LICENSE in the
13 * top-level directory of the distribution or, alternatively, at
14 * <http://www.OpenLDAP.org/license.html>.
15 */
16 #include <stdio.h>
17 #include <stdlib.h>
18 #include <errno.h>
19 #include <string.h>
20 #include <ctype.h>
21 #include <unistd.h>
22 #include "lmdb.h"
23
24 #define PRINT 1
25 #define NOHDR 2
26 static int mode;
27
28 static char *subname = NULL;
29
30 static size_t lineno;
31 static int version;
32
33 static int flags;
34
35 static char *prog;
36
37 static int Eof;
38
39 static MDB_envinfo info;
40
41 static MDB_val kbuf, dbuf;
42 static MDB_val k0buf;
43
44 #ifdef _WIN32
45 #define Z "I"
46 #else
47 #define Z "z"
48 #endif
49
50 #define STRLENOF(s) (sizeof(s)-1)
51
52 typedef struct flagbit {
53 int bit;
54 char *name;
55 int len;
56 } flagbit;
57
58 #define S(s) s, STRLENOF(s)
59
60 flagbit dbflags[] = {
61 { MDB_REVERSEKEY, S("reversekey") },
62 { MDB_DUPSORT, S("dupsort") },
63 { MDB_INTEGERKEY, S("integerkey") },
64 { MDB_DUPFIXED, S("dupfixed") },
65 { MDB_INTEGERDUP, S("integerdup") },
66 { MDB_REVERSEDUP, S("reversedup") },
67 { 0, NULL, 0 }
68 };
69
readhdr(void)70 static void readhdr(void)
71 {
72 char *ptr;
73
74 flags = 0;
75 while (fgets(dbuf.mv_data, dbuf.mv_size, stdin) != NULL) {
76 lineno++;
77 if (!strncmp(dbuf.mv_data, "VERSION=", STRLENOF("VERSION="))) {
78 version=atoi((char *)dbuf.mv_data+STRLENOF("VERSION="));
79 if (version > 3) {
80 fprintf(stderr, "%s: line %" Z "d: unsupported VERSION %d\n",
81 prog, lineno, version);
82 exit(EXIT_FAILURE);
83 }
84 } else if (!strncmp(dbuf.mv_data, "HEADER=END", STRLENOF("HEADER=END"))) {
85 break;
86 } else if (!strncmp(dbuf.mv_data, "format=", STRLENOF("format="))) {
87 if (!strncmp((char *)dbuf.mv_data+STRLENOF("FORMAT="), "print", STRLENOF("print")))
88 mode |= PRINT;
89 else if (strncmp((char *)dbuf.mv_data+STRLENOF("FORMAT="), "bytevalue", STRLENOF("bytevalue"))) {
90 fprintf(stderr, "%s: line %" Z "d: unsupported FORMAT %s\n",
91 prog, lineno, (char *)dbuf.mv_data+STRLENOF("FORMAT="));
92 exit(EXIT_FAILURE);
93 }
94 } else if (!strncmp(dbuf.mv_data, "database=", STRLENOF("database="))) {
95 ptr = memchr(dbuf.mv_data, '\n', dbuf.mv_size);
96 if (ptr) *ptr = '\0';
97 if (subname) free(subname);
98 subname = strdup((char *)dbuf.mv_data+STRLENOF("database="));
99 } else if (!strncmp(dbuf.mv_data, "type=", STRLENOF("type="))) {
100 if (strncmp((char *)dbuf.mv_data+STRLENOF("type="), "btree", STRLENOF("btree"))) {
101 fprintf(stderr, "%s: line %" Z "d: unsupported type %s\n",
102 prog, lineno, (char *)dbuf.mv_data+STRLENOF("type="));
103 exit(EXIT_FAILURE);
104 }
105 } else if (!strncmp(dbuf.mv_data, "mapaddr=", STRLENOF("mapaddr="))) {
106 int i;
107 ptr = memchr(dbuf.mv_data, '\n', dbuf.mv_size);
108 if (ptr) *ptr = '\0';
109 i = sscanf((char *)dbuf.mv_data+STRLENOF("mapaddr="), "%p", &info.me_mapaddr);
110 if (i != 1) {
111 fprintf(stderr, "%s: line %" Z "d: invalid mapaddr %s\n",
112 prog, lineno, (char *)dbuf.mv_data+STRLENOF("mapaddr="));
113 exit(EXIT_FAILURE);
114 }
115 } else if (!strncmp(dbuf.mv_data, "mapsize=", STRLENOF("mapsize="))) {
116 int i;
117 ptr = memchr(dbuf.mv_data, '\n', dbuf.mv_size);
118 if (ptr) *ptr = '\0';
119 i = sscanf((char *)dbuf.mv_data+STRLENOF("mapsize="), "%" Z "u", &info.me_mapsize);
120 if (i != 1) {
121 fprintf(stderr, "%s: line %" Z "d: invalid mapsize %s\n",
122 prog, lineno, (char *)dbuf.mv_data+STRLENOF("mapsize="));
123 exit(EXIT_FAILURE);
124 }
125 } else if (!strncmp(dbuf.mv_data, "maxreaders=", STRLENOF("maxreaders="))) {
126 int i;
127 ptr = memchr(dbuf.mv_data, '\n', dbuf.mv_size);
128 if (ptr) *ptr = '\0';
129 i = sscanf((char *)dbuf.mv_data+STRLENOF("maxreaders="), "%u", &info.me_maxreaders);
130 if (i != 1) {
131 fprintf(stderr, "%s: line %" Z "d: invalid maxreaders %s\n",
132 prog, lineno, (char *)dbuf.mv_data+STRLENOF("maxreaders="));
133 exit(EXIT_FAILURE);
134 }
135 } else {
136 int i;
137 for (i=0; dbflags[i].bit; i++) {
138 if (!strncmp(dbuf.mv_data, dbflags[i].name, dbflags[i].len) &&
139 ((char *)dbuf.mv_data)[dbflags[i].len] == '=') {
140 flags |= dbflags[i].bit;
141 break;
142 }
143 }
144 if (!dbflags[i].bit) {
145 ptr = memchr(dbuf.mv_data, '=', dbuf.mv_size);
146 if (!ptr) {
147 fprintf(stderr, "%s: line %" Z "d: unexpected format\n",
148 prog, lineno);
149 exit(EXIT_FAILURE);
150 } else {
151 *ptr = '\0';
152 fprintf(stderr, "%s: line %" Z "d: unrecognized keyword ignored: %s\n",
153 prog, lineno, (char *)dbuf.mv_data);
154 }
155 }
156 }
157 }
158 }
159
badend(void)160 static void badend(void)
161 {
162 fprintf(stderr, "%s: line %" Z "d: unexpected end of input\n",
163 prog, lineno);
164 }
165
unhex(unsigned char * c2)166 static int unhex(unsigned char *c2)
167 {
168 int x, c;
169 x = *c2++ & 0x4f;
170 if (x & 0x40)
171 x -= 55;
172 c = x << 4;
173 x = *c2 & 0x4f;
174 if (x & 0x40)
175 x -= 55;
176 c |= x;
177 return c;
178 }
179
readline(MDB_val * out,MDB_val * buf)180 static int readline(MDB_val *out, MDB_val *buf)
181 {
182 unsigned char *c1, *c2, *end;
183 size_t len, l2;
184 int c;
185
186 if (!(mode & NOHDR)) {
187 c = fgetc(stdin);
188 if (c == EOF) {
189 Eof = 1;
190 return EOF;
191 }
192 if (c != ' ') {
193 lineno++;
194 if (fgets(buf->mv_data, buf->mv_size, stdin) == NULL) {
195 badend:
196 Eof = 1;
197 badend();
198 return EOF;
199 }
200 if (c == 'D' && !strncmp(buf->mv_data, "ATA=END", STRLENOF("ATA=END")))
201 return EOF;
202 goto badend;
203 }
204 }
205 if (fgets(buf->mv_data, buf->mv_size, stdin) == NULL) {
206 Eof = 1;
207 return EOF;
208 }
209 lineno++;
210
211 c1 = buf->mv_data;
212 len = strlen((char *)c1);
213 l2 = len;
214
215 /* Is buffer too short? */
216 while (c1[len-1] != '\n') {
217 buf->mv_data = realloc(buf->mv_data, buf->mv_size*2);
218 if (!buf->mv_data) {
219 Eof = 1;
220 fprintf(stderr, "%s: line %" Z "d: out of memory, line too long\n",
221 prog, lineno);
222 return EOF;
223 }
224 c1 = buf->mv_data;
225 c1 += l2;
226 if (fgets((char *)c1, buf->mv_size+1, stdin) == NULL) {
227 Eof = 1;
228 badend();
229 return EOF;
230 }
231 buf->mv_size *= 2;
232 len = strlen((char *)c1);
233 l2 += len;
234 }
235 c1 = c2 = buf->mv_data;
236 len = l2;
237 c1[--len] = '\0';
238 end = c1 + len;
239
240 if (mode & PRINT) {
241 while (c2 < end) {
242 if (*c2 == '\\') {
243 if (c2[1] == '\\') {
244 *c1++ = *c2;
245 } else {
246 if (c2+3 > end || !isxdigit(c2[1]) || !isxdigit(c2[2])) {
247 Eof = 1;
248 badend();
249 return EOF;
250 }
251 *c1++ = unhex(++c2);
252 }
253 c2 += 2;
254 } else {
255 /* copies are redundant when no escapes were used */
256 *c1++ = *c2++;
257 }
258 }
259 } else {
260 /* odd length not allowed */
261 if (len & 1) {
262 Eof = 1;
263 badend();
264 return EOF;
265 }
266 while (c2 < end) {
267 if (!isxdigit(*c2) || !isxdigit(c2[1])) {
268 Eof = 1;
269 badend();
270 return EOF;
271 }
272 *c1++ = unhex(c2);
273 c2 += 2;
274 }
275 }
276 c2 = out->mv_data = buf->mv_data;
277 out->mv_size = c1 - c2;
278
279 return 0;
280 }
281
usage(void)282 static void usage(void)
283 {
284 fprintf(stderr, "usage: %s [-V] [-a] [-f input] [-n] [-s name] [-N] [-T] dbpath\n", prog);
285 exit(EXIT_FAILURE);
286 }
287
greater(const MDB_val * a,const MDB_val * b)288 static int greater(const MDB_val *a, const MDB_val *b)
289 {
290 return 1;
291 }
292
main(int argc,char * argv[])293 int main(int argc, char *argv[])
294 {
295 int i, rc;
296 MDB_env *env;
297 MDB_txn *txn;
298 MDB_cursor *mc;
299 MDB_dbi dbi;
300 char *envname;
301 int envflags = MDB_NOSYNC, putflags = 0;
302 int dohdr = 0, append = 0;
303 MDB_val prevk;
304
305 prog = argv[0];
306
307 if (argc < 2) {
308 usage();
309 }
310
311 /* -a: append records in input order
312 * -f: load file instead of stdin
313 * -n: use NOSUBDIR flag on env_open
314 * -s: load into named subDB
315 * -N: use NOOVERWRITE on puts
316 * -T: read plaintext
317 * -V: print version and exit
318 */
319 while ((i = getopt(argc, argv, "af:ns:NTV")) != EOF) {
320 switch(i) {
321 case 'V':
322 printf("%s\n", MDB_VERSION_STRING);
323 exit(0);
324 break;
325 case 'a':
326 append = 1;
327 break;
328 case 'f':
329 if (freopen(optarg, "r", stdin) == NULL) {
330 fprintf(stderr, "%s: %s: reopen: %s\n",
331 prog, optarg, strerror(errno));
332 exit(EXIT_FAILURE);
333 }
334 break;
335 case 'n':
336 envflags |= MDB_NOSUBDIR;
337 break;
338 case 's':
339 subname = strdup(optarg);
340 break;
341 case 'N':
342 putflags = MDB_NOOVERWRITE|MDB_NODUPDATA;
343 break;
344 case 'T':
345 mode |= NOHDR | PRINT;
346 break;
347 default:
348 usage();
349 }
350 }
351
352 if (optind != argc - 1)
353 usage();
354
355 dbuf.mv_size = 4096;
356 dbuf.mv_data = malloc(dbuf.mv_size);
357
358 if (!(mode & NOHDR))
359 readhdr();
360
361 envname = argv[optind];
362 rc = mdb_env_create(&env);
363 if (rc) {
364 fprintf(stderr, "mdb_env_create failed, error %d %s\n", rc, mdb_strerror(rc));
365 return EXIT_FAILURE;
366 }
367
368 mdb_env_set_maxdbs(env, 2);
369
370 if (info.me_maxreaders)
371 mdb_env_set_maxreaders(env, info.me_maxreaders);
372
373 if (info.me_mapsize)
374 mdb_env_set_mapsize(env, info.me_mapsize);
375
376 if (info.me_mapaddr)
377 envflags |= MDB_FIXEDMAP;
378
379 rc = mdb_env_open(env, envname, envflags, 0664);
380 if (rc) {
381 fprintf(stderr, "mdb_env_open failed, error %d %s\n", rc, mdb_strerror(rc));
382 goto env_close;
383 }
384
385 kbuf.mv_size = mdb_env_get_maxkeysize(env) * 2 + 2;
386 kbuf.mv_data = malloc(kbuf.mv_size * 2);
387 k0buf.mv_size = kbuf.mv_size;
388 k0buf.mv_data = (char *)kbuf.mv_data + kbuf.mv_size;
389 prevk.mv_data = k0buf.mv_data;
390
391 while(!Eof) {
392 MDB_val key, data;
393 int batch = 0;
394 flags = 0;
395 int appflag;
396
397 if (!dohdr) {
398 dohdr = 1;
399 } else if (!(mode & NOHDR))
400 readhdr();
401
402 rc = mdb_txn_begin(env, NULL, 0, &txn);
403 if (rc) {
404 fprintf(stderr, "mdb_txn_begin failed, error %d %s\n", rc, mdb_strerror(rc));
405 goto env_close;
406 }
407
408 rc = mdb_open(txn, subname, flags|MDB_CREATE, &dbi);
409 if (rc) {
410 fprintf(stderr, "mdb_open failed, error %d %s\n", rc, mdb_strerror(rc));
411 goto txn_abort;
412 }
413 prevk.mv_size = 0;
414 if (append) {
415 mdb_set_compare(txn, dbi, greater);
416 if (flags & MDB_DUPSORT)
417 mdb_set_dupsort(txn, dbi, greater);
418 }
419
420 rc = mdb_cursor_open(txn, dbi, &mc);
421 if (rc) {
422 fprintf(stderr, "mdb_cursor_open failed, error %d %s\n", rc, mdb_strerror(rc));
423 goto txn_abort;
424 }
425
426 while(1) {
427 rc = readline(&key, &kbuf);
428 if (rc) /* rc == EOF */
429 break;
430
431 rc = readline(&data, &dbuf);
432 if (rc) {
433 fprintf(stderr, "%s: line %" Z "d: failed to read key value\n", prog, lineno);
434 goto txn_abort;
435 }
436
437 if (append) {
438 appflag = MDB_APPEND;
439 if (flags & MDB_DUPSORT) {
440 if (prevk.mv_size == key.mv_size && !memcmp(prevk.mv_data, key.mv_data, key.mv_size))
441 appflag = MDB_CURRENT|MDB_APPENDDUP;
442 else {
443 memcpy(prevk.mv_data, key.mv_data, key.mv_size);
444 prevk.mv_size = key.mv_size;
445 }
446 }
447 } else {
448 appflag = 0;
449 }
450 rc = mdb_cursor_put(mc, &key, &data, putflags|appflag);
451 if (rc == MDB_KEYEXIST && putflags)
452 continue;
453 if (rc) {
454 fprintf(stderr, "mdb_cursor_put failed, error %d %s\n", rc, mdb_strerror(rc));
455 goto txn_abort;
456 }
457 batch++;
458 if (batch == 100) {
459 rc = mdb_txn_commit(txn);
460 if (rc) {
461 fprintf(stderr, "%s: line %" Z "d: txn_commit: %s\n",
462 prog, lineno, mdb_strerror(rc));
463 goto env_close;
464 }
465 rc = mdb_txn_begin(env, NULL, 0, &txn);
466 if (rc) {
467 fprintf(stderr, "mdb_txn_begin failed, error %d %s\n", rc, mdb_strerror(rc));
468 goto env_close;
469 }
470 rc = mdb_cursor_open(txn, dbi, &mc);
471 if (rc) {
472 fprintf(stderr, "mdb_cursor_open failed, error %d %s\n", rc, mdb_strerror(rc));
473 goto txn_abort;
474 }
475 if (appflag & MDB_APPENDDUP) {
476 MDB_val k, d;
477 mdb_cursor_get(mc, &k, &d, MDB_LAST);
478 }
479 batch = 0;
480 }
481 }
482 rc = mdb_txn_commit(txn);
483 txn = NULL;
484 if (rc) {
485 fprintf(stderr, "%s: line %" Z "d: txn_commit: %s\n",
486 prog, lineno, mdb_strerror(rc));
487 goto env_close;
488 }
489 mdb_dbi_close(env, dbi);
490 }
491
492 txn_abort:
493 mdb_txn_abort(txn);
494 env_close:
495 mdb_env_close(env);
496
497 return rc ? EXIT_FAILURE : EXIT_SUCCESS;
498 }
499