1 /* $NetBSD: mdb_load.c,v 1.3 2021/08/14 16:14:57 christos Exp $ */ 2 3 /* mdb_load.c - memory-mapped database load tool */ 4 /* 5 * Copyright 2011-2021 Howard Chu, Symas Corp. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted only as authorized by the OpenLDAP 10 * Public License. 11 * 12 * A copy of this license is available in the file LICENSE in the 13 * top-level directory of the distribution or, alternatively, at 14 * <http://www.OpenLDAP.org/license.html>. 15 */ 16 #include <stdio.h> 17 #include <stdlib.h> 18 #include <errno.h> 19 #include <string.h> 20 #include <ctype.h> 21 #include <unistd.h> 22 #include "lmdb.h" 23 24 #define PRINT 1 25 #define NOHDR 2 26 static int mode; 27 28 static char *subname = NULL; 29 30 static size_t lineno; 31 static int version; 32 33 static int flags; 34 35 static char *prog; 36 37 static int Eof; 38 39 static MDB_envinfo info; 40 41 static MDB_val kbuf, dbuf; 42 static MDB_val k0buf; 43 44 #ifdef _WIN32 45 #define Z "I" 46 #else 47 #define Z "z" 48 #endif 49 50 #define STRLENOF(s) (sizeof(s)-1) 51 52 typedef struct flagbit { 53 int bit; 54 char *name; 55 int len; 56 } flagbit; 57 58 #define S(s) s, STRLENOF(s) 59 60 flagbit dbflags[] = { 61 { MDB_REVERSEKEY, S("reversekey") }, 62 { MDB_DUPSORT, S("dupsort") }, 63 { MDB_INTEGERKEY, S("integerkey") }, 64 { MDB_DUPFIXED, S("dupfixed") }, 65 { MDB_INTEGERDUP, S("integerdup") }, 66 { MDB_REVERSEDUP, S("reversedup") }, 67 { 0, NULL, 0 } 68 }; 69 70 static void readhdr(void) 71 { 72 char *ptr; 73 74 flags = 0; 75 while (fgets(dbuf.mv_data, dbuf.mv_size, stdin) != NULL) { 76 lineno++; 77 if (!strncmp(dbuf.mv_data, "VERSION=", STRLENOF("VERSION="))) { 78 version=atoi((char *)dbuf.mv_data+STRLENOF("VERSION=")); 79 if (version > 3) { 80 fprintf(stderr, "%s: line %" Z "d: unsupported VERSION %d\n", 81 prog, lineno, version); 82 exit(EXIT_FAILURE); 83 } 84 } else if (!strncmp(dbuf.mv_data, "HEADER=END", STRLENOF("HEADER=END"))) { 85 break; 86 } else if (!strncmp(dbuf.mv_data, "format=", STRLENOF("format="))) { 87 if (!strncmp((char *)dbuf.mv_data+STRLENOF("FORMAT="), "print", STRLENOF("print"))) 88 mode |= PRINT; 89 else if (strncmp((char *)dbuf.mv_data+STRLENOF("FORMAT="), "bytevalue", STRLENOF("bytevalue"))) { 90 fprintf(stderr, "%s: line %" Z "d: unsupported FORMAT %s\n", 91 prog, lineno, (char *)dbuf.mv_data+STRLENOF("FORMAT=")); 92 exit(EXIT_FAILURE); 93 } 94 } else if (!strncmp(dbuf.mv_data, "database=", STRLENOF("database="))) { 95 ptr = memchr(dbuf.mv_data, '\n', dbuf.mv_size); 96 if (ptr) *ptr = '\0'; 97 if (subname) free(subname); 98 subname = strdup((char *)dbuf.mv_data+STRLENOF("database=")); 99 } else if (!strncmp(dbuf.mv_data, "type=", STRLENOF("type="))) { 100 if (strncmp((char *)dbuf.mv_data+STRLENOF("type="), "btree", STRLENOF("btree"))) { 101 fprintf(stderr, "%s: line %" Z "d: unsupported type %s\n", 102 prog, lineno, (char *)dbuf.mv_data+STRLENOF("type=")); 103 exit(EXIT_FAILURE); 104 } 105 } else if (!strncmp(dbuf.mv_data, "mapaddr=", STRLENOF("mapaddr="))) { 106 int i; 107 ptr = memchr(dbuf.mv_data, '\n', dbuf.mv_size); 108 if (ptr) *ptr = '\0'; 109 i = sscanf((char *)dbuf.mv_data+STRLENOF("mapaddr="), "%p", &info.me_mapaddr); 110 if (i != 1) { 111 fprintf(stderr, "%s: line %" Z "d: invalid mapaddr %s\n", 112 prog, lineno, (char *)dbuf.mv_data+STRLENOF("mapaddr=")); 113 exit(EXIT_FAILURE); 114 } 115 } else if (!strncmp(dbuf.mv_data, "mapsize=", STRLENOF("mapsize="))) { 116 int i; 117 ptr = memchr(dbuf.mv_data, '\n', dbuf.mv_size); 118 if (ptr) *ptr = '\0'; 119 i = sscanf((char *)dbuf.mv_data+STRLENOF("mapsize="), "%" Z "u", &info.me_mapsize); 120 if (i != 1) { 121 fprintf(stderr, "%s: line %" Z "d: invalid mapsize %s\n", 122 prog, lineno, (char *)dbuf.mv_data+STRLENOF("mapsize=")); 123 exit(EXIT_FAILURE); 124 } 125 } else if (!strncmp(dbuf.mv_data, "maxreaders=", STRLENOF("maxreaders="))) { 126 int i; 127 ptr = memchr(dbuf.mv_data, '\n', dbuf.mv_size); 128 if (ptr) *ptr = '\0'; 129 i = sscanf((char *)dbuf.mv_data+STRLENOF("maxreaders="), "%u", &info.me_maxreaders); 130 if (i != 1) { 131 fprintf(stderr, "%s: line %" Z "d: invalid maxreaders %s\n", 132 prog, lineno, (char *)dbuf.mv_data+STRLENOF("maxreaders=")); 133 exit(EXIT_FAILURE); 134 } 135 } else { 136 int i; 137 for (i=0; dbflags[i].bit; i++) { 138 if (!strncmp(dbuf.mv_data, dbflags[i].name, dbflags[i].len) && 139 ((char *)dbuf.mv_data)[dbflags[i].len] == '=') { 140 flags |= dbflags[i].bit; 141 break; 142 } 143 } 144 if (!dbflags[i].bit) { 145 ptr = memchr(dbuf.mv_data, '=', dbuf.mv_size); 146 if (!ptr) { 147 fprintf(stderr, "%s: line %" Z "d: unexpected format\n", 148 prog, lineno); 149 exit(EXIT_FAILURE); 150 } else { 151 *ptr = '\0'; 152 fprintf(stderr, "%s: line %" Z "d: unrecognized keyword ignored: %s\n", 153 prog, lineno, (char *)dbuf.mv_data); 154 } 155 } 156 } 157 } 158 } 159 160 static void badend(void) 161 { 162 fprintf(stderr, "%s: line %" Z "d: unexpected end of input\n", 163 prog, lineno); 164 } 165 166 static int unhex(unsigned char *c2) 167 { 168 int x, c; 169 x = *c2++ & 0x4f; 170 if (x & 0x40) 171 x -= 55; 172 c = x << 4; 173 x = *c2 & 0x4f; 174 if (x & 0x40) 175 x -= 55; 176 c |= x; 177 return c; 178 } 179 180 static int readline(MDB_val *out, MDB_val *buf) 181 { 182 unsigned char *c1, *c2, *end; 183 size_t len, l2; 184 int c; 185 186 if (!(mode & NOHDR)) { 187 c = fgetc(stdin); 188 if (c == EOF) { 189 Eof = 1; 190 return EOF; 191 } 192 if (c != ' ') { 193 lineno++; 194 if (fgets(buf->mv_data, buf->mv_size, stdin) == NULL) { 195 badend: 196 Eof = 1; 197 badend(); 198 return EOF; 199 } 200 if (c == 'D' && !strncmp(buf->mv_data, "ATA=END", STRLENOF("ATA=END"))) 201 return EOF; 202 goto badend; 203 } 204 } 205 if (fgets(buf->mv_data, buf->mv_size, stdin) == NULL) { 206 Eof = 1; 207 return EOF; 208 } 209 lineno++; 210 211 c1 = buf->mv_data; 212 len = strlen((char *)c1); 213 l2 = len; 214 215 /* Is buffer too short? */ 216 while (c1[len-1] != '\n') { 217 buf->mv_data = realloc(buf->mv_data, buf->mv_size*2); 218 if (!buf->mv_data) { 219 Eof = 1; 220 fprintf(stderr, "%s: line %" Z "d: out of memory, line too long\n", 221 prog, lineno); 222 return EOF; 223 } 224 c1 = buf->mv_data; 225 c1 += l2; 226 if (fgets((char *)c1, buf->mv_size+1, stdin) == NULL) { 227 Eof = 1; 228 badend(); 229 return EOF; 230 } 231 buf->mv_size *= 2; 232 len = strlen((char *)c1); 233 l2 += len; 234 } 235 c1 = c2 = buf->mv_data; 236 len = l2; 237 c1[--len] = '\0'; 238 end = c1 + len; 239 240 if (mode & PRINT) { 241 while (c2 < end) { 242 if (*c2 == '\\') { 243 if (c2[1] == '\\') { 244 *c1++ = *c2; 245 } else { 246 if (c2+3 > end || !isxdigit(c2[1]) || !isxdigit(c2[2])) { 247 Eof = 1; 248 badend(); 249 return EOF; 250 } 251 *c1++ = unhex(++c2); 252 } 253 c2 += 2; 254 } else { 255 /* copies are redundant when no escapes were used */ 256 *c1++ = *c2++; 257 } 258 } 259 } else { 260 /* odd length not allowed */ 261 if (len & 1) { 262 Eof = 1; 263 badend(); 264 return EOF; 265 } 266 while (c2 < end) { 267 if (!isxdigit(*c2) || !isxdigit(c2[1])) { 268 Eof = 1; 269 badend(); 270 return EOF; 271 } 272 *c1++ = unhex(c2); 273 c2 += 2; 274 } 275 } 276 c2 = out->mv_data = buf->mv_data; 277 out->mv_size = c1 - c2; 278 279 return 0; 280 } 281 282 static void usage(void) 283 { 284 fprintf(stderr, "usage: %s [-V] [-a] [-f input] [-n] [-s name] [-N] [-T] dbpath\n", prog); 285 exit(EXIT_FAILURE); 286 } 287 288 static int greater(const MDB_val *a, const MDB_val *b) 289 { 290 return 1; 291 } 292 293 int main(int argc, char *argv[]) 294 { 295 int i, rc; 296 MDB_env *env; 297 MDB_txn *txn; 298 MDB_cursor *mc; 299 MDB_dbi dbi; 300 char *envname; 301 int envflags = MDB_NOSYNC, putflags = 0; 302 int dohdr = 0, append = 0; 303 MDB_val prevk; 304 305 prog = argv[0]; 306 307 if (argc < 2) { 308 usage(); 309 } 310 311 /* -a: append records in input order 312 * -f: load file instead of stdin 313 * -n: use NOSUBDIR flag on env_open 314 * -s: load into named subDB 315 * -N: use NOOVERWRITE on puts 316 * -T: read plaintext 317 * -V: print version and exit 318 */ 319 while ((i = getopt(argc, argv, "af:ns:NTV")) != EOF) { 320 switch(i) { 321 case 'V': 322 printf("%s\n", MDB_VERSION_STRING); 323 exit(0); 324 break; 325 case 'a': 326 append = 1; 327 break; 328 case 'f': 329 if (freopen(optarg, "r", stdin) == NULL) { 330 fprintf(stderr, "%s: %s: reopen: %s\n", 331 prog, optarg, strerror(errno)); 332 exit(EXIT_FAILURE); 333 } 334 break; 335 case 'n': 336 envflags |= MDB_NOSUBDIR; 337 break; 338 case 's': 339 subname = strdup(optarg); 340 break; 341 case 'N': 342 putflags = MDB_NOOVERWRITE|MDB_NODUPDATA; 343 break; 344 case 'T': 345 mode |= NOHDR | PRINT; 346 break; 347 default: 348 usage(); 349 } 350 } 351 352 if (optind != argc - 1) 353 usage(); 354 355 dbuf.mv_size = 4096; 356 dbuf.mv_data = malloc(dbuf.mv_size); 357 358 if (!(mode & NOHDR)) 359 readhdr(); 360 361 envname = argv[optind]; 362 rc = mdb_env_create(&env); 363 if (rc) { 364 fprintf(stderr, "mdb_env_create failed, error %d %s\n", rc, mdb_strerror(rc)); 365 return EXIT_FAILURE; 366 } 367 368 mdb_env_set_maxdbs(env, 2); 369 370 if (info.me_maxreaders) 371 mdb_env_set_maxreaders(env, info.me_maxreaders); 372 373 if (info.me_mapsize) 374 mdb_env_set_mapsize(env, info.me_mapsize); 375 376 if (info.me_mapaddr) 377 envflags |= MDB_FIXEDMAP; 378 379 rc = mdb_env_open(env, envname, envflags, 0664); 380 if (rc) { 381 fprintf(stderr, "mdb_env_open failed, error %d %s\n", rc, mdb_strerror(rc)); 382 goto env_close; 383 } 384 385 kbuf.mv_size = mdb_env_get_maxkeysize(env) * 2 + 2; 386 kbuf.mv_data = malloc(kbuf.mv_size * 2); 387 k0buf.mv_size = kbuf.mv_size; 388 k0buf.mv_data = (char *)kbuf.mv_data + kbuf.mv_size; 389 prevk.mv_data = k0buf.mv_data; 390 391 while(!Eof) { 392 MDB_val key, data; 393 int batch = 0; 394 flags = 0; 395 int appflag; 396 397 if (!dohdr) { 398 dohdr = 1; 399 } else if (!(mode & NOHDR)) 400 readhdr(); 401 402 rc = mdb_txn_begin(env, NULL, 0, &txn); 403 if (rc) { 404 fprintf(stderr, "mdb_txn_begin failed, error %d %s\n", rc, mdb_strerror(rc)); 405 goto env_close; 406 } 407 408 rc = mdb_open(txn, subname, flags|MDB_CREATE, &dbi); 409 if (rc) { 410 fprintf(stderr, "mdb_open failed, error %d %s\n", rc, mdb_strerror(rc)); 411 goto txn_abort; 412 } 413 prevk.mv_size = 0; 414 if (append) { 415 mdb_set_compare(txn, dbi, greater); 416 if (flags & MDB_DUPSORT) 417 mdb_set_dupsort(txn, dbi, greater); 418 } 419 420 rc = mdb_cursor_open(txn, dbi, &mc); 421 if (rc) { 422 fprintf(stderr, "mdb_cursor_open failed, error %d %s\n", rc, mdb_strerror(rc)); 423 goto txn_abort; 424 } 425 426 while(1) { 427 rc = readline(&key, &kbuf); 428 if (rc) /* rc == EOF */ 429 break; 430 431 rc = readline(&data, &dbuf); 432 if (rc) { 433 fprintf(stderr, "%s: line %" Z "d: failed to read key value\n", prog, lineno); 434 goto txn_abort; 435 } 436 437 if (append) { 438 appflag = MDB_APPEND; 439 if (flags & MDB_DUPSORT) { 440 if (prevk.mv_size == key.mv_size && !memcmp(prevk.mv_data, key.mv_data, key.mv_size)) 441 appflag = MDB_CURRENT|MDB_APPENDDUP; 442 else { 443 memcpy(prevk.mv_data, key.mv_data, key.mv_size); 444 prevk.mv_size = key.mv_size; 445 } 446 } 447 } else { 448 appflag = 0; 449 } 450 rc = mdb_cursor_put(mc, &key, &data, putflags|appflag); 451 if (rc == MDB_KEYEXIST && putflags) 452 continue; 453 if (rc) { 454 fprintf(stderr, "mdb_cursor_put failed, error %d %s\n", rc, mdb_strerror(rc)); 455 goto txn_abort; 456 } 457 batch++; 458 if (batch == 100) { 459 rc = mdb_txn_commit(txn); 460 if (rc) { 461 fprintf(stderr, "%s: line %" Z "d: txn_commit: %s\n", 462 prog, lineno, mdb_strerror(rc)); 463 goto env_close; 464 } 465 rc = mdb_txn_begin(env, NULL, 0, &txn); 466 if (rc) { 467 fprintf(stderr, "mdb_txn_begin failed, error %d %s\n", rc, mdb_strerror(rc)); 468 goto env_close; 469 } 470 rc = mdb_cursor_open(txn, dbi, &mc); 471 if (rc) { 472 fprintf(stderr, "mdb_cursor_open failed, error %d %s\n", rc, mdb_strerror(rc)); 473 goto txn_abort; 474 } 475 if (appflag & MDB_APPENDDUP) { 476 MDB_val k, d; 477 mdb_cursor_get(mc, &k, &d, MDB_LAST); 478 } 479 batch = 0; 480 } 481 } 482 rc = mdb_txn_commit(txn); 483 txn = NULL; 484 if (rc) { 485 fprintf(stderr, "%s: line %" Z "d: txn_commit: %s\n", 486 prog, lineno, mdb_strerror(rc)); 487 goto env_close; 488 } 489 mdb_dbi_close(env, dbi); 490 } 491 492 txn_abort: 493 mdb_txn_abort(txn); 494 env_close: 495 mdb_env_close(env); 496 497 return rc ? EXIT_FAILURE : EXIT_SUCCESS; 498 } 499