1 /* $NetBSD: mdb_load.c,v 1.2 2020/08/11 13:15:38 christos Exp $ */ 2 3 /* mdb_load.c - memory-mapped database load tool */ 4 /* 5 * Copyright 2011-2020 Howard Chu, Symas Corp. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted only as authorized by the OpenLDAP 10 * Public License. 11 * 12 * A copy of this license is available in the file LICENSE in the 13 * top-level directory of the distribution or, alternatively, at 14 * <http://www.OpenLDAP.org/license.html>. 15 */ 16 #include <stdio.h> 17 #include <stdlib.h> 18 #include <errno.h> 19 #include <string.h> 20 #include <ctype.h> 21 #include <unistd.h> 22 #include "lmdb.h" 23 24 #define PRINT 1 25 #define NOHDR 2 26 static int mode; 27 28 static char *subname = NULL; 29 30 static size_t lineno; 31 static int version; 32 33 static int flags; 34 35 static char *prog; 36 37 static int Eof; 38 39 static MDB_envinfo info; 40 41 static MDB_val kbuf, dbuf; 42 43 #ifdef _WIN32 44 #define Z "I" 45 #else 46 #define Z "z" 47 #endif 48 49 #define STRLENOF(s) (sizeof(s)-1) 50 51 typedef struct flagbit { 52 int bit; 53 char *name; 54 int len; 55 } flagbit; 56 57 #define S(s) s, STRLENOF(s) 58 59 flagbit dbflags[] = { 60 { MDB_REVERSEKEY, S("reversekey") }, 61 { MDB_DUPSORT, S("dupsort") }, 62 { MDB_INTEGERKEY, S("integerkey") }, 63 { MDB_DUPFIXED, S("dupfixed") }, 64 { MDB_INTEGERDUP, S("integerdup") }, 65 { MDB_REVERSEDUP, S("reversedup") }, 66 { 0, NULL, 0 } 67 }; 68 69 static void readhdr(void) 70 { 71 char *ptr; 72 73 flags = 0; 74 while (fgets(dbuf.mv_data, dbuf.mv_size, stdin) != NULL) { 75 lineno++; 76 if (!strncmp(dbuf.mv_data, "VERSION=", STRLENOF("VERSION="))) { 77 version=atoi((char *)dbuf.mv_data+STRLENOF("VERSION=")); 78 if (version > 3) { 79 fprintf(stderr, "%s: line %" Z "d: unsupported VERSION %d\n", 80 prog, lineno, version); 81 exit(EXIT_FAILURE); 82 } 83 } else if (!strncmp(dbuf.mv_data, "HEADER=END", STRLENOF("HEADER=END"))) { 84 break; 85 } else if (!strncmp(dbuf.mv_data, "format=", STRLENOF("format="))) { 86 if (!strncmp((char *)dbuf.mv_data+STRLENOF("FORMAT="), "print", STRLENOF("print"))) 87 mode |= PRINT; 88 else if (strncmp((char *)dbuf.mv_data+STRLENOF("FORMAT="), "bytevalue", STRLENOF("bytevalue"))) { 89 fprintf(stderr, "%s: line %" Z "d: unsupported FORMAT %s\n", 90 prog, lineno, (char *)dbuf.mv_data+STRLENOF("FORMAT=")); 91 exit(EXIT_FAILURE); 92 } 93 } else if (!strncmp(dbuf.mv_data, "database=", STRLENOF("database="))) { 94 ptr = memchr(dbuf.mv_data, '\n', dbuf.mv_size); 95 if (ptr) *ptr = '\0'; 96 if (subname) free(subname); 97 subname = strdup((char *)dbuf.mv_data+STRLENOF("database=")); 98 } else if (!strncmp(dbuf.mv_data, "type=", STRLENOF("type="))) { 99 if (strncmp((char *)dbuf.mv_data+STRLENOF("type="), "btree", STRLENOF("btree"))) { 100 fprintf(stderr, "%s: line %" Z "d: unsupported type %s\n", 101 prog, lineno, (char *)dbuf.mv_data+STRLENOF("type=")); 102 exit(EXIT_FAILURE); 103 } 104 } else if (!strncmp(dbuf.mv_data, "mapaddr=", STRLENOF("mapaddr="))) { 105 int i; 106 ptr = memchr(dbuf.mv_data, '\n', dbuf.mv_size); 107 if (ptr) *ptr = '\0'; 108 i = sscanf((char *)dbuf.mv_data+STRLENOF("mapaddr="), "%p", &info.me_mapaddr); 109 if (i != 1) { 110 fprintf(stderr, "%s: line %" Z "d: invalid mapaddr %s\n", 111 prog, lineno, (char *)dbuf.mv_data+STRLENOF("mapaddr=")); 112 exit(EXIT_FAILURE); 113 } 114 } else if (!strncmp(dbuf.mv_data, "mapsize=", STRLENOF("mapsize="))) { 115 int i; 116 ptr = memchr(dbuf.mv_data, '\n', dbuf.mv_size); 117 if (ptr) *ptr = '\0'; 118 i = sscanf((char *)dbuf.mv_data+STRLENOF("mapsize="), "%" Z "u", &info.me_mapsize); 119 if (i != 1) { 120 fprintf(stderr, "%s: line %" Z "d: invalid mapsize %s\n", 121 prog, lineno, (char *)dbuf.mv_data+STRLENOF("mapsize=")); 122 exit(EXIT_FAILURE); 123 } 124 } else if (!strncmp(dbuf.mv_data, "maxreaders=", STRLENOF("maxreaders="))) { 125 int i; 126 ptr = memchr(dbuf.mv_data, '\n', dbuf.mv_size); 127 if (ptr) *ptr = '\0'; 128 i = sscanf((char *)dbuf.mv_data+STRLENOF("maxreaders="), "%u", &info.me_maxreaders); 129 if (i != 1) { 130 fprintf(stderr, "%s: line %" Z "d: invalid maxreaders %s\n", 131 prog, lineno, (char *)dbuf.mv_data+STRLENOF("maxreaders=")); 132 exit(EXIT_FAILURE); 133 } 134 } else { 135 int i; 136 for (i=0; dbflags[i].bit; i++) { 137 if (!strncmp(dbuf.mv_data, dbflags[i].name, dbflags[i].len) && 138 ((char *)dbuf.mv_data)[dbflags[i].len] == '=') { 139 flags |= dbflags[i].bit; 140 break; 141 } 142 } 143 if (!dbflags[i].bit) { 144 ptr = memchr(dbuf.mv_data, '=', dbuf.mv_size); 145 if (!ptr) { 146 fprintf(stderr, "%s: line %" Z "d: unexpected format\n", 147 prog, lineno); 148 exit(EXIT_FAILURE); 149 } else { 150 *ptr = '\0'; 151 fprintf(stderr, "%s: line %" Z "d: unrecognized keyword ignored: %s\n", 152 prog, lineno, (char *)dbuf.mv_data); 153 } 154 } 155 } 156 } 157 } 158 159 static void badend(void) 160 { 161 fprintf(stderr, "%s: line %" Z "d: unexpected end of input\n", 162 prog, lineno); 163 } 164 165 static int unhex(unsigned char *c2) 166 { 167 int x, c; 168 x = *c2++ & 0x4f; 169 if (x & 0x40) 170 x -= 55; 171 c = x << 4; 172 x = *c2 & 0x4f; 173 if (x & 0x40) 174 x -= 55; 175 c |= x; 176 return c; 177 } 178 179 static int readline(MDB_val *out, MDB_val *buf) 180 { 181 unsigned char *c1, *c2, *end; 182 size_t len, l2; 183 int c; 184 185 if (!(mode & NOHDR)) { 186 c = fgetc(stdin); 187 if (c == EOF) { 188 Eof = 1; 189 return EOF; 190 } 191 if (c != ' ') { 192 lineno++; 193 if (fgets(buf->mv_data, buf->mv_size, stdin) == NULL) { 194 badend: 195 Eof = 1; 196 badend(); 197 return EOF; 198 } 199 if (c == 'D' && !strncmp(buf->mv_data, "ATA=END", STRLENOF("ATA=END"))) 200 return EOF; 201 goto badend; 202 } 203 } 204 if (fgets(buf->mv_data, buf->mv_size, stdin) == NULL) { 205 Eof = 1; 206 return EOF; 207 } 208 lineno++; 209 210 c1 = buf->mv_data; 211 len = strlen((char *)c1); 212 l2 = len; 213 214 /* Is buffer too short? */ 215 while (c1[len-1] != '\n') { 216 buf->mv_data = realloc(buf->mv_data, buf->mv_size*2); 217 if (!buf->mv_data) { 218 Eof = 1; 219 fprintf(stderr, "%s: line %" Z "d: out of memory, line too long\n", 220 prog, lineno); 221 return EOF; 222 } 223 c1 = buf->mv_data; 224 c1 += l2; 225 if (fgets((char *)c1, buf->mv_size+1, stdin) == NULL) { 226 Eof = 1; 227 badend(); 228 return EOF; 229 } 230 buf->mv_size *= 2; 231 len = strlen((char *)c1); 232 l2 += len; 233 } 234 c1 = c2 = buf->mv_data; 235 len = l2; 236 c1[--len] = '\0'; 237 end = c1 + len; 238 239 if (mode & PRINT) { 240 while (c2 < end) { 241 if (*c2 == '\\') { 242 if (c2[1] == '\\') { 243 *c1++ = *c2; 244 } else { 245 if (c2+3 > end || !isxdigit(c2[1]) || !isxdigit(c2[2])) { 246 Eof = 1; 247 badend(); 248 return EOF; 249 } 250 *c1++ = unhex(++c2); 251 } 252 c2 += 2; 253 } else { 254 /* copies are redundant when no escapes were used */ 255 *c1++ = *c2++; 256 } 257 } 258 } else { 259 /* odd length not allowed */ 260 if (len & 1) { 261 Eof = 1; 262 badend(); 263 return EOF; 264 } 265 while (c2 < end) { 266 if (!isxdigit(*c2) || !isxdigit(c2[1])) { 267 Eof = 1; 268 badend(); 269 return EOF; 270 } 271 *c1++ = unhex(c2); 272 c2 += 2; 273 } 274 } 275 c2 = out->mv_data = buf->mv_data; 276 out->mv_size = c1 - c2; 277 278 return 0; 279 } 280 281 static void usage(void) 282 { 283 fprintf(stderr, "usage: %s [-V] [-f input] [-n] [-s name] [-N] [-T] dbpath\n", prog); 284 exit(EXIT_FAILURE); 285 } 286 287 int main(int argc, char *argv[]) 288 { 289 int i, rc; 290 MDB_env *env; 291 MDB_txn *txn; 292 MDB_cursor *mc; 293 MDB_dbi dbi; 294 char *envname; 295 int envflags = 0, putflags = 0; 296 int dohdr = 0; 297 298 prog = argv[0]; 299 300 if (argc < 2) { 301 usage(); 302 } 303 304 /* -f: load file instead of stdin 305 * -n: use NOSUBDIR flag on env_open 306 * -s: load into named subDB 307 * -N: use NOOVERWRITE on puts 308 * -T: read plaintext 309 * -V: print version and exit 310 */ 311 while ((i = getopt(argc, argv, "f:ns:NTV")) != EOF) { 312 switch(i) { 313 case 'V': 314 printf("%s\n", MDB_VERSION_STRING); 315 exit(0); 316 break; 317 case 'f': 318 if (freopen(optarg, "r", stdin) == NULL) { 319 fprintf(stderr, "%s: %s: reopen: %s\n", 320 prog, optarg, strerror(errno)); 321 exit(EXIT_FAILURE); 322 } 323 break; 324 case 'n': 325 envflags |= MDB_NOSUBDIR; 326 break; 327 case 's': 328 subname = strdup(optarg); 329 break; 330 case 'N': 331 putflags = MDB_NOOVERWRITE|MDB_NODUPDATA; 332 break; 333 case 'T': 334 mode |= NOHDR | PRINT; 335 break; 336 default: 337 usage(); 338 } 339 } 340 341 if (optind != argc - 1) 342 usage(); 343 344 dbuf.mv_size = 4096; 345 dbuf.mv_data = malloc(dbuf.mv_size); 346 347 if (!(mode & NOHDR)) 348 readhdr(); 349 350 envname = argv[optind]; 351 rc = mdb_env_create(&env); 352 if (rc) { 353 fprintf(stderr, "mdb_env_create failed, error %d %s\n", rc, mdb_strerror(rc)); 354 return EXIT_FAILURE; 355 } 356 357 mdb_env_set_maxdbs(env, 2); 358 359 if (info.me_maxreaders) 360 mdb_env_set_maxreaders(env, info.me_maxreaders); 361 362 if (info.me_mapsize) 363 mdb_env_set_mapsize(env, info.me_mapsize); 364 365 if (info.me_mapaddr) 366 envflags |= MDB_FIXEDMAP; 367 368 rc = mdb_env_open(env, envname, envflags, 0664); 369 if (rc) { 370 fprintf(stderr, "mdb_env_open failed, error %d %s\n", rc, mdb_strerror(rc)); 371 goto env_close; 372 } 373 374 kbuf.mv_size = mdb_env_get_maxkeysize(env) * 2 + 2; 375 kbuf.mv_data = malloc(kbuf.mv_size); 376 377 while(!Eof) { 378 MDB_val key, data; 379 int batch = 0; 380 381 if (!dohdr) { 382 dohdr = 1; 383 } else if (!(mode & NOHDR)) 384 readhdr(); 385 386 rc = mdb_txn_begin(env, NULL, 0, &txn); 387 if (rc) { 388 fprintf(stderr, "mdb_txn_begin failed, error %d %s\n", rc, mdb_strerror(rc)); 389 goto env_close; 390 } 391 392 rc = mdb_open(txn, subname, flags|MDB_CREATE, &dbi); 393 if (rc) { 394 fprintf(stderr, "mdb_open failed, error %d %s\n", rc, mdb_strerror(rc)); 395 goto txn_abort; 396 } 397 398 rc = mdb_cursor_open(txn, dbi, &mc); 399 if (rc) { 400 fprintf(stderr, "mdb_cursor_open failed, error %d %s\n", rc, mdb_strerror(rc)); 401 goto txn_abort; 402 } 403 404 while(1) { 405 rc = readline(&key, &kbuf); 406 if (rc) /* rc == EOF */ 407 break; 408 409 rc = readline(&data, &dbuf); 410 if (rc) { 411 fprintf(stderr, "%s: line %" Z "d: failed to read key value\n", prog, lineno); 412 goto txn_abort; 413 } 414 415 rc = mdb_cursor_put(mc, &key, &data, putflags); 416 if (rc == MDB_KEYEXIST && putflags) 417 continue; 418 if (rc) { 419 fprintf(stderr, "mdb_cursor_put failed, error %d %s\n", rc, mdb_strerror(rc)); 420 goto txn_abort; 421 } 422 batch++; 423 if (batch == 100) { 424 rc = mdb_txn_commit(txn); 425 if (rc) { 426 fprintf(stderr, "%s: line %" Z "d: txn_commit: %s\n", 427 prog, lineno, mdb_strerror(rc)); 428 goto env_close; 429 } 430 rc = mdb_txn_begin(env, NULL, 0, &txn); 431 if (rc) { 432 fprintf(stderr, "mdb_txn_begin failed, error %d %s\n", rc, mdb_strerror(rc)); 433 goto env_close; 434 } 435 rc = mdb_cursor_open(txn, dbi, &mc); 436 if (rc) { 437 fprintf(stderr, "mdb_cursor_open failed, error %d %s\n", rc, mdb_strerror(rc)); 438 goto txn_abort; 439 } 440 batch = 0; 441 } 442 } 443 rc = mdb_txn_commit(txn); 444 txn = NULL; 445 if (rc) { 446 fprintf(stderr, "%s: line %" Z "d: txn_commit: %s\n", 447 prog, lineno, mdb_strerror(rc)); 448 goto env_close; 449 } 450 mdb_dbi_close(env, dbi); 451 } 452 453 txn_abort: 454 mdb_txn_abort(txn); 455 env_close: 456 mdb_env_close(env); 457 458 return rc ? EXIT_FAILURE : EXIT_SUCCESS; 459 } 460