1 /* $NetBSD: slmdb.c,v 1.3 2020/03/18 19:05:22 christos Exp $ */ 2 3 /*++ 4 /* NAME 5 /* slmdb 3 6 /* SUMMARY 7 /* Simplified LMDB API 8 /* SYNOPSIS 9 /* #include <slmdb.h> 10 /* 11 /* int slmdb_init(slmdb, curr_limit, size_incr, hard_limit) 12 /* SLMDB *slmdb; 13 /* size_t curr_limit; 14 /* int size_incr; 15 /* size_t hard_limit; 16 /* 17 /* int slmdb_open(slmdb, path, open_flags, lmdb_flags, slmdb_flags) 18 /* SLMDB *slmdb; 19 /* const char *path; 20 /* int open_flags; 21 /* int lmdb_flags; 22 /* int slmdb_flags; 23 /* 24 /* int slmdb_close(slmdb) 25 /* SLMDB *slmdb; 26 /* 27 /* int slmdb_get(slmdb, mdb_key, mdb_value) 28 /* SLMDB *slmdb; 29 /* MDB_val *mdb_key; 30 /* MDB_val *mdb_value; 31 /* 32 /* int slmdb_put(slmdb, mdb_key, mdb_value, flags) 33 /* SLMDB *slmdb; 34 /* MDB_val *mdb_key; 35 /* MDB_val *mdb_value; 36 /* int flags; 37 /* 38 /* int slmdb_del(slmdb, mdb_key) 39 /* SLMDB *slmdb; 40 /* MDB_val *mdb_key; 41 /* 42 /* int slmdb_cursor_get(slmdb, mdb_key, mdb_value, op) 43 /* SLMDB *slmdb; 44 /* MDB_val *mdb_key; 45 /* MDB_val *mdb_value; 46 /* MDB_cursor_op op; 47 /* AUXILIARY FUNCTIONS 48 /* int slmdb_fd(slmdb) 49 /* SLMDB *slmdb; 50 /* 51 /* size_t slmdb_curr_limit(slmdb) 52 /* SLMDB *slmdb; 53 /* 54 /* int slmdb_control(slmdb, request, ...) 55 /* SLMDB *slmdb; 56 /* int request; 57 /* DESCRIPTION 58 /* This module simplifies the LMDB API by hiding recoverable 59 /* errors from the application. Details are given in the 60 /* section "ERROR RECOVERY". 61 /* 62 /* slmdb_init() performs mandatory initialization before opening 63 /* an LMDB database. The result value is an LMDB status code 64 /* (zero in case of success). 65 /* 66 /* slmdb_open() opens an LMDB database. The result value is 67 /* an LMDB status code (zero in case of success). 68 /* 69 /* slmdb_close() finalizes an optional bulk-mode transaction 70 /* and closes a successfully-opened LMDB database. The result 71 /* value is an LMDB status code (zero in case of success). 72 /* 73 /* slmdb_get() is an mdb_get() wrapper with automatic error 74 /* recovery. The result value is an LMDB status code (zero 75 /* in case of success). 76 /* 77 /* slmdb_put() is an mdb_put() wrapper with automatic error 78 /* recovery. The result value is an LMDB status code (zero 79 /* in case of success). 80 /* 81 /* slmdb_del() is an mdb_del() wrapper with automatic error 82 /* recovery. The result value is an LMDB status code (zero 83 /* in case of success). 84 /* 85 /* slmdb_cursor_get() is an mdb_cursor_get() wrapper with 86 /* automatic error recovery. The result value is an LMDB 87 /* status code (zero in case of success). This wrapper supports 88 /* only one cursor per database. 89 /* 90 /* slmdb_fd() returns the file descriptor for the specified 91 /* database. This may be used for file status queries or 92 /* application-controlled locking. 93 /* 94 /* slmdb_curr_limit() returns the current database size limit 95 /* for the specified database. 96 /* 97 /* slmdb_control() specifies optional features. The result is 98 /* an LMDB status code (zero in case of success). 99 /* 100 /* Arguments: 101 /* .IP slmdb 102 /* Pointer to caller-provided storage. 103 /* .IP curr_limit 104 /* The initial memory mapping size limit. This limit is 105 /* automatically increased when the database becomes full. 106 /* .IP size_incr 107 /* An integer factor by which the memory mapping size limit 108 /* is increased when the database becomes full. 109 /* .IP hard_limit 110 /* The upper bound for the memory mapping size limit. 111 /* .IP path 112 /* LMDB database pathname. 113 /* .IP open_flags 114 /* Flags that control file open operations. Do not specify 115 /* locking flags here. 116 /* .IP lmdb_flags 117 /* Flags that control the LMDB environment. If MDB_NOLOCK is 118 /* specified, then each slmdb_get() or slmdb_cursor_get() call 119 /* must be protected with a shared (or exclusive) external lock, 120 /* and each slmdb_put() or slmdb_del() call must be protected 121 /* with an exclusive external lock. A lock may be released 122 /* after the call returns. A writer may atomically downgrade 123 /* an exclusive lock to shared, but it must obtain an exclusive 124 /* lock before making another slmdb(3) write request. 125 /* .sp 126 /* Note: when a database is opened with MDB_NOLOCK, external 127 /* locks such as fcntl() do not protect slmdb(3) requests 128 /* within the same process against each other. If a program 129 /* cannot avoid making simultaneous slmdb(3) requests, then 130 /* it must synchronize these requests with in-process locks, 131 /* in addition to the per-process fcntl(2) locks. 132 /* .IP slmdb_flags 133 /* Bit-wise OR of zero or more of the following: 134 /* .RS 135 /* .IP SLMDB_FLAG_BULK 136 /* Open the database and create a "bulk" transaction that is 137 /* committed when the database is closed. If MDB_NOLOCK is 138 /* specified, then the entire transaction must be protected 139 /* with a persistent external lock. All slmdb_get(), slmdb_put() 140 /* and slmdb_del() requests will be directed to the "bulk" 141 /* transaction. 142 /* .RE 143 /* .IP mdb_key 144 /* Pointer to caller-provided lookup key storage. 145 /* .IP mdb_value 146 /* Pointer to caller-provided value storage. 147 /* .IP op 148 /* LMDB cursor operation. 149 /* .IP request 150 /* The start of a list of (name, value) pairs, terminated with 151 /* CA_SLMDB_CTL_END. The following text enumerates the symbolic 152 /* request names and the corresponding argument types. 153 /* .RS 154 /* .IP "CA_SLMDB_CTL_LONGJMP_FN(void (*)(void *, int))" 155 /* Call-back function pointer. The function is called to repeat 156 /* a failed bulk-mode transaction from the start. The arguments 157 /* are the application context and the setjmp() or sigsetjmp() 158 /* result value. 159 /* .IP "CA_SLMDB_CTL_NOTIFY_FN(void (*)(void *, int, ...))" 160 /* Call-back function pointer. The function is called to report 161 /* successful error recovery. The arguments are the application 162 /* context, the MDB error code, and additional arguments that 163 /* depend on the error code. Details are given in the section 164 /* "ERROR RECOVERY". 165 /* .IP "CA_SLMDB_CTL_ASSERT_FN(void (*)(void *, const char *))" 166 /* Call-back function pointer. The function is called to 167 /* report an LMDB internal assertion failure. The arguments 168 /* are the application context, and text that describes the 169 /* problem. 170 /* .IP "CA_SLMDB_CTL_CB_CONTEXT(void *)" 171 /* Application context that is passed in call-back function 172 /* calls. 173 /* .IP "CA_SLMDB_CTL_API_RETRY_LIMIT(int)" 174 /* How many times to recover from LMDB errors within the 175 /* execution of a single slmdb(3) API call before giving up. 176 /* .IP "CA_SLMDB_CTL_BULK_RETRY_LIMIT(int)" 177 /* How many times to recover from a bulk-mode transaction 178 /* before giving up. 179 /* .RE 180 /* ERROR RECOVERY 181 /* .ad 182 /* .fi 183 /* This module automatically repeats failed requests after 184 /* recoverable errors, up to the limits specified with 185 /* slmdb_control(). 186 /* 187 /* Recoverable errors are reported through an optional 188 /* notification function specified with slmdb_control(). With 189 /* recoverable MDB_MAP_FULL and MDB_MAP_RESIZED errors, the 190 /* additional argument is a size_t value with the updated 191 /* current database size limit; with recoverable MDB_READERS_FULL 192 /* errors there is no additional argument. 193 /* BUGS 194 /* Recovery from MDB_MAP_FULL involves resizing the database 195 /* memory mapping. According to LMDB documentation this 196 /* requires that there is no concurrent activity in the same 197 /* database by other threads in the same memory address space. 198 /* SEE ALSO 199 /* lmdb(3) API manpage (currently, non-existent). 200 /* AUTHOR(S) 201 /* Howard Chu 202 /* Symas Corporation 203 /* 204 /* Wietse Venema 205 /* IBM T.J. Watson Research 206 /* P.O. Box 704 207 /* Yorktown Heights, NY 10598, USA 208 /*--*/ 209 210 /* 211 * DO NOT include other Postfix-specific header files. This LMDB wrapper 212 * must be usable outside Postfix. 213 */ 214 215 #ifdef HAS_LMDB 216 217 /* System library. */ 218 219 #include <sys/stat.h> 220 #include <errno.h> 221 #include <fcntl.h> 222 #include <string.h> 223 #include <unistd.h> 224 #include <limits.h> 225 #include <stdarg.h> 226 #include <string.h> 227 #include <stdlib.h> 228 229 /* Application-specific. */ 230 231 #include <slmdb.h> 232 233 /* 234 * Minimum LMDB patchlevel. 235 * 236 * LMDB 0.9.11 allows Postfix daemons to log an LMDB error message instead of 237 * falling out of the sky without any explanation. Without such logging, 238 * Postfix with LMDB would be too hard to support. 239 * 240 * LMDB 0.9.10 fixes an information leak where LMDB wrote chunks of up to 4096 241 * bytes of uninitialized heap memory to a database. This was a security 242 * violation because it made information persistent that was not meant to be 243 * persisted, or it was sharing information that was not meant to be shared. 244 * 245 * LMDB 0.9.9 allows Postfix to use external (fcntl()-based) locks, instead of 246 * having to use world-writable LMDB lock files. 247 * 248 * LMDB 0.9.8 allows Postfix to update the database size limit on-the-fly, so 249 * that it can recover from an MDB_MAP_FULL error without having to close 250 * the database. It also allows an application to "pick up" a new database 251 * size limit on-the-fly, so that it can recover from an MDB_MAP_RESIZED 252 * error without having to close the database. 253 * 254 * The database size limit that remains is imposed by the hardware memory 255 * address space (31 or 47 bits, typically) or file system. The LMDB 256 * implementation is supposed to handle databases larger than physical 257 * memory. However, this is not necessarily guaranteed for (bulk) 258 * transactions larger than physical memory. 259 */ 260 #if MDB_VERSION_FULL < MDB_VERINT(0, 9, 11) 261 #error "This Postfix version requires LMDB version 0.9.11 or later" 262 #endif 263 264 /* 265 * Error recovery. 266 * 267 * The purpose of the slmdb(3) API is to hide LMDB quirks (recoverable 268 * MAP_FULL, MAP_RESIZED, or MDB_READERS_FULL errors). With these out of the 269 * way, applications can pretend that those quirks don't exist, and focus on 270 * their own job. 271 * 272 * - To recover from a single-transaction LMDB error, each wrapper function 273 * uses tail recursion instead of goto. Since LMDB errors are rare, code 274 * clarity is more important than speed. 275 * 276 * - To recover from a bulk-transaction LMDB error, the error-recovery code 277 * triggers a long jump back into the caller to some pre-arranged point (the 278 * closest thing that C has to exception handling). The application is then 279 * expected to repeat the bulk transaction from scratch. 280 */ 281 282 /* 283 * Our default retry attempt limits. We allow a few retries per slmdb(3) API 284 * call for non-bulk transactions. We allow a number of bulk-transaction 285 * retries that is proportional to the memory address space. 286 */ 287 #define SLMDB_DEF_API_RETRY_LIMIT 30 /* Retries per slmdb(3) API call */ 288 #define SLMDB_DEF_BULK_RETRY_LIMIT \ 289 (2 * sizeof(size_t) * CHAR_BIT) /* Retries per bulk-mode transaction */ 290 291 /* 292 * We increment the recursion counter each time we try to recover from 293 * error, and reset the recursion counter when returning to the application 294 * from the slmdb(3) API. 295 */ 296 #define SLMDB_API_RETURN(slmdb, status) do { \ 297 (slmdb)->api_retry_count = 0; \ 298 return (status); \ 299 } while (0) 300 301 /* 302 * With MDB_NOLOCK, the application uses an external lock for inter-process 303 * synchronization. Because the caller may release the external lock after 304 * an SLMDB API call, each SLMDB API function must use a short-lived 305 * transaction unless the transaction is a bulk-mode transaction. 306 */ 307 308 /* slmdb_cursor_close - close cursor and its read transaction */ 309 310 static void slmdb_cursor_close(SLMDB *slmdb) 311 { 312 MDB_txn *txn; 313 314 /* 315 * Close the cursor and its read transaction. We can restore it later 316 * from the saved key information. 317 */ 318 txn = mdb_cursor_txn(slmdb->cursor); 319 mdb_cursor_close(slmdb->cursor); 320 slmdb->cursor = 0; 321 mdb_txn_abort(txn); 322 } 323 324 /* slmdb_saved_key_init - initialize saved key info */ 325 326 static void slmdb_saved_key_init(SLMDB *slmdb) 327 { 328 slmdb->saved_key.mv_data = 0; 329 slmdb->saved_key.mv_size = 0; 330 slmdb->saved_key_size = 0; 331 } 332 333 /* slmdb_saved_key_free - destroy saved key info */ 334 335 static void slmdb_saved_key_free(SLMDB *slmdb) 336 { 337 free(slmdb->saved_key.mv_data); 338 slmdb_saved_key_init(slmdb); 339 } 340 341 #define HAVE_SLMDB_SAVED_KEY(s) ((s)->saved_key.mv_data != 0) 342 343 /* slmdb_saved_key_assign - copy the saved key */ 344 345 static int slmdb_saved_key_assign(SLMDB *slmdb, MDB_val *key_val) 346 { 347 348 /* 349 * Extend the buffer to fit the key, so that we can avoid malloc() 350 * overhead most of the time. 351 */ 352 if (slmdb->saved_key_size < key_val->mv_size) { 353 if (slmdb->saved_key.mv_data == 0) 354 slmdb->saved_key.mv_data = malloc(key_val->mv_size); 355 else 356 slmdb->saved_key.mv_data = 357 realloc(slmdb->saved_key.mv_data, key_val->mv_size); 358 if (slmdb->saved_key.mv_data == 0) { 359 slmdb_saved_key_init(slmdb); 360 return (ENOMEM); 361 } else { 362 slmdb->saved_key_size = key_val->mv_size; 363 } 364 } 365 366 /* 367 * Copy the key under the cursor. 368 */ 369 memcpy(slmdb->saved_key.mv_data, key_val->mv_data, key_val->mv_size); 370 slmdb->saved_key.mv_size = key_val->mv_size; 371 return (0); 372 } 373 374 /* slmdb_prepare - LMDB-specific (re)initialization before actual access */ 375 376 static int slmdb_prepare(SLMDB *slmdb) 377 { 378 int status = 0; 379 380 /* 381 * This is called before accessing the database, or after recovery from 382 * an LMDB error. Note: this code cannot recover from errors itself. 383 * slmdb->txn is either the database open() transaction or a 384 * freshly-created bulk-mode transaction. 385 * 386 * - With O_TRUNC we make a "drop" request before updating the database. 387 * 388 * - With a bulk-mode transaction we commit when the database is closed. 389 */ 390 if (slmdb->open_flags & O_TRUNC) { 391 if ((status = mdb_drop(slmdb->txn, slmdb->dbi, 0)) != 0) 392 return (status); 393 if ((slmdb->slmdb_flags & SLMDB_FLAG_BULK) == 0) { 394 if ((status = mdb_txn_commit(slmdb->txn)) != 0) 395 return (status); 396 slmdb->txn = 0; 397 } 398 } else if ((slmdb->lmdb_flags & MDB_RDONLY) != 0 399 || (slmdb->slmdb_flags & SLMDB_FLAG_BULK) == 0) { 400 mdb_txn_abort(slmdb->txn); 401 slmdb->txn = 0; 402 } 403 slmdb->api_retry_count = 0; 404 return (status); 405 } 406 407 /* slmdb_recover - recover from LMDB errors */ 408 409 static int slmdb_recover(SLMDB *slmdb, int status) 410 { 411 MDB_envinfo info; 412 413 /* 414 * This may be needed in non-MDB_NOLOCK mode. Recovery is rare enough 415 * that we don't care about a few wasted cycles. 416 */ 417 if (slmdb->cursor != 0) 418 slmdb_cursor_close(slmdb); 419 420 /* 421 * Recover bulk transactions only if they can be restarted. Limit the 422 * number of recovery attempts per slmdb(3) API request. 423 */ 424 if ((slmdb->txn != 0 && slmdb->longjmp_fn == 0) 425 || ((slmdb->api_retry_count += 1) >= slmdb->api_retry_limit)) 426 return (status); 427 428 /* 429 * If we can recover from the error, we clear the error condition and the 430 * caller should retry the failed operation immediately. Otherwise, the 431 * caller should terminate with a fatal run-time error and the program 432 * should be re-run later. 433 * 434 * slmdb->txn must be either null (non-bulk transaction error), or an 435 * aborted bulk-mode transaction. 436 */ 437 switch (status) { 438 439 /* 440 * As of LMDB 0.9.8 when a non-bulk update runs into a "map full" 441 * error, we can resize the environment's memory map and clear the 442 * error condition. The caller should retry immediately. 443 */ 444 case MDB_MAP_FULL: 445 /* Can we increase the memory map? Give up if we can't. */ 446 if (slmdb->curr_limit < slmdb->hard_limit / slmdb->size_incr) { 447 slmdb->curr_limit = slmdb->curr_limit * slmdb->size_incr; 448 } else if (slmdb->curr_limit < slmdb->hard_limit) { 449 slmdb->curr_limit = slmdb->hard_limit; 450 } else { 451 /* Sorry, we are already maxed out. */ 452 break; 453 } 454 if (slmdb->notify_fn) 455 slmdb->notify_fn(slmdb->cb_context, MDB_MAP_FULL, 456 slmdb->curr_limit); 457 status = mdb_env_set_mapsize(slmdb->env, slmdb->curr_limit); 458 break; 459 460 /* 461 * When a writer resizes the database, read-only applications must 462 * increase their LMDB memory map size limit, too. Otherwise, they 463 * won't be able to read a table after it grows. 464 * 465 * As of LMDB 0.9.8 we can import the new memory map size limit into the 466 * database environment by calling mdb_env_set_mapsize() with a zero 467 * size argument. Then we extract the map size limit for later use. 468 * The caller should retry immediately. 469 */ 470 case MDB_MAP_RESIZED: 471 if ((status = mdb_env_set_mapsize(slmdb->env, 0)) == 0) { 472 /* Do not panic. Maps may shrink after bulk update. */ 473 mdb_env_info(slmdb->env, &info); 474 slmdb->curr_limit = info.me_mapsize; 475 if (slmdb->notify_fn) 476 slmdb->notify_fn(slmdb->cb_context, MDB_MAP_RESIZED, 477 slmdb->curr_limit); 478 } 479 break; 480 481 /* 482 * What is it with these built-in hard limits that cause systems to 483 * stop when demand is at its highest? When the system is under 484 * stress it should slow down and keep making progress. 485 */ 486 case MDB_READERS_FULL: 487 if (slmdb->notify_fn) 488 slmdb->notify_fn(slmdb->cb_context, MDB_READERS_FULL); 489 sleep(1); 490 status = 0; 491 break; 492 493 /* 494 * We can't solve this problem. The application should terminate with 495 * a fatal run-time error and the program should be re-run later. 496 */ 497 default: 498 break; 499 } 500 501 /* 502 * If a bulk-transaction error is recoverable, build a new bulk 503 * transaction from scratch, by making a long jump back into the caller 504 * at some pre-arranged point. In MDB_NOLOCK mode, there is no need to 505 * upgrade the lock to "exclusive", because the failed write transaction 506 * has no side effects. 507 */ 508 if (slmdb->txn != 0 && status == 0 && slmdb->longjmp_fn != 0 509 && (slmdb->bulk_retry_count += 1) <= slmdb->bulk_retry_limit) { 510 if ((status = mdb_txn_begin(slmdb->env, (MDB_txn *) 0, 511 slmdb->lmdb_flags & MDB_RDONLY, 512 &slmdb->txn)) == 0 513 && (status = slmdb_prepare(slmdb)) == 0) 514 slmdb->longjmp_fn(slmdb->cb_context, 1); 515 } 516 return (status); 517 } 518 519 /* slmdb_txn_begin - mdb_txn_begin() wrapper with LMDB error recovery */ 520 521 static int slmdb_txn_begin(SLMDB *slmdb, int rdonly, MDB_txn **txn) 522 { 523 int status; 524 525 if ((status = mdb_txn_begin(slmdb->env, (MDB_txn *) 0, rdonly, txn)) != 0 526 && (status = slmdb_recover(slmdb, status)) == 0) 527 status = slmdb_txn_begin(slmdb, rdonly, txn); 528 529 return (status); 530 } 531 532 /* slmdb_get - mdb_get() wrapper with LMDB error recovery */ 533 534 int slmdb_get(SLMDB *slmdb, MDB_val *mdb_key, MDB_val *mdb_value) 535 { 536 MDB_txn *txn; 537 int status; 538 539 /* 540 * Start a read transaction if there's no bulk-mode txn. 541 */ 542 if (slmdb->txn) 543 txn = slmdb->txn; 544 else if ((status = slmdb_txn_begin(slmdb, MDB_RDONLY, &txn)) != 0) 545 SLMDB_API_RETURN(slmdb, status); 546 547 /* 548 * Do the lookup. 549 */ 550 if ((status = mdb_get(txn, slmdb->dbi, mdb_key, mdb_value)) != 0 551 && status != MDB_NOTFOUND) { 552 mdb_txn_abort(txn); 553 if ((status = slmdb_recover(slmdb, status)) == 0) 554 status = slmdb_get(slmdb, mdb_key, mdb_value); 555 SLMDB_API_RETURN(slmdb, status); 556 } 557 558 /* 559 * Close the read txn if it's not the bulk-mode txn. 560 */ 561 if (slmdb->txn == 0) 562 mdb_txn_abort(txn); 563 564 SLMDB_API_RETURN(slmdb, status); 565 } 566 567 /* slmdb_put - mdb_put() wrapper with LMDB error recovery */ 568 569 int slmdb_put(SLMDB *slmdb, MDB_val *mdb_key, 570 MDB_val *mdb_value, int flags) 571 { 572 MDB_txn *txn; 573 int status; 574 575 /* 576 * Start a write transaction if there's no bulk-mode txn. 577 */ 578 if (slmdb->txn) 579 txn = slmdb->txn; 580 else if ((status = slmdb_txn_begin(slmdb, 0, &txn)) != 0) 581 SLMDB_API_RETURN(slmdb, status); 582 583 /* 584 * Do the update. 585 */ 586 if ((status = mdb_put(txn, slmdb->dbi, mdb_key, mdb_value, flags)) != 0) { 587 mdb_txn_abort(txn); 588 if (status != MDB_KEYEXIST) { 589 if ((status = slmdb_recover(slmdb, status)) == 0) 590 status = slmdb_put(slmdb, mdb_key, mdb_value, flags); 591 SLMDB_API_RETURN(slmdb, status); 592 } 593 } 594 595 /* 596 * Commit the transaction if it's not the bulk-mode txn. 597 */ 598 if (status == 0 && slmdb->txn == 0 && (status = mdb_txn_commit(txn)) != 0 599 && (status = slmdb_recover(slmdb, status)) == 0) 600 status = slmdb_put(slmdb, mdb_key, mdb_value, flags); 601 602 SLMDB_API_RETURN(slmdb, status); 603 } 604 605 /* slmdb_del - mdb_del() wrapper with LMDB error recovery */ 606 607 int slmdb_del(SLMDB *slmdb, MDB_val *mdb_key) 608 { 609 MDB_txn *txn; 610 int status; 611 612 /* 613 * Start a write transaction if there's no bulk-mode txn. 614 */ 615 if (slmdb->txn) 616 txn = slmdb->txn; 617 else if ((status = slmdb_txn_begin(slmdb, 0, &txn)) != 0) 618 SLMDB_API_RETURN(slmdb, status); 619 620 /* 621 * Do the update. 622 */ 623 if ((status = mdb_del(txn, slmdb->dbi, mdb_key, (MDB_val *) 0)) != 0) { 624 mdb_txn_abort(txn); 625 if (status != MDB_NOTFOUND) { 626 if ((status = slmdb_recover(slmdb, status)) == 0) 627 status = slmdb_del(slmdb, mdb_key); 628 SLMDB_API_RETURN(slmdb, status); 629 } 630 } 631 632 /* 633 * Commit the transaction if it's not the bulk-mode txn. 634 */ 635 if (status == 0 && slmdb->txn == 0 && (status = mdb_txn_commit(txn)) != 0 636 && (status = slmdb_recover(slmdb, status)) == 0) 637 status = slmdb_del(slmdb, mdb_key); 638 639 SLMDB_API_RETURN(slmdb, status); 640 } 641 642 /* slmdb_cursor_get - mdb_cursor_get() wrapper with LMDB error recovery */ 643 644 int slmdb_cursor_get(SLMDB *slmdb, MDB_val *mdb_key, 645 MDB_val *mdb_value, MDB_cursor_op op) 646 { 647 MDB_txn *txn; 648 int status = 0; 649 650 /* 651 * Open a read transaction and cursor if needed. 652 */ 653 if (slmdb->cursor == 0) { 654 if ((status = slmdb_txn_begin(slmdb, MDB_RDONLY, &txn)) != 0) 655 SLMDB_API_RETURN(slmdb, status); 656 if ((status = mdb_cursor_open(txn, slmdb->dbi, &slmdb->cursor)) != 0) { 657 mdb_txn_abort(txn); 658 if ((status = slmdb_recover(slmdb, status)) == 0) 659 status = slmdb_cursor_get(slmdb, mdb_key, mdb_value, op); 660 SLMDB_API_RETURN(slmdb, status); 661 } 662 663 /* 664 * Restore the cursor position from the saved key information. 665 */ 666 if (HAVE_SLMDB_SAVED_KEY(slmdb) && op != MDB_FIRST) 667 status = mdb_cursor_get(slmdb->cursor, &slmdb->saved_key, 668 (MDB_val *) 0, MDB_SET); 669 } 670 671 /* 672 * Database lookup. 673 */ 674 if (status == 0) 675 status = mdb_cursor_get(slmdb->cursor, mdb_key, mdb_value, op); 676 677 /* 678 * Save the cursor position if successful. This can fail only with 679 * ENOMEM. 680 * 681 * Close the cursor read transaction if in MDB_NOLOCK mode, because the 682 * caller may release the external lock after we return. 683 */ 684 if (status == 0) { 685 status = slmdb_saved_key_assign(slmdb, mdb_key); 686 if (slmdb->lmdb_flags & MDB_NOLOCK) 687 slmdb_cursor_close(slmdb); 688 } 689 690 /* 691 * Handle end-of-database or other error. 692 */ 693 else { 694 /* Do not hand-optimize out the slmdb_cursor_close() calls below. */ 695 if (status == MDB_NOTFOUND) { 696 slmdb_cursor_close(slmdb); 697 if (HAVE_SLMDB_SAVED_KEY(slmdb)) 698 slmdb_saved_key_free(slmdb); 699 } else { 700 slmdb_cursor_close(slmdb); 701 if ((status = slmdb_recover(slmdb, status)) == 0) 702 status = slmdb_cursor_get(slmdb, mdb_key, mdb_value, op); 703 SLMDB_API_RETURN(slmdb, status); 704 /* Do not hand-optimize out the above return statement. */ 705 } 706 } 707 SLMDB_API_RETURN(slmdb, status); 708 } 709 710 /* slmdb_assert_cb - report LMDB assertion failure */ 711 712 static void slmdb_assert_cb(MDB_env *env, const char *text) 713 { 714 SLMDB *slmdb = (SLMDB *) mdb_env_get_userctx(env); 715 716 if (slmdb->assert_fn) 717 slmdb->assert_fn(slmdb->cb_context, text); 718 } 719 720 /* slmdb_control - control optional settings */ 721 722 int slmdb_control(SLMDB *slmdb, int first,...) 723 { 724 va_list ap; 725 int status = 0; 726 int reqno; 727 int rc; 728 729 va_start(ap, first); 730 for (reqno = first; status == 0 && reqno != SLMDB_CTL_END; reqno = va_arg(ap, int)) { 731 switch (reqno) { 732 case SLMDB_CTL_LONGJMP_FN: 733 slmdb->longjmp_fn = va_arg(ap, SLMDB_LONGJMP_FN); 734 break; 735 case SLMDB_CTL_NOTIFY_FN: 736 slmdb->notify_fn = va_arg(ap, SLMDB_NOTIFY_FN); 737 break; 738 case SLMDB_CTL_ASSERT_FN: 739 slmdb->assert_fn = va_arg(ap, SLMDB_ASSERT_FN); 740 if ((rc = mdb_env_set_userctx(slmdb->env, (void *) slmdb)) != 0 741 || (rc = mdb_env_set_assert(slmdb->env, slmdb_assert_cb)) != 0) 742 status = rc; 743 break; 744 case SLMDB_CTL_CB_CONTEXT: 745 slmdb->cb_context = va_arg(ap, void *); 746 break; 747 case SLMDB_CTL_API_RETRY_LIMIT: 748 slmdb->api_retry_limit = va_arg(ap, int); 749 break; 750 case SLMDB_CTL_BULK_RETRY_LIMIT: 751 slmdb->bulk_retry_limit = va_arg(ap, int); 752 break; 753 default: 754 status = errno = EINVAL; 755 break; 756 } 757 } 758 va_end(ap); 759 return (status); 760 } 761 762 /* slmdb_close - wrapper with LMDB error recovery */ 763 764 int slmdb_close(SLMDB *slmdb) 765 { 766 int status = 0; 767 768 /* 769 * Finish an open bulk transaction. If slmdb_recover() returns after a 770 * bulk-transaction error, then it was unable to recover. 771 */ 772 if (slmdb->txn != 0 773 && (status = mdb_txn_commit(slmdb->txn)) != 0) 774 status = slmdb_recover(slmdb, status); 775 776 /* 777 * Clean up after an unfinished sequence() operation. 778 */ 779 if (slmdb->cursor != 0) 780 slmdb_cursor_close(slmdb); 781 782 mdb_env_close(slmdb->env); 783 784 /* 785 * Clean up the saved key information. 786 */ 787 if (HAVE_SLMDB_SAVED_KEY(slmdb)) 788 slmdb_saved_key_free(slmdb); 789 790 SLMDB_API_RETURN(slmdb, status); 791 } 792 793 /* slmdb_init - mandatory initialization */ 794 795 int slmdb_init(SLMDB *slmdb, size_t curr_limit, int size_incr, 796 size_t hard_limit) 797 { 798 799 /* 800 * This is a separate operation to keep the slmdb_open() API simple. 801 * Don't allocate resources here. Just store control information, 802 */ 803 slmdb->curr_limit = curr_limit; 804 slmdb->size_incr = size_incr; 805 slmdb->hard_limit = hard_limit; 806 807 return (MDB_SUCCESS); 808 } 809 810 /* slmdb_open - open wrapped LMDB database */ 811 812 int slmdb_open(SLMDB *slmdb, const char *path, int open_flags, 813 int lmdb_flags, int slmdb_flags) 814 { 815 struct stat st; 816 MDB_env *env; 817 MDB_txn *txn; 818 MDB_dbi dbi; 819 int db_fd; 820 int status; 821 822 /* 823 * Create LMDB environment. 824 */ 825 if ((status = mdb_env_create(&env)) != 0) 826 return (status); 827 828 /* 829 * Make sure that the memory map has room to store and commit an initial 830 * "drop" transaction as well as fixed database metadata. We have no way 831 * to recover from errors before the first application-level I/O request. 832 */ 833 #define SLMDB_FUDGE 10240 834 835 if (slmdb->curr_limit < SLMDB_FUDGE) 836 slmdb->curr_limit = SLMDB_FUDGE; 837 if (stat(path, &st) == 0 838 && st.st_size > slmdb->curr_limit - SLMDB_FUDGE) { 839 if (st.st_size > slmdb->hard_limit) 840 slmdb->hard_limit = st.st_size; 841 if (st.st_size < slmdb->hard_limit - SLMDB_FUDGE) 842 slmdb->curr_limit = st.st_size + SLMDB_FUDGE; 843 else 844 slmdb->curr_limit = slmdb->hard_limit; 845 } 846 847 /* 848 * mdb_open() requires a txn, but since the default DB always exists in 849 * an LMDB environment, we usually don't need to do anything else with 850 * the txn. It is currently used for truncate and for bulk transactions. 851 */ 852 if ((status = mdb_env_set_mapsize(env, slmdb->curr_limit)) != 0 853 || (status = mdb_env_open(env, path, lmdb_flags, 0644)) != 0 854 || (status = mdb_txn_begin(env, (MDB_txn *) 0, 855 lmdb_flags & MDB_RDONLY, &txn)) != 0 856 || (status = mdb_open(txn, (const char *) 0, 0, &dbi)) != 0 857 || (status = mdb_env_get_fd(env, &db_fd)) != 0) { 858 mdb_env_close(env); 859 return (status); 860 } 861 862 /* 863 * Bundle up. 864 */ 865 slmdb->open_flags = open_flags; 866 slmdb->lmdb_flags = lmdb_flags; 867 slmdb->slmdb_flags = slmdb_flags; 868 slmdb->env = env; 869 slmdb->dbi = dbi; 870 slmdb->db_fd = db_fd; 871 slmdb->cursor = 0; 872 slmdb_saved_key_init(slmdb); 873 slmdb->api_retry_count = 0; 874 slmdb->bulk_retry_count = 0; 875 slmdb->api_retry_limit = SLMDB_DEF_API_RETRY_LIMIT; 876 slmdb->bulk_retry_limit = SLMDB_DEF_BULK_RETRY_LIMIT; 877 slmdb->longjmp_fn = 0; 878 slmdb->notify_fn = 0; 879 slmdb->assert_fn = 0; 880 slmdb->cb_context = 0; 881 slmdb->txn = txn; 882 883 if ((status = slmdb_prepare(slmdb)) != 0) 884 mdb_env_close(env); 885 886 return (status); 887 } 888 889 #endif 890