xref: /netbsd-src/external/ibm-public/postfix/dist/src/util/slmdb.c (revision e670fd5c413e99c2f6a37901bb21c537fcd322d2)
1 /*	$NetBSD: slmdb.c,v 1.3 2020/03/18 19:05:22 christos Exp $	*/
2 
3 /*++
4 /* NAME
5 /*	slmdb 3
6 /* SUMMARY
7 /*	Simplified LMDB API
8 /* SYNOPSIS
9 /*	#include <slmdb.h>
10 /*
11 /*	int	slmdb_init(slmdb, curr_limit, size_incr, hard_limit)
12 /*	SLMDB	*slmdb;
13 /*	size_t	curr_limit;
14 /*	int	size_incr;
15 /*	size_t	hard_limit;
16 /*
17 /*	int	slmdb_open(slmdb, path, open_flags, lmdb_flags, slmdb_flags)
18 /*	SLMDB	*slmdb;
19 /*	const char *path;
20 /*	int	open_flags;
21 /*	int	lmdb_flags;
22 /*	int	slmdb_flags;
23 /*
24 /*	int	slmdb_close(slmdb)
25 /*	SLMDB	*slmdb;
26 /*
27 /*	int	slmdb_get(slmdb, mdb_key, mdb_value)
28 /*	SLMDB	*slmdb;
29 /*	MDB_val	*mdb_key;
30 /*	MDB_val	*mdb_value;
31 /*
32 /*	int	slmdb_put(slmdb, mdb_key, mdb_value, flags)
33 /*	SLMDB	*slmdb;
34 /*	MDB_val	*mdb_key;
35 /*	MDB_val	*mdb_value;
36 /*	int	flags;
37 /*
38 /*	int	slmdb_del(slmdb, mdb_key)
39 /*	SLMDB	*slmdb;
40 /*	MDB_val	*mdb_key;
41 /*
42 /*	int	slmdb_cursor_get(slmdb, mdb_key, mdb_value, op)
43 /*	SLMDB	*slmdb;
44 /*	MDB_val	*mdb_key;
45 /*	MDB_val	*mdb_value;
46 /*	MDB_cursor_op op;
47 /* AUXILIARY FUNCTIONS
48 /*	int	slmdb_fd(slmdb)
49 /*	SLMDB	*slmdb;
50 /*
51 /*	size_t	slmdb_curr_limit(slmdb)
52 /*	SLMDB	*slmdb;
53 /*
54 /*	int	slmdb_control(slmdb, request, ...)
55 /*	SLMDB	*slmdb;
56 /*	int	request;
57 /* DESCRIPTION
58 /*	This module simplifies the LMDB API by hiding recoverable
59 /*	errors from the application.  Details are given in the
60 /*	section "ERROR RECOVERY".
61 /*
62 /*	slmdb_init() performs mandatory initialization before opening
63 /*	an LMDB database. The result value is an LMDB status code
64 /*	(zero in case of success).
65 /*
66 /*	slmdb_open() opens an LMDB database.  The result value is
67 /*	an LMDB status code (zero in case of success).
68 /*
69 /*	slmdb_close() finalizes an optional bulk-mode transaction
70 /*	and closes a successfully-opened LMDB database.  The result
71 /*	value is an LMDB status code (zero in case of success).
72 /*
73 /*	slmdb_get() is an mdb_get() wrapper with automatic error
74 /*	recovery.  The result value is an LMDB status code (zero
75 /*	in case of success).
76 /*
77 /*	slmdb_put() is an mdb_put() wrapper with automatic error
78 /*	recovery.  The result value is an LMDB status code (zero
79 /*	in case of success).
80 /*
81 /*	slmdb_del() is an mdb_del() wrapper with automatic error
82 /*	recovery.  The result value is an LMDB status code (zero
83 /*	in case of success).
84 /*
85 /*	slmdb_cursor_get() is an mdb_cursor_get() wrapper with
86 /*	automatic error recovery.  The result value is an LMDB
87 /*	status code (zero in case of success). This wrapper supports
88 /*	only one cursor per database.
89 /*
90 /*	slmdb_fd() returns the file descriptor for the specified
91 /*	database.  This may be used for file status queries or
92 /*	application-controlled locking.
93 /*
94 /*	slmdb_curr_limit() returns the current database size limit
95 /*	for the specified database.
96 /*
97 /*	slmdb_control() specifies optional features. The result is
98 /*	an LMDB status code (zero in case of success).
99 /*
100 /*	Arguments:
101 /* .IP slmdb
102 /*	Pointer to caller-provided storage.
103 /* .IP curr_limit
104 /*	The initial memory mapping size limit. This limit is
105 /*	automatically increased when the database becomes full.
106 /* .IP size_incr
107 /*	An integer factor by which the memory mapping size limit
108 /*	is increased when the database becomes full.
109 /* .IP hard_limit
110 /*	The upper bound for the memory mapping size limit.
111 /* .IP path
112 /*	LMDB database pathname.
113 /* .IP open_flags
114 /*	Flags that control file open operations. Do not specify
115 /*	locking flags here.
116 /* .IP lmdb_flags
117 /*	Flags that control the LMDB environment. If MDB_NOLOCK is
118 /*	specified, then each slmdb_get() or slmdb_cursor_get() call
119 /*	must be protected with a shared (or exclusive) external lock,
120 /*	and each slmdb_put() or slmdb_del() call must be protected
121 /*	with an exclusive external lock. A lock may be released
122 /*	after the call returns. A writer may atomically downgrade
123 /*	an exclusive lock to shared, but it must obtain an exclusive
124 /*	lock before making another slmdb(3) write request.
125 /* .sp
126 /*	Note: when a database is opened with MDB_NOLOCK, external
127 /*	locks such as fcntl() do not protect slmdb(3) requests
128 /*	within the same process against each other.  If a program
129 /*	cannot avoid making simultaneous slmdb(3) requests, then
130 /*	it must synchronize these requests with in-process locks,
131 /*	in addition to the per-process fcntl(2) locks.
132 /* .IP slmdb_flags
133 /*	Bit-wise OR of zero or more of the following:
134 /* .RS
135 /* .IP SLMDB_FLAG_BULK
136 /*	Open the database and create a "bulk" transaction that is
137 /*	committed when the database is closed. If MDB_NOLOCK is
138 /*	specified, then the entire transaction must be protected
139 /*	with a persistent external lock.  All slmdb_get(), slmdb_put()
140 /*	and slmdb_del() requests will be directed to the "bulk"
141 /*	transaction.
142 /* .RE
143 /* .IP mdb_key
144 /*	Pointer to caller-provided lookup key storage.
145 /* .IP mdb_value
146 /*	Pointer to caller-provided value storage.
147 /* .IP op
148 /*	LMDB cursor operation.
149 /* .IP request
150 /*	The start of a list of (name, value) pairs, terminated with
151 /*	CA_SLMDB_CTL_END.  The following text enumerates the symbolic
152 /*	request names and the corresponding argument types.
153 /* .RS
154 /* .IP "CA_SLMDB_CTL_LONGJMP_FN(void (*)(void *, int))"
155 /*	Call-back function pointer. The function is called to repeat
156 /*	a failed bulk-mode transaction from the start. The arguments
157 /*	are the application context and the setjmp() or sigsetjmp()
158 /*	result value.
159 /* .IP "CA_SLMDB_CTL_NOTIFY_FN(void (*)(void *, int, ...))"
160 /*	Call-back function pointer. The function is called to report
161 /*	successful error recovery. The arguments are the application
162 /*	context, the MDB error code, and additional arguments that
163 /*	depend on the error code.  Details are given in the section
164 /*	"ERROR RECOVERY".
165 /* .IP "CA_SLMDB_CTL_ASSERT_FN(void (*)(void *, const char *))"
166 /*	Call-back function pointer.  The function is called to
167 /*	report an LMDB internal assertion failure. The arguments
168 /*	are the application context, and text that describes the
169 /*	problem.
170 /* .IP "CA_SLMDB_CTL_CB_CONTEXT(void *)"
171 /*	Application context that is passed in call-back function
172 /*	calls.
173 /* .IP "CA_SLMDB_CTL_API_RETRY_LIMIT(int)"
174 /*	How many times to recover from LMDB errors within the
175 /*	execution of a single slmdb(3) API call before giving up.
176 /* .IP "CA_SLMDB_CTL_BULK_RETRY_LIMIT(int)"
177 /*	How many times to recover from a bulk-mode transaction
178 /*	before giving up.
179 /* .RE
180 /* ERROR RECOVERY
181 /* .ad
182 /* .fi
183 /*	This module automatically repeats failed requests after
184 /*	recoverable errors, up to the limits specified with
185 /*	slmdb_control().
186 /*
187 /*	Recoverable errors are reported through an optional
188 /*	notification function specified with slmdb_control().  With
189 /*	recoverable MDB_MAP_FULL and MDB_MAP_RESIZED errors, the
190 /*	additional argument is a size_t value with the updated
191 /*	current database size limit; with recoverable MDB_READERS_FULL
192 /*	errors there is no additional argument.
193 /* BUGS
194 /*	Recovery from MDB_MAP_FULL involves resizing the database
195 /*	memory mapping.  According to LMDB documentation this
196 /*	requires that there is no concurrent activity in the same
197 /*	database by other threads in the same memory address space.
198 /* SEE ALSO
199 /*	lmdb(3) API manpage (currently, non-existent).
200 /* AUTHOR(S)
201 /*	Howard Chu
202 /*	Symas Corporation
203 /*
204 /*	Wietse Venema
205 /*	IBM T.J. Watson Research
206 /*	P.O. Box 704
207 /*	Yorktown Heights, NY 10598, USA
208 /*--*/
209 
210  /*
211   * DO NOT include other Postfix-specific header files. This LMDB wrapper
212   * must be usable outside Postfix.
213   */
214 
215 #ifdef HAS_LMDB
216 
217 /* System library. */
218 
219 #include <sys/stat.h>
220 #include <errno.h>
221 #include <fcntl.h>
222 #include <string.h>
223 #include <unistd.h>
224 #include <limits.h>
225 #include <stdarg.h>
226 #include <string.h>
227 #include <stdlib.h>
228 
229 /* Application-specific. */
230 
231 #include <slmdb.h>
232 
233  /*
234   * Minimum LMDB patchlevel.
235   *
236   * LMDB 0.9.11 allows Postfix daemons to log an LMDB error message instead of
237   * falling out of the sky without any explanation. Without such logging,
238   * Postfix with LMDB would be too hard to support.
239   *
240   * LMDB 0.9.10 fixes an information leak where LMDB wrote chunks of up to 4096
241   * bytes of uninitialized heap memory to a database. This was a security
242   * violation because it made information persistent that was not meant to be
243   * persisted, or it was sharing information that was not meant to be shared.
244   *
245   * LMDB 0.9.9 allows Postfix to use external (fcntl()-based) locks, instead of
246   * having to use world-writable LMDB lock files.
247   *
248   * LMDB 0.9.8 allows Postfix to update the database size limit on-the-fly, so
249   * that it can recover from an MDB_MAP_FULL error without having to close
250   * the database. It also allows an application to "pick up" a new database
251   * size limit on-the-fly, so that it can recover from an MDB_MAP_RESIZED
252   * error without having to close the database.
253   *
254   * The database size limit that remains is imposed by the hardware memory
255   * address space (31 or 47 bits, typically) or file system. The LMDB
256   * implementation is supposed to handle databases larger than physical
257   * memory. However, this is not necessarily guaranteed for (bulk)
258   * transactions larger than physical memory.
259   */
260 #if MDB_VERSION_FULL < MDB_VERINT(0, 9, 11)
261 #error "This Postfix version requires LMDB version 0.9.11 or later"
262 #endif
263 
264  /*
265   * Error recovery.
266   *
267   * The purpose of the slmdb(3) API is to hide LMDB quirks (recoverable
268   * MAP_FULL, MAP_RESIZED, or MDB_READERS_FULL errors). With these out of the
269   * way, applications can pretend that those quirks don't exist, and focus on
270   * their own job.
271   *
272   * - To recover from a single-transaction LMDB error, each wrapper function
273   * uses tail recursion instead of goto. Since LMDB errors are rare, code
274   * clarity is more important than speed.
275   *
276   * - To recover from a bulk-transaction LMDB error, the error-recovery code
277   * triggers a long jump back into the caller to some pre-arranged point (the
278   * closest thing that C has to exception handling). The application is then
279   * expected to repeat the bulk transaction from scratch.
280   */
281 
282  /*
283   * Our default retry attempt limits. We allow a few retries per slmdb(3) API
284   * call for non-bulk transactions. We allow a number of bulk-transaction
285   * retries that is proportional to the memory address space.
286   */
287 #define SLMDB_DEF_API_RETRY_LIMIT 30	/* Retries per slmdb(3) API call */
288 #define SLMDB_DEF_BULK_RETRY_LIMIT \
289         (2 * sizeof(size_t) * CHAR_BIT)	/* Retries per bulk-mode transaction */
290 
291  /*
292   * We increment the recursion counter each time we try to recover from
293   * error, and reset the recursion counter when returning to the application
294   * from the slmdb(3) API.
295   */
296 #define SLMDB_API_RETURN(slmdb, status) do { \
297 	(slmdb)->api_retry_count = 0; \
298 	return (status); \
299     } while (0)
300 
301  /*
302   * With MDB_NOLOCK, the application uses an external lock for inter-process
303   * synchronization. Because the caller may release the external lock after
304   * an SLMDB API call, each SLMDB API function must use a short-lived
305   * transaction unless the transaction is a bulk-mode transaction.
306   */
307 
308 /* slmdb_cursor_close - close cursor and its read transaction */
309 
310 static void slmdb_cursor_close(SLMDB *slmdb)
311 {
312     MDB_txn *txn;
313 
314     /*
315      * Close the cursor and its read transaction. We can restore it later
316      * from the saved key information.
317      */
318     txn = mdb_cursor_txn(slmdb->cursor);
319     mdb_cursor_close(slmdb->cursor);
320     slmdb->cursor = 0;
321     mdb_txn_abort(txn);
322 }
323 
324 /* slmdb_saved_key_init - initialize saved key info */
325 
326 static void slmdb_saved_key_init(SLMDB *slmdb)
327 {
328     slmdb->saved_key.mv_data = 0;
329     slmdb->saved_key.mv_size = 0;
330     slmdb->saved_key_size = 0;
331 }
332 
333 /* slmdb_saved_key_free - destroy saved key info */
334 
335 static void slmdb_saved_key_free(SLMDB *slmdb)
336 {
337     free(slmdb->saved_key.mv_data);
338     slmdb_saved_key_init(slmdb);
339 }
340 
341 #define HAVE_SLMDB_SAVED_KEY(s) ((s)->saved_key.mv_data != 0)
342 
343 /* slmdb_saved_key_assign - copy the saved key */
344 
345 static int slmdb_saved_key_assign(SLMDB *slmdb, MDB_val *key_val)
346 {
347 
348     /*
349      * Extend the buffer to fit the key, so that we can avoid malloc()
350      * overhead most of the time.
351      */
352     if (slmdb->saved_key_size < key_val->mv_size) {
353 	if (slmdb->saved_key.mv_data == 0)
354 	    slmdb->saved_key.mv_data = malloc(key_val->mv_size);
355 	else
356 	    slmdb->saved_key.mv_data =
357 		realloc(slmdb->saved_key.mv_data, key_val->mv_size);
358 	if (slmdb->saved_key.mv_data == 0) {
359 	    slmdb_saved_key_init(slmdb);
360 	    return (ENOMEM);
361 	} else {
362 	    slmdb->saved_key_size = key_val->mv_size;
363 	}
364     }
365 
366     /*
367      * Copy the key under the cursor.
368      */
369     memcpy(slmdb->saved_key.mv_data, key_val->mv_data, key_val->mv_size);
370     slmdb->saved_key.mv_size = key_val->mv_size;
371     return (0);
372 }
373 
374 /* slmdb_prepare - LMDB-specific (re)initialization before actual access */
375 
376 static int slmdb_prepare(SLMDB *slmdb)
377 {
378     int     status = 0;
379 
380     /*
381      * This is called before accessing the database, or after recovery from
382      * an LMDB error. Note: this code cannot recover from errors itself.
383      * slmdb->txn is either the database open() transaction or a
384      * freshly-created bulk-mode transaction.
385      *
386      * - With O_TRUNC we make a "drop" request before updating the database.
387      *
388      * - With a bulk-mode transaction we commit when the database is closed.
389      */
390     if (slmdb->open_flags & O_TRUNC) {
391 	if ((status = mdb_drop(slmdb->txn, slmdb->dbi, 0)) != 0)
392 	    return (status);
393 	if ((slmdb->slmdb_flags & SLMDB_FLAG_BULK) == 0) {
394 	    if ((status = mdb_txn_commit(slmdb->txn)) != 0)
395 		return (status);
396 	    slmdb->txn = 0;
397 	}
398     } else if ((slmdb->lmdb_flags & MDB_RDONLY) != 0
399 	       || (slmdb->slmdb_flags & SLMDB_FLAG_BULK) == 0) {
400 	mdb_txn_abort(slmdb->txn);
401 	slmdb->txn = 0;
402     }
403     slmdb->api_retry_count = 0;
404     return (status);
405 }
406 
407 /* slmdb_recover - recover from LMDB errors */
408 
409 static int slmdb_recover(SLMDB *slmdb, int status)
410 {
411     MDB_envinfo info;
412 
413     /*
414      * This may be needed in non-MDB_NOLOCK mode. Recovery is rare enough
415      * that we don't care about a few wasted cycles.
416      */
417     if (slmdb->cursor != 0)
418 	slmdb_cursor_close(slmdb);
419 
420     /*
421      * Recover bulk transactions only if they can be restarted. Limit the
422      * number of recovery attempts per slmdb(3) API request.
423      */
424     if ((slmdb->txn != 0 && slmdb->longjmp_fn == 0)
425 	|| ((slmdb->api_retry_count += 1) >= slmdb->api_retry_limit))
426 	return (status);
427 
428     /*
429      * If we can recover from the error, we clear the error condition and the
430      * caller should retry the failed operation immediately. Otherwise, the
431      * caller should terminate with a fatal run-time error and the program
432      * should be re-run later.
433      *
434      * slmdb->txn must be either null (non-bulk transaction error), or an
435      * aborted bulk-mode transaction.
436      */
437     switch (status) {
438 
439 	/*
440 	 * As of LMDB 0.9.8 when a non-bulk update runs into a "map full"
441 	 * error, we can resize the environment's memory map and clear the
442 	 * error condition. The caller should retry immediately.
443 	 */
444     case MDB_MAP_FULL:
445 	/* Can we increase the memory map? Give up if we can't. */
446 	if (slmdb->curr_limit < slmdb->hard_limit / slmdb->size_incr) {
447 	    slmdb->curr_limit = slmdb->curr_limit * slmdb->size_incr;
448 	} else if (slmdb->curr_limit < slmdb->hard_limit) {
449 	    slmdb->curr_limit = slmdb->hard_limit;
450 	} else {
451 	    /* Sorry, we are already maxed out. */
452 	    break;
453 	}
454 	if (slmdb->notify_fn)
455 	    slmdb->notify_fn(slmdb->cb_context, MDB_MAP_FULL,
456 			     slmdb->curr_limit);
457 	status = mdb_env_set_mapsize(slmdb->env, slmdb->curr_limit);
458 	break;
459 
460 	/*
461 	 * When a writer resizes the database, read-only applications must
462 	 * increase their LMDB memory map size limit, too. Otherwise, they
463 	 * won't be able to read a table after it grows.
464 	 *
465 	 * As of LMDB 0.9.8 we can import the new memory map size limit into the
466 	 * database environment by calling mdb_env_set_mapsize() with a zero
467 	 * size argument. Then we extract the map size limit for later use.
468 	 * The caller should retry immediately.
469 	 */
470     case MDB_MAP_RESIZED:
471 	if ((status = mdb_env_set_mapsize(slmdb->env, 0)) == 0) {
472 	    /* Do not panic. Maps may shrink after bulk update. */
473 	    mdb_env_info(slmdb->env, &info);
474 	    slmdb->curr_limit = info.me_mapsize;
475 	    if (slmdb->notify_fn)
476 		slmdb->notify_fn(slmdb->cb_context, MDB_MAP_RESIZED,
477 				 slmdb->curr_limit);
478 	}
479 	break;
480 
481 	/*
482 	 * What is it with these built-in hard limits that cause systems to
483 	 * stop when demand is at its highest? When the system is under
484 	 * stress it should slow down and keep making progress.
485 	 */
486     case MDB_READERS_FULL:
487 	if (slmdb->notify_fn)
488 	    slmdb->notify_fn(slmdb->cb_context, MDB_READERS_FULL);
489 	sleep(1);
490 	status = 0;
491 	break;
492 
493 	/*
494 	 * We can't solve this problem. The application should terminate with
495 	 * a fatal run-time error and the program should be re-run later.
496 	 */
497     default:
498 	break;
499     }
500 
501     /*
502      * If a bulk-transaction error is recoverable, build a new bulk
503      * transaction from scratch, by making a long jump back into the caller
504      * at some pre-arranged point. In MDB_NOLOCK mode, there is no need to
505      * upgrade the lock to "exclusive", because the failed write transaction
506      * has no side effects.
507      */
508     if (slmdb->txn != 0 && status == 0 && slmdb->longjmp_fn != 0
509 	&& (slmdb->bulk_retry_count += 1) <= slmdb->bulk_retry_limit) {
510 	if ((status = mdb_txn_begin(slmdb->env, (MDB_txn *) 0,
511 				    slmdb->lmdb_flags & MDB_RDONLY,
512 				    &slmdb->txn)) == 0
513 	    && (status = slmdb_prepare(slmdb)) == 0)
514 	    slmdb->longjmp_fn(slmdb->cb_context, 1);
515     }
516     return (status);
517 }
518 
519 /* slmdb_txn_begin - mdb_txn_begin() wrapper with LMDB error recovery */
520 
521 static int slmdb_txn_begin(SLMDB *slmdb, int rdonly, MDB_txn **txn)
522 {
523     int     status;
524 
525     if ((status = mdb_txn_begin(slmdb->env, (MDB_txn *) 0, rdonly, txn)) != 0
526 	&& (status = slmdb_recover(slmdb, status)) == 0)
527 	status = slmdb_txn_begin(slmdb, rdonly, txn);
528 
529     return (status);
530 }
531 
532 /* slmdb_get - mdb_get() wrapper with LMDB error recovery */
533 
534 int     slmdb_get(SLMDB *slmdb, MDB_val *mdb_key, MDB_val *mdb_value)
535 {
536     MDB_txn *txn;
537     int     status;
538 
539     /*
540      * Start a read transaction if there's no bulk-mode txn.
541      */
542     if (slmdb->txn)
543 	txn = slmdb->txn;
544     else if ((status = slmdb_txn_begin(slmdb, MDB_RDONLY, &txn)) != 0)
545 	SLMDB_API_RETURN(slmdb, status);
546 
547     /*
548      * Do the lookup.
549      */
550     if ((status = mdb_get(txn, slmdb->dbi, mdb_key, mdb_value)) != 0
551 	&& status != MDB_NOTFOUND) {
552 	mdb_txn_abort(txn);
553 	if ((status = slmdb_recover(slmdb, status)) == 0)
554 	    status = slmdb_get(slmdb, mdb_key, mdb_value);
555 	SLMDB_API_RETURN(slmdb, status);
556     }
557 
558     /*
559      * Close the read txn if it's not the bulk-mode txn.
560      */
561     if (slmdb->txn == 0)
562 	mdb_txn_abort(txn);
563 
564     SLMDB_API_RETURN(slmdb, status);
565 }
566 
567 /* slmdb_put - mdb_put() wrapper with LMDB error recovery */
568 
569 int     slmdb_put(SLMDB *slmdb, MDB_val *mdb_key,
570 		          MDB_val *mdb_value, int flags)
571 {
572     MDB_txn *txn;
573     int     status;
574 
575     /*
576      * Start a write transaction if there's no bulk-mode txn.
577      */
578     if (slmdb->txn)
579 	txn = slmdb->txn;
580     else if ((status = slmdb_txn_begin(slmdb, 0, &txn)) != 0)
581 	SLMDB_API_RETURN(slmdb, status);
582 
583     /*
584      * Do the update.
585      */
586     if ((status = mdb_put(txn, slmdb->dbi, mdb_key, mdb_value, flags)) != 0) {
587 	mdb_txn_abort(txn);
588 	if (status != MDB_KEYEXIST) {
589 	    if ((status = slmdb_recover(slmdb, status)) == 0)
590 		status = slmdb_put(slmdb, mdb_key, mdb_value, flags);
591 	    SLMDB_API_RETURN(slmdb, status);
592 	}
593     }
594 
595     /*
596      * Commit the transaction if it's not the bulk-mode txn.
597      */
598     if (status == 0 && slmdb->txn == 0 && (status = mdb_txn_commit(txn)) != 0
599 	&& (status = slmdb_recover(slmdb, status)) == 0)
600 	status = slmdb_put(slmdb, mdb_key, mdb_value, flags);
601 
602     SLMDB_API_RETURN(slmdb, status);
603 }
604 
605 /* slmdb_del - mdb_del() wrapper with LMDB error recovery */
606 
607 int     slmdb_del(SLMDB *slmdb, MDB_val *mdb_key)
608 {
609     MDB_txn *txn;
610     int     status;
611 
612     /*
613      * Start a write transaction if there's no bulk-mode txn.
614      */
615     if (slmdb->txn)
616 	txn = slmdb->txn;
617     else if ((status = slmdb_txn_begin(slmdb, 0, &txn)) != 0)
618 	SLMDB_API_RETURN(slmdb, status);
619 
620     /*
621      * Do the update.
622      */
623     if ((status = mdb_del(txn, slmdb->dbi, mdb_key, (MDB_val *) 0)) != 0) {
624 	mdb_txn_abort(txn);
625 	if (status != MDB_NOTFOUND) {
626 	    if ((status = slmdb_recover(slmdb, status)) == 0)
627 		status = slmdb_del(slmdb, mdb_key);
628 	    SLMDB_API_RETURN(slmdb, status);
629 	}
630     }
631 
632     /*
633      * Commit the transaction if it's not the bulk-mode txn.
634      */
635     if (status == 0 && slmdb->txn == 0 && (status = mdb_txn_commit(txn)) != 0
636 	&& (status = slmdb_recover(slmdb, status)) == 0)
637 	status = slmdb_del(slmdb, mdb_key);
638 
639     SLMDB_API_RETURN(slmdb, status);
640 }
641 
642 /* slmdb_cursor_get - mdb_cursor_get() wrapper with LMDB error recovery */
643 
644 int     slmdb_cursor_get(SLMDB *slmdb, MDB_val *mdb_key,
645 			         MDB_val *mdb_value, MDB_cursor_op op)
646 {
647     MDB_txn *txn;
648     int     status = 0;
649 
650     /*
651      * Open a read transaction and cursor if needed.
652      */
653     if (slmdb->cursor == 0) {
654 	if ((status = slmdb_txn_begin(slmdb, MDB_RDONLY, &txn)) != 0)
655 	    SLMDB_API_RETURN(slmdb, status);
656 	if ((status = mdb_cursor_open(txn, slmdb->dbi, &slmdb->cursor)) != 0) {
657 	    mdb_txn_abort(txn);
658 	    if ((status = slmdb_recover(slmdb, status)) == 0)
659 		status = slmdb_cursor_get(slmdb, mdb_key, mdb_value, op);
660 	    SLMDB_API_RETURN(slmdb, status);
661 	}
662 
663 	/*
664 	 * Restore the cursor position from the saved key information.
665 	 */
666 	if (HAVE_SLMDB_SAVED_KEY(slmdb) && op != MDB_FIRST)
667 	    status = mdb_cursor_get(slmdb->cursor, &slmdb->saved_key,
668 				    (MDB_val *) 0, MDB_SET);
669     }
670 
671     /*
672      * Database lookup.
673      */
674     if (status == 0)
675 	status = mdb_cursor_get(slmdb->cursor, mdb_key, mdb_value, op);
676 
677     /*
678      * Save the cursor position if successful. This can fail only with
679      * ENOMEM.
680      *
681      * Close the cursor read transaction if in MDB_NOLOCK mode, because the
682      * caller may release the external lock after we return.
683      */
684     if (status == 0) {
685 	status = slmdb_saved_key_assign(slmdb, mdb_key);
686 	if (slmdb->lmdb_flags & MDB_NOLOCK)
687 	    slmdb_cursor_close(slmdb);
688     }
689 
690     /*
691      * Handle end-of-database or other error.
692      */
693     else {
694 	/* Do not hand-optimize out the slmdb_cursor_close() calls below. */
695 	if (status == MDB_NOTFOUND) {
696 	    slmdb_cursor_close(slmdb);
697 	    if (HAVE_SLMDB_SAVED_KEY(slmdb))
698 		slmdb_saved_key_free(slmdb);
699 	} else {
700 	    slmdb_cursor_close(slmdb);
701 	    if ((status = slmdb_recover(slmdb, status)) == 0)
702 		status = slmdb_cursor_get(slmdb, mdb_key, mdb_value, op);
703 	    SLMDB_API_RETURN(slmdb, status);
704 	    /* Do not hand-optimize out the above return statement. */
705 	}
706     }
707     SLMDB_API_RETURN(slmdb, status);
708 }
709 
710 /* slmdb_assert_cb - report LMDB assertion failure */
711 
712 static void slmdb_assert_cb(MDB_env *env, const char *text)
713 {
714     SLMDB  *slmdb = (SLMDB *) mdb_env_get_userctx(env);
715 
716     if (slmdb->assert_fn)
717 	slmdb->assert_fn(slmdb->cb_context, text);
718 }
719 
720 /* slmdb_control - control optional settings */
721 
722 int     slmdb_control(SLMDB *slmdb, int first,...)
723 {
724     va_list ap;
725     int     status = 0;
726     int     reqno;
727     int     rc;
728 
729     va_start(ap, first);
730     for (reqno = first; status == 0 && reqno != SLMDB_CTL_END; reqno = va_arg(ap, int)) {
731 	switch (reqno) {
732 	case SLMDB_CTL_LONGJMP_FN:
733 	    slmdb->longjmp_fn = va_arg(ap, SLMDB_LONGJMP_FN);
734 	    break;
735 	case SLMDB_CTL_NOTIFY_FN:
736 	    slmdb->notify_fn = va_arg(ap, SLMDB_NOTIFY_FN);
737 	    break;
738 	case SLMDB_CTL_ASSERT_FN:
739 	    slmdb->assert_fn = va_arg(ap, SLMDB_ASSERT_FN);
740 	    if ((rc = mdb_env_set_userctx(slmdb->env, (void *) slmdb)) != 0
741 	     || (rc = mdb_env_set_assert(slmdb->env, slmdb_assert_cb)) != 0)
742 		status = rc;
743 	    break;
744 	case SLMDB_CTL_CB_CONTEXT:
745 	    slmdb->cb_context = va_arg(ap, void *);
746 	    break;
747 	case SLMDB_CTL_API_RETRY_LIMIT:
748 	    slmdb->api_retry_limit = va_arg(ap, int);
749 	    break;
750 	case SLMDB_CTL_BULK_RETRY_LIMIT:
751 	    slmdb->bulk_retry_limit = va_arg(ap, int);
752 	    break;
753 	default:
754 	    status = errno = EINVAL;
755 	    break;
756 	}
757     }
758     va_end(ap);
759     return (status);
760 }
761 
762 /* slmdb_close - wrapper with LMDB error recovery */
763 
764 int     slmdb_close(SLMDB *slmdb)
765 {
766     int     status = 0;
767 
768     /*
769      * Finish an open bulk transaction. If slmdb_recover() returns after a
770      * bulk-transaction error, then it was unable to recover.
771      */
772     if (slmdb->txn != 0
773 	&& (status = mdb_txn_commit(slmdb->txn)) != 0)
774 	status = slmdb_recover(slmdb, status);
775 
776     /*
777      * Clean up after an unfinished sequence() operation.
778      */
779     if (slmdb->cursor != 0)
780 	slmdb_cursor_close(slmdb);
781 
782     mdb_env_close(slmdb->env);
783 
784     /*
785      * Clean up the saved key information.
786      */
787     if (HAVE_SLMDB_SAVED_KEY(slmdb))
788 	slmdb_saved_key_free(slmdb);
789 
790     SLMDB_API_RETURN(slmdb, status);
791 }
792 
793 /* slmdb_init - mandatory initialization */
794 
795 int     slmdb_init(SLMDB *slmdb, size_t curr_limit, int size_incr,
796 		           size_t hard_limit)
797 {
798 
799     /*
800      * This is a separate operation to keep the slmdb_open() API simple.
801      * Don't allocate resources here. Just store control information,
802      */
803     slmdb->curr_limit = curr_limit;
804     slmdb->size_incr = size_incr;
805     slmdb->hard_limit = hard_limit;
806 
807     return (MDB_SUCCESS);
808 }
809 
810 /* slmdb_open - open wrapped LMDB database */
811 
812 int     slmdb_open(SLMDB *slmdb, const char *path, int open_flags,
813 		           int lmdb_flags, int slmdb_flags)
814 {
815     struct stat st;
816     MDB_env *env;
817     MDB_txn *txn;
818     MDB_dbi dbi;
819     int     db_fd;
820     int     status;
821 
822     /*
823      * Create LMDB environment.
824      */
825     if ((status = mdb_env_create(&env)) != 0)
826 	return (status);
827 
828     /*
829      * Make sure that the memory map has room to store and commit an initial
830      * "drop" transaction as well as fixed database metadata. We have no way
831      * to recover from errors before the first application-level I/O request.
832      */
833 #define SLMDB_FUDGE      10240
834 
835     if (slmdb->curr_limit < SLMDB_FUDGE)
836 	slmdb->curr_limit = SLMDB_FUDGE;
837     if (stat(path, &st) == 0
838 	&& st.st_size > slmdb->curr_limit - SLMDB_FUDGE) {
839 	if (st.st_size > slmdb->hard_limit)
840 	    slmdb->hard_limit = st.st_size;
841 	if (st.st_size < slmdb->hard_limit - SLMDB_FUDGE)
842 	    slmdb->curr_limit = st.st_size + SLMDB_FUDGE;
843 	else
844 	    slmdb->curr_limit = slmdb->hard_limit;
845     }
846 
847     /*
848      * mdb_open() requires a txn, but since the default DB always exists in
849      * an LMDB environment, we usually don't need to do anything else with
850      * the txn. It is currently used for truncate and for bulk transactions.
851      */
852     if ((status = mdb_env_set_mapsize(env, slmdb->curr_limit)) != 0
853 	|| (status = mdb_env_open(env, path, lmdb_flags, 0644)) != 0
854 	|| (status = mdb_txn_begin(env, (MDB_txn *) 0,
855 				   lmdb_flags & MDB_RDONLY, &txn)) != 0
856 	|| (status = mdb_open(txn, (const char *) 0, 0, &dbi)) != 0
857 	|| (status = mdb_env_get_fd(env, &db_fd)) != 0) {
858 	mdb_env_close(env);
859 	return (status);
860     }
861 
862     /*
863      * Bundle up.
864      */
865     slmdb->open_flags = open_flags;
866     slmdb->lmdb_flags = lmdb_flags;
867     slmdb->slmdb_flags = slmdb_flags;
868     slmdb->env = env;
869     slmdb->dbi = dbi;
870     slmdb->db_fd = db_fd;
871     slmdb->cursor = 0;
872     slmdb_saved_key_init(slmdb);
873     slmdb->api_retry_count = 0;
874     slmdb->bulk_retry_count = 0;
875     slmdb->api_retry_limit = SLMDB_DEF_API_RETRY_LIMIT;
876     slmdb->bulk_retry_limit = SLMDB_DEF_BULK_RETRY_LIMIT;
877     slmdb->longjmp_fn = 0;
878     slmdb->notify_fn = 0;
879     slmdb->assert_fn = 0;
880     slmdb->cb_context = 0;
881     slmdb->txn = txn;
882 
883     if ((status = slmdb_prepare(slmdb)) != 0)
884 	mdb_env_close(env);
885 
886     return (status);
887 }
888 
889 #endif
890