xref: /openbsd-src/usr.sbin/ldapd/btree.3 (revision 82afc04172cf834b8455ec36d2b8aecd853c8cee)
1.\" $OpenBSD: btree.3,v 1.5 2018/11/04 08:04:08 jmc Exp $
2.\"
3.\" Copyright (c) 2009, 2010 Martin Hedenfalk <martinh@openbsd.org>
4.\"
5.\" Permission to use, copy, modify, and distribute this software for any
6.\" purpose with or without fee is hereby granted, provided that the above
7.\" copyright notice and this permission notice appear in all copies.
8.\"
9.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16.\"
17.Dd $Mdocdate: November 4 2018 $
18.Dt BTREE 3
19.Os
20.Sh NAME
21.Nm btree_open ,
22.Nm btree_open_fd ,
23.Nm btree_close ,
24.Nm btree_txn_begin ,
25.Nm btree_txn_get ,
26.Nm btree_txn_put ,
27.Nm btree_txn_del ,
28.Nm btree_txn_commit ,
29.Nm btree_txn_abort ,
30.Nm btree_get ,
31.Nm btree_put ,
32.Nm btree_del ,
33.Nm btree_txn_cursor_open ,
34.Nm btree_cursor_open ,
35.Nm btree_cursor_close ,
36.Nm btree_cursor_get ,
37.Nm btree_stat ,
38.Nm btree_compact ,
39.Nm btree_revert ,
40.Nm btree_sync ,
41.Nm btree_set_cache_size ,
42.Nm btree_get_flags ,
43.Nm btree_get_path ,
44.Nm btree_cmp ,
45.Nm btval_reset
46.Nd append-only prefix B+tree database library
47.Sh SYNOPSIS
48.In <btree.h>
49.Ft "struct btree *"
50.Fn "btree_open_fd" "int fd" "unsigned int flags"
51.Ft "struct btree *"
52.Fn "btree_open" "const char *path" "unsigned int flags" "mode_t mode"
53.Ft "void"
54.Fn "btree_close" "struct btree *bt"
55.Ft "struct btree_txn *"
56.Fn "btree_txn_begin" "struct btree *bt" "int rdonly"
57.Ft "int"
58.Fn "btree_txn_get" "struct btree *bt" "struct btree_txn *" "struct btval *key" "struct btval *data"
59.Ft "int"
60.Fn "btree_txn_put" "struct btree *bt" "struct btree_txn *" "struct btval *key" "struct btval *data" "unsigned int flags"
61.Ft "int"
62.Fn "btree_txn_del" "struct btree *bt" "struct btree_txn *" "struct btval *key" "struct btval *data"
63.Ft "int"
64.Fn "btree_txn_commit" "struct btree_txn *txn"
65.Ft "void"
66.Fn "btree_txn_abort" "struct btree_txn *txn"
67.Ft "int"
68.Fn "btree_get" "struct btree *bt" "struct btval *key" "struct btval *data"
69.Ft "int"
70.Fn "btree_put" "struct btree *bt" "struct btval *key" "struct btval *data" "unsigned flags"
71.Ft "int"
72.Fn "btree_del" "struct btree *bt" "struct btval *key" "struct btval *data"
73.Ft "struct cursor *"
74.Fn "btree_txn_cursor_open" "struct btree *bt" "struct btree_txn *txn"
75.Ft "struct cursor *"
76.Fn "btree_cursor_open" "struct btree *bt"
77.Ft "void"
78.Fn "btree_cursor_close" "struct cursor *cursor"
79.Ft "int"
80.Fn "btree_cursor_get" "struct cursor *cursor" "struct btval *key" "struct btval *data" "enum cursor_op op"
81.Ft "struct btree_stat *"
82.Fn "btree_stat" "struct btree *bt"
83.Ft "int"
84.Fn "btree_compact" "struct btree *bt"
85.Ft "int"
86.Fn "btree_revert" "struct btree *bt"
87.Ft "int"
88.Fn "btree_sync" "struct btree *bt"
89.Ft "void"
90.Fn "btree_set_cache_size" "struct btree *bt" "unsigned int cache_size"
91.Ft "unsigned int"
92.Fn "btree_get_flags" "struct btree *bt"
93.Ft "const char *"
94.Fn "btree_get_path" "struct btree *bt"
95.Ft "int"
96.Fn "btree_cmp" "struct btree *bt" "const struct btval *a" "const struct btval *b"
97.Ft "void"
98.Fn "btval_reset" "struct btval *btv"
99.Sh DESCRIPTION
100The database is implemented as a modified prefix B+tree.
101Instead of modifying the database file inplace,
102each update appends any modified pages at the end of the file.
103The last block of the file contains metadata and a pointer to the root page.
104The first block of the file contains a header that specifies the page size.
105.Pp
106Append-only writing gives the following properties:
107.Bl -enum
108.It
109No locks.
110Since all writes are appended to the end of the file, multiple
111readers can continue reading from the tree as it was when they
112started.
113This snapshot view might contain outdated versions of entries.
114.It
115Resistance to corruption.
116The file content is never modified.
117When opening a database file, the last good meta-data page is searched
118by scanning backwards.
119If there is trailing garbage in the file, it will be skipped.
120.It
121Hot backups.
122Backups can be made on a running server simply by copying the files.
123There is no risk for inconsistencies.
124.El
125.Pp
126The drawback is that it wastes space.
127A 4-level B+tree database will write at least 5 new pages on each update,
128including the meta-data page.
129With 4 KiB pagesize, the file would grow by 20 KiB on each update.
130.Pp
131To reclaim the wasted space, the database should be compacted.
132The compaction process opens a write transaction and traverses the tree.
133Each active page is then written to a new file.
134When complete, a special
135.Dq tombstone
136page is written to the old file to
137signal that it is stale and all processes using the file should re-open it.
138Modifications are denied on a stale file and fail with errno set to ESTALE.
139.Sh CURSORS
140A new cursor may be opened with a call to
141.Fn btree_txn_cursor_open
142or
143.Fn btree_cursor_open .
144The latter is implemented as a macro to the former with a NULL
145.Ar txn
146argument.
147Multiple cursors may be open simultaneously.
148The cursor must be closed with
149.Fn btree_cursor_close
150after use.
151.Pp
152The cursor can be placed at a specific key by setting
153.Ar op
154to BT_CURSOR and filling in the
155.Ar key
156argument.
157The cursor will be placed at the smallest key greater or equal to
158the specified key.
159If
160.Ar op
161is instead set to BT_CURSOR_EXACT, the cursor will be placed at the
162specified key, or fail if it doesn't exist.
163.Pp
164The database may be traversed from the first key to the last by calling
165.Fn btree_cursor_get
166with
167.Ar op
168initially set to BT_FIRST and then set to BT_NEXT.
169If the cursor is not yet initialized, ie
170.Fn btree_cursor_get
171has not yet been called with
172.Ar op
173set to BT_FIRST or BT_CURSOR, then BT_NEXT behaves as BT_FIRST.
174.Sh TRANSACTIONS
175There are two types of transactions: write and read-only transactions.
176Only one write transaction is allowed at a time.
177A read-only transaction allows the grouping of several read operations
178to see a consistent state of the database.
179.Pp
180A transaction is started with
181.Fn btree_txn_begin .
182If the
183.Ar rdonly
184parameter is 0, a write transaction is started and an exclusive lock
185is taken on the file using
186.Xr flock 2 .
187No lock is taken for read-only transactions.
188.Pp
189The transaction is ended either with
190.Fn btree_txn_commit
191or
192.Fn btree_txn_abort .
193The
194.Ft btree_txn
195pointer must not be accessed afterwards.
196Any cursor opened inside the transaction must be closed before the
197transaction is ended.
198.Sh RETURN VALUES
199The
200.Fn btree_txn_get ,
201.Fn btree_txn_put ,
202.Fn btree_txn_del ,
203.Fn btree_txn_commit ,
204.Fn btree_get ,
205.Fn btree_put ,
206.Fn btree_del ,
207.Fn btree_cursor_get ,
208.Fn btree_compact
209and
210.Fn btree_revert
211functions all return 0 on success.
212On failure -1 is returned and errno is set.
213.Pp
214All functions returning pointers return NULL on error.
215.Pp
216.Fn btree_txn_put
217and
218.Fn btree_put
219sets errno to EEXIST if the key already exists and BT_NOOVERWRITE was not
220passed in the
221.Ar flags
222argument.
223.Pp
224.Fn btree_txn_get ,
225.Fn btree_txn_del ,
226.Fn btree_get ,
227.Fn btree_del
228and
229.Fn btree_cursor_get
230sets errno to ENOENT if the specified key was not found.
231.Pp
232The
233.Fn btree_txn_begin ,
234.Fn btree_cursor ,
235.Fn btree_cursor_get ,
236.Fn btree_get ,
237.Fn btree_put ,
238.Fn btree_del
239functions can fail and set errno to ESTALE if the database file has been
240compacted by another process.
241The file should be re-opened and the operation retried.
242.Sh AUTHORS
243The
244.Nm btree
245library was written by
246.An Martin Hedenfalk Aq Mt martin@bzero.se .
247.Sh BUGS
248Byte order is assumed never to change.
249