xref: /onnv-gate/usr/src/cmd/sendmail/db/include/btree.h (revision 0:68f95e015346)
1*0Sstevel@tonic-gate /*-
2*0Sstevel@tonic-gate  * See the file LICENSE for redistribution information.
3*0Sstevel@tonic-gate  *
4*0Sstevel@tonic-gate  * Copyright (c) 1996, 1997, 1998
5*0Sstevel@tonic-gate  *	Sleepycat Software.  All rights reserved.
6*0Sstevel@tonic-gate  */
7*0Sstevel@tonic-gate /*
8*0Sstevel@tonic-gate  * Copyright (c) 1990, 1993, 1994, 1995, 1996
9*0Sstevel@tonic-gate  *	Keith Bostic.  All rights reserved.
10*0Sstevel@tonic-gate  */
11*0Sstevel@tonic-gate /*
12*0Sstevel@tonic-gate  * Copyright (c) 1990, 1993, 1994, 1995
13*0Sstevel@tonic-gate  *	The Regents of the University of California.  All rights reserved.
14*0Sstevel@tonic-gate  *
15*0Sstevel@tonic-gate  * This code is derived from software contributed to Berkeley by
16*0Sstevel@tonic-gate  * Mike Olson.
17*0Sstevel@tonic-gate  *
18*0Sstevel@tonic-gate  * Redistribution and use in source and binary forms, with or without
19*0Sstevel@tonic-gate  * modification, are permitted provided that the following conditions
20*0Sstevel@tonic-gate  * are met:
21*0Sstevel@tonic-gate  * 1. Redistributions of source code must retain the above copyright
22*0Sstevel@tonic-gate  *    notice, this list of conditions and the following disclaimer.
23*0Sstevel@tonic-gate  * 2. Redistributions in binary form must reproduce the above copyright
24*0Sstevel@tonic-gate  *    notice, this list of conditions and the following disclaimer in the
25*0Sstevel@tonic-gate  *    documentation and/or other materials provided with the distribution.
26*0Sstevel@tonic-gate  * 3. All advertising materials mentioning features or use of this software
27*0Sstevel@tonic-gate  *    must display the following acknowledgement:
28*0Sstevel@tonic-gate  *	This product includes software developed by the University of
29*0Sstevel@tonic-gate  *	California, Berkeley and its contributors.
30*0Sstevel@tonic-gate  * 4. Neither the name of the University nor the names of its contributors
31*0Sstevel@tonic-gate  *    may be used to endorse or promote products derived from this software
32*0Sstevel@tonic-gate  *    without specific prior written permission.
33*0Sstevel@tonic-gate  *
34*0Sstevel@tonic-gate  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
35*0Sstevel@tonic-gate  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
36*0Sstevel@tonic-gate  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
37*0Sstevel@tonic-gate  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
38*0Sstevel@tonic-gate  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
39*0Sstevel@tonic-gate  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
40*0Sstevel@tonic-gate  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
41*0Sstevel@tonic-gate  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
42*0Sstevel@tonic-gate  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
43*0Sstevel@tonic-gate  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
44*0Sstevel@tonic-gate  * SUCH DAMAGE.
45*0Sstevel@tonic-gate  *
46*0Sstevel@tonic-gate  *	@(#)btree.h	10.26 (Sleepycat) 12/16/98
47*0Sstevel@tonic-gate  */
48*0Sstevel@tonic-gate 
49*0Sstevel@tonic-gate /* Forward structure declarations. */
50*0Sstevel@tonic-gate struct __btree;		typedef struct __btree BTREE;
51*0Sstevel@tonic-gate struct __cursor;	typedef struct __cursor CURSOR;
52*0Sstevel@tonic-gate struct __epg;		typedef struct __epg EPG;
53*0Sstevel@tonic-gate struct __recno;		typedef struct __recno RECNO;
54*0Sstevel@tonic-gate 
55*0Sstevel@tonic-gate #define	DEFMINKEYPAGE	 (2)
56*0Sstevel@tonic-gate 
57*0Sstevel@tonic-gate #define	ISINTERNAL(p)	(TYPE(p) == P_IBTREE || TYPE(p) == P_IRECNO)
58*0Sstevel@tonic-gate #define	ISLEAF(p)	(TYPE(p) == P_LBTREE || TYPE(p) == P_LRECNO)
59*0Sstevel@tonic-gate 
60*0Sstevel@tonic-gate /*
61*0Sstevel@tonic-gate  * If doing transactions we have to hold the locks associated with a data item
62*0Sstevel@tonic-gate  * from a page for the entire transaction.  However, we don't have to hold the
63*0Sstevel@tonic-gate  * locks associated with walking the tree.  Distinguish between the two so that
64*0Sstevel@tonic-gate  * we don't tie up the internal pages of the tree longer than necessary.
65*0Sstevel@tonic-gate  */
66*0Sstevel@tonic-gate #define	__BT_LPUT(dbc, lock)						\
67*0Sstevel@tonic-gate 	(F_ISSET((dbc)->dbp, DB_AM_LOCKING) ?				\
68*0Sstevel@tonic-gate 	    lock_put((dbc)->dbp->dbenv->lk_info, lock) : 0)
69*0Sstevel@tonic-gate #define	__BT_TLPUT(dbc, lock)						\
70*0Sstevel@tonic-gate 	(F_ISSET((dbc)->dbp, DB_AM_LOCKING) && (dbc)->txn == NULL ?	\
71*0Sstevel@tonic-gate 	    lock_put((dbc)->dbp->dbenv->lk_info, lock) : 0)
72*0Sstevel@tonic-gate 
73*0Sstevel@tonic-gate /*
74*0Sstevel@tonic-gate  * Flags to __bam_search() and __bam_rsearch().
75*0Sstevel@tonic-gate  *
76*0Sstevel@tonic-gate  * Note, internal page searches must find the largest record less than key in
77*0Sstevel@tonic-gate  * the tree so that descents work.  Leaf page searches must find the smallest
78*0Sstevel@tonic-gate  * record greater than key so that the returned index is the record's correct
79*0Sstevel@tonic-gate  * position for insertion.
80*0Sstevel@tonic-gate  *
81*0Sstevel@tonic-gate  * The flags parameter to the search routines describes three aspects of the
82*0Sstevel@tonic-gate  * search: the type of locking required (including if we're locking a pair of
83*0Sstevel@tonic-gate  * pages), the item to return in the presence of duplicates and whether or not
84*0Sstevel@tonic-gate  * to return deleted entries.  To simplify both the mnemonic representation
85*0Sstevel@tonic-gate  * and the code that checks for various cases, we construct a set of bitmasks.
86*0Sstevel@tonic-gate  */
87*0Sstevel@tonic-gate #define	S_READ		0x00001		/* Read locks. */
88*0Sstevel@tonic-gate #define	S_WRITE		0x00002		/* Write locks. */
89*0Sstevel@tonic-gate 
90*0Sstevel@tonic-gate #define	S_APPEND	0x00040		/* Append to the tree. */
91*0Sstevel@tonic-gate #define	S_DELNO		0x00080		/* Don't return deleted items. */
92*0Sstevel@tonic-gate #define	S_DUPFIRST	0x00100		/* Return first duplicate. */
93*0Sstevel@tonic-gate #define	S_DUPLAST	0x00200		/* Return last duplicate. */
94*0Sstevel@tonic-gate #define	S_EXACT		0x00400		/* Exact items only. */
95*0Sstevel@tonic-gate #define	S_PARENT	0x00800		/* Lock page pair. */
96*0Sstevel@tonic-gate #define	S_STACK		0x01000		/* Need a complete stack. */
97*0Sstevel@tonic-gate #define	S_PAST_EOF	0x02000		/* If doing insert search (or keyfirst
98*0Sstevel@tonic-gate 					 * or keylast operations), or a split
99*0Sstevel@tonic-gate 					 * on behalf of an insert, it's okay to
100*0Sstevel@tonic-gate 					 * return an entry one past end-of-page.
101*0Sstevel@tonic-gate 					 */
102*0Sstevel@tonic-gate 
103*0Sstevel@tonic-gate #define	S_DELETE	(S_WRITE | S_DUPFIRST | S_DELNO | S_EXACT | S_STACK)
104*0Sstevel@tonic-gate #define	S_FIND		(S_READ | S_DUPFIRST | S_DELNO)
105*0Sstevel@tonic-gate #define	S_FIND_WR	(S_WRITE | S_DUPFIRST | S_DELNO)
106*0Sstevel@tonic-gate #define	S_INSERT	(S_WRITE | S_DUPLAST | S_PAST_EOF | S_STACK)
107*0Sstevel@tonic-gate #define	S_KEYFIRST	(S_WRITE | S_DUPFIRST | S_PAST_EOF | S_STACK)
108*0Sstevel@tonic-gate #define	S_KEYLAST	(S_WRITE | S_DUPLAST | S_PAST_EOF | S_STACK)
109*0Sstevel@tonic-gate #define	S_WRPAIR	(S_WRITE | S_DUPLAST | S_PAST_EOF | S_PARENT)
110*0Sstevel@tonic-gate 
111*0Sstevel@tonic-gate /*
112*0Sstevel@tonic-gate  * Flags to __bam_iitem().
113*0Sstevel@tonic-gate  */
114*0Sstevel@tonic-gate #define	BI_DELETED	0x01		/* Key/data pair only placeholder. */
115*0Sstevel@tonic-gate #define	BI_DOINCR	0x02		/* Increment the record count. */
116*0Sstevel@tonic-gate #define	BI_NEWKEY	0x04		/* New key. */
117*0Sstevel@tonic-gate 
118*0Sstevel@tonic-gate /*
119*0Sstevel@tonic-gate  * Various routines pass around page references.  A page reference can be a
120*0Sstevel@tonic-gate  * pointer to the page or a page number; for either, an indx can designate
121*0Sstevel@tonic-gate  * an item on the page.
122*0Sstevel@tonic-gate  */
123*0Sstevel@tonic-gate struct __epg {
124*0Sstevel@tonic-gate 	PAGE	 *page;			/* The page. */
125*0Sstevel@tonic-gate 	db_indx_t indx;			/* The index on the page. */
126*0Sstevel@tonic-gate 	DB_LOCK	  lock;			/* The page's lock. */
127*0Sstevel@tonic-gate };
128*0Sstevel@tonic-gate 
129*0Sstevel@tonic-gate /*
130*0Sstevel@tonic-gate  * We maintain a stack of the pages that we're locking in the tree.  Btree's
131*0Sstevel@tonic-gate  * (currently) only save two levels of the tree at a time, so the default
132*0Sstevel@tonic-gate  * stack is always large enough.  Recno trees have to lock the entire tree to
133*0Sstevel@tonic-gate  * do inserts/deletes, however.  Grow the stack as necessary.
134*0Sstevel@tonic-gate  */
135*0Sstevel@tonic-gate #define	BT_STK_CLR(c)							\
136*0Sstevel@tonic-gate 	((c)->csp = (c)->sp)
137*0Sstevel@tonic-gate 
138*0Sstevel@tonic-gate #define	BT_STK_ENTER(c, pagep, page_indx, lock, ret) do {		\
139*0Sstevel@tonic-gate 	if ((ret =							\
140*0Sstevel@tonic-gate 	    (c)->csp == (c)->esp ? __bam_stkgrow(c) : 0) == 0) {	\
141*0Sstevel@tonic-gate 		(c)->csp->page = pagep;					\
142*0Sstevel@tonic-gate 		(c)->csp->indx = page_indx;				\
143*0Sstevel@tonic-gate 		(c)->csp->lock = lock;					\
144*0Sstevel@tonic-gate 	}								\
145*0Sstevel@tonic-gate } while (0)
146*0Sstevel@tonic-gate 
147*0Sstevel@tonic-gate #define	BT_STK_PUSH(c, pagep, page_indx, lock, ret) do {		\
148*0Sstevel@tonic-gate 	BT_STK_ENTER(c, pagep, page_indx, lock, ret);			\
149*0Sstevel@tonic-gate 	++(c)->csp;							\
150*0Sstevel@tonic-gate } while (0)
151*0Sstevel@tonic-gate 
152*0Sstevel@tonic-gate #define	BT_STK_POP(c)							\
153*0Sstevel@tonic-gate 	((c)->csp == (c)->stack ? NULL : --(c)->csp)
154*0Sstevel@tonic-gate 
155*0Sstevel@tonic-gate /*
156*0Sstevel@tonic-gate  * Arguments passed to __bam_ca_replace().
157*0Sstevel@tonic-gate  */
158*0Sstevel@tonic-gate typedef enum {
159*0Sstevel@tonic-gate 	REPLACE_SETUP,
160*0Sstevel@tonic-gate 	REPLACE_SUCCESS,
161*0Sstevel@tonic-gate 	REPLACE_FAILED
162*0Sstevel@tonic-gate } ca_replace_arg;
163*0Sstevel@tonic-gate 
164*0Sstevel@tonic-gate /* Arguments passed to __ram_ca(). */
165*0Sstevel@tonic-gate typedef enum {
166*0Sstevel@tonic-gate 	CA_DELETE,
167*0Sstevel@tonic-gate 	CA_IAFTER,
168*0Sstevel@tonic-gate 	CA_IBEFORE
169*0Sstevel@tonic-gate } ca_recno_arg;
170*0Sstevel@tonic-gate 
171*0Sstevel@tonic-gate #define	RECNO_OOB	0		/* Illegal record number. */
172*0Sstevel@tonic-gate 
173*0Sstevel@tonic-gate /* Btree/Recno cursor. */
174*0Sstevel@tonic-gate struct __cursor {
175*0Sstevel@tonic-gate 	DBC		*dbc;		/* Enclosing DBC. */
176*0Sstevel@tonic-gate 
177*0Sstevel@tonic-gate 	/* Per-thread information: shared by btree/recno. */
178*0Sstevel@tonic-gate 	EPG		*sp;		/* Stack pointer. */
179*0Sstevel@tonic-gate 	EPG	 	*csp;		/* Current stack entry. */
180*0Sstevel@tonic-gate 	EPG		*esp;		/* End stack pointer. */
181*0Sstevel@tonic-gate 	EPG		 stack[5];
182*0Sstevel@tonic-gate 
183*0Sstevel@tonic-gate 	/* Per-thread information: btree private. */
184*0Sstevel@tonic-gate 	PAGE		*page;		/* Cursor page. */
185*0Sstevel@tonic-gate 
186*0Sstevel@tonic-gate 	db_pgno_t	 pgno;		/* Page. */
187*0Sstevel@tonic-gate 	db_indx_t	 indx;		/* Page item ref'd by the cursor. */
188*0Sstevel@tonic-gate 
189*0Sstevel@tonic-gate 	db_pgno_t	 dpgno;		/* Duplicate page. */
190*0Sstevel@tonic-gate 	db_indx_t	 dindx;		/* Page item ref'd by the cursor. */
191*0Sstevel@tonic-gate 
192*0Sstevel@tonic-gate 	DB_LOCK		 lock;		/* Cursor read lock. */
193*0Sstevel@tonic-gate 	db_lockmode_t	 mode;		/* Lock mode. */
194*0Sstevel@tonic-gate 
195*0Sstevel@tonic-gate 	/* Per-thread information: recno private. */
196*0Sstevel@tonic-gate 	db_recno_t	 recno;		/* Current record number. */
197*0Sstevel@tonic-gate 
198*0Sstevel@tonic-gate 	/*
199*0Sstevel@tonic-gate 	 * Btree:
200*0Sstevel@tonic-gate 	 * We set a flag in the cursor structure if the underlying object has
201*0Sstevel@tonic-gate 	 * been deleted.  It's not strictly necessary, we could get the same
202*0Sstevel@tonic-gate 	 * information by looking at the page itself.
203*0Sstevel@tonic-gate 	 *
204*0Sstevel@tonic-gate 	 * Recno:
205*0Sstevel@tonic-gate 	 * When renumbering recno databases during deletes, cursors referencing
206*0Sstevel@tonic-gate 	 * "deleted" records end up positioned between two records, and so must
207*0Sstevel@tonic-gate 	 * be specially adjusted on the next operation.
208*0Sstevel@tonic-gate 	 */
209*0Sstevel@tonic-gate #define	C_DELETED	0x0001		/* Record was deleted. */
210*0Sstevel@tonic-gate 	u_int32_t	 flags;
211*0Sstevel@tonic-gate };
212*0Sstevel@tonic-gate 
213*0Sstevel@tonic-gate /*
214*0Sstevel@tonic-gate  * The in-memory recno data structure.
215*0Sstevel@tonic-gate  *
216*0Sstevel@tonic-gate  * !!!
217*0Sstevel@tonic-gate  * These fields are ignored as far as multi-threading is concerned.  There
218*0Sstevel@tonic-gate  * are no transaction semantics associated with backing files, nor is there
219*0Sstevel@tonic-gate  * any thread protection.
220*0Sstevel@tonic-gate  */
221*0Sstevel@tonic-gate struct __recno {
222*0Sstevel@tonic-gate 	int		 re_delim;	/* Variable-length delimiting byte. */
223*0Sstevel@tonic-gate 	int		 re_pad;	/* Fixed-length padding byte. */
224*0Sstevel@tonic-gate 	u_int32_t	 re_len;	/* Length for fixed-length records. */
225*0Sstevel@tonic-gate 
226*0Sstevel@tonic-gate 	char		*re_source;	/* Source file name. */
227*0Sstevel@tonic-gate 	int		 re_fd;		/* Source file descriptor */
228*0Sstevel@tonic-gate 	db_recno_t	 re_last;	/* Last record number read. */
229*0Sstevel@tonic-gate 	void		*re_cmap;	/* Current point in mapped space. */
230*0Sstevel@tonic-gate 	void		*re_smap;	/* Start of mapped space. */
231*0Sstevel@tonic-gate 	void		*re_emap;	/* End of mapped space. */
232*0Sstevel@tonic-gate 	size_t		 re_msize;	/* Size of mapped region. */
233*0Sstevel@tonic-gate 					/* Recno input function. */
234*0Sstevel@tonic-gate 	int (*re_irec) __P((DBC *, db_recno_t));
235*0Sstevel@tonic-gate 
236*0Sstevel@tonic-gate #define	RECNO_EOF	0x0001		/* EOF on backing source file. */
237*0Sstevel@tonic-gate #define	RECNO_MODIFIED	0x0002		/* Tree was modified. */
238*0Sstevel@tonic-gate 	u_int32_t	 flags;
239*0Sstevel@tonic-gate };
240*0Sstevel@tonic-gate 
241*0Sstevel@tonic-gate /*
242*0Sstevel@tonic-gate  * The in-memory, per-tree btree data structure.
243*0Sstevel@tonic-gate  */
244*0Sstevel@tonic-gate struct __btree {
245*0Sstevel@tonic-gate 	db_pgno_t	 bt_lpgno;	/* Last insert location. */
246*0Sstevel@tonic-gate 
247*0Sstevel@tonic-gate 	db_indx_t 	 bt_maxkey;	/* Maximum keys per page. */
248*0Sstevel@tonic-gate 	db_indx_t 	 bt_minkey;	/* Minimum keys per page. */
249*0Sstevel@tonic-gate 
250*0Sstevel@tonic-gate 	int (*bt_compare)		/* Comparison function. */
251*0Sstevel@tonic-gate 	    __P((const DBT *, const DBT *));
252*0Sstevel@tonic-gate 	size_t(*bt_prefix)		/* Prefix function. */
253*0Sstevel@tonic-gate 	    __P((const DBT *, const DBT *));
254*0Sstevel@tonic-gate 
255*0Sstevel@tonic-gate 	db_indx_t	 bt_ovflsize;	/* Maximum key/data on-page size. */
256*0Sstevel@tonic-gate 
257*0Sstevel@tonic-gate 	RECNO		*recno;		/* Private recno structure. */
258*0Sstevel@tonic-gate };
259*0Sstevel@tonic-gate 
260*0Sstevel@tonic-gate #include "btree_auto.h"
261*0Sstevel@tonic-gate #include "btree_ext.h"
262*0Sstevel@tonic-gate #include "db_am.h"
263*0Sstevel@tonic-gate #include "common_ext.h"
264