xref: /csrg-svn/lib/libc/db/btree/btree.h (revision 51969)
146139Smao /*-
250989Sbostic  * Copyright (c) 1991 The Regents of the University of California.
346139Smao  * All rights reserved.
446139Smao  *
546139Smao  * This code is derived from software contributed to Berkeley by
646139Smao  * Mike Olson.
746139Smao  *
846139Smao  * %sccs.include.redist.c%
950989Sbostic  *
10*51969Sbostic  *	@(#)btree.h	5.6 (Berkeley) 12/16/91
1146139Smao  */
1246139Smao 
1350989Sbostic #include <mpool.h>
1450989Sbostic 
1550989Sbostic #define	DEFMINKEYPAGE	(2)		/* Minimum keys per page */
1650989Sbostic #define	MINCACHE	(5)		/* Minimum cached pages */
1750989Sbostic #define	MINPSIZE	(512)		/* Minimum page size */
1850989Sbostic 
1946139Smao /*
2051104Sbostic  * Page 0 of a btree file contains a copy of the meta-data.  This page is also
2151104Sbostic  * used as an out-of-band page, i.e. page pointers that point to nowhere point
2251104Sbostic  * to page 0.  Page 1 is the root of the btree.
2346139Smao  */
2450989Sbostic #define	P_INVALID	 0		/* Invalid tree page number. */
2551104Sbostic #define	P_META		 0		/* Tree metadata page number. */
2650989Sbostic #define	P_ROOT		 1		/* Tree root page number. */
2746139Smao 
2850989Sbostic /*
2951104Sbostic  * There are five page layouts in the btree: btree internal pages (BINTERNAL),
3051104Sbostic  * btree leaf pages (BLEAF), recno internal pages (RINTERNAL), recno leaf pages
3151104Sbostic  * (RLEAF) and overflow pages.  All five page types have a page header (PAGE).
3251104Sbostic  * This implementation requires that longs within structures are NOT padded.
3351104Sbostic  * (ANSI C permits random padding.)  If your compiler pads randomly you'll have
3451104Sbostic  * to do some work to get this package to run.
3550989Sbostic  */
3650989Sbostic typedef struct PAGE {
3750989Sbostic 	pgno_t	pgno;			/* this page's page number */
3850989Sbostic 	pgno_t	prevpg;			/* left sibling */
3950989Sbostic 	pgno_t	nextpg;			/* right sibling */
4046139Smao 
4150989Sbostic #define	P_BINTERNAL	0x01		/* btree internal page */
4250989Sbostic #define	P_BLEAF		0x02		/* leaf page */
4350989Sbostic #define	P_OVERFLOW	0x04		/* overflow page */
4450989Sbostic #define	P_RINTERNAL	0x08		/* recno internal page */
4550989Sbostic #define	P_RLEAF		0x10		/* leaf page */
4650989Sbostic #define P_TYPE		0x1f		/* type mask */
4746139Smao 
4850989Sbostic #define	P_PRESERVE	0x20		/* never delete this chain of pages */
4950989Sbostic 	u_long	flags;
5046139Smao 
5150989Sbostic 	index_t	lower;			/* lower bound of free space on page */
5250989Sbostic 	index_t	upper;			/* upper bound of free space on page */
5350989Sbostic 	index_t	linp[1];		/* long-aligned VARIABLE LENGTH DATA */
5450989Sbostic } PAGE;
5546139Smao 
5650989Sbostic /* First and next index. */
57*51969Sbostic #define	BTDATAOFF	(sizeof(pgno_t) + sizeof(pgno_t) + sizeof(pgno_t) + \
58*51969Sbostic 			    sizeof(u_long) + sizeof(index_t) + sizeof(index_t))
5950989Sbostic #define	NEXTINDEX(p)	(((p)->lower - BTDATAOFF) / sizeof(index_t))
6046139Smao 
6146139Smao /*
6250989Sbostic  * For pages other than overflow pages, there is an array of offsets into the
6350989Sbostic  * rest of the page immediately following the page header.  Each offset is to
6450989Sbostic  * an item which is unique to the type of page.  The h_lower offset is just
6550989Sbostic  * past the last filled-in index.  The h_upper offset is the first item on the
6650989Sbostic  * page.  Offsets are from the beginning of the page.
6750989Sbostic  *
6850989Sbostic  * If an item is too big to store on a single page, a flag is set and the item
6950989Sbostic  * is a { page, size } pair such that the page is the first page of an overflow
7050989Sbostic  * chain with size bytes of item.  Overflow pages are simply bytes without any
7150989Sbostic  * external structure.
7250989Sbostic  *
7350989Sbostic  * The size and page number fields in the items are long aligned so they can be
7450989Sbostic  * manipulated without copying.
7546139Smao  */
7651104Sbostic #define	LALIGN(n)	(((n) + sizeof(u_long) - 1) & ~(sizeof(u_long) - 1))
7750989Sbostic #define	NOVFLSIZE	(sizeof(pgno_t) + sizeof(size_t))
7846139Smao 
7946139Smao /*
8050989Sbostic  * For the btree internal pages, the item is a key.  BINTERNALs are {key, pgno}
8150989Sbostic  * pairs, such that the key compares less than or equal to all of the records
8250989Sbostic  * on that page.  For a tree without duplicate keys, an internal page with two
8350989Sbostic  * consecutive keys, a and b, will have all records greater than or equal to a
8450989Sbostic  * and less than b stored on the page associated with a.  Duplicate keys are
8550989Sbostic  * somewhat special and can cause duplicate internal and leaf page records and
8650989Sbostic  * some minor modifications of the above rule.
8746139Smao  */
8850989Sbostic typedef struct BINTERNAL {
8950989Sbostic 	size_t	ksize;			/* key size */
9050989Sbostic 	pgno_t	pgno;			/* page number stored on */
9150989Sbostic #define	P_BIGDATA	0x01		/* overflow data */
9250989Sbostic #define	P_BIGKEY	0x02		/* overflow key */
9350989Sbostic 	u_char	flags;
9450989Sbostic 	char	bytes[1];		/* data */
9550989Sbostic } BINTERNAL;
9646139Smao 
9750989Sbostic /* Get the page's BINTERNAL structure at index indx. */
9850989Sbostic #define	GETBINTERNAL(pg, indx) \
9950989Sbostic 	((BINTERNAL *)((char *)(pg) + (pg)->linp[indx]))
10046139Smao 
10150989Sbostic /* Get the number of bytes in the entry. */
10250989Sbostic #define NBINTERNAL(len) \
10350989Sbostic 	LALIGN(sizeof(size_t) + sizeof(pgno_t) + sizeof(u_char) + (len))
10446139Smao 
10550989Sbostic /* Copy a BINTERNAL entry to the page. */
10650989Sbostic #define	WR_BINTERNAL(p, size, pgno, flags) { \
10751764Sbostic 	*(size_t *)p = size; \
10851764Sbostic 	p += sizeof(size_t); \
10951764Sbostic 	*(pgno_t *)p = pgno; \
11051764Sbostic 	p += sizeof(pgno_t); \
11151764Sbostic 	*(u_char *)p = flags; \
11251764Sbostic 	p += sizeof(u_char); \
11350989Sbostic }
11446139Smao 
11546139Smao /*
11650989Sbostic  * For the recno internal pages, the item is a page number with the number of
11750989Sbostic  * keys found on that page and below.
11846139Smao  */
11950989Sbostic typedef struct RINTERNAL {
12050989Sbostic 	recno_t	nrecs;			/* number of records */
12150989Sbostic 	pgno_t	pgno;			/* page number stored below */
12250989Sbostic } RINTERNAL;
12346139Smao 
12450989Sbostic /* Get the page's RINTERNAL structure at index indx. */
12550989Sbostic #define	GETRINTERNAL(pg, indx) \
12650989Sbostic 	((RINTERNAL *)((char *)(pg) + (pg)->linp[indx]))
12746139Smao 
12850989Sbostic /* Get the number of bytes in the entry. */
12950989Sbostic #define NRINTERNAL \
13050989Sbostic 	LALIGN(sizeof(recno_t) + sizeof(pgno_t))
13146139Smao 
13250989Sbostic /* Copy a RINTERAL entry to the page. */
13350989Sbostic #define	WR_RINTERNAL(p, nrecs, pgno) { \
13451764Sbostic 	*(recno_t *)p = nrecs; \
13551764Sbostic 	p += sizeof(recno_t); \
13650989Sbostic 	*(pgno_t *)p = pgno; \
13750989Sbostic }
13846139Smao 
13950989Sbostic /* For the btree leaf pages, the item is a key and data pair. */
14050989Sbostic typedef struct BLEAF {
14150989Sbostic 	size_t	ksize;			/* size of key */
14250989Sbostic 	size_t	dsize;			/* size of data */
14350989Sbostic 	u_char	flags;			/* P_BIGDATA, P_BIGKEY */
14450989Sbostic 	char	bytes[1];		/* data */
14550989Sbostic } BLEAF;
14646139Smao 
14750989Sbostic /* Get the page's BLEAF structure at index indx. */
14850989Sbostic #define	GETBLEAF(pg, indx) \
14950989Sbostic 	((BLEAF *)((char *)(pg) + (pg)->linp[indx]))
15046139Smao 
15150989Sbostic /* Get the number of bytes in the entry. */
15251104Sbostic #define NBLEAF(p)	NBLEAFDBT((p)->ksize, (p)->dsize)
15346139Smao 
15450989Sbostic /* Get the number of bytes in the user's key/data pair. */
15550989Sbostic #define NBLEAFDBT(ksize, dsize) \
15650989Sbostic 	LALIGN(sizeof(size_t) + sizeof(size_t) + sizeof(u_char) + \
15750989Sbostic 	    (ksize) + (dsize))
15846139Smao 
15950989Sbostic /* Copy a BLEAF entry to the page. */
16050989Sbostic #define	WR_BLEAF(p, key, data, flags) { \
16151764Sbostic 	*(size_t *)p = key->size; \
16251764Sbostic 	p += sizeof(size_t); \
16351764Sbostic 	*(size_t *)p = data->size; \
16451764Sbostic 	p += sizeof(size_t); \
16551764Sbostic 	*(u_char *)p = flags; \
16651764Sbostic 	p += sizeof(u_char); \
16750989Sbostic 	bcopy(key->data, p, key->size); \
16850989Sbostic 	p += key->size; \
16950989Sbostic 	bcopy(data->data, p, data->size); \
17050989Sbostic }
17146139Smao 
17250989Sbostic /* For the recno leaf pages, the item is a data entry. */
17350989Sbostic typedef struct RLEAF {
17450989Sbostic 	size_t	dsize;			/* size of data */
17550989Sbostic 	u_char	flags;			/* P_BIGDATA */
17650989Sbostic 	char	bytes[1];
17750989Sbostic } RLEAF;
17846139Smao 
17950989Sbostic /* Get the page's RLEAF structure at index indx. */
18050989Sbostic #define	GETRLEAF(pg, indx) \
18150989Sbostic 	((RLEAF *)((char *)(pg) + (pg)->linp[indx]))
18246139Smao 
18350989Sbostic /* Get the number of bytes in the entry. */
18451104Sbostic #define NRLEAF(p)	NRLEAFDBT((p)->dsize)
18546139Smao 
18650989Sbostic /* Get the number of bytes from the user's data. */
18750989Sbostic #define	NRLEAFDBT(dsize) \
18850989Sbostic 	LALIGN(sizeof(size_t) + sizeof(u_char) + (dsize))
18946139Smao 
19050989Sbostic /* Copy a RLEAF entry to the page. */
19150989Sbostic #define	WR_RLEAF(p, data, flags) { \
19251764Sbostic 	*(size_t *)p = data->size; \
19351764Sbostic 	p += sizeof(size_t); \
19451764Sbostic 	*(u_char *)p = flags; \
19551764Sbostic 	p += sizeof(u_char); \
19650989Sbostic 	bcopy(data->data, p, data->size); \
19750989Sbostic }
19846139Smao 
19946139Smao /*
20050989Sbostic  * A record in the tree is either a pointer to a page and an index in the page
20150989Sbostic  * or a page number and an index.  These structures are used as a cursor, stack
20250989Sbostic  * entry and search returns as well as to pass records to other routines.
20350989Sbostic  *
20450989Sbostic  * One comment about searches.  Internal page searches must find the largest
20550989Sbostic  * record less than key in the tree so that descents work.  Leaf page searches
20650989Sbostic  * must find the smallest record greater than key so that the returned index
20750989Sbostic  * is the record's correct position for insertion.
20851104Sbostic  *
20951104Sbostic  * One comment about cursors.  The cursor key is never removed from the tree,
21051104Sbostic  * even if deleted.  This is because it is quite difficult to decide where the
21151104Sbostic  * cursor should be when other keys have been inserted/deleted in the tree;
21251104Sbostic  * duplicate keys make it impossible.  This scheme does require extra work
21351104Sbostic  * though, to make sure that we don't perform an operation on a deleted key.
21446139Smao  */
21550989Sbostic typedef struct EPGNO {
21650989Sbostic 	pgno_t	pgno;			/* the page number */
21750989Sbostic 	index_t	index;			/* the index on the page */
21850989Sbostic } EPGNO;
21946139Smao 
22050989Sbostic typedef struct EPG {
22150989Sbostic 	PAGE	*page;			/* the (pinned) page */
22250989Sbostic 	index_t	 index;			/* the index on the page */
22350989Sbostic } EPG;
22446139Smao 
22551104Sbostic /*
22651104Sbostic  * The metadata of the tree.  The m_nrecs field is used only by the RECNO code.
22751104Sbostic  * This is because the btree doesn't really need it and it requires that every
22851104Sbostic  * put or delete call modify the metadata.
22951104Sbostic  */
23051104Sbostic typedef struct BTMETA {
23151104Sbostic 	u_long	m_magic;		/* magic number */
23251104Sbostic 	u_long	m_version;		/* version */
23351104Sbostic 	u_long	m_psize;		/* page size */
23451104Sbostic 	u_long	m_free;			/* page number of first free page */
23551104Sbostic 	u_long	m_nrecs;		/* R: number of records */
23651104Sbostic #define	SAVEMETA	(BTF_NODUPS | BTF_RECNO)
23751104Sbostic 	u_long	m_flags;		/* bt_flags & SAVEMETA */
23851104Sbostic 	u_long	m_lorder;		/* byte order */
23951104Sbostic } BTMETA;
24051104Sbostic 
24150989Sbostic /* The in-memory btree/recno data structure. */
24250989Sbostic typedef struct BTREE {
24350989Sbostic 	MPOOL	*bt_mp;			/* memory pool cookie */
24446139Smao 
24550989Sbostic 	DB	*bt_dbp;		/* pointer to enclosing DB */
24646139Smao 
24751104Sbostic 	EPGNO	bt_bcursor;		/* B: btree cursor */
24851104Sbostic 	recno_t	bt_rcursor;		/* R: recno cursor (1-based) */
24946139Smao 
25050989Sbostic #define	BT_POP(t)	(t->bt_sp ? t->bt_stack + --t->bt_sp : NULL)
25150989Sbostic #define	BT_CLR(t)	(t->bt_sp = 0)
25250989Sbostic 	EPGNO	*bt_stack;		/* stack of parent pages */
25350989Sbostic 	u_int	bt_sp;			/* current stack pointer */
25450989Sbostic 	u_int	bt_maxstack;		/* largest stack */
25546139Smao 
25650989Sbostic 	char	*bt_kbuf;		/* key buffer */
25750989Sbostic 	size_t	bt_kbufsz;		/* key buffer size */
25850989Sbostic 	char	*bt_dbuf;		/* data buffer */
25950989Sbostic 	size_t	bt_dbufsz;		/* data buffer size */
26046139Smao 
26150989Sbostic 	int	bt_fd;			/* tree file descriptor */
26250989Sbostic 	FILE	*bt_rfp;		/* R: record FILE pointer */
26350989Sbostic 	int	bt_rfd;			/* R: record file descriptor */
26446139Smao 
26551104Sbostic 	pgno_t	bt_free;		/* XXX next free page */
26651104Sbostic 	index_t	bt_psize;		/* page size */
26751104Sbostic 	index_t	bt_ovflsize;		/* cut-off for key/data overflow */
26850989Sbostic 	int	bt_lorder;		/* byte order */
26950989Sbostic 					/* sorted order */
27050989Sbostic 	enum { NOT, BACK, FORWARD, } bt_order;
27150989Sbostic 	EPGNO	bt_last;		/* last insert */
27246139Smao 
27350989Sbostic 					/* B: key comparison function */
27450989Sbostic 	int	(*bt_cmp) __P((const DBT *, const DBT *));
27550989Sbostic 					/* B: prefix comparison function */
27650989Sbostic 	int	(*bt_pfx) __P((const DBT *, const DBT *));
27750989Sbostic 					/* R: recno input function */
27850989Sbostic 	int	(*bt_irec) __P((struct BTREE *, recno_t));
27951104Sbostic 	recno_t	bt_nrecs;		/* R: number of records */
28050989Sbostic 	caddr_t	bt_smap;		/* R: start of mapped space */
28150989Sbostic 	caddr_t bt_emap;		/* R: end of mapped space */
28250989Sbostic 	size_t	bt_reclen;		/* R: fixed record length */
28350989Sbostic 	u_char	bt_bval;		/* R: delimiting byte/pad character */
28446139Smao 
28551104Sbostic #define	BTF_DELCRSR	0x001		/* cursor has been deleted */
28650989Sbostic #define	BTF_FIXEDLEN	0x002		/* fixed length records */
28750989Sbostic #define	BTF_INMEM	0x004		/* in-memory tree */
28851104Sbostic #define	BTF_METADIRTY	0x008		/* B: need to write metadata */
28950989Sbostic #define	BTF_MODIFIED	0x010		/* tree modified */
29051104Sbostic #define	BTF_NODUPS	0x020		/* B: no duplicate keys permitted */
29150989Sbostic #define	BTF_RDONLY	0x040		/* read-only tree */
29250989Sbostic #define	BTF_RECNO	0x080		/* record oriented tree */
29350989Sbostic #define	BTF_SEQINIT	0x100		/* sequential scan initialized */
29450989Sbostic 	u_long		bt_flags;	/* btree state */
29550989Sbostic } BTREE;
29646139Smao 
29750989Sbostic #define	ISSET(t, f)	((t)->bt_flags & (f))
29850989Sbostic #define	NOTSET(t, f)	(!((t)->bt_flags & (f)))
29950989Sbostic #define	SET(t, f)	((t)->bt_flags |= (f))
30050989Sbostic #define	UNSET(t, f)	((t)->bt_flags &= ~(f))
30146139Smao 
30250989Sbostic #include "extern.h"
303