xref: /dflybsd-src/sys/vfs/hammer/hammer_btree.h (revision c4bf625e67439f34b29bfd33c4e2555ffea63ce9)
1 /*
2  * Copyright (c) 2007 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  * $DragonFly: src/sys/vfs/hammer/hammer_btree.h,v 1.12 2008/02/10 09:51:01 dillon Exp $
35  */
36 
37 /*
38  * HAMMER B-Tree index
39  *
40  * HAMMER implements a modified B+Tree.   B+Trees store records only
41  * at their leaves and HAMMER's modification is to adjust the internal
42  * elements so there is a boundary element on each side instead of sub-tree
43  * pointers.
44  *
45  * We just call our modified B+Tree a 'B-Tree' in HAMMER documentation to
46  * reduce confusion.
47  *
48  * A B-Tree internal node looks like this:
49  *
50  *	B N N N N N N B   <-- boundary and internal elements
51  *       S S S S S S S    <-- subtree pointers
52  *
53  * A B-Tree leaf node looks like this:
54  *
55  *	L L L L L L L L   <-- leaf elemenets
56  *			      (there is also a previous and next-leaf pointer)
57  *
58  * The recursion radix of an internal node is reduced by 1 relative to
59  * a normal B-Tree in order to accomodate the right-hand boundary.
60  *
61  * The big benefit to using a B-Tree with built-in bounds information is
62  * that it makes it possible to cache pointers into the middle of the tree
63  * and not have to start searches, insertions, OR deletions at the root node.
64  * The boundary elements allow searches to progress in a definitive direction
65  * from any point in the tree without revisting nodes.  It is also possible
66  * to terminate searches early and make minor adjustments to the boundaries
67  * (within the confines of the parent's boundaries) on the fly.  This greatly
68  * improves the efficiency of many operations, most especially record appends.
69  *
70  * HAMMER B-Trees are per-cluster.  The global multi-cluster B-Tree is
71  * constructed by allowing internal nodes to link to the roots of other
72  * clusters.  Fields in the cluster header then reference back to its
73  * parent and use the cluster generation number to detect stale linkages.
74  *
75  * The B-Tree balancing code can operate within a cluster or across the
76  * filesystem's ENTIRE B-Tree super-structure.  A cluster's B-Tree root
77  * can be a leaf node in the worse case.  A cluster is guarenteed to have
78  * sufficient free space to hold a single completely full leaf in the
79  * degenerate case.
80  *
81  * All of the structures below are on-disk structures.
82  */
83 
84 /*
85  * Common base for all B-Tree element types (40 bytes)
86  *
87  * obj_type is set to the object type the record represents if an inode,
88  * directory entry, or an inter-cluster reference.  A cluster range is
89  * special in that the B-Tree nodes represent a range within the B-Tree
90  * inclusive of rec_type field, so obj_type must be used to detect the
91  * cluster range entries.
92  *
93  * btype is only used by the elements making up an internal or leaf B-Tree
94  * node and applies to the node rather then to the key.  This means that
95  * btype must be assigned/reassigned after any update to the base_elm making
96  * up a B-Tree element.
97  */
98 struct hammer_base_elm {
99 	int64_t	obj_id;		/* 00 object record is associated with */
100 	int64_t key;		/* 08 indexing key (offset or namekey) */
101 
102 	hammer_tid_t create_tid; /* 10 transaction id for record creation */
103 	hammer_tid_t delete_tid; /* 18 transaction id for record update/del */
104 
105 	u_int16_t rec_type;	/* 20 _RECTYPE_ */
106 	u_int8_t obj_type;	/* 22 _OBJTYPE_ (restricted) */
107 	u_int8_t btype;		/* 23 B-Tree element type */
108 	int32_t reserved07;	/* 24 (future) */
109 				/* 28 */
110 };
111 
112 typedef struct hammer_base_elm *hammer_base_elm_t;
113 
114 /*
115  * Internal element (40 + 24 = 64 bytes).
116  *
117  * An internal element contains the left-hand boundary, right-hand boundary,
118  * and a recursion to another B-Tree node.
119  */
120 struct hammer_btree_internal_elm {
121 	struct hammer_base_elm base;
122 	hammer_off_t unused00;
123 	hammer_off_t subtree_offset;
124 	int32_t unused02;
125 	int32_t unused03;
126 };
127 
128 /*
129  * Leaf B-Tree element (40 + 24 = 64 bytes).
130  *
131  * A leaf element.
132  */
133 struct hammer_btree_leaf_elm {
134 	struct hammer_base_elm base;
135 	hammer_off_t rec_offset;
136 	hammer_off_t data_offset;
137 	int32_t data_len;
138 	u_int32_t data_crc;
139 };
140 
141 /*
142  * Rollup btree leaf element types - 64 byte structure
143  */
144 union hammer_btree_elm {
145 	struct hammer_base_elm base;
146 	struct hammer_btree_leaf_elm leaf;
147 	struct hammer_btree_internal_elm internal;
148 };
149 
150 typedef union hammer_btree_elm *hammer_btree_elm_t;
151 
152 /*
153  * B-Tree node (normal or meta)	(16x64 = 1K structure)
154  *
155  * Each node contains 15 elements.  The last element for an internal node
156  * is the right-boundary so internal nodes have one fewer logical elements
157  * then leaf nodes.
158  *
159  * 'count' always refers to the number of elements and is non-inclusive of
160  * the right-hand boundary for an internal node.
161  *
162  * NOTE: The node head for an internal does not contain the subtype
163  * (The B-Tree node type for the nodes referenced by its elements).
164  * Instead, each element specifies the subtype (elm->base.subtype).
165  * This allows us to maintain an unbalanced B-Tree and to easily identify
166  * special inter-cluster link elements.
167  *
168  * NOTE: FUTURE EXPANSION: The reserved fields in hammer_node_ondisk are
169  * reserved for left/right leaf linkage fields, flags, and other future
170  * features.
171  */
172 #define HAMMER_BTREE_LEAF_ELMS	15
173 #define HAMMER_BTREE_INT_ELMS	(HAMMER_BTREE_LEAF_ELMS - 1)
174 
175 /*
176  * It is safe to combine two adjacent nodes if the total number of elements
177  * is less then or equal to the *_FILL constant.
178  */
179 #define HAMMER_BTREE_LEAF_FILL	(HAMMER_BTREE_LEAF_ELMS - 3)
180 #define HAMMER_BTREE_INT_FILL	(HAMMER_BTREE_INT_ELMS - 3)
181 
182 #define HAMMER_BTREE_TYPE_INTERNAL	((u_int8_t)'I')
183 #define HAMMER_BTREE_TYPE_LEAF		((u_int8_t)'L')
184 #define HAMMER_BTREE_TYPE_RECORD	((u_int8_t)'R')
185 
186 struct hammer_node_ondisk {
187 	/*
188 	 * B-Tree node header (64 bytes)
189 	 */
190 	u_int32_t	signature;
191 	u_int32_t	crc;
192 	hammer_off_t	parent;		/* 0 if at root of cluster */
193 	int32_t		count;
194 	u_int8_t	type;
195 	u_int8_t	reserved01;
196 	u_int16_t	reserved02;
197 	hammer_off_t	reserved03;	/* future link_left */
198 	hammer_off_t	reserved04;	/* future link_right */
199 	hammer_off_t	reserved05;
200 	hammer_off_t	reserved06;
201 	hammer_off_t	reserved07;
202 
203 	/*
204 	 * Element array.  Internal nodes have one less logical element
205 	 * (meaning: the same number of physical elements) in order to
206 	 * accomodate the right-hand boundary.  The left-hand boundary
207 	 * is integrated into the first element.  Leaf nodes have no
208 	 * boundary elements.
209 	 */
210 	union hammer_btree_elm elms[HAMMER_BTREE_LEAF_ELMS];
211 };
212 
213 typedef struct hammer_node_ondisk *hammer_node_ondisk_t;
214 
215