xref: /freebsd-src/sys/contrib/openzfs/module/avl/avl.c (revision eda14cbc264d6969b02f2b1994cef11148e914f1)
1*eda14cbcSMatt Macy /*
2*eda14cbcSMatt Macy  * CDDL HEADER START
3*eda14cbcSMatt Macy  *
4*eda14cbcSMatt Macy  * The contents of this file are subject to the terms of the
5*eda14cbcSMatt Macy  * Common Development and Distribution License (the "License").
6*eda14cbcSMatt Macy  * You may not use this file except in compliance with the License.
7*eda14cbcSMatt Macy  *
8*eda14cbcSMatt Macy  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*eda14cbcSMatt Macy  * or http://www.opensolaris.org/os/licensing.
10*eda14cbcSMatt Macy  * See the License for the specific language governing permissions
11*eda14cbcSMatt Macy  * and limitations under the License.
12*eda14cbcSMatt Macy  *
13*eda14cbcSMatt Macy  * When distributing Covered Code, include this CDDL HEADER in each
14*eda14cbcSMatt Macy  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*eda14cbcSMatt Macy  * If applicable, add the following below this CDDL HEADER, with the
16*eda14cbcSMatt Macy  * fields enclosed by brackets "[]" replaced with your own identifying
17*eda14cbcSMatt Macy  * information: Portions Copyright [yyyy] [name of copyright owner]
18*eda14cbcSMatt Macy  *
19*eda14cbcSMatt Macy  * CDDL HEADER END
20*eda14cbcSMatt Macy  */
21*eda14cbcSMatt Macy /*
22*eda14cbcSMatt Macy  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23*eda14cbcSMatt Macy  * Use is subject to license terms.
24*eda14cbcSMatt Macy  */
25*eda14cbcSMatt Macy 
26*eda14cbcSMatt Macy /*
27*eda14cbcSMatt Macy  * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
28*eda14cbcSMatt Macy  * Copyright (c) 2015 by Delphix. All rights reserved.
29*eda14cbcSMatt Macy  */
30*eda14cbcSMatt Macy 
31*eda14cbcSMatt Macy /*
32*eda14cbcSMatt Macy  * AVL - generic AVL tree implementation for kernel use
33*eda14cbcSMatt Macy  *
34*eda14cbcSMatt Macy  * A complete description of AVL trees can be found in many CS textbooks.
35*eda14cbcSMatt Macy  *
36*eda14cbcSMatt Macy  * Here is a very brief overview. An AVL tree is a binary search tree that is
37*eda14cbcSMatt Macy  * almost perfectly balanced. By "almost" perfectly balanced, we mean that at
38*eda14cbcSMatt Macy  * any given node, the left and right subtrees are allowed to differ in height
39*eda14cbcSMatt Macy  * by at most 1 level.
40*eda14cbcSMatt Macy  *
41*eda14cbcSMatt Macy  * This relaxation from a perfectly balanced binary tree allows doing
42*eda14cbcSMatt Macy  * insertion and deletion relatively efficiently. Searching the tree is
43*eda14cbcSMatt Macy  * still a fast operation, roughly O(log(N)).
44*eda14cbcSMatt Macy  *
45*eda14cbcSMatt Macy  * The key to insertion and deletion is a set of tree manipulations called
46*eda14cbcSMatt Macy  * rotations, which bring unbalanced subtrees back into the semi-balanced state.
47*eda14cbcSMatt Macy  *
48*eda14cbcSMatt Macy  * This implementation of AVL trees has the following peculiarities:
49*eda14cbcSMatt Macy  *
50*eda14cbcSMatt Macy  *	- The AVL specific data structures are physically embedded as fields
51*eda14cbcSMatt Macy  *	  in the "using" data structures.  To maintain generality the code
52*eda14cbcSMatt Macy  *	  must constantly translate between "avl_node_t *" and containing
53*eda14cbcSMatt Macy  *	  data structure "void *"s by adding/subtracting the avl_offset.
54*eda14cbcSMatt Macy  *
55*eda14cbcSMatt Macy  *	- Since the AVL data is always embedded in other structures, there is
56*eda14cbcSMatt Macy  *	  no locking or memory allocation in the AVL routines. This must be
57*eda14cbcSMatt Macy  *	  provided for by the enclosing data structure's semantics. Typically,
58*eda14cbcSMatt Macy  *	  avl_insert()/_add()/_remove()/avl_insert_here() require some kind of
59*eda14cbcSMatt Macy  *	  exclusive write lock. Other operations require a read lock.
60*eda14cbcSMatt Macy  *
61*eda14cbcSMatt Macy  *      - The implementation uses iteration instead of explicit recursion,
62*eda14cbcSMatt Macy  *	  since it is intended to run on limited size kernel stacks. Since
63*eda14cbcSMatt Macy  *	  there is no recursion stack present to move "up" in the tree,
64*eda14cbcSMatt Macy  *	  there is an explicit "parent" link in the avl_node_t.
65*eda14cbcSMatt Macy  *
66*eda14cbcSMatt Macy  *      - The left/right children pointers of a node are in an array.
67*eda14cbcSMatt Macy  *	  In the code, variables (instead of constants) are used to represent
68*eda14cbcSMatt Macy  *	  left and right indices.  The implementation is written as if it only
69*eda14cbcSMatt Macy  *	  dealt with left handed manipulations.  By changing the value assigned
70*eda14cbcSMatt Macy  *	  to "left", the code also works for right handed trees.  The
71*eda14cbcSMatt Macy  *	  following variables/terms are frequently used:
72*eda14cbcSMatt Macy  *
73*eda14cbcSMatt Macy  *		int left;	// 0 when dealing with left children,
74*eda14cbcSMatt Macy  *				// 1 for dealing with right children
75*eda14cbcSMatt Macy  *
76*eda14cbcSMatt Macy  *		int left_heavy;	// -1 when left subtree is taller at some node,
77*eda14cbcSMatt Macy  *				// +1 when right subtree is taller
78*eda14cbcSMatt Macy  *
79*eda14cbcSMatt Macy  *		int right;	// will be the opposite of left (0 or 1)
80*eda14cbcSMatt Macy  *		int right_heavy;// will be the opposite of left_heavy (-1 or 1)
81*eda14cbcSMatt Macy  *
82*eda14cbcSMatt Macy  *		int direction;  // 0 for "<" (ie. left child); 1 for ">" (right)
83*eda14cbcSMatt Macy  *
84*eda14cbcSMatt Macy  *	  Though it is a little more confusing to read the code, the approach
85*eda14cbcSMatt Macy  *	  allows using half as much code (and hence cache footprint) for tree
86*eda14cbcSMatt Macy  *	  manipulations and eliminates many conditional branches.
87*eda14cbcSMatt Macy  *
88*eda14cbcSMatt Macy  *	- The avl_index_t is an opaque "cookie" used to find nodes at or
89*eda14cbcSMatt Macy  *	  adjacent to where a new value would be inserted in the tree. The value
90*eda14cbcSMatt Macy  *	  is a modified "avl_node_t *".  The bottom bit (normally 0 for a
91*eda14cbcSMatt Macy  *	  pointer) is set to indicate if that the new node has a value greater
92*eda14cbcSMatt Macy  *	  than the value of the indicated "avl_node_t *".
93*eda14cbcSMatt Macy  *
94*eda14cbcSMatt Macy  * Note - in addition to userland (e.g. libavl and libutil) and the kernel
95*eda14cbcSMatt Macy  * (e.g. genunix), avl.c is compiled into ld.so and kmdb's genunix module,
96*eda14cbcSMatt Macy  * which each have their own compilation environments and subsequent
97*eda14cbcSMatt Macy  * requirements. Each of these environments must be considered when adding
98*eda14cbcSMatt Macy  * dependencies from avl.c.
99*eda14cbcSMatt Macy  */
100*eda14cbcSMatt Macy 
101*eda14cbcSMatt Macy #include <sys/types.h>
102*eda14cbcSMatt Macy #include <sys/param.h>
103*eda14cbcSMatt Macy #include <sys/debug.h>
104*eda14cbcSMatt Macy #include <sys/avl.h>
105*eda14cbcSMatt Macy #include <sys/cmn_err.h>
106*eda14cbcSMatt Macy #include <sys/mod.h>
107*eda14cbcSMatt Macy 
108*eda14cbcSMatt Macy /*
109*eda14cbcSMatt Macy  * Small arrays to translate between balance (or diff) values and child indices.
110*eda14cbcSMatt Macy  *
111*eda14cbcSMatt Macy  * Code that deals with binary tree data structures will randomly use
112*eda14cbcSMatt Macy  * left and right children when examining a tree.  C "if()" statements
113*eda14cbcSMatt Macy  * which evaluate randomly suffer from very poor hardware branch prediction.
114*eda14cbcSMatt Macy  * In this code we avoid some of the branch mispredictions by using the
115*eda14cbcSMatt Macy  * following translation arrays. They replace random branches with an
116*eda14cbcSMatt Macy  * additional memory reference. Since the translation arrays are both very
117*eda14cbcSMatt Macy  * small the data should remain efficiently in cache.
118*eda14cbcSMatt Macy  */
119*eda14cbcSMatt Macy static const int  avl_child2balance[2]	= {-1, 1};
120*eda14cbcSMatt Macy static const int  avl_balance2child[]	= {0, 0, 1};
121*eda14cbcSMatt Macy 
122*eda14cbcSMatt Macy 
123*eda14cbcSMatt Macy /*
124*eda14cbcSMatt Macy  * Walk from one node to the previous valued node (ie. an infix walk
125*eda14cbcSMatt Macy  * towards the left). At any given node we do one of 2 things:
126*eda14cbcSMatt Macy  *
127*eda14cbcSMatt Macy  * - If there is a left child, go to it, then to it's rightmost descendant.
128*eda14cbcSMatt Macy  *
129*eda14cbcSMatt Macy  * - otherwise we return through parent nodes until we've come from a right
130*eda14cbcSMatt Macy  *   child.
131*eda14cbcSMatt Macy  *
132*eda14cbcSMatt Macy  * Return Value:
133*eda14cbcSMatt Macy  * NULL - if at the end of the nodes
134*eda14cbcSMatt Macy  * otherwise next node
135*eda14cbcSMatt Macy  */
136*eda14cbcSMatt Macy void *
137*eda14cbcSMatt Macy avl_walk(avl_tree_t *tree, void	*oldnode, int left)
138*eda14cbcSMatt Macy {
139*eda14cbcSMatt Macy 	size_t off = tree->avl_offset;
140*eda14cbcSMatt Macy 	avl_node_t *node = AVL_DATA2NODE(oldnode, off);
141*eda14cbcSMatt Macy 	int right = 1 - left;
142*eda14cbcSMatt Macy 	int was_child;
143*eda14cbcSMatt Macy 
144*eda14cbcSMatt Macy 
145*eda14cbcSMatt Macy 	/*
146*eda14cbcSMatt Macy 	 * nowhere to walk to if tree is empty
147*eda14cbcSMatt Macy 	 */
148*eda14cbcSMatt Macy 	if (node == NULL)
149*eda14cbcSMatt Macy 		return (NULL);
150*eda14cbcSMatt Macy 
151*eda14cbcSMatt Macy 	/*
152*eda14cbcSMatt Macy 	 * Visit the previous valued node. There are two possibilities:
153*eda14cbcSMatt Macy 	 *
154*eda14cbcSMatt Macy 	 * If this node has a left child, go down one left, then all
155*eda14cbcSMatt Macy 	 * the way right.
156*eda14cbcSMatt Macy 	 */
157*eda14cbcSMatt Macy 	if (node->avl_child[left] != NULL) {
158*eda14cbcSMatt Macy 		for (node = node->avl_child[left];
159*eda14cbcSMatt Macy 		    node->avl_child[right] != NULL;
160*eda14cbcSMatt Macy 		    node = node->avl_child[right])
161*eda14cbcSMatt Macy 			;
162*eda14cbcSMatt Macy 	/*
163*eda14cbcSMatt Macy 	 * Otherwise, return through left children as far as we can.
164*eda14cbcSMatt Macy 	 */
165*eda14cbcSMatt Macy 	} else {
166*eda14cbcSMatt Macy 		for (;;) {
167*eda14cbcSMatt Macy 			was_child = AVL_XCHILD(node);
168*eda14cbcSMatt Macy 			node = AVL_XPARENT(node);
169*eda14cbcSMatt Macy 			if (node == NULL)
170*eda14cbcSMatt Macy 				return (NULL);
171*eda14cbcSMatt Macy 			if (was_child == right)
172*eda14cbcSMatt Macy 				break;
173*eda14cbcSMatt Macy 		}
174*eda14cbcSMatt Macy 	}
175*eda14cbcSMatt Macy 
176*eda14cbcSMatt Macy 	return (AVL_NODE2DATA(node, off));
177*eda14cbcSMatt Macy }
178*eda14cbcSMatt Macy 
179*eda14cbcSMatt Macy /*
180*eda14cbcSMatt Macy  * Return the lowest valued node in a tree or NULL.
181*eda14cbcSMatt Macy  * (leftmost child from root of tree)
182*eda14cbcSMatt Macy  */
183*eda14cbcSMatt Macy void *
184*eda14cbcSMatt Macy avl_first(avl_tree_t *tree)
185*eda14cbcSMatt Macy {
186*eda14cbcSMatt Macy 	avl_node_t *node;
187*eda14cbcSMatt Macy 	avl_node_t *prev = NULL;
188*eda14cbcSMatt Macy 	size_t off = tree->avl_offset;
189*eda14cbcSMatt Macy 
190*eda14cbcSMatt Macy 	for (node = tree->avl_root; node != NULL; node = node->avl_child[0])
191*eda14cbcSMatt Macy 		prev = node;
192*eda14cbcSMatt Macy 
193*eda14cbcSMatt Macy 	if (prev != NULL)
194*eda14cbcSMatt Macy 		return (AVL_NODE2DATA(prev, off));
195*eda14cbcSMatt Macy 	return (NULL);
196*eda14cbcSMatt Macy }
197*eda14cbcSMatt Macy 
198*eda14cbcSMatt Macy /*
199*eda14cbcSMatt Macy  * Return the highest valued node in a tree or NULL.
200*eda14cbcSMatt Macy  * (rightmost child from root of tree)
201*eda14cbcSMatt Macy  */
202*eda14cbcSMatt Macy void *
203*eda14cbcSMatt Macy avl_last(avl_tree_t *tree)
204*eda14cbcSMatt Macy {
205*eda14cbcSMatt Macy 	avl_node_t *node;
206*eda14cbcSMatt Macy 	avl_node_t *prev = NULL;
207*eda14cbcSMatt Macy 	size_t off = tree->avl_offset;
208*eda14cbcSMatt Macy 
209*eda14cbcSMatt Macy 	for (node = tree->avl_root; node != NULL; node = node->avl_child[1])
210*eda14cbcSMatt Macy 		prev = node;
211*eda14cbcSMatt Macy 
212*eda14cbcSMatt Macy 	if (prev != NULL)
213*eda14cbcSMatt Macy 		return (AVL_NODE2DATA(prev, off));
214*eda14cbcSMatt Macy 	return (NULL);
215*eda14cbcSMatt Macy }
216*eda14cbcSMatt Macy 
217*eda14cbcSMatt Macy /*
218*eda14cbcSMatt Macy  * Access the node immediately before or after an insertion point.
219*eda14cbcSMatt Macy  *
220*eda14cbcSMatt Macy  * "avl_index_t" is a (avl_node_t *) with the bottom bit indicating a child
221*eda14cbcSMatt Macy  *
222*eda14cbcSMatt Macy  * Return value:
223*eda14cbcSMatt Macy  *	NULL: no node in the given direction
224*eda14cbcSMatt Macy  *	"void *"  of the found tree node
225*eda14cbcSMatt Macy  */
226*eda14cbcSMatt Macy void *
227*eda14cbcSMatt Macy avl_nearest(avl_tree_t *tree, avl_index_t where, int direction)
228*eda14cbcSMatt Macy {
229*eda14cbcSMatt Macy 	int child = AVL_INDEX2CHILD(where);
230*eda14cbcSMatt Macy 	avl_node_t *node = AVL_INDEX2NODE(where);
231*eda14cbcSMatt Macy 	void *data;
232*eda14cbcSMatt Macy 	size_t off = tree->avl_offset;
233*eda14cbcSMatt Macy 
234*eda14cbcSMatt Macy 	if (node == NULL) {
235*eda14cbcSMatt Macy 		ASSERT(tree->avl_root == NULL);
236*eda14cbcSMatt Macy 		return (NULL);
237*eda14cbcSMatt Macy 	}
238*eda14cbcSMatt Macy 	data = AVL_NODE2DATA(node, off);
239*eda14cbcSMatt Macy 	if (child != direction)
240*eda14cbcSMatt Macy 		return (data);
241*eda14cbcSMatt Macy 
242*eda14cbcSMatt Macy 	return (avl_walk(tree, data, direction));
243*eda14cbcSMatt Macy }
244*eda14cbcSMatt Macy 
245*eda14cbcSMatt Macy 
246*eda14cbcSMatt Macy /*
247*eda14cbcSMatt Macy  * Search for the node which contains "value".  The algorithm is a
248*eda14cbcSMatt Macy  * simple binary tree search.
249*eda14cbcSMatt Macy  *
250*eda14cbcSMatt Macy  * return value:
251*eda14cbcSMatt Macy  *	NULL: the value is not in the AVL tree
252*eda14cbcSMatt Macy  *		*where (if not NULL)  is set to indicate the insertion point
253*eda14cbcSMatt Macy  *	"void *"  of the found tree node
254*eda14cbcSMatt Macy  */
255*eda14cbcSMatt Macy void *
256*eda14cbcSMatt Macy avl_find(avl_tree_t *tree, const void *value, avl_index_t *where)
257*eda14cbcSMatt Macy {
258*eda14cbcSMatt Macy 	avl_node_t *node;
259*eda14cbcSMatt Macy 	avl_node_t *prev = NULL;
260*eda14cbcSMatt Macy 	int child = 0;
261*eda14cbcSMatt Macy 	int diff;
262*eda14cbcSMatt Macy 	size_t off = tree->avl_offset;
263*eda14cbcSMatt Macy 
264*eda14cbcSMatt Macy 	for (node = tree->avl_root; node != NULL;
265*eda14cbcSMatt Macy 	    node = node->avl_child[child]) {
266*eda14cbcSMatt Macy 
267*eda14cbcSMatt Macy 		prev = node;
268*eda14cbcSMatt Macy 
269*eda14cbcSMatt Macy 		diff = tree->avl_compar(value, AVL_NODE2DATA(node, off));
270*eda14cbcSMatt Macy 		ASSERT(-1 <= diff && diff <= 1);
271*eda14cbcSMatt Macy 		if (diff == 0) {
272*eda14cbcSMatt Macy #ifdef ZFS_DEBUG
273*eda14cbcSMatt Macy 			if (where != NULL)
274*eda14cbcSMatt Macy 				*where = 0;
275*eda14cbcSMatt Macy #endif
276*eda14cbcSMatt Macy 			return (AVL_NODE2DATA(node, off));
277*eda14cbcSMatt Macy 		}
278*eda14cbcSMatt Macy 		child = avl_balance2child[1 + diff];
279*eda14cbcSMatt Macy 
280*eda14cbcSMatt Macy 	}
281*eda14cbcSMatt Macy 
282*eda14cbcSMatt Macy 	if (where != NULL)
283*eda14cbcSMatt Macy 		*where = AVL_MKINDEX(prev, child);
284*eda14cbcSMatt Macy 
285*eda14cbcSMatt Macy 	return (NULL);
286*eda14cbcSMatt Macy }
287*eda14cbcSMatt Macy 
288*eda14cbcSMatt Macy 
289*eda14cbcSMatt Macy /*
290*eda14cbcSMatt Macy  * Perform a rotation to restore balance at the subtree given by depth.
291*eda14cbcSMatt Macy  *
292*eda14cbcSMatt Macy  * This routine is used by both insertion and deletion. The return value
293*eda14cbcSMatt Macy  * indicates:
294*eda14cbcSMatt Macy  *	 0 : subtree did not change height
295*eda14cbcSMatt Macy  *	!0 : subtree was reduced in height
296*eda14cbcSMatt Macy  *
297*eda14cbcSMatt Macy  * The code is written as if handling left rotations, right rotations are
298*eda14cbcSMatt Macy  * symmetric and handled by swapping values of variables right/left[_heavy]
299*eda14cbcSMatt Macy  *
300*eda14cbcSMatt Macy  * On input balance is the "new" balance at "node". This value is either
301*eda14cbcSMatt Macy  * -2 or +2.
302*eda14cbcSMatt Macy  */
303*eda14cbcSMatt Macy static int
304*eda14cbcSMatt Macy avl_rotation(avl_tree_t *tree, avl_node_t *node, int balance)
305*eda14cbcSMatt Macy {
306*eda14cbcSMatt Macy 	int left = !(balance < 0);	/* when balance = -2, left will be 0 */
307*eda14cbcSMatt Macy 	int right = 1 - left;
308*eda14cbcSMatt Macy 	int left_heavy = balance >> 1;
309*eda14cbcSMatt Macy 	int right_heavy = -left_heavy;
310*eda14cbcSMatt Macy 	avl_node_t *parent = AVL_XPARENT(node);
311*eda14cbcSMatt Macy 	avl_node_t *child = node->avl_child[left];
312*eda14cbcSMatt Macy 	avl_node_t *cright;
313*eda14cbcSMatt Macy 	avl_node_t *gchild;
314*eda14cbcSMatt Macy 	avl_node_t *gright;
315*eda14cbcSMatt Macy 	avl_node_t *gleft;
316*eda14cbcSMatt Macy 	int which_child = AVL_XCHILD(node);
317*eda14cbcSMatt Macy 	int child_bal = AVL_XBALANCE(child);
318*eda14cbcSMatt Macy 
319*eda14cbcSMatt Macy 	/* BEGIN CSTYLED */
320*eda14cbcSMatt Macy 	/*
321*eda14cbcSMatt Macy 	 * case 1 : node is overly left heavy, the left child is balanced or
322*eda14cbcSMatt Macy 	 * also left heavy. This requires the following rotation.
323*eda14cbcSMatt Macy 	 *
324*eda14cbcSMatt Macy 	 *                   (node bal:-2)
325*eda14cbcSMatt Macy 	 *                    /           \
326*eda14cbcSMatt Macy 	 *                   /             \
327*eda14cbcSMatt Macy 	 *              (child bal:0 or -1)
328*eda14cbcSMatt Macy 	 *              /    \
329*eda14cbcSMatt Macy 	 *             /      \
330*eda14cbcSMatt Macy 	 *                     cright
331*eda14cbcSMatt Macy 	 *
332*eda14cbcSMatt Macy 	 * becomes:
333*eda14cbcSMatt Macy 	 *
334*eda14cbcSMatt Macy 	 *              (child bal:1 or 0)
335*eda14cbcSMatt Macy 	 *              /        \
336*eda14cbcSMatt Macy 	 *             /          \
337*eda14cbcSMatt Macy 	 *                        (node bal:-1 or 0)
338*eda14cbcSMatt Macy 	 *                         /     \
339*eda14cbcSMatt Macy 	 *                        /       \
340*eda14cbcSMatt Macy 	 *                     cright
341*eda14cbcSMatt Macy 	 *
342*eda14cbcSMatt Macy 	 * we detect this situation by noting that child's balance is not
343*eda14cbcSMatt Macy 	 * right_heavy.
344*eda14cbcSMatt Macy 	 */
345*eda14cbcSMatt Macy 	/* END CSTYLED */
346*eda14cbcSMatt Macy 	if (child_bal != right_heavy) {
347*eda14cbcSMatt Macy 
348*eda14cbcSMatt Macy 		/*
349*eda14cbcSMatt Macy 		 * compute new balance of nodes
350*eda14cbcSMatt Macy 		 *
351*eda14cbcSMatt Macy 		 * If child used to be left heavy (now balanced) we reduced
352*eda14cbcSMatt Macy 		 * the height of this sub-tree -- used in "return...;" below
353*eda14cbcSMatt Macy 		 */
354*eda14cbcSMatt Macy 		child_bal += right_heavy; /* adjust towards right */
355*eda14cbcSMatt Macy 
356*eda14cbcSMatt Macy 		/*
357*eda14cbcSMatt Macy 		 * move "cright" to be node's left child
358*eda14cbcSMatt Macy 		 */
359*eda14cbcSMatt Macy 		cright = child->avl_child[right];
360*eda14cbcSMatt Macy 		node->avl_child[left] = cright;
361*eda14cbcSMatt Macy 		if (cright != NULL) {
362*eda14cbcSMatt Macy 			AVL_SETPARENT(cright, node);
363*eda14cbcSMatt Macy 			AVL_SETCHILD(cright, left);
364*eda14cbcSMatt Macy 		}
365*eda14cbcSMatt Macy 
366*eda14cbcSMatt Macy 		/*
367*eda14cbcSMatt Macy 		 * move node to be child's right child
368*eda14cbcSMatt Macy 		 */
369*eda14cbcSMatt Macy 		child->avl_child[right] = node;
370*eda14cbcSMatt Macy 		AVL_SETBALANCE(node, -child_bal);
371*eda14cbcSMatt Macy 		AVL_SETCHILD(node, right);
372*eda14cbcSMatt Macy 		AVL_SETPARENT(node, child);
373*eda14cbcSMatt Macy 
374*eda14cbcSMatt Macy 		/*
375*eda14cbcSMatt Macy 		 * update the pointer into this subtree
376*eda14cbcSMatt Macy 		 */
377*eda14cbcSMatt Macy 		AVL_SETBALANCE(child, child_bal);
378*eda14cbcSMatt Macy 		AVL_SETCHILD(child, which_child);
379*eda14cbcSMatt Macy 		AVL_SETPARENT(child, parent);
380*eda14cbcSMatt Macy 		if (parent != NULL)
381*eda14cbcSMatt Macy 			parent->avl_child[which_child] = child;
382*eda14cbcSMatt Macy 		else
383*eda14cbcSMatt Macy 			tree->avl_root = child;
384*eda14cbcSMatt Macy 
385*eda14cbcSMatt Macy 		return (child_bal == 0);
386*eda14cbcSMatt Macy 	}
387*eda14cbcSMatt Macy 
388*eda14cbcSMatt Macy 	/* BEGIN CSTYLED */
389*eda14cbcSMatt Macy 	/*
390*eda14cbcSMatt Macy 	 * case 2 : When node is left heavy, but child is right heavy we use
391*eda14cbcSMatt Macy 	 * a different rotation.
392*eda14cbcSMatt Macy 	 *
393*eda14cbcSMatt Macy 	 *                   (node b:-2)
394*eda14cbcSMatt Macy 	 *                    /   \
395*eda14cbcSMatt Macy 	 *                   /     \
396*eda14cbcSMatt Macy 	 *                  /       \
397*eda14cbcSMatt Macy 	 *             (child b:+1)
398*eda14cbcSMatt Macy 	 *              /     \
399*eda14cbcSMatt Macy 	 *             /       \
400*eda14cbcSMatt Macy 	 *                   (gchild b: != 0)
401*eda14cbcSMatt Macy 	 *                     /  \
402*eda14cbcSMatt Macy 	 *                    /    \
403*eda14cbcSMatt Macy 	 *                 gleft   gright
404*eda14cbcSMatt Macy 	 *
405*eda14cbcSMatt Macy 	 * becomes:
406*eda14cbcSMatt Macy 	 *
407*eda14cbcSMatt Macy 	 *              (gchild b:0)
408*eda14cbcSMatt Macy 	 *              /       \
409*eda14cbcSMatt Macy 	 *             /         \
410*eda14cbcSMatt Macy 	 *            /           \
411*eda14cbcSMatt Macy 	 *        (child b:?)   (node b:?)
412*eda14cbcSMatt Macy 	 *         /  \          /   \
413*eda14cbcSMatt Macy 	 *        /    \        /     \
414*eda14cbcSMatt Macy 	 *            gleft   gright
415*eda14cbcSMatt Macy 	 *
416*eda14cbcSMatt Macy 	 * computing the new balances is more complicated. As an example:
417*eda14cbcSMatt Macy 	 *	 if gchild was right_heavy, then child is now left heavy
418*eda14cbcSMatt Macy 	 *		else it is balanced
419*eda14cbcSMatt Macy 	 */
420*eda14cbcSMatt Macy 	/* END CSTYLED */
421*eda14cbcSMatt Macy 	gchild = child->avl_child[right];
422*eda14cbcSMatt Macy 	gleft = gchild->avl_child[left];
423*eda14cbcSMatt Macy 	gright = gchild->avl_child[right];
424*eda14cbcSMatt Macy 
425*eda14cbcSMatt Macy 	/*
426*eda14cbcSMatt Macy 	 * move gright to left child of node and
427*eda14cbcSMatt Macy 	 *
428*eda14cbcSMatt Macy 	 * move gleft to right child of node
429*eda14cbcSMatt Macy 	 */
430*eda14cbcSMatt Macy 	node->avl_child[left] = gright;
431*eda14cbcSMatt Macy 	if (gright != NULL) {
432*eda14cbcSMatt Macy 		AVL_SETPARENT(gright, node);
433*eda14cbcSMatt Macy 		AVL_SETCHILD(gright, left);
434*eda14cbcSMatt Macy 	}
435*eda14cbcSMatt Macy 
436*eda14cbcSMatt Macy 	child->avl_child[right] = gleft;
437*eda14cbcSMatt Macy 	if (gleft != NULL) {
438*eda14cbcSMatt Macy 		AVL_SETPARENT(gleft, child);
439*eda14cbcSMatt Macy 		AVL_SETCHILD(gleft, right);
440*eda14cbcSMatt Macy 	}
441*eda14cbcSMatt Macy 
442*eda14cbcSMatt Macy 	/*
443*eda14cbcSMatt Macy 	 * move child to left child of gchild and
444*eda14cbcSMatt Macy 	 *
445*eda14cbcSMatt Macy 	 * move node to right child of gchild and
446*eda14cbcSMatt Macy 	 *
447*eda14cbcSMatt Macy 	 * fixup parent of all this to point to gchild
448*eda14cbcSMatt Macy 	 */
449*eda14cbcSMatt Macy 	balance = AVL_XBALANCE(gchild);
450*eda14cbcSMatt Macy 	gchild->avl_child[left] = child;
451*eda14cbcSMatt Macy 	AVL_SETBALANCE(child, (balance == right_heavy ? left_heavy : 0));
452*eda14cbcSMatt Macy 	AVL_SETPARENT(child, gchild);
453*eda14cbcSMatt Macy 	AVL_SETCHILD(child, left);
454*eda14cbcSMatt Macy 
455*eda14cbcSMatt Macy 	gchild->avl_child[right] = node;
456*eda14cbcSMatt Macy 	AVL_SETBALANCE(node, (balance == left_heavy ? right_heavy : 0));
457*eda14cbcSMatt Macy 	AVL_SETPARENT(node, gchild);
458*eda14cbcSMatt Macy 	AVL_SETCHILD(node, right);
459*eda14cbcSMatt Macy 
460*eda14cbcSMatt Macy 	AVL_SETBALANCE(gchild, 0);
461*eda14cbcSMatt Macy 	AVL_SETPARENT(gchild, parent);
462*eda14cbcSMatt Macy 	AVL_SETCHILD(gchild, which_child);
463*eda14cbcSMatt Macy 	if (parent != NULL)
464*eda14cbcSMatt Macy 		parent->avl_child[which_child] = gchild;
465*eda14cbcSMatt Macy 	else
466*eda14cbcSMatt Macy 		tree->avl_root = gchild;
467*eda14cbcSMatt Macy 
468*eda14cbcSMatt Macy 	return (1);	/* the new tree is always shorter */
469*eda14cbcSMatt Macy }
470*eda14cbcSMatt Macy 
471*eda14cbcSMatt Macy 
472*eda14cbcSMatt Macy /*
473*eda14cbcSMatt Macy  * Insert a new node into an AVL tree at the specified (from avl_find()) place.
474*eda14cbcSMatt Macy  *
475*eda14cbcSMatt Macy  * Newly inserted nodes are always leaf nodes in the tree, since avl_find()
476*eda14cbcSMatt Macy  * searches out to the leaf positions.  The avl_index_t indicates the node
477*eda14cbcSMatt Macy  * which will be the parent of the new node.
478*eda14cbcSMatt Macy  *
479*eda14cbcSMatt Macy  * After the node is inserted, a single rotation further up the tree may
480*eda14cbcSMatt Macy  * be necessary to maintain an acceptable AVL balance.
481*eda14cbcSMatt Macy  */
482*eda14cbcSMatt Macy void
483*eda14cbcSMatt Macy avl_insert(avl_tree_t *tree, void *new_data, avl_index_t where)
484*eda14cbcSMatt Macy {
485*eda14cbcSMatt Macy 	avl_node_t *node;
486*eda14cbcSMatt Macy 	avl_node_t *parent = AVL_INDEX2NODE(where);
487*eda14cbcSMatt Macy 	int old_balance;
488*eda14cbcSMatt Macy 	int new_balance;
489*eda14cbcSMatt Macy 	int which_child = AVL_INDEX2CHILD(where);
490*eda14cbcSMatt Macy 	size_t off = tree->avl_offset;
491*eda14cbcSMatt Macy 
492*eda14cbcSMatt Macy 	ASSERT(tree);
493*eda14cbcSMatt Macy #ifdef _LP64
494*eda14cbcSMatt Macy 	ASSERT(((uintptr_t)new_data & 0x7) == 0);
495*eda14cbcSMatt Macy #endif
496*eda14cbcSMatt Macy 
497*eda14cbcSMatt Macy 	node = AVL_DATA2NODE(new_data, off);
498*eda14cbcSMatt Macy 
499*eda14cbcSMatt Macy 	/*
500*eda14cbcSMatt Macy 	 * First, add the node to the tree at the indicated position.
501*eda14cbcSMatt Macy 	 */
502*eda14cbcSMatt Macy 	++tree->avl_numnodes;
503*eda14cbcSMatt Macy 
504*eda14cbcSMatt Macy 	node->avl_child[0] = NULL;
505*eda14cbcSMatt Macy 	node->avl_child[1] = NULL;
506*eda14cbcSMatt Macy 
507*eda14cbcSMatt Macy 	AVL_SETCHILD(node, which_child);
508*eda14cbcSMatt Macy 	AVL_SETBALANCE(node, 0);
509*eda14cbcSMatt Macy 	AVL_SETPARENT(node, parent);
510*eda14cbcSMatt Macy 	if (parent != NULL) {
511*eda14cbcSMatt Macy 		ASSERT(parent->avl_child[which_child] == NULL);
512*eda14cbcSMatt Macy 		parent->avl_child[which_child] = node;
513*eda14cbcSMatt Macy 	} else {
514*eda14cbcSMatt Macy 		ASSERT(tree->avl_root == NULL);
515*eda14cbcSMatt Macy 		tree->avl_root = node;
516*eda14cbcSMatt Macy 	}
517*eda14cbcSMatt Macy 	/*
518*eda14cbcSMatt Macy 	 * Now, back up the tree modifying the balance of all nodes above the
519*eda14cbcSMatt Macy 	 * insertion point. If we get to a highly unbalanced ancestor, we
520*eda14cbcSMatt Macy 	 * need to do a rotation.  If we back out of the tree we are done.
521*eda14cbcSMatt Macy 	 * If we brought any subtree into perfect balance (0), we are also done.
522*eda14cbcSMatt Macy 	 */
523*eda14cbcSMatt Macy 	for (;;) {
524*eda14cbcSMatt Macy 		node = parent;
525*eda14cbcSMatt Macy 		if (node == NULL)
526*eda14cbcSMatt Macy 			return;
527*eda14cbcSMatt Macy 
528*eda14cbcSMatt Macy 		/*
529*eda14cbcSMatt Macy 		 * Compute the new balance
530*eda14cbcSMatt Macy 		 */
531*eda14cbcSMatt Macy 		old_balance = AVL_XBALANCE(node);
532*eda14cbcSMatt Macy 		new_balance = old_balance + avl_child2balance[which_child];
533*eda14cbcSMatt Macy 
534*eda14cbcSMatt Macy 		/*
535*eda14cbcSMatt Macy 		 * If we introduced equal balance, then we are done immediately
536*eda14cbcSMatt Macy 		 */
537*eda14cbcSMatt Macy 		if (new_balance == 0) {
538*eda14cbcSMatt Macy 			AVL_SETBALANCE(node, 0);
539*eda14cbcSMatt Macy 			return;
540*eda14cbcSMatt Macy 		}
541*eda14cbcSMatt Macy 
542*eda14cbcSMatt Macy 		/*
543*eda14cbcSMatt Macy 		 * If both old and new are not zero we went
544*eda14cbcSMatt Macy 		 * from -1 to -2 balance, do a rotation.
545*eda14cbcSMatt Macy 		 */
546*eda14cbcSMatt Macy 		if (old_balance != 0)
547*eda14cbcSMatt Macy 			break;
548*eda14cbcSMatt Macy 
549*eda14cbcSMatt Macy 		AVL_SETBALANCE(node, new_balance);
550*eda14cbcSMatt Macy 		parent = AVL_XPARENT(node);
551*eda14cbcSMatt Macy 		which_child = AVL_XCHILD(node);
552*eda14cbcSMatt Macy 	}
553*eda14cbcSMatt Macy 
554*eda14cbcSMatt Macy 	/*
555*eda14cbcSMatt Macy 	 * perform a rotation to fix the tree and return
556*eda14cbcSMatt Macy 	 */
557*eda14cbcSMatt Macy 	(void) avl_rotation(tree, node, new_balance);
558*eda14cbcSMatt Macy }
559*eda14cbcSMatt Macy 
560*eda14cbcSMatt Macy /*
561*eda14cbcSMatt Macy  * Insert "new_data" in "tree" in the given "direction" either after or
562*eda14cbcSMatt Macy  * before (AVL_AFTER, AVL_BEFORE) the data "here".
563*eda14cbcSMatt Macy  *
564*eda14cbcSMatt Macy  * Insertions can only be done at empty leaf points in the tree, therefore
565*eda14cbcSMatt Macy  * if the given child of the node is already present we move to either
566*eda14cbcSMatt Macy  * the AVL_PREV or AVL_NEXT and reverse the insertion direction. Since
567*eda14cbcSMatt Macy  * every other node in the tree is a leaf, this always works.
568*eda14cbcSMatt Macy  *
569*eda14cbcSMatt Macy  * To help developers using this interface, we assert that the new node
570*eda14cbcSMatt Macy  * is correctly ordered at every step of the way in DEBUG kernels.
571*eda14cbcSMatt Macy  */
572*eda14cbcSMatt Macy void
573*eda14cbcSMatt Macy avl_insert_here(
574*eda14cbcSMatt Macy 	avl_tree_t *tree,
575*eda14cbcSMatt Macy 	void *new_data,
576*eda14cbcSMatt Macy 	void *here,
577*eda14cbcSMatt Macy 	int direction)
578*eda14cbcSMatt Macy {
579*eda14cbcSMatt Macy 	avl_node_t *node;
580*eda14cbcSMatt Macy 	int child = direction;	/* rely on AVL_BEFORE == 0, AVL_AFTER == 1 */
581*eda14cbcSMatt Macy #ifdef ZFS_DEBUG
582*eda14cbcSMatt Macy 	int diff;
583*eda14cbcSMatt Macy #endif
584*eda14cbcSMatt Macy 
585*eda14cbcSMatt Macy 	ASSERT(tree != NULL);
586*eda14cbcSMatt Macy 	ASSERT(new_data != NULL);
587*eda14cbcSMatt Macy 	ASSERT(here != NULL);
588*eda14cbcSMatt Macy 	ASSERT(direction == AVL_BEFORE || direction == AVL_AFTER);
589*eda14cbcSMatt Macy 
590*eda14cbcSMatt Macy 	/*
591*eda14cbcSMatt Macy 	 * If corresponding child of node is not NULL, go to the neighboring
592*eda14cbcSMatt Macy 	 * node and reverse the insertion direction.
593*eda14cbcSMatt Macy 	 */
594*eda14cbcSMatt Macy 	node = AVL_DATA2NODE(here, tree->avl_offset);
595*eda14cbcSMatt Macy 
596*eda14cbcSMatt Macy #ifdef ZFS_DEBUG
597*eda14cbcSMatt Macy 	diff = tree->avl_compar(new_data, here);
598*eda14cbcSMatt Macy 	ASSERT(-1 <= diff && diff <= 1);
599*eda14cbcSMatt Macy 	ASSERT(diff != 0);
600*eda14cbcSMatt Macy 	ASSERT(diff > 0 ? child == 1 : child == 0);
601*eda14cbcSMatt Macy #endif
602*eda14cbcSMatt Macy 
603*eda14cbcSMatt Macy 	if (node->avl_child[child] != NULL) {
604*eda14cbcSMatt Macy 		node = node->avl_child[child];
605*eda14cbcSMatt Macy 		child = 1 - child;
606*eda14cbcSMatt Macy 		while (node->avl_child[child] != NULL) {
607*eda14cbcSMatt Macy #ifdef ZFS_DEBUG
608*eda14cbcSMatt Macy 			diff = tree->avl_compar(new_data,
609*eda14cbcSMatt Macy 			    AVL_NODE2DATA(node, tree->avl_offset));
610*eda14cbcSMatt Macy 			ASSERT(-1 <= diff && diff <= 1);
611*eda14cbcSMatt Macy 			ASSERT(diff != 0);
612*eda14cbcSMatt Macy 			ASSERT(diff > 0 ? child == 1 : child == 0);
613*eda14cbcSMatt Macy #endif
614*eda14cbcSMatt Macy 			node = node->avl_child[child];
615*eda14cbcSMatt Macy 		}
616*eda14cbcSMatt Macy #ifdef ZFS_DEBUG
617*eda14cbcSMatt Macy 		diff = tree->avl_compar(new_data,
618*eda14cbcSMatt Macy 		    AVL_NODE2DATA(node, tree->avl_offset));
619*eda14cbcSMatt Macy 		ASSERT(-1 <= diff && diff <= 1);
620*eda14cbcSMatt Macy 		ASSERT(diff != 0);
621*eda14cbcSMatt Macy 		ASSERT(diff > 0 ? child == 1 : child == 0);
622*eda14cbcSMatt Macy #endif
623*eda14cbcSMatt Macy 	}
624*eda14cbcSMatt Macy 	ASSERT(node->avl_child[child] == NULL);
625*eda14cbcSMatt Macy 
626*eda14cbcSMatt Macy 	avl_insert(tree, new_data, AVL_MKINDEX(node, child));
627*eda14cbcSMatt Macy }
628*eda14cbcSMatt Macy 
629*eda14cbcSMatt Macy /*
630*eda14cbcSMatt Macy  * Add a new node to an AVL tree.  Strictly enforce that no duplicates can
631*eda14cbcSMatt Macy  * be added to the tree with a VERIFY which is enabled for non-DEBUG builds.
632*eda14cbcSMatt Macy  */
633*eda14cbcSMatt Macy void
634*eda14cbcSMatt Macy avl_add(avl_tree_t *tree, void *new_node)
635*eda14cbcSMatt Macy {
636*eda14cbcSMatt Macy 	avl_index_t where = 0;
637*eda14cbcSMatt Macy 
638*eda14cbcSMatt Macy 	VERIFY(avl_find(tree, new_node, &where) == NULL);
639*eda14cbcSMatt Macy 
640*eda14cbcSMatt Macy 	avl_insert(tree, new_node, where);
641*eda14cbcSMatt Macy }
642*eda14cbcSMatt Macy 
643*eda14cbcSMatt Macy /*
644*eda14cbcSMatt Macy  * Delete a node from the AVL tree.  Deletion is similar to insertion, but
645*eda14cbcSMatt Macy  * with 2 complications.
646*eda14cbcSMatt Macy  *
647*eda14cbcSMatt Macy  * First, we may be deleting an interior node. Consider the following subtree:
648*eda14cbcSMatt Macy  *
649*eda14cbcSMatt Macy  *     d           c            c
650*eda14cbcSMatt Macy  *    / \         / \          / \
651*eda14cbcSMatt Macy  *   b   e       b   e        b   e
652*eda14cbcSMatt Macy  *  / \	        / \          /
653*eda14cbcSMatt Macy  * a   c       a            a
654*eda14cbcSMatt Macy  *
655*eda14cbcSMatt Macy  * When we are deleting node (d), we find and bring up an adjacent valued leaf
656*eda14cbcSMatt Macy  * node, say (c), to take the interior node's place. In the code this is
657*eda14cbcSMatt Macy  * handled by temporarily swapping (d) and (c) in the tree and then using
658*eda14cbcSMatt Macy  * common code to delete (d) from the leaf position.
659*eda14cbcSMatt Macy  *
660*eda14cbcSMatt Macy  * Secondly, an interior deletion from a deep tree may require more than one
661*eda14cbcSMatt Macy  * rotation to fix the balance. This is handled by moving up the tree through
662*eda14cbcSMatt Macy  * parents and applying rotations as needed. The return value from
663*eda14cbcSMatt Macy  * avl_rotation() is used to detect when a subtree did not change overall
664*eda14cbcSMatt Macy  * height due to a rotation.
665*eda14cbcSMatt Macy  */
666*eda14cbcSMatt Macy void
667*eda14cbcSMatt Macy avl_remove(avl_tree_t *tree, void *data)
668*eda14cbcSMatt Macy {
669*eda14cbcSMatt Macy 	avl_node_t *delete;
670*eda14cbcSMatt Macy 	avl_node_t *parent;
671*eda14cbcSMatt Macy 	avl_node_t *node;
672*eda14cbcSMatt Macy 	avl_node_t tmp;
673*eda14cbcSMatt Macy 	int old_balance;
674*eda14cbcSMatt Macy 	int new_balance;
675*eda14cbcSMatt Macy 	int left;
676*eda14cbcSMatt Macy 	int right;
677*eda14cbcSMatt Macy 	int which_child;
678*eda14cbcSMatt Macy 	size_t off = tree->avl_offset;
679*eda14cbcSMatt Macy 
680*eda14cbcSMatt Macy 	ASSERT(tree);
681*eda14cbcSMatt Macy 
682*eda14cbcSMatt Macy 	delete = AVL_DATA2NODE(data, off);
683*eda14cbcSMatt Macy 
684*eda14cbcSMatt Macy 	/*
685*eda14cbcSMatt Macy 	 * Deletion is easiest with a node that has at most 1 child.
686*eda14cbcSMatt Macy 	 * We swap a node with 2 children with a sequentially valued
687*eda14cbcSMatt Macy 	 * neighbor node. That node will have at most 1 child. Note this
688*eda14cbcSMatt Macy 	 * has no effect on the ordering of the remaining nodes.
689*eda14cbcSMatt Macy 	 *
690*eda14cbcSMatt Macy 	 * As an optimization, we choose the greater neighbor if the tree
691*eda14cbcSMatt Macy 	 * is right heavy, otherwise the left neighbor. This reduces the
692*eda14cbcSMatt Macy 	 * number of rotations needed.
693*eda14cbcSMatt Macy 	 */
694*eda14cbcSMatt Macy 	if (delete->avl_child[0] != NULL && delete->avl_child[1] != NULL) {
695*eda14cbcSMatt Macy 
696*eda14cbcSMatt Macy 		/*
697*eda14cbcSMatt Macy 		 * choose node to swap from whichever side is taller
698*eda14cbcSMatt Macy 		 */
699*eda14cbcSMatt Macy 		old_balance = AVL_XBALANCE(delete);
700*eda14cbcSMatt Macy 		left = avl_balance2child[old_balance + 1];
701*eda14cbcSMatt Macy 		right = 1 - left;
702*eda14cbcSMatt Macy 
703*eda14cbcSMatt Macy 		/*
704*eda14cbcSMatt Macy 		 * get to the previous value'd node
705*eda14cbcSMatt Macy 		 * (down 1 left, as far as possible right)
706*eda14cbcSMatt Macy 		 */
707*eda14cbcSMatt Macy 		for (node = delete->avl_child[left];
708*eda14cbcSMatt Macy 		    node->avl_child[right] != NULL;
709*eda14cbcSMatt Macy 		    node = node->avl_child[right])
710*eda14cbcSMatt Macy 			;
711*eda14cbcSMatt Macy 
712*eda14cbcSMatt Macy 		/*
713*eda14cbcSMatt Macy 		 * create a temp placeholder for 'node'
714*eda14cbcSMatt Macy 		 * move 'node' to delete's spot in the tree
715*eda14cbcSMatt Macy 		 */
716*eda14cbcSMatt Macy 		tmp = *node;
717*eda14cbcSMatt Macy 
718*eda14cbcSMatt Macy 		*node = *delete;
719*eda14cbcSMatt Macy 		if (node->avl_child[left] == node)
720*eda14cbcSMatt Macy 			node->avl_child[left] = &tmp;
721*eda14cbcSMatt Macy 
722*eda14cbcSMatt Macy 		parent = AVL_XPARENT(node);
723*eda14cbcSMatt Macy 		if (parent != NULL)
724*eda14cbcSMatt Macy 			parent->avl_child[AVL_XCHILD(node)] = node;
725*eda14cbcSMatt Macy 		else
726*eda14cbcSMatt Macy 			tree->avl_root = node;
727*eda14cbcSMatt Macy 		AVL_SETPARENT(node->avl_child[left], node);
728*eda14cbcSMatt Macy 		AVL_SETPARENT(node->avl_child[right], node);
729*eda14cbcSMatt Macy 
730*eda14cbcSMatt Macy 		/*
731*eda14cbcSMatt Macy 		 * Put tmp where node used to be (just temporary).
732*eda14cbcSMatt Macy 		 * It always has a parent and at most 1 child.
733*eda14cbcSMatt Macy 		 */
734*eda14cbcSMatt Macy 		delete = &tmp;
735*eda14cbcSMatt Macy 		parent = AVL_XPARENT(delete);
736*eda14cbcSMatt Macy 		parent->avl_child[AVL_XCHILD(delete)] = delete;
737*eda14cbcSMatt Macy 		which_child = (delete->avl_child[1] != 0);
738*eda14cbcSMatt Macy 		if (delete->avl_child[which_child] != NULL)
739*eda14cbcSMatt Macy 			AVL_SETPARENT(delete->avl_child[which_child], delete);
740*eda14cbcSMatt Macy 	}
741*eda14cbcSMatt Macy 
742*eda14cbcSMatt Macy 
743*eda14cbcSMatt Macy 	/*
744*eda14cbcSMatt Macy 	 * Here we know "delete" is at least partially a leaf node. It can
745*eda14cbcSMatt Macy 	 * be easily removed from the tree.
746*eda14cbcSMatt Macy 	 */
747*eda14cbcSMatt Macy 	ASSERT(tree->avl_numnodes > 0);
748*eda14cbcSMatt Macy 	--tree->avl_numnodes;
749*eda14cbcSMatt Macy 	parent = AVL_XPARENT(delete);
750*eda14cbcSMatt Macy 	which_child = AVL_XCHILD(delete);
751*eda14cbcSMatt Macy 	if (delete->avl_child[0] != NULL)
752*eda14cbcSMatt Macy 		node = delete->avl_child[0];
753*eda14cbcSMatt Macy 	else
754*eda14cbcSMatt Macy 		node = delete->avl_child[1];
755*eda14cbcSMatt Macy 
756*eda14cbcSMatt Macy 	/*
757*eda14cbcSMatt Macy 	 * Connect parent directly to node (leaving out delete).
758*eda14cbcSMatt Macy 	 */
759*eda14cbcSMatt Macy 	if (node != NULL) {
760*eda14cbcSMatt Macy 		AVL_SETPARENT(node, parent);
761*eda14cbcSMatt Macy 		AVL_SETCHILD(node, which_child);
762*eda14cbcSMatt Macy 	}
763*eda14cbcSMatt Macy 	if (parent == NULL) {
764*eda14cbcSMatt Macy 		tree->avl_root = node;
765*eda14cbcSMatt Macy 		return;
766*eda14cbcSMatt Macy 	}
767*eda14cbcSMatt Macy 	parent->avl_child[which_child] = node;
768*eda14cbcSMatt Macy 
769*eda14cbcSMatt Macy 
770*eda14cbcSMatt Macy 	/*
771*eda14cbcSMatt Macy 	 * Since the subtree is now shorter, begin adjusting parent balances
772*eda14cbcSMatt Macy 	 * and performing any needed rotations.
773*eda14cbcSMatt Macy 	 */
774*eda14cbcSMatt Macy 	do {
775*eda14cbcSMatt Macy 
776*eda14cbcSMatt Macy 		/*
777*eda14cbcSMatt Macy 		 * Move up the tree and adjust the balance
778*eda14cbcSMatt Macy 		 *
779*eda14cbcSMatt Macy 		 * Capture the parent and which_child values for the next
780*eda14cbcSMatt Macy 		 * iteration before any rotations occur.
781*eda14cbcSMatt Macy 		 */
782*eda14cbcSMatt Macy 		node = parent;
783*eda14cbcSMatt Macy 		old_balance = AVL_XBALANCE(node);
784*eda14cbcSMatt Macy 		new_balance = old_balance - avl_child2balance[which_child];
785*eda14cbcSMatt Macy 		parent = AVL_XPARENT(node);
786*eda14cbcSMatt Macy 		which_child = AVL_XCHILD(node);
787*eda14cbcSMatt Macy 
788*eda14cbcSMatt Macy 		/*
789*eda14cbcSMatt Macy 		 * If a node was in perfect balance but isn't anymore then
790*eda14cbcSMatt Macy 		 * we can stop, since the height didn't change above this point
791*eda14cbcSMatt Macy 		 * due to a deletion.
792*eda14cbcSMatt Macy 		 */
793*eda14cbcSMatt Macy 		if (old_balance == 0) {
794*eda14cbcSMatt Macy 			AVL_SETBALANCE(node, new_balance);
795*eda14cbcSMatt Macy 			break;
796*eda14cbcSMatt Macy 		}
797*eda14cbcSMatt Macy 
798*eda14cbcSMatt Macy 		/*
799*eda14cbcSMatt Macy 		 * If the new balance is zero, we don't need to rotate
800*eda14cbcSMatt Macy 		 * else
801*eda14cbcSMatt Macy 		 * need a rotation to fix the balance.
802*eda14cbcSMatt Macy 		 * If the rotation doesn't change the height
803*eda14cbcSMatt Macy 		 * of the sub-tree we have finished adjusting.
804*eda14cbcSMatt Macy 		 */
805*eda14cbcSMatt Macy 		if (new_balance == 0)
806*eda14cbcSMatt Macy 			AVL_SETBALANCE(node, new_balance);
807*eda14cbcSMatt Macy 		else if (!avl_rotation(tree, node, new_balance))
808*eda14cbcSMatt Macy 			break;
809*eda14cbcSMatt Macy 	} while (parent != NULL);
810*eda14cbcSMatt Macy }
811*eda14cbcSMatt Macy 
812*eda14cbcSMatt Macy #define	AVL_REINSERT(tree, obj)		\
813*eda14cbcSMatt Macy 	avl_remove((tree), (obj));	\
814*eda14cbcSMatt Macy 	avl_add((tree), (obj))
815*eda14cbcSMatt Macy 
816*eda14cbcSMatt Macy boolean_t
817*eda14cbcSMatt Macy avl_update_lt(avl_tree_t *t, void *obj)
818*eda14cbcSMatt Macy {
819*eda14cbcSMatt Macy 	void *neighbor;
820*eda14cbcSMatt Macy 
821*eda14cbcSMatt Macy 	ASSERT(((neighbor = AVL_NEXT(t, obj)) == NULL) ||
822*eda14cbcSMatt Macy 	    (t->avl_compar(obj, neighbor) <= 0));
823*eda14cbcSMatt Macy 
824*eda14cbcSMatt Macy 	neighbor = AVL_PREV(t, obj);
825*eda14cbcSMatt Macy 	if ((neighbor != NULL) && (t->avl_compar(obj, neighbor) < 0)) {
826*eda14cbcSMatt Macy 		AVL_REINSERT(t, obj);
827*eda14cbcSMatt Macy 		return (B_TRUE);
828*eda14cbcSMatt Macy 	}
829*eda14cbcSMatt Macy 
830*eda14cbcSMatt Macy 	return (B_FALSE);
831*eda14cbcSMatt Macy }
832*eda14cbcSMatt Macy 
833*eda14cbcSMatt Macy boolean_t
834*eda14cbcSMatt Macy avl_update_gt(avl_tree_t *t, void *obj)
835*eda14cbcSMatt Macy {
836*eda14cbcSMatt Macy 	void *neighbor;
837*eda14cbcSMatt Macy 
838*eda14cbcSMatt Macy 	ASSERT(((neighbor = AVL_PREV(t, obj)) == NULL) ||
839*eda14cbcSMatt Macy 	    (t->avl_compar(obj, neighbor) >= 0));
840*eda14cbcSMatt Macy 
841*eda14cbcSMatt Macy 	neighbor = AVL_NEXT(t, obj);
842*eda14cbcSMatt Macy 	if ((neighbor != NULL) && (t->avl_compar(obj, neighbor) > 0)) {
843*eda14cbcSMatt Macy 		AVL_REINSERT(t, obj);
844*eda14cbcSMatt Macy 		return (B_TRUE);
845*eda14cbcSMatt Macy 	}
846*eda14cbcSMatt Macy 
847*eda14cbcSMatt Macy 	return (B_FALSE);
848*eda14cbcSMatt Macy }
849*eda14cbcSMatt Macy 
850*eda14cbcSMatt Macy boolean_t
851*eda14cbcSMatt Macy avl_update(avl_tree_t *t, void *obj)
852*eda14cbcSMatt Macy {
853*eda14cbcSMatt Macy 	void *neighbor;
854*eda14cbcSMatt Macy 
855*eda14cbcSMatt Macy 	neighbor = AVL_PREV(t, obj);
856*eda14cbcSMatt Macy 	if ((neighbor != NULL) && (t->avl_compar(obj, neighbor) < 0)) {
857*eda14cbcSMatt Macy 		AVL_REINSERT(t, obj);
858*eda14cbcSMatt Macy 		return (B_TRUE);
859*eda14cbcSMatt Macy 	}
860*eda14cbcSMatt Macy 
861*eda14cbcSMatt Macy 	neighbor = AVL_NEXT(t, obj);
862*eda14cbcSMatt Macy 	if ((neighbor != NULL) && (t->avl_compar(obj, neighbor) > 0)) {
863*eda14cbcSMatt Macy 		AVL_REINSERT(t, obj);
864*eda14cbcSMatt Macy 		return (B_TRUE);
865*eda14cbcSMatt Macy 	}
866*eda14cbcSMatt Macy 
867*eda14cbcSMatt Macy 	return (B_FALSE);
868*eda14cbcSMatt Macy }
869*eda14cbcSMatt Macy 
870*eda14cbcSMatt Macy void
871*eda14cbcSMatt Macy avl_swap(avl_tree_t *tree1, avl_tree_t *tree2)
872*eda14cbcSMatt Macy {
873*eda14cbcSMatt Macy 	avl_node_t *temp_node;
874*eda14cbcSMatt Macy 	ulong_t temp_numnodes;
875*eda14cbcSMatt Macy 
876*eda14cbcSMatt Macy 	ASSERT3P(tree1->avl_compar, ==, tree2->avl_compar);
877*eda14cbcSMatt Macy 	ASSERT3U(tree1->avl_offset, ==, tree2->avl_offset);
878*eda14cbcSMatt Macy 	ASSERT3U(tree1->avl_size, ==, tree2->avl_size);
879*eda14cbcSMatt Macy 
880*eda14cbcSMatt Macy 	temp_node = tree1->avl_root;
881*eda14cbcSMatt Macy 	temp_numnodes = tree1->avl_numnodes;
882*eda14cbcSMatt Macy 	tree1->avl_root = tree2->avl_root;
883*eda14cbcSMatt Macy 	tree1->avl_numnodes = tree2->avl_numnodes;
884*eda14cbcSMatt Macy 	tree2->avl_root = temp_node;
885*eda14cbcSMatt Macy 	tree2->avl_numnodes = temp_numnodes;
886*eda14cbcSMatt Macy }
887*eda14cbcSMatt Macy 
888*eda14cbcSMatt Macy /*
889*eda14cbcSMatt Macy  * initialize a new AVL tree
890*eda14cbcSMatt Macy  */
891*eda14cbcSMatt Macy void
892*eda14cbcSMatt Macy avl_create(avl_tree_t *tree, int (*compar) (const void *, const void *),
893*eda14cbcSMatt Macy     size_t size, size_t offset)
894*eda14cbcSMatt Macy {
895*eda14cbcSMatt Macy 	ASSERT(tree);
896*eda14cbcSMatt Macy 	ASSERT(compar);
897*eda14cbcSMatt Macy 	ASSERT(size > 0);
898*eda14cbcSMatt Macy 	ASSERT(size >= offset + sizeof (avl_node_t));
899*eda14cbcSMatt Macy #ifdef _LP64
900*eda14cbcSMatt Macy 	ASSERT((offset & 0x7) == 0);
901*eda14cbcSMatt Macy #endif
902*eda14cbcSMatt Macy 
903*eda14cbcSMatt Macy 	tree->avl_compar = compar;
904*eda14cbcSMatt Macy 	tree->avl_root = NULL;
905*eda14cbcSMatt Macy 	tree->avl_numnodes = 0;
906*eda14cbcSMatt Macy 	tree->avl_size = size;
907*eda14cbcSMatt Macy 	tree->avl_offset = offset;
908*eda14cbcSMatt Macy }
909*eda14cbcSMatt Macy 
910*eda14cbcSMatt Macy /*
911*eda14cbcSMatt Macy  * Delete a tree.
912*eda14cbcSMatt Macy  */
913*eda14cbcSMatt Macy /* ARGSUSED */
914*eda14cbcSMatt Macy void
915*eda14cbcSMatt Macy avl_destroy(avl_tree_t *tree)
916*eda14cbcSMatt Macy {
917*eda14cbcSMatt Macy 	ASSERT(tree);
918*eda14cbcSMatt Macy 	ASSERT(tree->avl_numnodes == 0);
919*eda14cbcSMatt Macy 	ASSERT(tree->avl_root == NULL);
920*eda14cbcSMatt Macy }
921*eda14cbcSMatt Macy 
922*eda14cbcSMatt Macy 
923*eda14cbcSMatt Macy /*
924*eda14cbcSMatt Macy  * Return the number of nodes in an AVL tree.
925*eda14cbcSMatt Macy  */
926*eda14cbcSMatt Macy ulong_t
927*eda14cbcSMatt Macy avl_numnodes(avl_tree_t *tree)
928*eda14cbcSMatt Macy {
929*eda14cbcSMatt Macy 	ASSERT(tree);
930*eda14cbcSMatt Macy 	return (tree->avl_numnodes);
931*eda14cbcSMatt Macy }
932*eda14cbcSMatt Macy 
933*eda14cbcSMatt Macy boolean_t
934*eda14cbcSMatt Macy avl_is_empty(avl_tree_t *tree)
935*eda14cbcSMatt Macy {
936*eda14cbcSMatt Macy 	ASSERT(tree);
937*eda14cbcSMatt Macy 	return (tree->avl_numnodes == 0);
938*eda14cbcSMatt Macy }
939*eda14cbcSMatt Macy 
940*eda14cbcSMatt Macy #define	CHILDBIT	(1L)
941*eda14cbcSMatt Macy 
942*eda14cbcSMatt Macy /*
943*eda14cbcSMatt Macy  * Post-order tree walk used to visit all tree nodes and destroy the tree
944*eda14cbcSMatt Macy  * in post order. This is used for removing all the nodes from a tree without
945*eda14cbcSMatt Macy  * paying any cost for rebalancing it.
946*eda14cbcSMatt Macy  *
947*eda14cbcSMatt Macy  * example:
948*eda14cbcSMatt Macy  *
949*eda14cbcSMatt Macy  *	void *cookie = NULL;
950*eda14cbcSMatt Macy  *	my_data_t *node;
951*eda14cbcSMatt Macy  *
952*eda14cbcSMatt Macy  *	while ((node = avl_destroy_nodes(tree, &cookie)) != NULL)
953*eda14cbcSMatt Macy  *		free(node);
954*eda14cbcSMatt Macy  *	avl_destroy(tree);
955*eda14cbcSMatt Macy  *
956*eda14cbcSMatt Macy  * The cookie is really an avl_node_t to the current node's parent and
957*eda14cbcSMatt Macy  * an indication of which child you looked at last.
958*eda14cbcSMatt Macy  *
959*eda14cbcSMatt Macy  * On input, a cookie value of CHILDBIT indicates the tree is done.
960*eda14cbcSMatt Macy  */
961*eda14cbcSMatt Macy void *
962*eda14cbcSMatt Macy avl_destroy_nodes(avl_tree_t *tree, void **cookie)
963*eda14cbcSMatt Macy {
964*eda14cbcSMatt Macy 	avl_node_t	*node;
965*eda14cbcSMatt Macy 	avl_node_t	*parent;
966*eda14cbcSMatt Macy 	int		child;
967*eda14cbcSMatt Macy 	void		*first;
968*eda14cbcSMatt Macy 	size_t		off = tree->avl_offset;
969*eda14cbcSMatt Macy 
970*eda14cbcSMatt Macy 	/*
971*eda14cbcSMatt Macy 	 * Initial calls go to the first node or it's right descendant.
972*eda14cbcSMatt Macy 	 */
973*eda14cbcSMatt Macy 	if (*cookie == NULL) {
974*eda14cbcSMatt Macy 		first = avl_first(tree);
975*eda14cbcSMatt Macy 
976*eda14cbcSMatt Macy 		/*
977*eda14cbcSMatt Macy 		 * deal with an empty tree
978*eda14cbcSMatt Macy 		 */
979*eda14cbcSMatt Macy 		if (first == NULL) {
980*eda14cbcSMatt Macy 			*cookie = (void *)CHILDBIT;
981*eda14cbcSMatt Macy 			return (NULL);
982*eda14cbcSMatt Macy 		}
983*eda14cbcSMatt Macy 
984*eda14cbcSMatt Macy 		node = AVL_DATA2NODE(first, off);
985*eda14cbcSMatt Macy 		parent = AVL_XPARENT(node);
986*eda14cbcSMatt Macy 		goto check_right_side;
987*eda14cbcSMatt Macy 	}
988*eda14cbcSMatt Macy 
989*eda14cbcSMatt Macy 	/*
990*eda14cbcSMatt Macy 	 * If there is no parent to return to we are done.
991*eda14cbcSMatt Macy 	 */
992*eda14cbcSMatt Macy 	parent = (avl_node_t *)((uintptr_t)(*cookie) & ~CHILDBIT);
993*eda14cbcSMatt Macy 	if (parent == NULL) {
994*eda14cbcSMatt Macy 		if (tree->avl_root != NULL) {
995*eda14cbcSMatt Macy 			ASSERT(tree->avl_numnodes == 1);
996*eda14cbcSMatt Macy 			tree->avl_root = NULL;
997*eda14cbcSMatt Macy 			tree->avl_numnodes = 0;
998*eda14cbcSMatt Macy 		}
999*eda14cbcSMatt Macy 		return (NULL);
1000*eda14cbcSMatt Macy 	}
1001*eda14cbcSMatt Macy 
1002*eda14cbcSMatt Macy 	/*
1003*eda14cbcSMatt Macy 	 * Remove the child pointer we just visited from the parent and tree.
1004*eda14cbcSMatt Macy 	 */
1005*eda14cbcSMatt Macy 	child = (uintptr_t)(*cookie) & CHILDBIT;
1006*eda14cbcSMatt Macy 	parent->avl_child[child] = NULL;
1007*eda14cbcSMatt Macy 	ASSERT(tree->avl_numnodes > 1);
1008*eda14cbcSMatt Macy 	--tree->avl_numnodes;
1009*eda14cbcSMatt Macy 
1010*eda14cbcSMatt Macy 	/*
1011*eda14cbcSMatt Macy 	 * If we just did a right child or there isn't one, go up to parent.
1012*eda14cbcSMatt Macy 	 */
1013*eda14cbcSMatt Macy 	if (child == 1 || parent->avl_child[1] == NULL) {
1014*eda14cbcSMatt Macy 		node = parent;
1015*eda14cbcSMatt Macy 		parent = AVL_XPARENT(parent);
1016*eda14cbcSMatt Macy 		goto done;
1017*eda14cbcSMatt Macy 	}
1018*eda14cbcSMatt Macy 
1019*eda14cbcSMatt Macy 	/*
1020*eda14cbcSMatt Macy 	 * Do parent's right child, then leftmost descendent.
1021*eda14cbcSMatt Macy 	 */
1022*eda14cbcSMatt Macy 	node = parent->avl_child[1];
1023*eda14cbcSMatt Macy 	while (node->avl_child[0] != NULL) {
1024*eda14cbcSMatt Macy 		parent = node;
1025*eda14cbcSMatt Macy 		node = node->avl_child[0];
1026*eda14cbcSMatt Macy 	}
1027*eda14cbcSMatt Macy 
1028*eda14cbcSMatt Macy 	/*
1029*eda14cbcSMatt Macy 	 * If here, we moved to a left child. It may have one
1030*eda14cbcSMatt Macy 	 * child on the right (when balance == +1).
1031*eda14cbcSMatt Macy 	 */
1032*eda14cbcSMatt Macy check_right_side:
1033*eda14cbcSMatt Macy 	if (node->avl_child[1] != NULL) {
1034*eda14cbcSMatt Macy 		ASSERT(AVL_XBALANCE(node) == 1);
1035*eda14cbcSMatt Macy 		parent = node;
1036*eda14cbcSMatt Macy 		node = node->avl_child[1];
1037*eda14cbcSMatt Macy 		ASSERT(node->avl_child[0] == NULL &&
1038*eda14cbcSMatt Macy 		    node->avl_child[1] == NULL);
1039*eda14cbcSMatt Macy 	} else {
1040*eda14cbcSMatt Macy 		ASSERT(AVL_XBALANCE(node) <= 0);
1041*eda14cbcSMatt Macy 	}
1042*eda14cbcSMatt Macy 
1043*eda14cbcSMatt Macy done:
1044*eda14cbcSMatt Macy 	if (parent == NULL) {
1045*eda14cbcSMatt Macy 		*cookie = (void *)CHILDBIT;
1046*eda14cbcSMatt Macy 		ASSERT(node == tree->avl_root);
1047*eda14cbcSMatt Macy 	} else {
1048*eda14cbcSMatt Macy 		*cookie = (void *)((uintptr_t)parent | AVL_XCHILD(node));
1049*eda14cbcSMatt Macy 	}
1050*eda14cbcSMatt Macy 
1051*eda14cbcSMatt Macy 	return (AVL_NODE2DATA(node, off));
1052*eda14cbcSMatt Macy }
1053*eda14cbcSMatt Macy 
1054*eda14cbcSMatt Macy #if defined(_KERNEL)
1055*eda14cbcSMatt Macy 
1056*eda14cbcSMatt Macy static int __init
1057*eda14cbcSMatt Macy avl_init(void)
1058*eda14cbcSMatt Macy {
1059*eda14cbcSMatt Macy 	return (0);
1060*eda14cbcSMatt Macy }
1061*eda14cbcSMatt Macy 
1062*eda14cbcSMatt Macy static void __exit
1063*eda14cbcSMatt Macy avl_fini(void)
1064*eda14cbcSMatt Macy {
1065*eda14cbcSMatt Macy }
1066*eda14cbcSMatt Macy 
1067*eda14cbcSMatt Macy module_init(avl_init);
1068*eda14cbcSMatt Macy module_exit(avl_fini);
1069*eda14cbcSMatt Macy #endif
1070*eda14cbcSMatt Macy 
1071*eda14cbcSMatt Macy ZFS_MODULE_DESCRIPTION("Generic AVL tree implementation");
1072*eda14cbcSMatt Macy ZFS_MODULE_AUTHOR(ZFS_META_AUTHOR);
1073*eda14cbcSMatt Macy ZFS_MODULE_LICENSE(ZFS_META_LICENSE);
1074*eda14cbcSMatt Macy ZFS_MODULE_VERSION(ZFS_META_VERSION "-" ZFS_META_RELEASE);
1075*eda14cbcSMatt Macy 
1076*eda14cbcSMatt Macy EXPORT_SYMBOL(avl_create);
1077*eda14cbcSMatt Macy EXPORT_SYMBOL(avl_find);
1078*eda14cbcSMatt Macy EXPORT_SYMBOL(avl_insert);
1079*eda14cbcSMatt Macy EXPORT_SYMBOL(avl_insert_here);
1080*eda14cbcSMatt Macy EXPORT_SYMBOL(avl_walk);
1081*eda14cbcSMatt Macy EXPORT_SYMBOL(avl_first);
1082*eda14cbcSMatt Macy EXPORT_SYMBOL(avl_last);
1083*eda14cbcSMatt Macy EXPORT_SYMBOL(avl_nearest);
1084*eda14cbcSMatt Macy EXPORT_SYMBOL(avl_add);
1085*eda14cbcSMatt Macy EXPORT_SYMBOL(avl_swap);
1086*eda14cbcSMatt Macy EXPORT_SYMBOL(avl_is_empty);
1087*eda14cbcSMatt Macy EXPORT_SYMBOL(avl_remove);
1088*eda14cbcSMatt Macy EXPORT_SYMBOL(avl_numnodes);
1089*eda14cbcSMatt Macy EXPORT_SYMBOL(avl_destroy_nodes);
1090*eda14cbcSMatt Macy EXPORT_SYMBOL(avl_destroy);
1091*eda14cbcSMatt Macy EXPORT_SYMBOL(avl_update_lt);
1092*eda14cbcSMatt Macy EXPORT_SYMBOL(avl_update_gt);
1093*eda14cbcSMatt Macy EXPORT_SYMBOL(avl_update);
1094