xref: /netbsd-src/external/mpl/bind/dist/lib/dns/rbt.c (revision bcda20f65a8566e103791ec395f7f499ef322704)
1 /*	$NetBSD: rbt.c,v 1.17 2025/01/26 16:25:24 christos Exp $	*/
2 
3 /*
4  * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
5  *
6  * SPDX-License-Identifier: MPL-2.0
7  *
8  * This Source Code Form is subject to the terms of the Mozilla Public
9  * License, v. 2.0. If a copy of the MPL was not distributed with this
10  * file, you can obtain one at https://mozilla.org/MPL/2.0/.
11  *
12  * See the COPYRIGHT file distributed with this work for additional
13  * information regarding copyright ownership.
14  */
15 
16 /*! \file */
17 
18 #include <inttypes.h>
19 #include <stdbool.h>
20 #include <sys/stat.h>
21 
22 #include <isc/crc64.h>
23 #include <isc/file.h>
24 #include <isc/hash.h>
25 #include <isc/hex.h>
26 #include <isc/mem.h>
27 #include <isc/once.h>
28 #include <isc/refcount.h>
29 #include <isc/stdio.h>
30 #include <isc/string.h>
31 #include <isc/util.h>
32 
33 /*%
34  * This define is so dns/name.h (included by dns/fixedname.h) uses more
35  * efficient macro calls instead of functions for a few operations.
36  */
37 #include <unistd.h>
38 
39 #include <isc/result.h>
40 
41 #include <dns/db.h>
42 #include <dns/fixedname.h>
43 #include <dns/log.h>
44 #include <dns/rbt.h>
45 
46 #define CHECK(x)                             \
47 	do {                                 \
48 		result = (x);                \
49 		if (result != ISC_R_SUCCESS) \
50 			goto cleanup;        \
51 	} while (0)
52 
53 #define RBT_MAGIC      ISC_MAGIC('R', 'B', 'T', '+')
54 #define VALID_RBT(rbt) ISC_MAGIC_VALID(rbt, RBT_MAGIC)
55 
56 /*
57  * XXXDCL Since parent pointers were added in again, I could remove all of the
58  * chain junk, and replace with dns_rbt_firstnode, _previousnode, _nextnode,
59  * _lastnode.  This would involve pretty major change to the API.
60  */
61 #define CHAIN_MAGIC	   ISC_MAGIC('0', '-', '0', '-')
62 #define VALID_CHAIN(chain) ISC_MAGIC_VALID(chain, CHAIN_MAGIC)
63 
64 #define RBT_HASH_NEXTTABLE(hindex) ((hindex == 0) ? 1 : 0)
65 
66 struct dns_rbt {
67 	unsigned int magic;
68 	isc_mem_t *mctx;
69 	dns_rbtnode_t *root;
70 	void (*data_deleter)(void *, void *);
71 	void *deleter_arg;
72 	unsigned int nodecount;
73 	uint8_t hashbits[2];
74 	dns_rbtnode_t **hashtable[2];
75 	uint8_t hindex;
76 	uint32_t hiter;
77 };
78 
79 #define IS_EMPTY(node) ((node)->data == NULL)
80 
81 #define WANTEMPTYDATA_OR_DATA(options, node) \
82 	((options & DNS_RBTFIND_EMPTYDATA) != 0 || node->data != NULL)
83 
84 /*%
85  * The variable length stuff stored after the node has the following
86  * structure.
87  *
88  *	NAME_DATA{1..255} OLDOFFSETLEN{1} OFFSETS{1..128}
89  *
90  * NAME_DATA contains the name of the node when it was created.
91  * OLDOFFSETLEN contains the length of OFFSETS when the node was created.
92  * OFFSETS contains the offsets into name for each label when the node
93  * was created.
94  */
95 
96 #define NAME(node)	   ((unsigned char *)((node) + 1))
97 #define OFFSETS(node)	   (NAME(node) + node->oldnamelen + 1)
98 #define OLDOFFSETLEN(node) (OFFSETS(node)[-1])
99 
100 #define NODE_SIZE(node) \
101 	(sizeof(*node) + node->oldnamelen + OLDOFFSETLEN(node) + 1)
102 
103 /*%
104  * Color management.
105  */
106 #define RED	       0
107 #define BLACK	       1
108 #define IS_RED(node)   ((node) != NULL && (node)->color == RED)
109 #define IS_BLACK(node) ((node) == NULL || (node)->color == BLACK)
110 
111 /*%
112  * Chain management.
113  *
114  * The "ancestors" member of chains were removed, with their job now
115  * being wholly handled by parent pointers (which didn't exist, because
116  * of memory concerns, when chains were first implemented).
117  */
118 #define ADD_LEVEL(chain, node)                                     \
119 	do {                                                       \
120 		INSIST((chain)->level_count < DNS_RBT_LEVELBLOCK); \
121 		(chain)->levels[(chain)->level_count++] = (node);  \
122 	} while (0)
123 
124 /*
125  * Initialize a dns_name_t that refers to a node's name.
126  */
127 static void
128 node_name(dns_rbtnode_t *node, dns_name_t *name) {
129 	name->length = node->namelen;
130 	name->labels = node->offsetlen;
131 	name->ndata = NAME(node);
132 	name->offsets = OFFSETS(node);
133 	name->attributes = (struct dns_name_attrs){ .absolute = node->absolute,
134 						    .readonly = true };
135 }
136 
137 #ifdef DEBUG
138 /*
139  * A little something to help out in GDB.
140  */
141 dns_name_t
142 Name(dns_rbtnode_t *node);
143 dns_name_t
144 Name(dns_rbtnode_t *node) {
145 	dns_name_t name;
146 
147 	dns_name_init(&name, NULL);
148 	if (node != NULL) {
149 		node_name(node, &name);
150 	}
151 
152 	return name;
153 }
154 #endif /* DEBUG */
155 
156 /*
157  * Upper node is the parent of the root of the passed node's
158  * subtree. The passed node must not be NULL.
159  */
160 static dns_rbtnode_t *
161 get_upper_node(dns_rbtnode_t *node) {
162 	return node->uppernode;
163 }
164 
165 size_t
166 dns__rbtnode_getdistance(dns_rbtnode_t *node) {
167 	size_t nodes = 1;
168 
169 	while (node != NULL) {
170 		if (node->is_root) {
171 			break;
172 		}
173 		nodes++;
174 		node = node->parent;
175 	}
176 
177 	return nodes;
178 }
179 
180 /*
181  * Forward declarations.
182  */
183 static dns_rbtnode_t *
184 rbtnode_new(isc_mem_t *mctx, const dns_name_t *name);
185 
186 static void
187 hashtable_new(dns_rbt_t *rbt, uint8_t index, uint8_t bits);
188 static void
189 hashtable_free(dns_rbt_t *rbt, uint8_t index);
190 
191 static void
192 hash_node(dns_rbt_t *rbt, dns_rbtnode_t *node, const dns_name_t *name);
193 
194 static void
195 unhash_node(dns_rbt_t *rbt, dns_rbtnode_t *node);
196 
197 static uint32_t
198 rehash_bits(dns_rbt_t *rbt, size_t newcount);
199 static void
200 hashtable_rehash(dns_rbt_t *rbt, uint32_t newbits);
201 static void
202 hashtable_rehash_one(dns_rbt_t *rbt);
203 static void
204 maybe_rehash(dns_rbt_t *rbt, size_t size);
205 static bool
206 rehashing_in_progress(dns_rbt_t *rbt);
207 
208 #define TRY_NEXTTABLE(hindex, rbt) \
209 	(hindex == rbt->hindex && rehashing_in_progress(rbt))
210 
211 static void
212 rotate_left(dns_rbtnode_t *node, dns_rbtnode_t **rootp);
213 static void
214 rotate_right(dns_rbtnode_t *node, dns_rbtnode_t **rootp);
215 
216 static void
217 addonlevel(dns_rbtnode_t *node, dns_rbtnode_t *current, int order,
218 	   dns_rbtnode_t **rootp);
219 
220 static void
221 deletefromlevel(dns_rbtnode_t *item, dns_rbtnode_t **rootp);
222 
223 static void
224 deletetreeflat(dns_rbt_t *rbt, unsigned int quantum, bool unhash,
225 	       dns_rbtnode_t **nodep);
226 
227 static void
228 printnodename(dns_rbtnode_t *node, bool quoted, FILE *f);
229 
230 static void
231 freenode(dns_rbt_t *rbt, dns_rbtnode_t **nodep);
232 
233 unsigned int
234 dns__rbtnode_namelen(dns_rbtnode_t *node) {
235 	dns_name_t current;
236 	unsigned int len = 0;
237 
238 	REQUIRE(DNS_RBTNODE_VALID(node));
239 
240 	dns_name_init(&current, NULL);
241 
242 	do {
243 		if (node != NULL) {
244 			node_name(node, &current);
245 			len += current.length;
246 		} else {
247 			len += 1;
248 			break;
249 		}
250 
251 		node = get_upper_node(node);
252 	} while (!dns_name_isabsolute(&current));
253 
254 	return len;
255 }
256 
257 unsigned int
258 dns__rbtnode_getsize(dns_rbtnode_t *node) {
259 	REQUIRE(DNS_RBTNODE_VALID(node));
260 
261 	return NODE_SIZE(node);
262 }
263 
264 /*
265  * Initialize a red/black tree of trees.
266  */
267 isc_result_t
268 dns_rbt_create(isc_mem_t *mctx, dns_rbtdeleter_t deleter, void *deleter_arg,
269 	       dns_rbt_t **rbtp) {
270 	dns_rbt_t *rbt;
271 
272 	REQUIRE(mctx != NULL);
273 	REQUIRE(rbtp != NULL && *rbtp == NULL);
274 	REQUIRE(deleter == NULL ? deleter_arg == NULL : 1);
275 
276 	rbt = isc_mem_get(mctx, sizeof(*rbt));
277 	*rbt = (dns_rbt_t){
278 		.data_deleter = deleter,
279 		.deleter_arg = deleter_arg,
280 	};
281 
282 	isc_mem_attach(mctx, &rbt->mctx);
283 
284 	hashtable_new(rbt, 0, ISC_HASH_MIN_BITS);
285 
286 	rbt->magic = RBT_MAGIC;
287 
288 	*rbtp = rbt;
289 
290 	return ISC_R_SUCCESS;
291 }
292 
293 /*
294  * Deallocate a red/black tree of trees.
295  */
296 isc_result_t
297 dns_rbt_destroy(dns_rbt_t **rbtp, unsigned int quantum) {
298 	dns_rbt_t *rbt;
299 
300 	REQUIRE(rbtp != NULL && VALID_RBT(*rbtp));
301 
302 	rbt = *rbtp;
303 
304 	deletetreeflat(rbt, quantum, false, &rbt->root);
305 	if (rbt->root != NULL) {
306 		return ISC_R_QUOTA;
307 	}
308 
309 	*rbtp = NULL;
310 
311 	INSIST(rbt->nodecount == 0);
312 
313 	if (rbt->hashtable[0] != NULL) {
314 		hashtable_free(rbt, 0);
315 	}
316 	if (rbt->hashtable[1] != NULL) {
317 		hashtable_free(rbt, 1);
318 	}
319 
320 	rbt->magic = 0;
321 
322 	isc_mem_putanddetach(&rbt->mctx, rbt, sizeof(*rbt));
323 	return ISC_R_SUCCESS;
324 }
325 
326 unsigned int
327 dns_rbt_nodecount(dns_rbt_t *rbt) {
328 	REQUIRE(VALID_RBT(rbt));
329 
330 	return rbt->nodecount;
331 }
332 
333 size_t
334 dns_rbt_hashsize(dns_rbt_t *rbt) {
335 	REQUIRE(VALID_RBT(rbt));
336 
337 	uint8_t hashbits = (rbt->hashbits[0] > rbt->hashbits[1])
338 				   ? rbt->hashbits[0]
339 				   : rbt->hashbits[1];
340 
341 	return 1 << hashbits;
342 }
343 
344 static isc_result_t
345 chain_name(dns_rbtnodechain_t *chain, dns_name_t *name,
346 	   bool include_chain_end) {
347 	dns_name_t nodename;
348 	isc_result_t result = ISC_R_SUCCESS;
349 	int i;
350 
351 	dns_name_init(&nodename, NULL);
352 
353 	if (include_chain_end && chain->end != NULL) {
354 		node_name(chain->end, &nodename);
355 		dns_name_copy(&nodename, name);
356 	} else {
357 		dns_name_reset(name);
358 	}
359 
360 	for (i = (int)chain->level_count - 1; i >= 0; i--) {
361 		node_name(chain->levels[i], &nodename);
362 		result = dns_name_concatenate(name, &nodename, name, NULL);
363 
364 		if (result != ISC_R_SUCCESS) {
365 			return result;
366 		}
367 	}
368 	return result;
369 }
370 
371 static isc_result_t
372 move_chain_to_last(dns_rbtnodechain_t *chain, dns_rbtnode_t *node) {
373 	do {
374 		/*
375 		 * Go as far right and then down as much as possible,
376 		 * as long as the rightmost node has a down pointer.
377 		 */
378 		while (node->right != NULL) {
379 			node = node->right;
380 		}
381 
382 		if (node->down == NULL) {
383 			break;
384 		}
385 
386 		ADD_LEVEL(chain, node);
387 		node = node->down;
388 	} while (1);
389 
390 	chain->end = node;
391 
392 	return ISC_R_SUCCESS;
393 }
394 
395 /*
396  * Add 'name' to tree, initializing its data pointer with 'data'.
397  */
398 
399 isc_result_t
400 dns_rbt_addnode(dns_rbt_t *rbt, const dns_name_t *name, dns_rbtnode_t **nodep) {
401 	/*
402 	 * Does this thing have too many variables or what?
403 	 */
404 	dns_rbtnode_t **root, *parent, *child, *current, *new_current;
405 	dns_name_t *add_name, *new_name, current_name, *prefix, *suffix;
406 	dns_fixedname_t fixedcopy, fixedprefix, fixedsuffix, fnewname;
407 	dns_offsets_t current_offsets;
408 	dns_namereln_t compared;
409 	isc_result_t result = ISC_R_SUCCESS;
410 	unsigned int level_count;
411 	unsigned int common_labels;
412 	unsigned int nlabels, hlabels;
413 	int order;
414 
415 	REQUIRE(VALID_RBT(rbt));
416 	REQUIRE(dns_name_isabsolute(name));
417 	REQUIRE(nodep != NULL && *nodep == NULL);
418 
419 	/*
420 	 * Dear future BIND developer,
421 	 *
422 	 * After you have tried attempting to optimize this routine by
423 	 * using the hashtable and have realized your folly, please
424 	 * append another cross ("X") below as a warning to the next
425 	 * future BIND developer:
426 	 *
427 	 * Number of victim developers: X
428 	 *
429 	 * I wish the past developer had included such a notice.
430 	 *
431 	 * Long form: Unlike dns_rbt_findnode(), this function does not
432 	 * lend itself to be optimized using the hashtable:
433 	 *
434 	 * 1. In the subtree where the insertion occurs, this function
435 	 * needs to have the insertion point and the order where the
436 	 * lookup terminated (i.e., at the insertion point where left or
437 	 * right child is NULL). This cannot be determined from the
438 	 * hashtable, so at least in that subtree, a BST O(log N) lookup
439 	 * is necessary.
440 	 *
441 	 * 2. Our RBT nodes contain not only single labels but label
442 	 * sequences to optimize space usage. So at every level, we have
443 	 * to look for a match in the hashtable for all superdomains in
444 	 * the rest of the name we're searching. This is an O(N)
445 	 * operation at least, here N being the label size of name, each
446 	 * of which is a hashtable lookup involving dns_name_equal()
447 	 * comparisons.
448 	 */
449 
450 	/*
451 	 * Create a copy of the name so the original name structure is
452 	 * not modified.
453 	 */
454 	add_name = dns_fixedname_initname(&fixedcopy);
455 	INSIST(add_name != NULL);
456 	dns_name_clone(name, add_name);
457 
458 	if (rbt->root == NULL) {
459 		new_current = rbtnode_new(rbt->mctx, add_name);
460 		rbt->nodecount++;
461 		new_current->is_root = 1;
462 		new_current->uppernode = NULL;
463 		rbt->root = new_current;
464 		*nodep = new_current;
465 		hash_node(rbt, new_current, name);
466 		return ISC_R_SUCCESS;
467 	}
468 
469 	level_count = 0;
470 
471 	prefix = dns_fixedname_initname(&fixedprefix);
472 	suffix = dns_fixedname_initname(&fixedsuffix);
473 
474 	INSIST(prefix != NULL);
475 	INSIST(suffix != NULL);
476 
477 	root = &rbt->root;
478 	INSIST((*root)->is_root);
479 	parent = NULL;
480 	current = NULL;
481 	child = *root;
482 	dns_name_init(&current_name, current_offsets);
483 	new_name = dns_fixedname_initname(&fnewname);
484 	nlabels = dns_name_countlabels(name);
485 	hlabels = 0;
486 
487 	do {
488 		current = child;
489 
490 		node_name(current, &current_name);
491 		compared = dns_name_fullcompare(add_name, &current_name, &order,
492 						&common_labels);
493 
494 		if (compared == dns_namereln_equal) {
495 			*nodep = current;
496 			result = ISC_R_EXISTS;
497 			break;
498 		}
499 
500 		if (compared == dns_namereln_none) {
501 			if (order < 0) {
502 				parent = current;
503 				child = current->left;
504 			} else if (order > 0) {
505 				parent = current;
506 				child = current->right;
507 			}
508 		} else {
509 			/*
510 			 * This name has some suffix in common with the
511 			 * name at the current node.  If the name at
512 			 * the current node is shorter, that means the
513 			 * new name should be in a subtree.  If the
514 			 * name at the current node is longer, that means
515 			 * the down pointer to this tree should point
516 			 * to a new tree that has the common suffix, and
517 			 * the non-common parts of these two names should
518 			 * start a new tree.
519 			 */
520 			hlabels += common_labels;
521 			if (compared == dns_namereln_subdomain) {
522 				/*
523 				 * All of the existing labels are in common,
524 				 * so the new name is in a subtree.
525 				 * Whack off the common labels for the
526 				 * not-in-common part to be searched for
527 				 * in the next level.
528 				 */
529 				dns_name_split(add_name, common_labels,
530 					       add_name, NULL);
531 
532 				/*
533 				 * Follow the down pointer (possibly NULL).
534 				 */
535 				root = &current->down;
536 
537 				INSIST(*root == NULL ||
538 				       ((*root)->is_root &&
539 					(*root)->parent == current));
540 
541 				parent = NULL;
542 				child = current->down;
543 
544 				INSIST(level_count < DNS_RBT_LEVELBLOCK);
545 				level_count++;
546 			} else {
547 				/*
548 				 * The number of labels in common is fewer
549 				 * than the number of labels at the current
550 				 * node, so the current node must be adjusted
551 				 * to have just the common suffix, and a down
552 				 * pointer made to a new tree.
553 				 */
554 
555 				INSIST(compared ==
556 					       dns_namereln_commonancestor ||
557 				       compared == dns_namereln_contains);
558 
559 				/*
560 				 * Ensure the number of levels in the tree
561 				 * does not exceed the number of logical
562 				 * levels allowed by DNSSEC.
563 				 *
564 				 * XXXDCL need a better error result?
565 				 */
566 				if (level_count >= DNS_RBT_LEVELBLOCK) {
567 					result = ISC_R_NOSPACE;
568 					break;
569 				}
570 
571 				/*
572 				 * Split the name into two parts, a prefix
573 				 * which is the not-in-common parts of the
574 				 * two names and a suffix that is the common
575 				 * parts of them.
576 				 */
577 				dns_name_split(&current_name, common_labels,
578 					       prefix, suffix);
579 				new_current = rbtnode_new(rbt->mctx, suffix);
580 
581 				/*
582 				 * Reproduce the tree attributes of the
583 				 * current node.
584 				 */
585 				new_current->is_root = current->is_root;
586 				if (current->nsec == DNS_DB_NSEC_HAS_NSEC) {
587 					new_current->nsec = DNS_DB_NSEC_NORMAL;
588 				} else {
589 					new_current->nsec = current->nsec;
590 				}
591 				new_current->parent = current->parent;
592 				new_current->left = current->left;
593 				new_current->right = current->right;
594 				new_current->color = current->color;
595 
596 				/*
597 				 * Fix pointers that were to the current node.
598 				 */
599 				if (parent != NULL) {
600 					if (parent->left == current) {
601 						parent->left = new_current;
602 					} else {
603 						parent->right = new_current;
604 					}
605 				}
606 				if (new_current->left != NULL) {
607 					new_current->left->parent = new_current;
608 				}
609 				if (new_current->right != NULL) {
610 					new_current->right->parent =
611 						new_current;
612 				}
613 				if (*root == current) {
614 					*root = new_current;
615 				}
616 
617 				current->namelen = prefix->length;
618 				current->offsetlen = prefix->labels;
619 
620 				/*
621 				 * Set up the new root of the next level.
622 				 * By definition it will not be the top
623 				 * level tree, so clear the absolute flag.
624 				 */
625 				current->is_root = 1;
626 				current->parent = new_current;
627 				new_current->down = current;
628 				root = &new_current->down;
629 
630 				new_current->uppernode = current->uppernode;
631 				current->uppernode = new_current;
632 
633 				INSIST(level_count < DNS_RBT_LEVELBLOCK);
634 				level_count++;
635 
636 				current->left = NULL;
637 				current->right = NULL;
638 
639 				current->color = BLACK;
640 				current->absolute = false;
641 
642 				rbt->nodecount++;
643 				dns_name_getlabelsequence(name,
644 							  nlabels - hlabels,
645 							  hlabels, new_name);
646 				hash_node(rbt, new_current, new_name);
647 
648 				if (common_labels ==
649 				    dns_name_countlabels(add_name))
650 				{
651 					/*
652 					 * The name has been added by pushing
653 					 * the not-in-common parts down to
654 					 * a new level.
655 					 */
656 					*nodep = new_current;
657 					return ISC_R_SUCCESS;
658 				} else {
659 					/*
660 					 * The current node has no data,
661 					 * because it is just a placeholder.
662 					 * Its data pointer is already NULL
663 					 * from rbtnode_new()), so there's
664 					 * nothing more to do to it.
665 					 *
666 					 * The not-in-common parts of the new
667 					 * name will be inserted into the new
668 					 * level following this loop.
669 					 */
670 					dns_name_split(add_name, common_labels,
671 						       add_name, NULL);
672 					result = ISC_R_SUCCESS;
673 					break;
674 				}
675 			}
676 		}
677 	} while (child != NULL);
678 
679 	if (result == ISC_R_SUCCESS) {
680 		new_current = rbtnode_new(rbt->mctx, add_name);
681 	}
682 
683 	if (result == ISC_R_SUCCESS) {
684 		if (*root == NULL) {
685 			new_current->uppernode = current;
686 		} else {
687 			new_current->uppernode = (*root)->parent;
688 		}
689 
690 		addonlevel(new_current, current, order, root);
691 		rbt->nodecount++;
692 		*nodep = new_current;
693 		hash_node(rbt, new_current, name);
694 	}
695 
696 	return result;
697 }
698 
699 /*
700  * Find the node for "name" in the tree of trees.
701  */
702 isc_result_t
703 dns__rbt_findnode(dns_rbt_t *rbt, const dns_name_t *name, dns_name_t *foundname,
704 		  dns_rbtnode_t **node, dns_rbtnodechain_t *chain,
705 		  unsigned int options, dns_rbtfindcallback_t callback,
706 		  void *callback_arg DNS__DB_FLARG) {
707 	dns_rbtnode_t *current, *last_compared;
708 	dns_rbtnodechain_t localchain;
709 	dns_name_t *search_name, current_name, *callback_name;
710 	dns_fixedname_t fixedcallbackname, fixedsearchname;
711 	dns_namereln_t compared;
712 	isc_result_t result, saved_result;
713 	unsigned int common_labels;
714 	unsigned int hlabels = 0;
715 	int order;
716 	uint8_t hindex;
717 
718 	REQUIRE(VALID_RBT(rbt));
719 	REQUIRE(dns_name_isabsolute(name));
720 	REQUIRE(node != NULL && *node == NULL);
721 	REQUIRE((options & (DNS_RBTFIND_NOEXACT | DNS_RBTFIND_NOPREDECESSOR)) !=
722 		(DNS_RBTFIND_NOEXACT | DNS_RBTFIND_NOPREDECESSOR));
723 
724 	/*
725 	 * If there is a chain it needs to appear to be in a sane state,
726 	 * otherwise a chain is still needed to generate foundname and
727 	 * callback_name.
728 	 */
729 	if (chain == NULL) {
730 		options |= DNS_RBTFIND_NOPREDECESSOR;
731 		chain = &localchain;
732 		dns_rbtnodechain_init(chain);
733 	} else {
734 		dns_rbtnodechain_reset(chain);
735 	}
736 
737 	if (rbt->root == NULL) {
738 		return ISC_R_NOTFOUND;
739 	}
740 
741 	/*
742 	 * Appease GCC about variables it incorrectly thinks are
743 	 * possibly used uninitialized.
744 	 */
745 	compared = dns_namereln_none;
746 	last_compared = NULL;
747 	order = 0;
748 
749 	callback_name = dns_fixedname_initname(&fixedcallbackname);
750 
751 	/*
752 	 * search_name is the name segment being sought in each tree level.
753 	 * By using a fixedname, the search_name will definitely have offsets
754 	 * for use by any splitting. By using dns_name_clone, no name data
755 	 * should be copied.
756 	 */
757 	search_name = dns_fixedname_initname(&fixedsearchname);
758 	INSIST(search_name != NULL);
759 	dns_name_clone(name, search_name);
760 
761 	dns_name_init(&current_name, NULL);
762 
763 	saved_result = ISC_R_SUCCESS;
764 	current = rbt->root;
765 
766 	while (current != NULL) {
767 		node_name(current, &current_name);
768 		compared = dns_name_fullcompare(search_name, &current_name,
769 						&order, &common_labels);
770 		/*
771 		 * last_compared is used as a shortcut to start (or
772 		 * continue rather) finding the stop-node of the search
773 		 * when hashing was used (see much below in this
774 		 * function).
775 		 */
776 		last_compared = current;
777 
778 		if (compared == dns_namereln_equal) {
779 			break;
780 		}
781 
782 		if (compared == dns_namereln_none) {
783 			/*
784 			 * Here, current is pointing at a subtree root
785 			 * node. We try to find a matching node using
786 			 * the hashtable. We can get one of 3 results
787 			 * here: (a) we locate the matching node, (b) we
788 			 * find a node to which the current node has a
789 			 * subdomain relation, (c) we fail to find (a)
790 			 * or (b).
791 			 */
792 
793 			dns_name_t hash_name;
794 			dns_rbtnode_t *hnode;
795 			dns_rbtnode_t *up_current;
796 			unsigned int nlabels;
797 			unsigned int tlabels = 1;
798 			uint32_t hashval;
799 			uint32_t hash;
800 
801 			/*
802 			 * The case of current not being a subtree root,
803 			 * that means a left or right pointer was
804 			 * followed, only happens when the algorithm
805 			 * fell through to the traditional binary search
806 			 * because of a bitstring label.  Since we
807 			 * dropped the bitstring support, this should
808 			 * not happen.
809 			 */
810 			INSIST(current->is_root);
811 
812 			nlabels = dns_name_countlabels(search_name);
813 
814 			/*
815 			 * current is the root of the current level, so
816 			 * its parent is the same as its "up" pointer.
817 			 */
818 			up_current = current->parent;
819 			dns_name_init(&hash_name, NULL);
820 
821 		hashagain:
822 			hindex = rbt->hindex;
823 			/*
824 			 * Compute the hash over the full absolute
825 			 * name. Look for the smallest suffix match at
826 			 * this tree level (hlevel), and then at every
827 			 * iteration, look for the next smallest suffix
828 			 * match (add another subdomain label to the
829 			 * absolute name being hashed).
830 			 */
831 			dns_name_getlabelsequence(name, nlabels - tlabels,
832 						  hlabels + tlabels,
833 						  &hash_name);
834 			hashval = dns_name_hash(&hash_name);
835 
836 			dns_name_getlabelsequence(search_name,
837 						  nlabels - tlabels, tlabels,
838 						  &hash_name);
839 
840 		nexttable:
841 			/*
842 			 * Walk all the nodes in the hash bucket pointed
843 			 * by the computed hash value.
844 			 */
845 
846 			hash = isc_hash_bits32(hashval, rbt->hashbits[hindex]);
847 
848 			for (hnode = rbt->hashtable[hindex][hash];
849 			     hnode != NULL; hnode = hnode->hashnext)
850 			{
851 				dns_name_t hnode_name;
852 
853 				if (hashval != hnode->hashval) {
854 					continue;
855 				}
856 				/*
857 				 * This checks that the hashed label sequence
858 				 * being looked up is at the same tree level, so
859 				 * that we don't match a labelsequence from some
860 				 * other subdomain.
861 				 */
862 				if (get_upper_node(hnode) != up_current) {
863 					continue;
864 				}
865 
866 				dns_name_init(&hnode_name, NULL);
867 				node_name(hnode, &hnode_name);
868 				if (dns_name_equal(&hnode_name, &hash_name)) {
869 					break;
870 				}
871 			}
872 
873 			if (hnode != NULL) {
874 				current = hnode;
875 				/*
876 				 * This is an optimization.  If hashing found
877 				 * the right node, the next call to
878 				 * dns_name_fullcompare() would obviously
879 				 * return _equal or _subdomain.  Determine
880 				 * which of those would be the case by
881 				 * checking if the full name was hashed.  Then
882 				 * make it look like dns_name_fullcompare
883 				 * was called and jump to the right place.
884 				 */
885 				if (tlabels == nlabels) {
886 					compared = dns_namereln_equal;
887 					break;
888 				} else {
889 					common_labels = tlabels;
890 					compared = dns_namereln_subdomain;
891 					goto subdomain;
892 				}
893 			}
894 
895 			if (TRY_NEXTTABLE(hindex, rbt)) {
896 				/*
897 				 * Rehashing in progress, check the other table
898 				 */
899 				hindex = RBT_HASH_NEXTTABLE(rbt->hindex);
900 				goto nexttable;
901 			}
902 
903 			if (tlabels++ < nlabels) {
904 				goto hashagain;
905 			}
906 
907 			/*
908 			 * All of the labels have been tried against the hash
909 			 * table.
910 			 */
911 			current = NULL;
912 			continue;
913 		} else {
914 			/*
915 			 * The names have some common suffix labels.
916 			 *
917 			 * If the number in common are equal in length to
918 			 * the current node's name length, then follow the
919 			 * down pointer and search in the new tree.
920 			 */
921 			if (compared == dns_namereln_subdomain) {
922 			subdomain:
923 				/*
924 				 * Whack off the current node's common parts
925 				 * for the name to search in the next level.
926 				 */
927 				dns_name_split(search_name, common_labels,
928 					       search_name, NULL);
929 				hlabels += common_labels;
930 				/*
931 				 * This might be the closest enclosing name.
932 				 */
933 				if (WANTEMPTYDATA_OR_DATA(options, current)) {
934 					*node = current;
935 				}
936 
937 				/*
938 				 * Point the chain to the next level.   This
939 				 * needs to be done before 'current' is pointed
940 				 * there because the callback in the next
941 				 * block of code needs the current 'current',
942 				 * but in the event the callback requests that
943 				 * the search be stopped then the
944 				 * DNS_R_PARTIALMATCH code at the end of this
945 				 * function needs the chain pointed to the
946 				 * next level.
947 				 */
948 				ADD_LEVEL(chain, current);
949 
950 				/*
951 				 * The caller may want to interrupt the
952 				 * downward search when certain special nodes
953 				 * are traversed.  If this is a special node,
954 				 * the callback is used to learn what the
955 				 * caller wants to do.
956 				 */
957 				if (callback != NULL && current->find_callback)
958 				{
959 					result = chain_name(
960 						chain, callback_name, false);
961 					if (result != ISC_R_SUCCESS) {
962 						dns_rbtnodechain_reset(chain);
963 						return result;
964 					}
965 
966 					result =
967 						(callback)(current,
968 							   callback_name,
969 							   callback_arg
970 								   DNS__DB_FLARG_PASS);
971 					if (result != DNS_R_CONTINUE) {
972 						saved_result = result;
973 						/*
974 						 * Treat this node as if it
975 						 * had no down pointer.
976 						 */
977 						current = NULL;
978 						break;
979 					}
980 				}
981 
982 				/*
983 				 * Finally, head to the next tree level.
984 				 */
985 				current = current->down;
986 			} else {
987 				/*
988 				 * Though there are labels in common, the
989 				 * entire name at this node is not common
990 				 * with the search name so the search
991 				 * name does not exist in the tree.
992 				 */
993 				INSIST(compared ==
994 					       dns_namereln_commonancestor ||
995 				       compared == dns_namereln_contains);
996 
997 				current = NULL;
998 			}
999 		}
1000 	}
1001 
1002 	/*
1003 	 * If current is not NULL, NOEXACT is not disallowing exact matches,
1004 	 * and either the node has data or an empty node is ok, return
1005 	 * ISC_R_SUCCESS to indicate an exact match.
1006 	 */
1007 	if (current != NULL && (options & DNS_RBTFIND_NOEXACT) == 0 &&
1008 	    WANTEMPTYDATA_OR_DATA(options, current))
1009 	{
1010 		/*
1011 		 * Found an exact match.
1012 		 */
1013 		chain->end = current;
1014 		chain->level_matches = chain->level_count;
1015 
1016 		if (foundname != NULL) {
1017 			result = chain_name(chain, foundname, true);
1018 		} else {
1019 			result = ISC_R_SUCCESS;
1020 		}
1021 
1022 		if (result == ISC_R_SUCCESS) {
1023 			*node = current;
1024 			result = saved_result;
1025 		} else {
1026 			*node = NULL;
1027 		}
1028 	} else {
1029 		/*
1030 		 * Did not find an exact match (or did not want one).
1031 		 */
1032 		if (*node != NULL) {
1033 			/*
1034 			 * ... but found a partially matching superdomain.
1035 			 * Unwind the chain to the partial match node
1036 			 * to set level_matches to the level above the node,
1037 			 * and then to derive the name.
1038 			 *
1039 			 * chain->level_count is guaranteed to be at least 1
1040 			 * here because by definition of finding a superdomain,
1041 			 * the chain is pointed to at least the first subtree.
1042 			 */
1043 			chain->level_matches = chain->level_count - 1;
1044 
1045 			while (chain->levels[chain->level_matches] != *node) {
1046 				INSIST(chain->level_matches > 0);
1047 				chain->level_matches--;
1048 			}
1049 
1050 			if (foundname != NULL) {
1051 				unsigned int saved_count = chain->level_count;
1052 
1053 				chain->level_count = chain->level_matches + 1;
1054 
1055 				result = chain_name(chain, foundname, false);
1056 
1057 				chain->level_count = saved_count;
1058 			} else {
1059 				result = ISC_R_SUCCESS;
1060 			}
1061 
1062 			if (result == ISC_R_SUCCESS) {
1063 				result = DNS_R_PARTIALMATCH;
1064 			}
1065 		} else {
1066 			result = ISC_R_NOTFOUND;
1067 		}
1068 
1069 		if (current != NULL) {
1070 			/*
1071 			 * There was an exact match but either
1072 			 * DNS_RBTFIND_NOEXACT was set, or
1073 			 * DNS_RBTFIND_EMPTYDATA was set and the node had no
1074 			 * data.  A policy decision was made to set the
1075 			 * chain to the exact match, but this is subject
1076 			 * to change if it becomes apparent that something
1077 			 * else would be more useful.  It is important that
1078 			 * this case is handled here, because the predecessor
1079 			 * setting code below assumes the match was not exact.
1080 			 */
1081 			INSIST(((options & DNS_RBTFIND_NOEXACT) != 0) ||
1082 			       ((options & DNS_RBTFIND_EMPTYDATA) == 0 &&
1083 				current->data == NULL));
1084 			chain->end = current;
1085 		} else if ((options & DNS_RBTFIND_NOPREDECESSOR) != 0) {
1086 			/*
1087 			 * Ensure the chain points nowhere.
1088 			 */
1089 			chain->end = NULL;
1090 		} else {
1091 			/*
1092 			 * Since there was no exact match, the chain argument
1093 			 * needs to be pointed at the DNSSEC predecessor of
1094 			 * the search name.
1095 			 */
1096 			if (compared == dns_namereln_subdomain) {
1097 				/*
1098 				 * Attempted to follow a down pointer that was
1099 				 * NULL, which means the searched for name was
1100 				 * a subdomain of a terminal name in the tree.
1101 				 * Since there are no existing subdomains to
1102 				 * order against, the terminal name is the
1103 				 * predecessor.
1104 				 */
1105 				INSIST(chain->level_count > 0);
1106 				INSIST(chain->level_matches <
1107 				       chain->level_count);
1108 				chain->end =
1109 					chain->levels[--chain->level_count];
1110 			} else {
1111 				isc_result_t result2;
1112 
1113 				/*
1114 				 * Point current to the node that stopped
1115 				 * the search.
1116 				 *
1117 				 * With the hashing modification that has been
1118 				 * added to the algorithm, the stop node of a
1119 				 * standard binary search is not known.  So it
1120 				 * has to be found.  There is probably a more
1121 				 * clever way of doing this.
1122 				 *
1123 				 * The assignment of current to NULL when
1124 				 * the relationship is *not* dns_namereln_none,
1125 				 * even though it later gets set to the same
1126 				 * last_compared anyway, is simply to not push
1127 				 * the while loop in one more level of
1128 				 * indentation.
1129 				 */
1130 				if (compared == dns_namereln_none) {
1131 					current = last_compared;
1132 				} else {
1133 					current = NULL;
1134 				}
1135 
1136 				while (current != NULL) {
1137 					node_name(current, &current_name);
1138 					compared = dns_name_fullcompare(
1139 						search_name, &current_name,
1140 						&order, &common_labels);
1141 					POST(compared);
1142 
1143 					last_compared = current;
1144 
1145 					/*
1146 					 * Standard binary search movement.
1147 					 */
1148 					if (order < 0) {
1149 						current = current->left;
1150 					} else {
1151 						current = current->right;
1152 					}
1153 				}
1154 
1155 				current = last_compared;
1156 
1157 				/*
1158 				 * Reached a point within a level tree that
1159 				 * positively indicates the name is not
1160 				 * present, but the stop node could be either
1161 				 * less than the desired name (order > 0) or
1162 				 * greater than the desired name (order < 0).
1163 				 *
1164 				 * If the stop node is less, it is not
1165 				 * necessarily the predecessor.  If the stop
1166 				 * node has a down pointer, then the real
1167 				 * predecessor is at the end of a level below
1168 				 * (not necessarily the next level).
1169 				 * Move down levels until the rightmost node
1170 				 * does not have a down pointer.
1171 				 *
1172 				 * When the stop node is greater, it is
1173 				 * the successor.  All the logic for finding
1174 				 * the predecessor is handily encapsulated
1175 				 * in dns_rbtnodechain_prev.  In the event
1176 				 * that the search name is less than anything
1177 				 * else in the tree, the chain is reset.
1178 				 * XXX DCL What is the best way for the caller
1179 				 *         to know that the search name has
1180 				 *         no predecessor?
1181 				 */
1182 
1183 				if (order > 0) {
1184 					if (current->down != NULL) {
1185 						ADD_LEVEL(chain, current);
1186 
1187 						result2 = move_chain_to_last(
1188 							chain, current->down);
1189 
1190 						if (result2 != ISC_R_SUCCESS) {
1191 							result = result2;
1192 						}
1193 					} else {
1194 						/*
1195 						 * Ah, the pure and simple
1196 						 * case.  The stop node is the
1197 						 * predecessor.
1198 						 */
1199 						chain->end = current;
1200 					}
1201 				} else {
1202 					INSIST(order < 0);
1203 
1204 					chain->end = current;
1205 
1206 					result2 = dns_rbtnodechain_prev(
1207 						chain, NULL, NULL);
1208 					if (result2 == ISC_R_SUCCESS ||
1209 					    result2 == DNS_R_NEWORIGIN)
1210 					{
1211 						/* Nothing. */
1212 					} else if (result2 == ISC_R_NOMORE) {
1213 						/*
1214 						 * There is no predecessor.
1215 						 */
1216 						dns_rbtnodechain_reset(chain);
1217 					} else {
1218 						result = result2;
1219 					}
1220 				}
1221 			}
1222 		}
1223 	}
1224 
1225 	ENSURE(*node == NULL || DNS_RBTNODE_VALID(*node));
1226 
1227 	return result;
1228 }
1229 
1230 /*
1231  * Remove a node from the tree of trees.
1232  *
1233  * NOTE WELL: deletion is *not* symmetric with addition; that is, reversing
1234  * a sequence of additions to be deletions will not generally get the
1235  * tree back to the state it started in.  For example, if the addition
1236  * of "b.c" caused the node "a.b.c" to be split, pushing "a" to its own level,
1237  * then the subsequent deletion of "b.c" will not cause "a" to be pulled up,
1238  * restoring "a.b.c".  The RBT *used* to do this kind of rejoining, but it
1239  * turned out to be a bad idea because it could corrupt an active nodechain
1240  * that had "b.c" as one of its levels -- and the RBT has no idea what
1241  * nodechains are in use by callers, so it can't even *try* to helpfully
1242  * fix them up (which would probably be doomed to failure anyway).
1243  *
1244  * Similarly, it is possible to leave the tree in a state where a supposedly
1245  * deleted node still exists.  The first case of this is obvious; take
1246  * the tree which has "b.c" on one level, pointing to "a".  Now deleted "b.c".
1247  * It was just established in the previous paragraph why we can't pull "a"
1248  * back up to its parent level.  But what happens when "a" then gets deleted?
1249  * "b.c" is left hanging around without data or children.  This condition
1250  * is actually pretty easy to detect, but ... should it really be removed?
1251  * Is a chain pointing to it?  An iterator?  Who knows!  (Note that the
1252  * references structure member cannot be looked at because it is private to
1253  * rbtdb.)  This is ugly and makes me unhappy, but after hours of trying to
1254  * make it more aesthetically proper and getting nowhere, this is the way it
1255  * is going to stay until such time as it proves to be a *real* problem.
1256  *
1257  * Finally, for reference, note that the original routine that did node
1258  * joining was called join_nodes().  It has been excised, living now only
1259  * in the CVS history, but comments have been left behind that point to it just
1260  * in case someone wants to muck with this some more.
1261  *
1262  * The one positive aspect of all of this is that joining used to have a
1263  * case where it might fail.  Without trying to join, now this function always
1264  * succeeds. It still returns isc_result_t, though, so the API wouldn't change.
1265  */
1266 isc_result_t
1267 dns_rbt_deletenode(dns_rbt_t *rbt, dns_rbtnode_t *node, bool recurse) {
1268 	dns_rbtnode_t *parent;
1269 
1270 	REQUIRE(VALID_RBT(rbt));
1271 	REQUIRE(DNS_RBTNODE_VALID(node));
1272 	INSIST(rbt->nodecount != 0);
1273 
1274 	if (node->down != NULL) {
1275 		if (recurse) {
1276 			node->down->parent = NULL;
1277 			deletetreeflat(rbt, 0, true, &node->down);
1278 		} else {
1279 			if (node->data != NULL && rbt->data_deleter != NULL) {
1280 				rbt->data_deleter(node->data, rbt->deleter_arg);
1281 			}
1282 			node->data = NULL;
1283 
1284 			/*
1285 			 * Since there is at least one node below this one and
1286 			 * no recursion was requested, the deletion is
1287 			 * complete.  The down node from this node might be all
1288 			 * by itself on a single level, so join_nodes() could
1289 			 * be used to collapse the tree (with all the caveats
1290 			 * of the comment at the start of this function).
1291 			 * But join_nodes() function has now been removed.
1292 			 */
1293 			return ISC_R_SUCCESS;
1294 		}
1295 	}
1296 
1297 	/*
1298 	 * Note the node that points to the level of the node
1299 	 * that is being deleted.  If the deleted node is the
1300 	 * top level, parent will be set to NULL.
1301 	 */
1302 	parent = get_upper_node(node);
1303 
1304 	/*
1305 	 * This node now has no down pointer, so now it needs
1306 	 * to be removed from this level.
1307 	 */
1308 	deletefromlevel(node, parent == NULL ? &rbt->root : &parent->down);
1309 
1310 	if (node->data != NULL && rbt->data_deleter != NULL) {
1311 		rbt->data_deleter(node->data, rbt->deleter_arg);
1312 	}
1313 
1314 	unhash_node(rbt, node);
1315 #if DNS_RBT_USEMAGIC
1316 	node->magic = 0;
1317 #endif /* if DNS_RBT_USEMAGIC */
1318 	isc_refcount_destroy(&node->references);
1319 
1320 	freenode(rbt, &node);
1321 
1322 	/*
1323 	 * This function never fails.
1324 	 */
1325 	return ISC_R_SUCCESS;
1326 }
1327 
1328 void
1329 dns_rbt_namefromnode(dns_rbtnode_t *node, dns_name_t *name) {
1330 	REQUIRE(DNS_RBTNODE_VALID(node));
1331 	REQUIRE(name != NULL);
1332 	REQUIRE(name->offsets == NULL);
1333 
1334 	node_name(node, name);
1335 }
1336 
1337 isc_result_t
1338 dns_rbt_fullnamefromnode(dns_rbtnode_t *node, dns_name_t *name) {
1339 	dns_name_t current;
1340 	isc_result_t result;
1341 
1342 	REQUIRE(DNS_RBTNODE_VALID(node));
1343 	REQUIRE(name != NULL);
1344 	REQUIRE(name->buffer != NULL);
1345 
1346 	dns_name_init(&current, NULL);
1347 	dns_name_reset(name);
1348 
1349 	do {
1350 		INSIST(node != NULL);
1351 
1352 		node_name(node, &current);
1353 
1354 		result = dns_name_concatenate(name, &current, name, NULL);
1355 		if (result != ISC_R_SUCCESS) {
1356 			break;
1357 		}
1358 
1359 		node = get_upper_node(node);
1360 	} while (!dns_name_isabsolute(name));
1361 
1362 	return result;
1363 }
1364 
1365 char *
1366 dns_rbt_formatnodename(dns_rbtnode_t *node, char *printname,
1367 		       unsigned int size) {
1368 	dns_fixedname_t fixedname;
1369 	dns_name_t *name;
1370 	isc_result_t result;
1371 
1372 	REQUIRE(DNS_RBTNODE_VALID(node));
1373 	REQUIRE(printname != NULL);
1374 
1375 	name = dns_fixedname_initname(&fixedname);
1376 	result = dns_rbt_fullnamefromnode(node, name);
1377 	if (result == ISC_R_SUCCESS) {
1378 		dns_name_format(name, printname, size);
1379 	} else {
1380 		snprintf(printname, size, "<error building name: %s>",
1381 			 isc_result_totext(result));
1382 	}
1383 
1384 	return printname;
1385 }
1386 
1387 static dns_rbtnode_t *
1388 rbtnode_new(isc_mem_t *mctx, const dns_name_t *name) {
1389 	dns_rbtnode_t *node = NULL;
1390 	isc_region_t region;
1391 	unsigned int labels;
1392 	size_t nodelen;
1393 
1394 	REQUIRE(name->offsets != NULL);
1395 
1396 	dns_name_toregion(name, &region);
1397 	labels = dns_name_countlabels(name);
1398 	ENSURE(labels > 0);
1399 
1400 	/*
1401 	 * Allocate space for the node structure, the name, and the offsets.
1402 	 */
1403 	nodelen = sizeof(dns_rbtnode_t) + region.length + labels + 1;
1404 	node = isc_mem_get(mctx, nodelen);
1405 	*node = (dns_rbtnode_t){
1406 		.color = BLACK,
1407 		.nsec = DNS_DB_NSEC_NORMAL,
1408 	};
1409 
1410 	ISC_LINK_INIT(node, deadlink);
1411 
1412 	isc_refcount_init(&node->references, 0);
1413 
1414 	/*
1415 	 * The following is stored to make reconstructing a name from the
1416 	 * stored value in the node easy:  the length of the name, the number
1417 	 * of labels, whether the name is absolute or not, the name itself,
1418 	 * and the name's offsets table.
1419 	 *
1420 	 * XXX RTH
1421 	 *      The offsets table could be made smaller by eliminating the
1422 	 *      first offset, which is always 0.  This requires changes to
1423 	 *      lib/dns/name.c.
1424 	 *
1425 	 * Note: OLDOFFSETLEN *must* be assigned *after* OLDNAMELEN is assigned
1426 	 * 	 as it uses OLDNAMELEN.
1427 	 */
1428 	node->oldnamelen = node->namelen = region.length;
1429 	OLDOFFSETLEN(node) = node->offsetlen = labels;
1430 	node->absolute = name->attributes.absolute;
1431 
1432 	memmove(NAME(node), region.base, region.length);
1433 	memmove(OFFSETS(node), name->offsets, labels);
1434 
1435 #if DNS_RBT_USEMAGIC
1436 	node->magic = DNS_RBTNODE_MAGIC;
1437 #endif /* if DNS_RBT_USEMAGIC */
1438 	return node;
1439 }
1440 
1441 /*
1442  * Add a node to the hash table
1443  */
1444 static void
1445 hash_add_node(dns_rbt_t *rbt, dns_rbtnode_t *node, const dns_name_t *name) {
1446 	uint32_t hash;
1447 
1448 	REQUIRE(name != NULL);
1449 
1450 	node->hashval = dns_name_hash(name);
1451 
1452 	hash = isc_hash_bits32(node->hashval, rbt->hashbits[rbt->hindex]);
1453 	node->hashnext = rbt->hashtable[rbt->hindex][hash];
1454 
1455 	rbt->hashtable[rbt->hindex][hash] = node;
1456 }
1457 
1458 /*
1459  * Initialize hash table
1460  */
1461 static void
1462 hashtable_new(dns_rbt_t *rbt, uint8_t index, uint8_t bits) {
1463 	REQUIRE(rbt->hashbits[index] == 0U);
1464 	REQUIRE(rbt->hashtable[index] == NULL);
1465 	REQUIRE(bits >= ISC_HASH_MIN_BITS);
1466 	REQUIRE(bits < ISC_HASH_MAX_BITS);
1467 
1468 	rbt->hashbits[index] = bits;
1469 
1470 	rbt->hashtable[index] = isc_mem_cget(rbt->mctx,
1471 					     ISC_HASHSIZE(rbt->hashbits[index]),
1472 					     sizeof(dns_rbtnode_t *));
1473 }
1474 
1475 static void
1476 hashtable_free(dns_rbt_t *rbt, uint8_t index) {
1477 	isc_mem_cput(rbt->mctx, rbt->hashtable[index],
1478 		     ISC_HASHSIZE(rbt->hashbits[index]),
1479 		     sizeof(dns_rbtnode_t *));
1480 
1481 	rbt->hashbits[index] = 0U;
1482 	rbt->hashtable[index] = NULL;
1483 }
1484 
1485 static uint32_t
1486 rehash_bits(dns_rbt_t *rbt, size_t newcount) {
1487 	uint32_t newbits = rbt->hashbits[rbt->hindex];
1488 
1489 	while (newcount >= ISC_HASHSIZE(newbits) && newbits < ISC_HASH_MAX_BITS)
1490 	{
1491 		newbits += 1;
1492 	}
1493 
1494 	return newbits;
1495 }
1496 
1497 /*
1498  * Rebuild the hashtable to reduce the load factor
1499  */
1500 static void
1501 hashtable_rehash(dns_rbt_t *rbt, uint32_t newbits) {
1502 	uint8_t oldindex = rbt->hindex;
1503 	uint32_t oldbits = rbt->hashbits[oldindex];
1504 	uint8_t newindex = RBT_HASH_NEXTTABLE(oldindex);
1505 
1506 	REQUIRE(rbt->hashbits[oldindex] >= ISC_HASH_MIN_BITS);
1507 	REQUIRE(rbt->hashbits[oldindex] <= ISC_HASH_MAX_BITS);
1508 	REQUIRE(rbt->hashtable[oldindex] != NULL);
1509 
1510 	REQUIRE(newbits <= ISC_HASH_MAX_BITS);
1511 	REQUIRE(rbt->hashbits[newindex] == 0U);
1512 	REQUIRE(rbt->hashtable[newindex] == NULL);
1513 
1514 	REQUIRE(newbits > oldbits);
1515 
1516 	hashtable_new(rbt, newindex, newbits);
1517 
1518 	rbt->hindex = newindex;
1519 
1520 	hashtable_rehash_one(rbt);
1521 }
1522 
1523 static void
1524 hashtable_rehash_one(dns_rbt_t *rbt) {
1525 	dns_rbtnode_t **newtable = rbt->hashtable[rbt->hindex];
1526 	uint32_t oldsize =
1527 		ISC_HASHSIZE(rbt->hashbits[RBT_HASH_NEXTTABLE(rbt->hindex)]);
1528 	dns_rbtnode_t **oldtable =
1529 		rbt->hashtable[RBT_HASH_NEXTTABLE(rbt->hindex)];
1530 	dns_rbtnode_t *node = NULL;
1531 	dns_rbtnode_t *nextnode;
1532 
1533 	/* Find first non-empty node */
1534 	while (rbt->hiter < oldsize && oldtable[rbt->hiter] == NULL) {
1535 		rbt->hiter++;
1536 	}
1537 
1538 	/* Rehashing complete */
1539 	if (rbt->hiter == oldsize) {
1540 		hashtable_free(rbt, RBT_HASH_NEXTTABLE(rbt->hindex));
1541 		rbt->hiter = 0;
1542 		return;
1543 	}
1544 
1545 	/* Move the first non-empty node from old hashtable to new hashtable */
1546 	for (node = oldtable[rbt->hiter]; node != NULL; node = nextnode) {
1547 		uint32_t hash = isc_hash_bits32(node->hashval,
1548 						rbt->hashbits[rbt->hindex]);
1549 		nextnode = node->hashnext;
1550 		node->hashnext = newtable[hash];
1551 		newtable[hash] = node;
1552 	}
1553 
1554 	oldtable[rbt->hiter] = NULL;
1555 
1556 	rbt->hiter++;
1557 }
1558 
1559 static void
1560 maybe_rehash(dns_rbt_t *rbt, size_t newcount) {
1561 	uint32_t newbits = rehash_bits(rbt, newcount);
1562 
1563 	if (rbt->hashbits[rbt->hindex] < newbits &&
1564 	    newbits <= ISC_HASH_MAX_BITS)
1565 	{
1566 		hashtable_rehash(rbt, newbits);
1567 	}
1568 }
1569 
1570 static bool
1571 rehashing_in_progress(dns_rbt_t *rbt) {
1572 	return rbt->hashtable[RBT_HASH_NEXTTABLE(rbt->hindex)] != NULL;
1573 }
1574 
1575 static bool
1576 hashtable_is_overcommited(dns_rbt_t *rbt) {
1577 	return rbt->nodecount >=
1578 	       (ISC_HASHSIZE(rbt->hashbits[rbt->hindex]) * ISC_HASH_OVERCOMMIT);
1579 }
1580 
1581 /*
1582  * Add a node to the hash table. Rehash the hashtable if the node count
1583  * rises above a critical level.
1584  */
1585 static void
1586 hash_node(dns_rbt_t *rbt, dns_rbtnode_t *node, const dns_name_t *name) {
1587 	REQUIRE(DNS_RBTNODE_VALID(node));
1588 
1589 	if (rehashing_in_progress(rbt)) {
1590 		/* Rehash in progress */
1591 		hashtable_rehash_one(rbt);
1592 	} else if (hashtable_is_overcommited(rbt)) {
1593 		/* Rehash requested */
1594 		maybe_rehash(rbt, rbt->nodecount);
1595 	}
1596 
1597 	hash_add_node(rbt, node, name);
1598 }
1599 
1600 /*
1601  * Remove a node from the hash table
1602  */
1603 static void
1604 unhash_node(dns_rbt_t *rbt, dns_rbtnode_t *dnode) {
1605 	uint32_t hash;
1606 	uint8_t hindex = rbt->hindex;
1607 	dns_rbtnode_t *hnode;
1608 
1609 	REQUIRE(DNS_RBTNODE_VALID(dnode));
1610 
1611 	/*
1612 	 * The node could be either in:
1613 	 *  a) current table: no rehashing in progress, or
1614 	 *  b) current table: the node has been already moved, or
1615 	 *  c) other table: the node hasn't been moved yet.
1616 	 */
1617 nexttable:
1618 	hash = isc_hash_bits32(dnode->hashval, rbt->hashbits[hindex]);
1619 
1620 	hnode = rbt->hashtable[hindex][hash];
1621 
1622 	if (hnode == dnode) {
1623 		rbt->hashtable[hindex][hash] = hnode->hashnext;
1624 		return;
1625 	} else {
1626 		for (; hnode != NULL; hnode = hnode->hashnext) {
1627 			if (hnode->hashnext == dnode) {
1628 				hnode->hashnext = dnode->hashnext;
1629 				return;
1630 			}
1631 		}
1632 	}
1633 
1634 	if (TRY_NEXTTABLE(hindex, rbt)) {
1635 		/* Rehashing in progress, delete from the other table */
1636 		hindex = RBT_HASH_NEXTTABLE(hindex);
1637 		goto nexttable;
1638 	}
1639 
1640 	/* We haven't found any matching node, this should not be possible. */
1641 	UNREACHABLE();
1642 }
1643 
1644 static void
1645 rotate_left(dns_rbtnode_t *node, dns_rbtnode_t **rootp) {
1646 	dns_rbtnode_t *child;
1647 
1648 	REQUIRE(DNS_RBTNODE_VALID(node));
1649 	REQUIRE(rootp != NULL);
1650 
1651 	child = node->right;
1652 	INSIST(child != NULL);
1653 
1654 	node->right = child->left;
1655 	if (child->left != NULL) {
1656 		child->left->parent = node;
1657 	}
1658 	child->left = node;
1659 
1660 	child->parent = node->parent;
1661 
1662 	if (node->is_root) {
1663 		*rootp = child;
1664 		child->is_root = 1;
1665 		node->is_root = 0;
1666 	} else {
1667 		if (node->parent->left == node) {
1668 			node->parent->left = child;
1669 		} else {
1670 			node->parent->right = child;
1671 		}
1672 	}
1673 
1674 	node->parent = child;
1675 }
1676 
1677 static void
1678 rotate_right(dns_rbtnode_t *node, dns_rbtnode_t **rootp) {
1679 	dns_rbtnode_t *child;
1680 
1681 	REQUIRE(DNS_RBTNODE_VALID(node));
1682 	REQUIRE(rootp != NULL);
1683 
1684 	child = node->left;
1685 	INSIST(child != NULL);
1686 
1687 	node->left = child->right;
1688 	if (child->right != NULL) {
1689 		child->right->parent = node;
1690 	}
1691 	child->right = node;
1692 
1693 	child->parent = node->parent;
1694 
1695 	if (node->is_root) {
1696 		*rootp = child;
1697 		child->is_root = 1;
1698 		node->is_root = 0;
1699 	} else {
1700 		if (node->parent->left == node) {
1701 			node->parent->left = child;
1702 		} else {
1703 			node->parent->right = child;
1704 		}
1705 	}
1706 
1707 	node->parent = child;
1708 }
1709 
1710 /*
1711  * This is the real workhorse of the insertion code, because it does the
1712  * true red/black tree on a single level.
1713  */
1714 static void
1715 addonlevel(dns_rbtnode_t *node, dns_rbtnode_t *current, int order,
1716 	   dns_rbtnode_t **rootp) {
1717 	dns_rbtnode_t *child, *root, *parent, *grandparent;
1718 	dns_name_t add_name, current_name;
1719 	dns_offsets_t add_offsets, current_offsets;
1720 
1721 	REQUIRE(rootp != NULL);
1722 	REQUIRE(DNS_RBTNODE_VALID(node) && node->left == NULL &&
1723 		node->right == NULL);
1724 	REQUIRE(current != NULL);
1725 
1726 	root = *rootp;
1727 	if (root == NULL) {
1728 		/*
1729 		 * First node of a level.
1730 		 */
1731 		node->color = BLACK;
1732 		node->is_root = 1;
1733 		node->parent = current;
1734 		*rootp = node;
1735 		return;
1736 	}
1737 
1738 	child = root;
1739 	POST(child);
1740 
1741 	dns_name_init(&add_name, add_offsets);
1742 	node_name(node, &add_name);
1743 
1744 	dns_name_init(&current_name, current_offsets);
1745 	node_name(current, &current_name);
1746 
1747 	if (order < 0) {
1748 		INSIST(current->left == NULL);
1749 		current->left = node;
1750 	} else {
1751 		INSIST(current->right == NULL);
1752 		current->right = node;
1753 	}
1754 
1755 	INSIST(node->parent == NULL);
1756 	node->parent = current;
1757 
1758 	node->color = RED;
1759 
1760 	while (node != root && IS_RED(node->parent)) {
1761 		/*
1762 		 * XXXDCL could do away with separate parent and grandparent
1763 		 * variables.  They are vestiges of the days before parent
1764 		 * pointers.  However, they make the code a little clearer.
1765 		 */
1766 
1767 		parent = node->parent;
1768 		grandparent = parent->parent;
1769 
1770 		if (parent == grandparent->left) {
1771 			child = grandparent->right;
1772 			if (child != NULL && IS_RED(child)) {
1773 				parent->color = BLACK;
1774 				child->color = BLACK;
1775 				grandparent->color = RED;
1776 				node = grandparent;
1777 			} else {
1778 				if (node == parent->right) {
1779 					rotate_left(parent, &root);
1780 					node = parent;
1781 					parent = node->parent;
1782 					grandparent = parent->parent;
1783 				}
1784 				parent->color = BLACK;
1785 				grandparent->color = RED;
1786 				rotate_right(grandparent, &root);
1787 			}
1788 		} else {
1789 			child = grandparent->left;
1790 			if (child != NULL && IS_RED(child)) {
1791 				parent->color = BLACK;
1792 				child->color = BLACK;
1793 				grandparent->color = RED;
1794 				node = grandparent;
1795 			} else {
1796 				if (node == parent->left) {
1797 					rotate_right(parent, &root);
1798 					node = parent;
1799 					parent = node->parent;
1800 					grandparent = parent->parent;
1801 				}
1802 				parent->color = BLACK;
1803 				grandparent->color = RED;
1804 				rotate_left(grandparent, &root);
1805 			}
1806 		}
1807 	}
1808 
1809 	root->color = BLACK;
1810 	ENSURE(root->is_root);
1811 	*rootp = root;
1812 
1813 	return;
1814 }
1815 
1816 /*
1817  * This is the real workhorse of the deletion code, because it does the
1818  * true red/black tree on a single level.
1819  */
1820 static void
1821 deletefromlevel(dns_rbtnode_t *item, dns_rbtnode_t **rootp) {
1822 	dns_rbtnode_t *child, *sibling, *parent;
1823 	dns_rbtnode_t *successor;
1824 
1825 	REQUIRE(item != NULL);
1826 
1827 	/*
1828 	 * Verify that the parent history is (apparently) correct.
1829 	 */
1830 	INSIST((item->is_root && *rootp == item) ||
1831 	       (!item->is_root &&
1832 		(item->parent->left == item || item->parent->right == item)));
1833 
1834 	child = NULL;
1835 
1836 	if (item->left == NULL) {
1837 		if (item->right == NULL) {
1838 			if (item->is_root) {
1839 				/*
1840 				 * This is the only item in the tree.
1841 				 */
1842 				*rootp = NULL;
1843 				return;
1844 			}
1845 		} else {
1846 			/*
1847 			 * This node has one child, on the right.
1848 			 */
1849 			child = item->right;
1850 		}
1851 	} else if (item->right == NULL) {
1852 		/*
1853 		 * This node has one child, on the left.
1854 		 */
1855 		child = item->left;
1856 	} else {
1857 		dns_rbtnode_t *saved_parent, *saved_right;
1858 		int saved_color;
1859 
1860 		/*
1861 		 * This node has two children, so it cannot be directly
1862 		 * deleted.  Find its immediate in-order successor and
1863 		 * move it to this location, then do the deletion at the
1864 		 * old site of the successor.
1865 		 */
1866 		successor = item->right;
1867 		while (successor->left != NULL) {
1868 			successor = successor->left;
1869 		}
1870 
1871 		/*
1872 		 * The successor cannot possibly have a left child;
1873 		 * if there is any child, it is on the right.
1874 		 */
1875 		if (successor->right != NULL) {
1876 			child = successor->right;
1877 		}
1878 
1879 		/*
1880 		 * Swap the two nodes; it would be simpler to just replace
1881 		 * the value being deleted with that of the successor,
1882 		 * but this rigamarole is done so the caller has complete
1883 		 * control over the pointers (and memory allocation) of
1884 		 * all of nodes.  If just the key value were removed from
1885 		 * the tree, the pointer to the node would be unchanged.
1886 		 */
1887 
1888 		/*
1889 		 * First, put the successor in the tree location of the
1890 		 * node to be deleted.  Save its existing tree pointer
1891 		 * information, which will be needed when linking up
1892 		 * delete to the successor's old location.
1893 		 */
1894 		saved_parent = successor->parent;
1895 		saved_right = successor->right;
1896 		saved_color = successor->color;
1897 
1898 		if (item->is_root) {
1899 			*rootp = successor;
1900 			successor->is_root = true;
1901 			item->is_root = false;
1902 		} else if (item->parent->left == item) {
1903 			item->parent->left = successor;
1904 		} else {
1905 			item->parent->right = successor;
1906 		}
1907 
1908 		successor->parent = item->parent;
1909 		successor->left = item->left;
1910 		successor->right = item->right;
1911 		successor->color = item->color;
1912 
1913 		if (successor->left != NULL) {
1914 			successor->left->parent = successor;
1915 		}
1916 		if (successor->right != successor) {
1917 			successor->right->parent = successor;
1918 		}
1919 
1920 		/*
1921 		 * Now relink the node to be deleted into the
1922 		 * successor's previous tree location.
1923 		 */
1924 		INSIST(!item->is_root);
1925 
1926 		if (saved_parent == item) {
1927 			/*
1928 			 * Node being deleted was successor's parent.
1929 			 */
1930 			successor->right = item;
1931 			item->parent = successor;
1932 		} else {
1933 			saved_parent->left = item;
1934 			item->parent = saved_parent;
1935 		}
1936 
1937 		/*
1938 		 * Original location of successor node has no left.
1939 		 */
1940 		item->left = NULL;
1941 		item->right = saved_right;
1942 		item->color = saved_color;
1943 	}
1944 
1945 	/*
1946 	 * Remove the node by removing the links from its parent.
1947 	 */
1948 	if (!item->is_root) {
1949 		if (item->parent->left == item) {
1950 			item->parent->left = child;
1951 		} else {
1952 			item->parent->right = child;
1953 		}
1954 
1955 		if (child != NULL) {
1956 			child->parent = item->parent;
1957 		}
1958 	} else {
1959 		/*
1960 		 * This is the root being deleted, and at this point
1961 		 * it is known to have just one child.
1962 		 */
1963 		*rootp = child;
1964 		child->is_root = 1;
1965 		child->parent = item->parent;
1966 	}
1967 
1968 	/*
1969 	 * Fix color violations.
1970 	 */
1971 	if (IS_BLACK(item)) {
1972 		parent = item->parent;
1973 
1974 		while (child != *rootp && IS_BLACK(child)) {
1975 			INSIST(child == NULL || !child->is_root);
1976 
1977 			if (parent->left == child) {
1978 				sibling = parent->right;
1979 
1980 				if (IS_RED(sibling)) {
1981 					sibling->color = BLACK;
1982 					parent->color = RED;
1983 					rotate_left(parent, rootp);
1984 					sibling = parent->right;
1985 				}
1986 
1987 				INSIST(sibling != NULL);
1988 
1989 				if (IS_BLACK(sibling->left) &&
1990 				    IS_BLACK(sibling->right))
1991 				{
1992 					sibling->color = RED;
1993 					child = parent;
1994 				} else {
1995 					if (IS_BLACK(sibling->right)) {
1996 						sibling->left->color = BLACK;
1997 						sibling->color = RED;
1998 						rotate_right(sibling, rootp);
1999 						sibling = parent->right;
2000 					}
2001 
2002 					sibling->color = parent->color;
2003 					parent->color = BLACK;
2004 					INSIST(sibling->right != NULL);
2005 					sibling->right->color = BLACK;
2006 					rotate_left(parent, rootp);
2007 					child = *rootp;
2008 				}
2009 			} else {
2010 				/*
2011 				 * Child is parent's right child.
2012 				 * Everything is done the same as above,
2013 				 * except mirrored.
2014 				 */
2015 				sibling = parent->left;
2016 
2017 				if (IS_RED(sibling)) {
2018 					sibling->color = BLACK;
2019 					parent->color = RED;
2020 					rotate_right(parent, rootp);
2021 					sibling = parent->left;
2022 				}
2023 
2024 				INSIST(sibling != NULL);
2025 
2026 				if (IS_BLACK(sibling->left) &&
2027 				    IS_BLACK(sibling->right))
2028 				{
2029 					sibling->color = RED;
2030 					child = parent;
2031 				} else {
2032 					if (IS_BLACK(sibling->left)) {
2033 						sibling->right->color = BLACK;
2034 						sibling->color = RED;
2035 						rotate_left(sibling, rootp);
2036 						sibling = parent->left;
2037 					}
2038 
2039 					sibling->color = parent->color;
2040 					parent->color = BLACK;
2041 					INSIST(sibling->left != NULL);
2042 					sibling->left->color = BLACK;
2043 					rotate_right(parent, rootp);
2044 					child = *rootp;
2045 				}
2046 			}
2047 
2048 			parent = child->parent;
2049 		}
2050 
2051 		if (IS_RED(child)) {
2052 			child->color = BLACK;
2053 		}
2054 	}
2055 }
2056 
2057 static void
2058 freenode(dns_rbt_t *rbt, dns_rbtnode_t **nodep) {
2059 	dns_rbtnode_t *node = *nodep;
2060 	*nodep = NULL;
2061 
2062 	isc_mem_put(rbt->mctx, node, NODE_SIZE(node));
2063 
2064 	rbt->nodecount--;
2065 }
2066 
2067 static void
2068 deletetreeflat(dns_rbt_t *rbt, unsigned int quantum, bool unhash,
2069 	       dns_rbtnode_t **nodep) {
2070 	dns_rbtnode_t *root = *nodep;
2071 
2072 	while (root != NULL) {
2073 		/*
2074 		 * If there is a left, right or down node, walk into it
2075 		 * and iterate.
2076 		 */
2077 		if (root->left != NULL) {
2078 			dns_rbtnode_t *node = root;
2079 			root = root->left;
2080 			node->left = NULL;
2081 		} else if (root->right != NULL) {
2082 			dns_rbtnode_t *node = root;
2083 			root = root->right;
2084 			node->right = NULL;
2085 		} else if (root->down != NULL) {
2086 			dns_rbtnode_t *node = root;
2087 			root = root->down;
2088 			node->down = NULL;
2089 		} else {
2090 			/*
2091 			 * There are no left, right or down nodes, so we
2092 			 * can free this one and go back to its parent.
2093 			 */
2094 			dns_rbtnode_t *node = root;
2095 			root = root->parent;
2096 
2097 			if (rbt->data_deleter != NULL && node->data != NULL) {
2098 				rbt->data_deleter(node->data, rbt->deleter_arg);
2099 			}
2100 			if (unhash) {
2101 				unhash_node(rbt, node);
2102 			}
2103 			/*
2104 			 * Note: we don't call unhash_node() here as we
2105 			 * are destroying the complete RBT tree.
2106 			 */
2107 #if DNS_RBT_USEMAGIC
2108 			node->magic = 0;
2109 #endif /* if DNS_RBT_USEMAGIC */
2110 			freenode(rbt, &node);
2111 			if (quantum != 0 && --quantum == 0) {
2112 				break;
2113 			}
2114 		}
2115 	}
2116 
2117 	*nodep = root;
2118 }
2119 
2120 static size_t
2121 getheight_helper(dns_rbtnode_t *node) {
2122 	size_t dl, dr;
2123 	size_t this_height, down_height;
2124 
2125 	if (node == NULL) {
2126 		return 0;
2127 	}
2128 
2129 	dl = getheight_helper(node->left);
2130 	dr = getheight_helper(node->right);
2131 
2132 	this_height = ISC_MAX(dl + 1, dr + 1);
2133 	down_height = getheight_helper(node->down);
2134 
2135 	return ISC_MAX(this_height, down_height);
2136 }
2137 
2138 size_t
2139 dns__rbt_getheight(dns_rbt_t *rbt) {
2140 	return getheight_helper(rbt->root);
2141 }
2142 
2143 static bool
2144 check_properties_helper(dns_rbtnode_t *node) {
2145 	if (node == NULL) {
2146 		return true;
2147 	}
2148 
2149 	if (IS_RED(node)) {
2150 		/* Root nodes must be BLACK. */
2151 		if (node->is_root) {
2152 			return false;
2153 		}
2154 
2155 		/* Both children of RED nodes must be BLACK. */
2156 		if (IS_RED(node->left) || IS_RED(node->right)) {
2157 			return false;
2158 		}
2159 	}
2160 
2161 	if ((node->down != NULL) && (!node->down->is_root)) {
2162 		return false;
2163 	}
2164 
2165 	if (node->is_root) {
2166 		if ((node->parent != NULL) && (node->parent->down != node)) {
2167 			return false;
2168 		}
2169 
2170 		if (get_upper_node(node) != node->parent) {
2171 			return false;
2172 		}
2173 	}
2174 
2175 	/* If node is assigned to the down_ pointer of its parent, it is
2176 	 * a subtree root and must have the flag set.
2177 	 */
2178 	if (((!node->parent) || (node->parent->down == node)) &&
2179 	    (!node->is_root))
2180 	{
2181 		return false;
2182 	}
2183 
2184 	/* Repeat tests with this node's children. */
2185 	return check_properties_helper(node->left) &&
2186 	       check_properties_helper(node->right) &&
2187 	       check_properties_helper(node->down);
2188 }
2189 
2190 static bool
2191 check_black_distance_helper(dns_rbtnode_t *node, size_t *distance) {
2192 	size_t dl, dr, dd;
2193 
2194 	if (node == NULL) {
2195 		*distance = 1;
2196 		return true;
2197 	}
2198 
2199 	if (!check_black_distance_helper(node->left, &dl)) {
2200 		return false;
2201 	}
2202 
2203 	if (!check_black_distance_helper(node->right, &dr)) {
2204 		return false;
2205 	}
2206 
2207 	if (!check_black_distance_helper(node->down, &dd)) {
2208 		return false;
2209 	}
2210 
2211 	/* Left and right side black node counts must match. */
2212 	if (dl != dr) {
2213 		return false;
2214 	}
2215 
2216 	if (IS_BLACK(node)) {
2217 		dl++;
2218 	}
2219 
2220 	*distance = dl;
2221 
2222 	return true;
2223 }
2224 
2225 bool
2226 dns__rbt_checkproperties(dns_rbt_t *rbt) {
2227 	size_t dd;
2228 
2229 	if (!check_properties_helper(rbt->root)) {
2230 		return false;
2231 	}
2232 
2233 	/* Path from a given node to all its leaves must contain the
2234 	 * same number of BLACK child nodes. This is done separately
2235 	 * here instead of inside check_properties_helper() as
2236 	 * it would take (n log n) complexity otherwise.
2237 	 */
2238 	return check_black_distance_helper(rbt->root, &dd);
2239 }
2240 
2241 static void
2242 dns_rbt_indent(FILE *f, int depth) {
2243 	int i;
2244 
2245 	fprintf(f, "%4d ", depth);
2246 
2247 	for (i = 0; i < depth; i++) {
2248 		fprintf(f, "- ");
2249 	}
2250 }
2251 
2252 void
2253 dns_rbt_printnodeinfo(dns_rbtnode_t *n, FILE *f) {
2254 	if (n == NULL) {
2255 		fprintf(f, "Null node\n");
2256 		return;
2257 	}
2258 
2259 	fprintf(f, "Node info for nodename: ");
2260 	printnodename(n, true, f);
2261 	fprintf(f, "\n");
2262 
2263 	fprintf(f, "n = %p\n", n);
2264 
2265 	fprintf(f, "node lock address = %u\n", n->locknum);
2266 
2267 	fprintf(f, "Parent: %p\n", n->parent);
2268 	fprintf(f, "Right: %p\n", n->right);
2269 	fprintf(f, "Left: %p\n", n->left);
2270 	fprintf(f, "Down: %p\n", n->down);
2271 	fprintf(f, "Data: %p\n", n->data);
2272 }
2273 
2274 static void
2275 printnodename(dns_rbtnode_t *node, bool quoted, FILE *f) {
2276 	isc_region_t r;
2277 	dns_name_t name;
2278 	char buffer[DNS_NAME_FORMATSIZE];
2279 	dns_offsets_t offsets;
2280 
2281 	r.length = node->namelen;
2282 	r.base = NAME(node);
2283 
2284 	dns_name_init(&name, offsets);
2285 	dns_name_fromregion(&name, &r);
2286 
2287 	dns_name_format(&name, buffer, sizeof(buffer));
2288 
2289 	if (quoted) {
2290 		fprintf(f, "\"%s\"", buffer);
2291 	} else {
2292 		fprintf(f, "%s", buffer);
2293 	}
2294 }
2295 
2296 static void
2297 print_text_helper(dns_rbtnode_t *root, dns_rbtnode_t *parent, int depth,
2298 		  const char *direction, void (*data_printer)(FILE *, void *),
2299 		  FILE *f) {
2300 	dns_rbt_indent(f, depth);
2301 
2302 	if (root != NULL) {
2303 		printnodename(root, true, f);
2304 		fprintf(f, " (%s, %s", direction,
2305 			root->color == RED ? "RED" : "BLACK");
2306 
2307 		if ((!root->is_root && root->parent != parent) ||
2308 		    (root->is_root && depth > 0 && root->parent->down != root))
2309 		{
2310 			fprintf(f, " (BAD parent pointer! -> ");
2311 			if (root->parent != NULL) {
2312 				printnodename(root->parent, true, f);
2313 			} else {
2314 				fprintf(f, "NULL");
2315 			}
2316 			fprintf(f, ")");
2317 		}
2318 
2319 		fprintf(f, ")");
2320 
2321 		if (root->data != NULL && data_printer != NULL) {
2322 			fprintf(f, " data@%p: ", root->data);
2323 			data_printer(f, root->data);
2324 		}
2325 		fprintf(f, "\n");
2326 
2327 		depth++;
2328 
2329 		if (root->color == RED && IS_RED(root->left)) {
2330 			fprintf(f, "** Red/Red color violation on left\n");
2331 		}
2332 		print_text_helper(root->left, root, depth, "left", data_printer,
2333 				  f);
2334 
2335 		if (root->color == RED && IS_RED(root->right)) {
2336 			fprintf(f, "** Red/Red color violation on right\n");
2337 		}
2338 		print_text_helper(root->right, root, depth, "right",
2339 				  data_printer, f);
2340 
2341 		print_text_helper(root->down, NULL, depth, "down", data_printer,
2342 				  f);
2343 	} else {
2344 		fprintf(f, "NULL (%s)\n", direction);
2345 	}
2346 }
2347 
2348 void
2349 dns_rbt_printtext(dns_rbt_t *rbt, void (*data_printer)(FILE *, void *),
2350 		  FILE *f) {
2351 	REQUIRE(VALID_RBT(rbt));
2352 
2353 	print_text_helper(rbt->root, NULL, 0, "root", data_printer, f);
2354 }
2355 
2356 static int
2357 print_dot_helper(dns_rbtnode_t *node, unsigned int *nodecount,
2358 		 bool show_pointers, FILE *f) {
2359 	unsigned int l, r, d;
2360 
2361 	if (node == NULL) {
2362 		return 0;
2363 	}
2364 
2365 	l = print_dot_helper(node->left, nodecount, show_pointers, f);
2366 	r = print_dot_helper(node->right, nodecount, show_pointers, f);
2367 	d = print_dot_helper(node->down, nodecount, show_pointers, f);
2368 
2369 	*nodecount += 1;
2370 
2371 	fprintf(f, "node%u[label = \"<f0> |<f1> ", *nodecount);
2372 	printnodename(node, false, f);
2373 	fprintf(f, "|<f2>");
2374 
2375 	if (show_pointers) {
2376 		fprintf(f, "|<f3> n=%p|<f4> p=%p", node, node->parent);
2377 	}
2378 
2379 	fprintf(f, "\"] [");
2380 
2381 	if (IS_RED(node)) {
2382 		fprintf(f, "color=red");
2383 	} else {
2384 		fprintf(f, "color=black");
2385 	}
2386 
2387 	/* XXXMUKS: verify that IS_ROOT() indicates subtree root and not
2388 	 * forest root.
2389 	 */
2390 	if (node->is_root) {
2391 		fprintf(f, ",penwidth=3");
2392 	}
2393 
2394 	if (node->data == NULL) {
2395 		fprintf(f, ",style=filled,fillcolor=lightgrey");
2396 	}
2397 
2398 	fprintf(f, "];\n");
2399 
2400 	if (node->left != NULL) {
2401 		fprintf(f, "\"node%u\":f0 -> \"node%u\":f1;\n", *nodecount, l);
2402 	}
2403 
2404 	if (node->down != NULL) {
2405 		fprintf(f, "\"node%u\":f1 -> \"node%u\":f1 [penwidth=5];\n",
2406 			*nodecount, d);
2407 	}
2408 	if (node->right != NULL) {
2409 		fprintf(f, "\"node%u\":f2 -> \"node%u\":f1;\n", *nodecount, r);
2410 	}
2411 
2412 	return *nodecount;
2413 }
2414 
2415 void
2416 dns_rbt_printdot(dns_rbt_t *rbt, bool show_pointers, FILE *f) {
2417 	unsigned int nodecount = 0;
2418 
2419 	REQUIRE(VALID_RBT(rbt));
2420 
2421 	fprintf(f, "digraph g {\n");
2422 	fprintf(f, "node [shape = record,height=.1];\n");
2423 	print_dot_helper(rbt->root, &nodecount, show_pointers, f);
2424 	fprintf(f, "}\n");
2425 }
2426 
2427 /*
2428  * Chain Functions
2429  */
2430 
2431 void
2432 dns_rbtnodechain_init(dns_rbtnodechain_t *chain) {
2433 	REQUIRE(chain != NULL);
2434 
2435 	/*
2436 	 * Initialize 'chain'.
2437 	 */
2438 	chain->end = NULL;
2439 	chain->level_count = 0;
2440 	chain->level_matches = 0;
2441 	memset(chain->levels, 0, sizeof(chain->levels));
2442 
2443 	chain->magic = CHAIN_MAGIC;
2444 }
2445 
2446 isc_result_t
2447 dns_rbtnodechain_current(dns_rbtnodechain_t *chain, dns_name_t *name,
2448 			 dns_name_t *origin, dns_rbtnode_t **node) {
2449 	isc_result_t result = ISC_R_SUCCESS;
2450 
2451 	REQUIRE(VALID_CHAIN(chain));
2452 
2453 	SET_IF_NOT_NULL(node, chain->end);
2454 
2455 	if (chain->end == NULL) {
2456 		return ISC_R_NOTFOUND;
2457 	}
2458 
2459 	if (name != NULL) {
2460 		node_name(chain->end, name);
2461 
2462 		if (chain->level_count == 0) {
2463 			/*
2464 			 * Names in the top level tree are all absolute.
2465 			 * Always make 'name' relative.
2466 			 */
2467 			INSIST(dns_name_isabsolute(name));
2468 
2469 			/*
2470 			 * This is cheaper than
2471 			 * dns_name_getlabelsequence().
2472 			 */
2473 			name->labels--;
2474 			name->length--;
2475 			name->attributes.absolute = false;
2476 		}
2477 	}
2478 
2479 	if (origin != NULL) {
2480 		if (chain->level_count > 0) {
2481 			result = chain_name(chain, origin, false);
2482 		} else {
2483 			dns_name_copy(dns_rootname, origin);
2484 		}
2485 	}
2486 
2487 	return result;
2488 }
2489 
2490 isc_result_t
2491 dns_rbtnodechain_prev(dns_rbtnodechain_t *chain, dns_name_t *name,
2492 		      dns_name_t *origin) {
2493 	dns_rbtnode_t *current, *previous, *predecessor;
2494 	isc_result_t result = ISC_R_SUCCESS;
2495 	bool new_origin = false;
2496 
2497 	REQUIRE(VALID_CHAIN(chain) && chain->end != NULL);
2498 
2499 	predecessor = NULL;
2500 
2501 	current = chain->end;
2502 
2503 	if (current->left != NULL) {
2504 		/*
2505 		 * Moving left one then right as far as possible is the
2506 		 * previous node, at least for this level.
2507 		 */
2508 		current = current->left;
2509 
2510 		while (current->right != NULL) {
2511 			current = current->right;
2512 		}
2513 
2514 		predecessor = current;
2515 	} else {
2516 		/*
2517 		 * No left links, so move toward the root.  If at any
2518 		 * point on the way there the link from parent to child
2519 		 * is a right link, then the parent is the previous
2520 		 * node, at least for this level.
2521 		 */
2522 		while (!current->is_root) {
2523 			previous = current;
2524 			current = current->parent;
2525 
2526 			if (current->right == previous) {
2527 				predecessor = current;
2528 				break;
2529 			}
2530 		}
2531 	}
2532 
2533 	if (predecessor != NULL) {
2534 		/*
2535 		 * Found a predecessor node in this level.  It might not
2536 		 * really be the predecessor, however.
2537 		 */
2538 		if (predecessor->down != NULL) {
2539 			/*
2540 			 * The predecessor is really down at least one
2541 			 * level. Go down and as far right as possible,
2542 			 * and repeat as long as the rightmost node has
2543 			 * a down pointer.
2544 			 */
2545 			do {
2546 				/*
2547 				 * XXX DCL Need to do something about
2548 				 * origins here. See whether to go down,
2549 				 * and if so whether it is truly what
2550 				 * Bob calls a new origin.
2551 				 */
2552 				ADD_LEVEL(chain, predecessor);
2553 				predecessor = predecessor->down;
2554 
2555 				/* XXX DCL duplicated from above; clever
2556 				 * way to unduplicate? */
2557 
2558 				while (predecessor->right != NULL) {
2559 					predecessor = predecessor->right;
2560 				}
2561 			} while (predecessor->down != NULL);
2562 
2563 			/* XXX DCL probably needs work on the concept */
2564 			if (origin != NULL) {
2565 				new_origin = true;
2566 			}
2567 		}
2568 	} else if (chain->level_count > 0) {
2569 		/*
2570 		 * Dang, didn't find a predecessor in this level.
2571 		 * Got to the root of this level without having
2572 		 * traversed any right links.  Ascend the tree one
2573 		 * level; the node that points to this tree is the
2574 		 * predecessor.
2575 		 */
2576 		INSIST(chain->level_count > 0 && current->is_root);
2577 		predecessor = chain->levels[--chain->level_count];
2578 
2579 		/* XXX DCL probably needs work on the concept */
2580 		/*
2581 		 * Don't declare an origin change when the new origin is
2582 		 * "." at the top level tree, because "." is declared as
2583 		 * the origin for the second level tree.
2584 		 */
2585 		if (origin != NULL &&
2586 		    (chain->level_count > 0 || predecessor->offsetlen > 1))
2587 		{
2588 			new_origin = true;
2589 		}
2590 	}
2591 
2592 	if (predecessor != NULL) {
2593 		chain->end = predecessor;
2594 
2595 		if (new_origin) {
2596 			result = dns_rbtnodechain_current(chain, name, origin,
2597 							  NULL);
2598 			if (result == ISC_R_SUCCESS) {
2599 				result = DNS_R_NEWORIGIN;
2600 			}
2601 		} else {
2602 			result = dns_rbtnodechain_current(chain, name, NULL,
2603 							  NULL);
2604 		}
2605 	} else {
2606 		result = ISC_R_NOMORE;
2607 	}
2608 
2609 	return result;
2610 }
2611 
2612 isc_result_t
2613 dns_rbtnodechain_down(dns_rbtnodechain_t *chain, dns_name_t *name,
2614 		      dns_name_t *origin) {
2615 	dns_rbtnode_t *current, *successor;
2616 	isc_result_t result = ISC_R_SUCCESS;
2617 	bool new_origin = false;
2618 
2619 	REQUIRE(VALID_CHAIN(chain) && chain->end != NULL);
2620 
2621 	successor = NULL;
2622 
2623 	current = chain->end;
2624 
2625 	if (current->down != NULL) {
2626 		/*
2627 		 * Don't declare an origin change when the new origin is
2628 		 * "." at the second level tree, because "." is already
2629 		 * declared as the origin for the top level tree.
2630 		 */
2631 		if (chain->level_count > 0 || current->offsetlen > 1) {
2632 			new_origin = true;
2633 		}
2634 
2635 		ADD_LEVEL(chain, current);
2636 		current = current->down;
2637 
2638 		while (current->left != NULL) {
2639 			current = current->left;
2640 		}
2641 
2642 		successor = current;
2643 	}
2644 
2645 	if (successor != NULL) {
2646 		chain->end = successor;
2647 
2648 		/*
2649 		 * It is not necessary to use dns_rbtnodechain_current
2650 		 * like the other functions because this function will
2651 		 * never find a node in the topmost level.  This is
2652 		 * because the root level will never be more than one
2653 		 * name, and everything in the megatree is a successor
2654 		 * to that node, down at the second level or below.
2655 		 */
2656 
2657 		if (name != NULL) {
2658 			node_name(chain->end, name);
2659 		}
2660 
2661 		if (new_origin) {
2662 			if (origin != NULL) {
2663 				result = chain_name(chain, origin, false);
2664 			}
2665 
2666 			if (result == ISC_R_SUCCESS) {
2667 				result = DNS_R_NEWORIGIN;
2668 			}
2669 		} else {
2670 			result = ISC_R_SUCCESS;
2671 		}
2672 	} else {
2673 		result = ISC_R_NOMORE;
2674 	}
2675 
2676 	return result;
2677 }
2678 
2679 isc_result_t
2680 dns_rbtnodechain_nextflat(dns_rbtnodechain_t *chain, dns_name_t *name) {
2681 	dns_rbtnode_t *current, *previous, *successor;
2682 	isc_result_t result = ISC_R_SUCCESS;
2683 
2684 	REQUIRE(VALID_CHAIN(chain) && chain->end != NULL);
2685 
2686 	successor = NULL;
2687 
2688 	current = chain->end;
2689 
2690 	if (current->right == NULL) {
2691 		while (!current->is_root) {
2692 			previous = current;
2693 			current = current->parent;
2694 
2695 			if (current->left == previous) {
2696 				successor = current;
2697 				break;
2698 			}
2699 		}
2700 	} else {
2701 		current = current->right;
2702 
2703 		while (current->left != NULL) {
2704 			current = current->left;
2705 		}
2706 
2707 		successor = current;
2708 	}
2709 
2710 	if (successor != NULL) {
2711 		chain->end = successor;
2712 
2713 		if (name != NULL) {
2714 			node_name(chain->end, name);
2715 		}
2716 
2717 		result = ISC_R_SUCCESS;
2718 	} else {
2719 		result = ISC_R_NOMORE;
2720 	}
2721 
2722 	return result;
2723 }
2724 
2725 isc_result_t
2726 dns_rbtnodechain_next(dns_rbtnodechain_t *chain, dns_name_t *name,
2727 		      dns_name_t *origin) {
2728 	dns_rbtnode_t *current, *previous, *successor;
2729 	isc_result_t result = ISC_R_SUCCESS;
2730 	bool new_origin = false;
2731 
2732 	REQUIRE(VALID_CHAIN(chain) && chain->end != NULL);
2733 
2734 	successor = NULL;
2735 
2736 	current = chain->end;
2737 
2738 	/*
2739 	 * If there is a level below this node, the next node is the
2740 	 * leftmost node of the next level.
2741 	 */
2742 	if (current->down != NULL) {
2743 		/*
2744 		 * Don't declare an origin change when the new origin is
2745 		 * "." at the second level tree, because "." is already
2746 		 * declared as the origin for the top level tree.
2747 		 */
2748 		if (chain->level_count > 0 || current->offsetlen > 1) {
2749 			new_origin = true;
2750 		}
2751 
2752 		ADD_LEVEL(chain, current);
2753 		current = current->down;
2754 
2755 		while (current->left != NULL) {
2756 			current = current->left;
2757 		}
2758 
2759 		successor = current;
2760 	} else if (current->right == NULL) {
2761 		/*
2762 		 * The successor is up, either in this level or a
2763 		 * previous one. Head back toward the root of the tree,
2764 		 * looking for any path that was via a left link; the
2765 		 * successor is the node that has that left link.  In
2766 		 * the event the root of the level is reached without
2767 		 * having traversed any left links, ascend one level and
2768 		 * look for either a right link off the point of ascent,
2769 		 * or search for a left link upward again, repeating
2770 		 * ascends until either case is true.
2771 		 */
2772 		do {
2773 			while (!current->is_root) {
2774 				previous = current;
2775 				current = current->parent;
2776 
2777 				if (current->left == previous) {
2778 					successor = current;
2779 					break;
2780 				}
2781 			}
2782 
2783 			if (successor == NULL) {
2784 				/*
2785 				 * Reached the root without having
2786 				 * traversed any left pointers, so this
2787 				 * level is done.
2788 				 */
2789 				if (chain->level_count == 0) {
2790 					/*
2791 					 * If the tree we are iterating
2792 					 * over was modified since this
2793 					 * chain was initialized in a
2794 					 * way that caused node splits
2795 					 * to occur, "current" may now
2796 					 * be pointing to a root node
2797 					 * which appears to be at level
2798 					 * 0, but still has a parent. If
2799 					 * that happens, abort.
2800 					 * Otherwise, we are done
2801 					 * looking for a successor as we
2802 					 * really reached the root node
2803 					 * on level 0.
2804 					 */
2805 					INSIST(current->parent == NULL);
2806 					break;
2807 				}
2808 
2809 				current = chain->levels[--chain->level_count];
2810 				new_origin = true;
2811 
2812 				if (current->right != NULL) {
2813 					break;
2814 				}
2815 			}
2816 		} while (successor == NULL);
2817 	}
2818 
2819 	if (successor == NULL && current->right != NULL) {
2820 		current = current->right;
2821 
2822 		while (current->left != NULL) {
2823 			current = current->left;
2824 		}
2825 
2826 		successor = current;
2827 	}
2828 
2829 	if (successor != NULL) {
2830 		/*
2831 		 * If we determine that the current node is the
2832 		 * successor to itself, we will run into an infinite
2833 		 * loop, so abort instead.
2834 		 */
2835 		INSIST(chain->end != successor);
2836 
2837 		chain->end = successor;
2838 
2839 		/*
2840 		 * It is not necessary to use dns_rbtnodechain_current
2841 		 * like the other functions because this function will
2842 		 * never find a node in the topmost level.  This is
2843 		 * because the root level will never be more than one
2844 		 * name, and everything in the megatree is a successor
2845 		 * to that node, down at the second level or below.
2846 		 */
2847 
2848 		if (name != NULL) {
2849 			node_name(chain->end, name);
2850 		}
2851 
2852 		if (new_origin) {
2853 			if (origin != NULL) {
2854 				result = chain_name(chain, origin, false);
2855 			}
2856 
2857 			if (result == ISC_R_SUCCESS) {
2858 				result = DNS_R_NEWORIGIN;
2859 			}
2860 		} else {
2861 			result = ISC_R_SUCCESS;
2862 		}
2863 	} else {
2864 		result = ISC_R_NOMORE;
2865 	}
2866 
2867 	return result;
2868 }
2869 
2870 isc_result_t
2871 dns_rbtnodechain_first(dns_rbtnodechain_t *chain, dns_rbt_t *rbt,
2872 		       dns_name_t *name, dns_name_t *origin)
2873 
2874 {
2875 	isc_result_t result;
2876 
2877 	REQUIRE(VALID_RBT(rbt));
2878 	REQUIRE(VALID_CHAIN(chain));
2879 
2880 	dns_rbtnodechain_reset(chain);
2881 
2882 	chain->end = rbt->root;
2883 
2884 	result = dns_rbtnodechain_current(chain, name, origin, NULL);
2885 
2886 	if (result == ISC_R_SUCCESS) {
2887 		result = DNS_R_NEWORIGIN;
2888 	}
2889 
2890 	return result;
2891 }
2892 
2893 isc_result_t
2894 dns_rbtnodechain_last(dns_rbtnodechain_t *chain, dns_rbt_t *rbt,
2895 		      dns_name_t *name, dns_name_t *origin)
2896 
2897 {
2898 	isc_result_t result;
2899 
2900 	REQUIRE(VALID_RBT(rbt));
2901 	REQUIRE(VALID_CHAIN(chain));
2902 
2903 	dns_rbtnodechain_reset(chain);
2904 
2905 	result = move_chain_to_last(chain, rbt->root);
2906 	if (result != ISC_R_SUCCESS) {
2907 		return result;
2908 	}
2909 
2910 	result = dns_rbtnodechain_current(chain, name, origin, NULL);
2911 
2912 	if (result == ISC_R_SUCCESS) {
2913 		result = DNS_R_NEWORIGIN;
2914 	}
2915 
2916 	return result;
2917 }
2918 
2919 void
2920 dns_rbtnodechain_reset(dns_rbtnodechain_t *chain) {
2921 	REQUIRE(VALID_CHAIN(chain));
2922 
2923 	/*
2924 	 * Free any dynamic storage associated with 'chain', and then
2925 	 * reinitialize 'chain'.
2926 	 */
2927 	chain->end = NULL;
2928 	chain->level_count = 0;
2929 	chain->level_matches = 0;
2930 }
2931 
2932 void
2933 dns_rbtnodechain_invalidate(dns_rbtnodechain_t *chain) {
2934 	/*
2935 	 * Free any dynamic storage associated with 'chain', and then
2936 	 * invalidate 'chain'.
2937 	 */
2938 
2939 	dns_rbtnodechain_reset(chain);
2940 
2941 	chain->magic = 0;
2942 }
2943 
2944 /* XXXMUKS:
2945  *
2946  * - worth removing inline as static functions are inlined automatically
2947  *   where suitable by modern compilers.
2948  * - bump the size of dns_rbt.nodecount to size_t.
2949  * - the dumpfile header also contains a nodecount that is unsigned
2950  *   int. If large files (> 2^32 nodes) are to be supported, the
2951  *   allocation for this field should be increased.
2952  */
2953