xref: /netbsd-src/common/lib/libc/gen/radixtree.c (revision f32144615845cc98528c78d4f24930c3b8fe6a8f)
1 /*	$NetBSD: radixtree.c,v 1.2 2011/04/14 15:42:02 yamt Exp $	*/
2 
3 /*-
4  * Copyright (c)2011 YAMAMOTO Takashi,
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 /*
30  * radix tree
31  *
32  * it's designed to work efficiently with dense index distribution.
33  * the memory consumption (number of necessary intermediate nodes)
34  * heavily depends on index distribution.  basically, more dense index
35  * distribution consumes less nodes per item.
36  * approximately,
37  * the best case: about RADIX_TREE_PTR_PER_NODE items per node.
38  * the worst case: RADIX_TREE_MAX_HEIGHT nodes per item.
39  */
40 
41 #include <sys/cdefs.h>
42 
43 #if defined(_KERNEL) || defined(_STANDALONE)
44 __KERNEL_RCSID(0, "$NetBSD: radixtree.c,v 1.2 2011/04/14 15:42:02 yamt Exp $");
45 #include <sys/param.h>
46 #include <sys/null.h>
47 #include <sys/pool.h>
48 #include <sys/radixtree.h>
49 #else /* defined(_KERNEL) || defined(_STANDALONE) */
50 __RCSID("$NetBSD: radixtree.c,v 1.2 2011/04/14 15:42:02 yamt Exp $");
51 #include <assert.h>
52 #include <errno.h>
53 #include <stdbool.h>
54 #include <stdlib.h>
55 #if 1
56 #define KASSERT assert
57 #else
58 #define KASSERT(a)	/* nothing */
59 #endif
60 #endif /* defined(_KERNEL) || defined(_STANDALONE) */
61 
62 #include <sys/radixtree.h>
63 
64 #define	RADIX_TREE_BITS_PER_HEIGHT	4	/* XXX tune */
65 #define	RADIX_TREE_PTR_PER_NODE		(1 << RADIX_TREE_BITS_PER_HEIGHT)
66 #define	RADIX_TREE_MAX_HEIGHT		(64 / RADIX_TREE_BITS_PER_HEIGHT)
67 __CTASSERT((64 % RADIX_TREE_BITS_PER_HEIGHT) == 0);
68 
69 __CTASSERT(((1 << RADIX_TREE_TAG_ID_MAX) & (sizeof(int) - 1)) == 0);
70 #define	RADIX_TREE_TAG_MASK	((1 << RADIX_TREE_TAG_ID_MAX) - 1)
71 
72 static inline void *
73 entry_ptr(void *p)
74 {
75 
76 	return (void *)((uintptr_t)p & ~RADIX_TREE_TAG_MASK);
77 }
78 
79 static inline unsigned int
80 entry_tagmask(void *p)
81 {
82 
83 	return (uintptr_t)p & RADIX_TREE_TAG_MASK;
84 }
85 
86 static inline void *
87 entry_compose(void *p, unsigned int tagmask)
88 {
89 
90 	return (void *)((uintptr_t)p | tagmask);
91 }
92 
93 static inline bool
94 entry_match_p(void *p, unsigned int tagmask)
95 {
96 
97 	KASSERT(entry_ptr(p) != NULL || entry_tagmask(p) == 0);
98 	if (p == NULL) {
99 		return false;
100 	}
101 	if (tagmask == 0) {
102 		return true;
103 	}
104 	return (entry_tagmask(p) & tagmask) != 0;
105 }
106 
107 static inline unsigned int
108 tagid_to_mask(radix_tree_tagid_t id)
109 {
110 
111 	return 1U << id;
112 }
113 
114 /*
115  * radix_tree_node: an intermediate node
116  *
117  * we don't care the type of leaf nodes.  they are just void *.
118  */
119 
120 struct radix_tree_node {
121 	void *n_ptrs[RADIX_TREE_PTR_PER_NODE];
122 	unsigned int n_nptrs;	/* # of non-NULL pointers in n_ptrs */
123 };
124 
125 static unsigned int
126 any_children_tagmask(struct radix_tree_node *n)
127 {
128 	unsigned int mask;
129 	int i;
130 
131 	mask = 0;
132 	for (i = 0; i < RADIX_TREE_PTR_PER_NODE; i++) {
133 		mask |= (unsigned int)(uintptr_t)n->n_ptrs[i];
134 	}
135 	return mask & RADIX_TREE_TAG_MASK;
136 }
137 
138 /*
139  * p_refs[0].pptr == &t->t_root
140  *	:
141  * p_refs[n].pptr == &(*p_refs[n-1])->n_ptrs[x]
142  *	:
143  *	:
144  * p_refs[t->t_height].pptr == &leaf_pointer
145  */
146 
147 struct radix_tree_path {
148 	struct radix_tree_node_ref {
149 		void **pptr;
150 	} p_refs[RADIX_TREE_MAX_HEIGHT + 1]; /* +1 for the root ptr */
151 	int p_lastidx;
152 };
153 
154 static inline void **
155 path_pptr(struct radix_tree *t, struct radix_tree_path *p,
156     unsigned int height)
157 {
158 
159 	KASSERT(height <= t->t_height);
160 	return p->p_refs[height].pptr;
161 }
162 
163 static inline struct radix_tree_node *
164 path_node(struct radix_tree * t, struct radix_tree_path *p, unsigned int height)
165 {
166 
167 	KASSERT(height <= t->t_height);
168 	return entry_ptr(*path_pptr(t, p, height));
169 }
170 
171 static inline unsigned int
172 path_idx(struct radix_tree * t, struct radix_tree_path *p, unsigned int height)
173 {
174 
175 	KASSERT(height <= t->t_height);
176 	return path_pptr(t, p, height + 1) - path_node(t, p, height)->n_ptrs;
177 }
178 
179 /*
180  * radix_tree_init_tree:
181  *
182  * initialize a tree.
183  */
184 
185 void
186 radix_tree_init_tree(struct radix_tree *t)
187 {
188 
189 	t->t_height = 0;
190 	t->t_root = NULL;
191 }
192 
193 /*
194  * radix_tree_init_tree:
195  *
196  * clean up a tree.
197  */
198 
199 void
200 radix_tree_fini_tree(struct radix_tree *t)
201 {
202 
203 	KASSERT(t->t_root == NULL);
204 	KASSERT(t->t_height == 0);
205 }
206 
207 #if defined(_KERNEL)
208 pool_cache_t radix_tree_node_cache __read_mostly;
209 
210 static int
211 radix_tree_node_ctor(void *dummy, void *item, int flags)
212 {
213 	struct radix_tree_node *n = item;
214 
215 	KASSERT(dummy == NULL);
216 	memset(n, 0, sizeof(*n));
217 	return 0;
218 }
219 
220 /*
221  * radix_tree_init:
222  *
223  * initialize the subsystem.
224  */
225 
226 void
227 radix_tree_init(void)
228 {
229 
230 	radix_tree_node_cache = pool_cache_init(sizeof(struct radix_tree_node),
231 	    0, 0, 0, "radix_tree_node", NULL, IPL_NONE, radix_tree_node_ctor,
232 	    NULL, NULL);
233 	KASSERT(radix_tree_node_cache != NULL);
234 }
235 #endif /* defined(_KERNEL) */
236 
237 static bool __unused
238 radix_tree_node_clean_p(const struct radix_tree_node *n)
239 {
240 	unsigned int i;
241 
242 	if (n->n_nptrs != 0) {
243 		return false;
244 	}
245 	for (i = 0; i < RADIX_TREE_PTR_PER_NODE; i++) {
246 		if (n->n_ptrs[i] != NULL) {
247 			return false;
248 		}
249 	}
250 	return true;
251 }
252 
253 static struct radix_tree_node *
254 radix_tree_alloc_node(void)
255 {
256 	struct radix_tree_node *n;
257 
258 #if defined(_KERNEL)
259 	n = pool_cache_get(radix_tree_node_cache, PR_NOWAIT);
260 #else /* defined(_KERNEL) */
261 	n = calloc(1, sizeof(*n));
262 #endif /* defined(_KERNEL) */
263 	KASSERT(n == NULL || radix_tree_node_clean_p(n));
264 	return n;
265 }
266 
267 static void
268 radix_tree_free_node(struct radix_tree_node *n)
269 {
270 
271 	KASSERT(radix_tree_node_clean_p(n));
272 #if defined(_KERNEL)
273 	pool_cache_put(radix_tree_node_cache, n);
274 #else /* defined(_KERNEL) */
275 	free(n);
276 #endif /* defined(_KERNEL) */
277 }
278 
279 static int
280 radix_tree_grow(struct radix_tree *t, unsigned int newheight)
281 {
282 	const unsigned int tagmask = entry_tagmask(t->t_root);
283 
284 	KASSERT(newheight <= 64 / RADIX_TREE_BITS_PER_HEIGHT);
285 	if (t->t_root == NULL) {
286 		t->t_height = newheight;
287 		return 0;
288 	}
289 	while (t->t_height < newheight) {
290 		struct radix_tree_node *n;
291 
292 		n = radix_tree_alloc_node();
293 		if (n == NULL) {
294 			/*
295 			 * don't bother to revert our changes.
296 			 * the caller will likely retry.
297 			 */
298 			return ENOMEM;
299 		}
300 		n->n_nptrs = 1;
301 		n->n_ptrs[0] = t->t_root;
302 		t->t_root = entry_compose(n, tagmask);
303 		t->t_height++;
304 	}
305 	return 0;
306 }
307 
308 static inline void **
309 radix_tree_lookup_ptr(struct radix_tree *t, uint64_t idx,
310     struct radix_tree_path *path, bool alloc, const unsigned int tagmask)
311 {
312 	struct radix_tree_node *n;
313 	int hshift = RADIX_TREE_BITS_PER_HEIGHT * t->t_height;
314 	int shift;
315 	void **vpp;
316 	const uint64_t mask = (UINT64_C(1) << RADIX_TREE_BITS_PER_HEIGHT) - 1;
317 	struct radix_tree_node_ref *refs = NULL;
318 
319 	KASSERT(tagmask == 0 || !alloc);
320 	KASSERT(path == NULL || !alloc);
321 	vpp = &t->t_root;
322 	if (path != NULL) {
323 		refs = path->p_refs;
324 		refs->pptr = vpp;
325 	}
326 	n = NULL;
327 	for (shift = 64 - RADIX_TREE_BITS_PER_HEIGHT; shift >= 0;) {
328 		struct radix_tree_node *c;
329 		void *entry;
330 		const uint64_t i = (idx >> shift) & mask;
331 
332 		if (shift >= hshift) {
333 			unsigned int newheight;
334 
335 			KASSERT(vpp == &t->t_root);
336 			if (i == 0) {
337 				shift -= RADIX_TREE_BITS_PER_HEIGHT;
338 				continue;
339 			}
340 			if (!alloc) {
341 				if (path != NULL) {
342 					KASSERT((refs - path->p_refs) == 0);
343 					path->p_lastidx = 0;
344 				}
345 				return NULL;
346 			}
347 			newheight = shift / RADIX_TREE_BITS_PER_HEIGHT + 1;
348 			if (radix_tree_grow(t, newheight)) {
349 				return NULL;
350 			}
351 			hshift = RADIX_TREE_BITS_PER_HEIGHT * t->t_height;
352 		}
353 		entry = *vpp;
354 		c = entry_ptr(entry);
355 		if (c == NULL ||
356 		    (tagmask != 0 &&
357 		    (entry_tagmask(entry) & tagmask) == 0)) {
358 			if (!alloc) {
359 				if (path != NULL) {
360 					path->p_lastidx = refs - path->p_refs;
361 				}
362 				return NULL;
363 			}
364 			c = radix_tree_alloc_node();
365 			if (c == NULL) {
366 				return NULL;
367 			}
368 			*vpp = c;
369 			if (n != NULL) {
370 				KASSERT(n->n_nptrs < RADIX_TREE_PTR_PER_NODE);
371 				n->n_nptrs++;
372 			}
373 		}
374 		n = c;
375 		vpp = &n->n_ptrs[i];
376 		if (path != NULL) {
377 			refs++;
378 			refs->pptr = vpp;
379 		}
380 		shift -= RADIX_TREE_BITS_PER_HEIGHT;
381 	}
382 	if (alloc) {
383 		KASSERT(*vpp == NULL);
384 		if (n != NULL) {
385 			KASSERT(n->n_nptrs < RADIX_TREE_PTR_PER_NODE);
386 			n->n_nptrs++;
387 		}
388 	}
389 	if (path != NULL) {
390 		path->p_lastidx = refs - path->p_refs;
391 	}
392 	return vpp;
393 }
394 
395 /*
396  * radix_tree_insert_node:
397  *
398  * insert the node at idx.
399  * it's illegal to insert NULL.
400  * it's illegal to insert a non-aligned pointer.
401  *
402  * this function returns ENOMEM if necessary memory allocation failed.
403  * otherwise, this function returns 0.
404  *
405  * note that inserting a node can involves memory allocation for intermediate
406  * nodes.  if _KERNEL, it's done with non-blocking IPL_NONE memory allocation.
407  */
408 
409 int
410 radix_tree_insert_node(struct radix_tree *t, uint64_t idx, void *p)
411 {
412 	void **vpp;
413 
414 	KASSERT(p != NULL);
415 	KASSERT(entry_compose(p, 0) == p);
416 	vpp = radix_tree_lookup_ptr(t, idx, NULL, true, 0);
417 	if (vpp == NULL) {
418 		return ENOMEM;
419 	}
420 	KASSERT(*vpp == NULL);
421 	*vpp = p;
422 	return 0;
423 }
424 
425 void *
426 radix_tree_replace_node(struct radix_tree *t, uint64_t idx, void *p)
427 {
428 	void **vpp;
429 	void *oldp;
430 
431 	KASSERT(p != NULL);
432 	KASSERT(entry_compose(p, 0) == p);
433 	vpp = radix_tree_lookup_ptr(t, idx, NULL, false, 0);
434 	KASSERT(vpp != NULL);
435 	oldp = *vpp;
436 	KASSERT(oldp != NULL);
437 	*vpp = entry_compose(p, entry_tagmask(*vpp));
438 	return entry_ptr(oldp);
439 }
440 
441 /*
442  * radix_tree_remove_node:
443  *
444  * remove the node at idx.
445  * it's illegal to try to remove a node which has not been inserted.
446  */
447 
448 void *
449 radix_tree_remove_node(struct radix_tree *t, uint64_t idx)
450 {
451 	struct radix_tree_path path;
452 	void **vpp;
453 	void *oldp;
454 	int i;
455 
456 	vpp = radix_tree_lookup_ptr(t, idx, &path, false, 0);
457 	KASSERT(vpp != NULL);
458 	oldp = *vpp;
459 	KASSERT(oldp != NULL);
460 	KASSERT(path.p_lastidx == t->t_height);
461 	KASSERT(vpp == path_pptr(t, &path, path.p_lastidx));
462 	*vpp = NULL;
463 	for (i = t->t_height - 1; i >= 0; i--) {
464 		void *entry;
465 		struct radix_tree_node ** const pptr =
466 		    (struct radix_tree_node **)path_pptr(t, &path, i);
467 		struct radix_tree_node *n;
468 
469 		KASSERT(pptr != NULL);
470 		entry = *pptr;
471 		n = entry_ptr(entry);
472 		KASSERT(n != NULL);
473 		KASSERT(n->n_nptrs > 0);
474 		n->n_nptrs--;
475 		if (n->n_nptrs > 0) {
476 			break;
477 		}
478 		radix_tree_free_node(n);
479 		*pptr = NULL;
480 	}
481 	/*
482 	 * fix up height
483 	 */
484 	if (i < 0) {
485 		KASSERT(t->t_root == NULL);
486 		t->t_height = 0;
487 	}
488 	/*
489 	 * update tags
490 	 */
491 	for (; i >= 0; i--) {
492 		void *entry;
493 		struct radix_tree_node ** const pptr =
494 		    (struct radix_tree_node **)path_pptr(t, &path, i);
495 		struct radix_tree_node *n;
496 		unsigned int newmask;
497 
498 		KASSERT(pptr != NULL);
499 		entry = *pptr;
500 		n = entry_ptr(entry);
501 		KASSERT(n != NULL);
502 		KASSERT(n->n_nptrs > 0);
503 		newmask = any_children_tagmask(n);
504 		if (newmask == entry_tagmask(entry)) {
505 			break;
506 		}
507 		*pptr = entry_compose(n, newmask);
508 	}
509 	/*
510 	 * XXX is it worth to try to reduce height?
511 	 * if we do that, make radix_tree_grow rollback its change as well.
512 	 */
513 	return entry_ptr(oldp);
514 }
515 
516 /*
517  * radix_tree_lookup_node:
518  *
519  * returns the node at idx.
520  * returns NULL if nothing is found at idx.
521  */
522 
523 void *
524 radix_tree_lookup_node(struct radix_tree *t, uint64_t idx)
525 {
526 	void **vpp;
527 
528 	vpp = radix_tree_lookup_ptr(t, idx, NULL, false, 0);
529 	if (vpp == NULL) {
530 		return NULL;
531 	}
532 	return entry_ptr(*vpp);
533 }
534 
535 static inline void
536 gang_lookup_init(struct radix_tree *t, uint64_t idx,
537     struct radix_tree_path *path, const unsigned int tagmask)
538 {
539 	void **vpp;
540 
541 	vpp = radix_tree_lookup_ptr(t, idx, path, false, tagmask);
542 	KASSERT(vpp == NULL ||
543 	    vpp == path_pptr(t, path, path->p_lastidx));
544 	KASSERT(&t->t_root == path_pptr(t, path, 0));
545 }
546 
547 static inline unsigned int
548 gang_lookup_scan(struct radix_tree *t, struct radix_tree_path *path,
549     void **results, unsigned int maxresults, const unsigned int tagmask)
550 {
551 	void **vpp;
552 	int nfound;
553 	unsigned int lastidx;
554 
555 	KASSERT(maxresults > 0);
556 	lastidx = path->p_lastidx;
557 	if (lastidx == 0) {
558 		return 0;
559 	}
560 	nfound = 0;
561 	vpp = path_pptr(t, path, lastidx);
562 	while (/*CONSTCOND*/true) {
563 		struct radix_tree_node *n;
564 		int i;
565 
566 		if (entry_match_p(*vpp, tagmask)) {
567 			KASSERT(lastidx == t->t_height);
568 			/*
569 			 * record the non-NULL leaf.
570 			 */
571 			results[nfound] = entry_ptr(*vpp);
572 			nfound++;
573 			if (nfound == maxresults) {
574 				return nfound;
575 			}
576 		}
577 scan_siblings:
578 		/*
579 		 * try to find the next non-NULL sibling.
580 		 */
581 		n = path_node(t, path, lastidx - 1);
582 		if (*vpp != NULL && n->n_nptrs == 1) {
583 			/*
584 			 * optimization
585 			 */
586 			goto no_siblings;
587 		}
588 		for (i = path_idx(t, path, lastidx - 1) + 1;
589 		    i < RADIX_TREE_PTR_PER_NODE;
590 		    i++) {
591 			if (entry_match_p(n->n_ptrs[i], tagmask)) {
592 				vpp = &n->n_ptrs[i];
593 				path->p_refs[lastidx].pptr = vpp;
594 				KASSERT(path_idx(t, path, lastidx - 1)
595 				    == i);
596 				break;
597 			}
598 		}
599 		if (i == RADIX_TREE_PTR_PER_NODE) {
600 no_siblings:
601 			/*
602 			 * not found.  go to parent.
603 			 */
604 			lastidx--;
605 			if (lastidx == 0) {
606 				return nfound;
607 			}
608 			vpp = path_pptr(t, path, lastidx);
609 			goto scan_siblings;
610 		}
611 		/*
612 		 * descending the left-most child node, upto the leaf or NULL.
613 		 */
614 		while (entry_match_p(*vpp, tagmask) && lastidx < t->t_height) {
615 			n = entry_ptr(*vpp);
616 			vpp = &n->n_ptrs[0];
617 			lastidx++;
618 			path->p_refs[lastidx].pptr = vpp;
619 		}
620 	}
621 }
622 
623 /*
624  * radix_tree_gang_lookup_node:
625  *
626  * search nodes starting from idx in the ascending order.
627  * results should be an array large enough to hold maxresults pointers.
628  * returns the number of nodes found, up to maxresults.
629  * returning less than maxresults means there are no more nodes.
630  *
631  * the result of this function is semantically equivalent to what could be
632  * obtained by repeated calls of radix_tree_lookup_node with increasing index.
633  * but this function is much faster when node indexes are distributed sparsely.
634  *
635  * note that this function doesn't return exact values of node indexes of
636  * found nodes.  if they are important for a caller, it's the caller's
637  * responsibility to check them, typically by examinining the returned nodes
638  * using some caller-specific knowledge about them.
639  */
640 
641 unsigned int
642 radix_tree_gang_lookup_node(struct radix_tree *t, uint64_t idx,
643     void **results, unsigned int maxresults)
644 {
645 	struct radix_tree_path path;
646 
647 	gang_lookup_init(t, idx, &path, 0);
648 	return gang_lookup_scan(t, &path, results, maxresults, 0);
649 }
650 
651 /*
652  * radix_tree_gang_lookup_tagged_node:
653  *
654  * same as radix_tree_gang_lookup_node except that this one only returns
655  * nodes tagged with tagid.
656  */
657 
658 unsigned int
659 radix_tree_gang_lookup_tagged_node(struct radix_tree *t, uint64_t idx,
660     void **results, unsigned int maxresults, radix_tree_tagid_t tagid)
661 {
662 	struct radix_tree_path path;
663 	const unsigned int tagmask = tagid_to_mask(tagid);
664 
665 	gang_lookup_init(t, idx, &path, tagmask);
666 	return gang_lookup_scan(t, &path, results, maxresults, tagmask);
667 }
668 
669 bool
670 radix_tree_get_tag(struct radix_tree *t, uint64_t idx,
671     radix_tree_tagid_t tagid)
672 {
673 #if 1
674 	const unsigned int tagmask = tagid_to_mask(tagid);
675 	void **vpp;
676 
677 	vpp = radix_tree_lookup_ptr(t, idx, NULL, false, tagmask);
678 	if (vpp == NULL) {
679 		return false;
680 	}
681 	KASSERT(*vpp != NULL);
682 	return (entry_tagmask(*vpp) & tagmask) != 0;
683 #else
684 	const unsigned int tagmask = tagid_to_mask(tagid);
685 	void **vpp;
686 
687 	vpp = radix_tree_lookup_ptr(t, idx, NULL, false, 0);
688 	KASSERT(vpp != NULL);
689 	return (entry_tagmask(*vpp) & tagmask) != 0;
690 #endif
691 }
692 
693 void
694 radix_tree_set_tag(struct radix_tree *t, uint64_t idx,
695     radix_tree_tagid_t tagid)
696 {
697 	struct radix_tree_path path;
698 	const unsigned int tagmask = tagid_to_mask(tagid);
699 	void **vpp;
700 	int i;
701 
702 	vpp = radix_tree_lookup_ptr(t, idx, &path, false, 0);
703 	KASSERT(vpp != NULL);
704 	KASSERT(*vpp != NULL);
705 	KASSERT(path.p_lastidx == t->t_height);
706 	KASSERT(vpp == path_pptr(t, &path, path.p_lastidx));
707 	for (i = t->t_height; i >= 0; i--) {
708 		void ** const pptr = (void **)path_pptr(t, &path, i);
709 		void *entry;
710 
711 		KASSERT(pptr != NULL);
712 		entry = *pptr;
713 		if ((entry_tagmask(entry) & tagmask) != 0) {
714 			break;
715 		}
716 		*pptr = (void *)((uintptr_t)entry | tagmask);
717 	}
718 }
719 
720 void
721 radix_tree_clear_tag(struct radix_tree *t, uint64_t idx,
722     radix_tree_tagid_t tagid)
723 {
724 	struct radix_tree_path path;
725 	const unsigned int tagmask = tagid_to_mask(tagid);
726 	void **vpp;
727 	int i;
728 
729 	vpp = radix_tree_lookup_ptr(t, idx, &path, false, 0);
730 	KASSERT(vpp != NULL);
731 	KASSERT(*vpp != NULL);
732 	KASSERT(path.p_lastidx == t->t_height);
733 	KASSERT(vpp == path_pptr(t, &path, path.p_lastidx));
734 	if ((entry_tagmask(*vpp) & tagmask) == 0) {
735 		return;
736 	}
737 	for (i = t->t_height; i >= 0; i--) {
738 		void ** const pptr = (void **)path_pptr(t, &path, i);
739 		void *entry;
740 
741 		KASSERT(pptr != NULL);
742 		entry = *pptr;
743 		KASSERT((entry_tagmask(entry) & tagmask) != 0);
744 		*pptr = entry_compose(entry_ptr(entry),
745 		    entry_tagmask(entry) & ~tagmask);
746 		if (0 < i && i < t->t_height - 1) {
747 			struct radix_tree_node *n = path_node(t, &path, i - 1);
748 
749 			if ((any_children_tagmask(n) & tagmask) != 0) {
750 				break;
751 			}
752 		}
753 	}
754 }
755 
756 #if defined(UNITTEST)
757 
758 #include <inttypes.h>
759 #include <stdio.h>
760 
761 static void
762 radix_tree_dump_node(const struct radix_tree *t, void *vp,
763     uint64_t offset, unsigned int height)
764 {
765 	struct radix_tree_node *n;
766 	unsigned int i;
767 
768 	for (i = 0; i < t->t_height - height; i++) {
769 		printf(" ");
770 	}
771 	if (entry_tagmask(vp) == 0) {
772 		printf("[%" PRIu64 "] %p", offset, entry_ptr(vp));
773 	} else {
774 		printf("[%" PRIu64 "] %p (tagmask=0x%x)", offset, entry_ptr(vp),
775 		    entry_tagmask(vp));
776 	}
777 	if (height == 0) {
778 		printf(" (leaf)\n");
779 		return;
780 	}
781 	n = entry_ptr(vp);
782 	assert(any_children_tagmask(n) == entry_tagmask(vp));
783 	printf(" (%u children)\n", n->n_nptrs);
784 	for (i = 0; i < __arraycount(n->n_ptrs); i++) {
785 		void *c;
786 
787 		c = n->n_ptrs[i];
788 		if (c == NULL) {
789 			continue;
790 		}
791 		radix_tree_dump_node(t, c,
792 		    offset + i * (UINT64_C(1) <<
793 		    (RADIX_TREE_BITS_PER_HEIGHT * (height - 1))), height - 1);
794 	}
795 }
796 
797 void radix_tree_dump(const struct radix_tree *);
798 
799 void
800 radix_tree_dump(const struct radix_tree *t)
801 {
802 
803 	printf("tree %p height=%u\n", t, t->t_height);
804 	radix_tree_dump_node(t, t->t_root, 0, t->t_height);
805 }
806 
807 static void
808 test1(void)
809 {
810 	struct radix_tree s;
811 	struct radix_tree *t = &s;
812 	void *results[3];
813 
814 	radix_tree_init_tree(t);
815 	radix_tree_dump(t);
816 	assert(radix_tree_lookup_node(t, 0) == NULL);
817 	assert(radix_tree_lookup_node(t, 1000) == NULL);
818 	assert(radix_tree_insert_node(t, 1000, (void *)0xdeadbea0) == 0);
819 	radix_tree_dump(t);
820 	assert(!radix_tree_get_tag(t, 1000, 0));
821 	assert(!radix_tree_get_tag(t, 1000, 1));
822 	radix_tree_set_tag(t, 1000, 1);
823 	assert(!radix_tree_get_tag(t, 1000, 0));
824 	assert(radix_tree_get_tag(t, 1000, 1));
825 	radix_tree_dump(t);
826 	assert(radix_tree_lookup_node(t, 1000) == (void *)0xdeadbea0);
827 	assert(radix_tree_insert_node(t, 0, (void *)0xbea0) == 0);
828 	radix_tree_dump(t);
829 	assert(radix_tree_lookup_node(t, 0) == (void *)0xbea0);
830 	assert(radix_tree_lookup_node(t, 1000) == (void *)0xdeadbea0);
831 	assert(radix_tree_insert_node(t, UINT64_C(10000000000), (void *)0xdea0)
832 	    == 0);
833 	radix_tree_dump(t);
834 	assert(radix_tree_lookup_node(t, 0) == (void *)0xbea0);
835 	assert(radix_tree_lookup_node(t, 1000) == (void *)0xdeadbea0);
836 	assert(radix_tree_lookup_node(t, UINT64_C(10000000000)) ==
837 	    (void *)0xdea0);
838 	radix_tree_dump(t);
839 	assert(!radix_tree_get_tag(t, 0, 1));
840 	assert(radix_tree_get_tag(t, 1000, 1));
841 	assert(!radix_tree_get_tag(t, UINT64_C(10000000000), 1));
842 	radix_tree_set_tag(t, 0, 1);;
843 	radix_tree_set_tag(t, UINT64_C(10000000000), 1);
844 	radix_tree_dump(t);
845 	assert(radix_tree_get_tag(t, 0, 1));
846 	assert(radix_tree_get_tag(t, 1000, 1));
847 	assert(radix_tree_get_tag(t, UINT64_C(10000000000), 1));
848 	radix_tree_clear_tag(t, 0, 1);;
849 	radix_tree_clear_tag(t, UINT64_C(10000000000), 1);
850 	radix_tree_dump(t);
851 	assert(!radix_tree_get_tag(t, 0, 1));
852 	assert(radix_tree_get_tag(t, 1000, 1));
853 	assert(!radix_tree_get_tag(t, UINT64_C(10000000000), 1));
854 	radix_tree_dump(t);
855 	assert(radix_tree_replace_node(t, 1000, (void *)0x12345678) ==
856 	    (void *)0xdeadbea0);
857 	assert(!radix_tree_get_tag(t, 1000, 0));
858 	assert(radix_tree_get_tag(t, 1000, 1));
859 	assert(radix_tree_gang_lookup_node(t, 0, results, 3) == 3);
860 	assert(results[0] == (void *)0xbea0);
861 	assert(results[1] == (void *)0x12345678);
862 	assert(results[2] == (void *)0xdea0);
863 	assert(radix_tree_gang_lookup_node(t, 1, results, 3) == 2);
864 	assert(results[0] == (void *)0x12345678);
865 	assert(results[1] == (void *)0xdea0);
866 	assert(radix_tree_gang_lookup_node(t, 1001, results, 3) == 1);
867 	assert(results[0] == (void *)0xdea0);
868 	assert(radix_tree_gang_lookup_node(t, UINT64_C(10000000001), results, 3)
869 	    == 0);
870 	assert(radix_tree_gang_lookup_node(t, UINT64_C(1000000000000), results,
871 	    3) == 0);
872 	assert(radix_tree_gang_lookup_tagged_node(t, 0, results, 100, 1) == 1);
873 	assert(results[0] == (void *)0x12345678);
874 	assert(entry_tagmask(t->t_root) != 0);
875 	assert(radix_tree_remove_node(t, 1000) == (void *)0x12345678);
876 	assert(entry_tagmask(t->t_root) == 0);
877 	radix_tree_dump(t);
878 	assert(radix_tree_remove_node(t, UINT64_C(10000000000)) ==
879 	    (void *)0xdea0);
880 	radix_tree_dump(t);
881 	assert(radix_tree_remove_node(t, 0) == (void *)0xbea0);
882 	radix_tree_dump(t);
883 	radix_tree_fini_tree(t);
884 }
885 
886 #include <sys/time.h>
887 
888 struct testnode {
889 	uint64_t idx;
890 };
891 
892 static void
893 printops(const char *name, unsigned int n, const struct timeval *stv,
894     const struct timeval *etv)
895 {
896 	uint64_t s = stv->tv_sec * 1000000 + stv->tv_usec;
897 	uint64_t e = etv->tv_sec * 1000000 + etv->tv_usec;
898 
899 	printf("%lf %s/s\n", (double)n / (e - s) * 1000000, name);
900 }
901 
902 #define	TEST2_GANG_LOOKUP_NODES	16
903 
904 static bool
905 test2_should_tag(unsigned int i, radix_tree_tagid_t tagid)
906 {
907 
908 	if (tagid == 0) {
909 		return (i & 0x3) == 0;
910 	} else {
911 		return (i % 7) == 0;
912 	}
913 }
914 
915 static void
916 test2(bool dense)
917 {
918 	struct radix_tree s;
919 	struct radix_tree *t = &s;
920 	struct testnode *n;
921 	unsigned int i;
922 	unsigned int nnodes = 100000;
923 	unsigned int removed;
924 	radix_tree_tagid_t tag;
925 	unsigned int ntagged[RADIX_TREE_TAG_ID_MAX];
926 	struct testnode *nodes;
927 	struct timeval stv;
928 	struct timeval etv;
929 
930 	nodes = malloc(nnodes * sizeof(*nodes));
931 	for (tag = 0; tag < RADIX_TREE_TAG_ID_MAX; tag++) {
932 		ntagged[tag] = 0;
933 	}
934 	radix_tree_init_tree(t);
935 	for (i = 0; i < nnodes; i++) {
936 		n = &nodes[i];
937 		n->idx = random();
938 		if (sizeof(long) == 4) {
939 			n->idx <<= 32;
940 			n->idx |= (uint32_t)random();
941 		}
942 		if (dense) {
943 			n->idx %= nnodes * 2;
944 		}
945 		while (radix_tree_lookup_node(t, n->idx) != NULL) {
946 			n->idx++;
947 		}
948 		radix_tree_insert_node(t, n->idx, n);
949 		for (tag = 0; tag < RADIX_TREE_TAG_ID_MAX; tag++) {
950 			if (test2_should_tag(i, tag)) {
951 				radix_tree_set_tag(t, n->idx, tag);
952 				ntagged[tag]++;
953 			}
954 			assert(test2_should_tag(i, tag) ==
955 			    radix_tree_get_tag(t, n->idx, tag));
956 		}
957 	}
958 
959 	gettimeofday(&stv, NULL);
960 	for (i = 0; i < nnodes; i++) {
961 		n = &nodes[i];
962 		assert(radix_tree_lookup_node(t, n->idx) == n);
963 	}
964 	gettimeofday(&etv, NULL);
965 	printops("lookup", nnodes, &stv, &etv);
966 
967 	for (tag = 0; tag < RADIX_TREE_TAG_ID_MAX; tag++) {
968 		gettimeofday(&stv, NULL);
969 		for (i = 0; i < nnodes; i++) {
970 			n = &nodes[i];
971 			assert(test2_should_tag(i, tag) ==
972 			    radix_tree_get_tag(t, n->idx, tag));
973 		}
974 		gettimeofday(&etv, NULL);
975 		printops("get_tag", ntagged[tag], &stv, &etv);
976 	}
977 
978 	gettimeofday(&stv, NULL);
979 	for (i = 0; i < nnodes; i++) {
980 		n = &nodes[i];
981 		radix_tree_remove_node(t, n->idx);
982 	}
983 	gettimeofday(&etv, NULL);
984 	printops("remove", nnodes, &stv, &etv);
985 
986 	gettimeofday(&stv, NULL);
987 	for (i = 0; i < nnodes; i++) {
988 		n = &nodes[i];
989 		radix_tree_insert_node(t, n->idx, n);
990 	}
991 	gettimeofday(&etv, NULL);
992 	printops("insert", nnodes, &stv, &etv);
993 
994 	for (tag = 0; tag < RADIX_TREE_TAG_ID_MAX; tag++) {
995 		ntagged[tag] = 0;
996 		gettimeofday(&stv, NULL);
997 		for (i = 0; i < nnodes; i++) {
998 			n = &nodes[i];
999 			if (test2_should_tag(i, tag)) {
1000 				radix_tree_set_tag(t, n->idx, tag);
1001 				ntagged[tag]++;
1002 			}
1003 		}
1004 		gettimeofday(&etv, NULL);
1005 		printops("set_tag", ntagged[tag], &stv, &etv);
1006 	}
1007 
1008 	gettimeofday(&stv, NULL);
1009 	{
1010 		struct testnode *results[TEST2_GANG_LOOKUP_NODES];
1011 		uint64_t nextidx;
1012 		unsigned int nfound;
1013 		unsigned int total;
1014 
1015 		nextidx = 0;
1016 		total = 0;
1017 		while ((nfound = radix_tree_gang_lookup_node(t, nextidx,
1018 		    (void *)results, __arraycount(results))) > 0) {
1019 			nextidx = results[nfound - 1]->idx + 1;
1020 			total += nfound;
1021 		}
1022 		assert(total == nnodes);
1023 	}
1024 	gettimeofday(&etv, NULL);
1025 	printops("ganglookup", nnodes, &stv, &etv);
1026 
1027 	for (tag = 0; tag < RADIX_TREE_TAG_ID_MAX; tag++) {
1028 		gettimeofday(&stv, NULL);
1029 		{
1030 			struct testnode *results[TEST2_GANG_LOOKUP_NODES];
1031 			uint64_t nextidx;
1032 			unsigned int nfound;
1033 			unsigned int total;
1034 
1035 			nextidx = 0;
1036 			total = 0;
1037 			while ((nfound = radix_tree_gang_lookup_tagged_node(t,
1038 			    nextidx, (void *)results, __arraycount(results),
1039 			    tag)) > 0) {
1040 				nextidx = results[nfound - 1]->idx + 1;
1041 				total += nfound;
1042 			}
1043 			assert(total == ntagged[tag]);
1044 		}
1045 		gettimeofday(&etv, NULL);
1046 		printops("ganglookup_tag", ntagged[tag], &stv, &etv);
1047 	}
1048 
1049 	removed = 0;
1050 	for (tag = 0; tag < RADIX_TREE_TAG_ID_MAX; tag++) {
1051 		unsigned int total;
1052 
1053 		total = 0;
1054 		gettimeofday(&stv, NULL);
1055 		{
1056 			struct testnode *results[TEST2_GANG_LOOKUP_NODES];
1057 			uint64_t nextidx;
1058 			unsigned int nfound;
1059 
1060 			nextidx = 0;
1061 			while ((nfound = radix_tree_gang_lookup_tagged_node(t,
1062 			    nextidx, (void *)results, __arraycount(results),
1063 			    tag)) > 0) {
1064 				for (i = 0; i < nfound; i++) {
1065 					radix_tree_remove_node(t,
1066 					    results[i]->idx);
1067 				}
1068 				nextidx = results[nfound - 1]->idx + 1;
1069 				total += nfound;
1070 			}
1071 			assert(tag != 0 || total == ntagged[tag]);
1072 			assert(total <= ntagged[tag]);
1073 		}
1074 		gettimeofday(&etv, NULL);
1075 		printops("ganglookup_tag+remove", total, &stv, &etv);
1076 		removed += total;
1077 	}
1078 
1079 	gettimeofday(&stv, NULL);
1080 	{
1081 		struct testnode *results[TEST2_GANG_LOOKUP_NODES];
1082 		uint64_t nextidx;
1083 		unsigned int nfound;
1084 		unsigned int total;
1085 
1086 		nextidx = 0;
1087 		total = 0;
1088 		while ((nfound = radix_tree_gang_lookup_node(t, nextidx,
1089 		    (void *)results, __arraycount(results))) > 0) {
1090 			for (i = 0; i < nfound; i++) {
1091 				assert(results[i] == radix_tree_remove_node(t,
1092 				    results[i]->idx));
1093 			}
1094 			nextidx = results[nfound - 1]->idx + 1;
1095 			total += nfound;
1096 		}
1097 		assert(total == nnodes - removed);
1098 	}
1099 	gettimeofday(&etv, NULL);
1100 	printops("ganglookup+remove", nnodes - removed, &stv, &etv);
1101 
1102 	radix_tree_fini_tree(t);
1103 	free(nodes);
1104 }
1105 
1106 int
1107 main(int argc, char *argv[])
1108 {
1109 
1110 	test1();
1111 	printf("dense distribution:\n");
1112 	test2(true);
1113 	printf("sparse distribution:\n");
1114 	test2(false);
1115 	return 0;
1116 }
1117 
1118 #endif /* defined(UNITTEST) */
1119