xref: /netbsd-src/common/lib/libc/gen/radixtree.c (revision 56bb44cae5b13a6b74792381ba1e6d930b26aa67)
1 /*	$NetBSD: radixtree.c,v 1.1 2011/02/22 21:31:15 yamt Exp $	*/
2 
3 /*-
4  * Copyright (c)2011 YAMAMOTO Takashi,
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 /*
30  * radix tree
31  *
32  * it's designed to work efficiently with dense index distribution.
33  * the memory consumption (number of necessary intermediate nodes)
34  * heavily depends on index distribution.  basically, more dense index
35  * distribution consumes less nodes per item.
36  * approximately,
37  * the best case: about RADIX_TREE_PTR_PER_NODE items per node.
38  * the worst case: RADIX_TREE_MAX_HEIGHT nodes per item.
39  */
40 
41 #include <sys/cdefs.h>
42 
43 #if defined(_KERNEL)
44 __KERNEL_RCSID(0, "$NetBSD: radixtree.c,v 1.1 2011/02/22 21:31:15 yamt Exp $");
45 #include <sys/param.h>
46 #include <sys/null.h>
47 #include <sys/pool.h>
48 #include <sys/radixtree.h>
49 #else /* defined(_KERNEL) */
50 __RCSID("$NetBSD: radixtree.c,v 1.1 2011/02/22 21:31:15 yamt Exp $");
51 #include <assert.h>
52 #include <errno.h>
53 #include <stdbool.h>
54 #include <stdlib.h>
55 #if 1
56 #define KASSERT assert
57 #else
58 #define KASSERT(a)	/* nothing */
59 #endif
60 /* XXX */
61 #if !defined(CTASSERT)
62 #define	CTASSERT(x)	/* nothing */
63 #endif
64 #endif /* defined(_KERNEL) */
65 
66 #include <sys/radixtree.h>
67 
68 #define	RADIX_TREE_BITS_PER_HEIGHT	4	/* XXX tune */
69 #define	RADIX_TREE_PTR_PER_NODE		(1 << RADIX_TREE_BITS_PER_HEIGHT)
70 #define	RADIX_TREE_MAX_HEIGHT		(64 / RADIX_TREE_BITS_PER_HEIGHT)
71 CTASSERT((64 % RADIX_TREE_BITS_PER_HEIGHT) == 0);
72 
73 CTASSERT(((1 << RADIX_TREE_TAG_ID_MAX) & (sizeof(int) - 1)) == 0);
74 #define	RADIX_TREE_TAG_MASK	((1 << RADIX_TREE_TAG_ID_MAX) - 1)
75 
76 static inline void *
77 entry_ptr(void *p)
78 {
79 
80 	return (void *)((uintptr_t)p & ~RADIX_TREE_TAG_MASK);
81 }
82 
83 static inline unsigned int
84 entry_tagmask(void *p)
85 {
86 
87 	return (uintptr_t)p & RADIX_TREE_TAG_MASK;
88 }
89 
90 static inline void *
91 entry_compose(void *p, unsigned int tagmask)
92 {
93 
94 	return (void *)((uintptr_t)p | tagmask);
95 }
96 
97 static inline bool
98 entry_match_p(void *p, unsigned int tagmask)
99 {
100 
101 	KASSERT(entry_ptr(p) != NULL || entry_tagmask(p) == 0);
102 	if (p == NULL) {
103 		return false;
104 	}
105 	if (tagmask == 0) {
106 		return true;
107 	}
108 	return (entry_tagmask(p) & tagmask) != 0;
109 }
110 
111 static inline unsigned int
112 tagid_to_mask(radix_tree_tagid_t id)
113 {
114 
115 	return 1U << id;
116 }
117 
118 /*
119  * radix_tree_node: an intermediate node
120  *
121  * we don't care the type of leaf nodes.  they are just void *.
122  */
123 
124 struct radix_tree_node {
125 	void *n_ptrs[RADIX_TREE_PTR_PER_NODE];
126 	unsigned int n_nptrs;	/* # of non-NULL pointers in n_ptrs */
127 };
128 
129 static unsigned int
130 any_children_tagmask(struct radix_tree_node *n)
131 {
132 	unsigned int mask;
133 	int i;
134 
135 	mask = 0;
136 	for (i = 0; i < RADIX_TREE_PTR_PER_NODE; i++) {
137 		mask |= (unsigned int)(uintptr_t)n->n_ptrs[i];
138 	}
139 	return mask & RADIX_TREE_TAG_MASK;
140 }
141 
142 /*
143  * p_refs[0].pptr == &t->t_root
144  *	:
145  * p_refs[n].pptr == &(*p_refs[n-1])->n_ptrs[x]
146  *	:
147  *	:
148  * p_refs[t->t_height].pptr == &leaf_pointer
149  */
150 
151 struct radix_tree_path {
152 	struct radix_tree_node_ref {
153 		void **pptr;
154 	} p_refs[RADIX_TREE_MAX_HEIGHT + 1]; /* +1 for the root ptr */
155 	int p_lastidx;
156 };
157 
158 static inline void **
159 path_pptr(struct radix_tree *t, struct radix_tree_path *p,
160     unsigned int height)
161 {
162 
163 	KASSERT(height <= t->t_height);
164 	return p->p_refs[height].pptr;
165 }
166 
167 static inline struct radix_tree_node *
168 path_node(struct radix_tree * t, struct radix_tree_path *p, unsigned int height)
169 {
170 
171 	KASSERT(height <= t->t_height);
172 	return entry_ptr(*path_pptr(t, p, height));
173 }
174 
175 static inline unsigned int
176 path_idx(struct radix_tree * t, struct radix_tree_path *p, unsigned int height)
177 {
178 
179 	KASSERT(height <= t->t_height);
180 	return path_pptr(t, p, height + 1) - path_node(t, p, height)->n_ptrs;
181 }
182 
183 /*
184  * radix_tree_init_tree:
185  *
186  * initialize a tree.
187  */
188 
189 void
190 radix_tree_init_tree(struct radix_tree *t)
191 {
192 
193 	t->t_height = 0;
194 	t->t_root = NULL;
195 }
196 
197 /*
198  * radix_tree_init_tree:
199  *
200  * clean up a tree.
201  */
202 
203 void
204 radix_tree_fini_tree(struct radix_tree *t)
205 {
206 
207 	KASSERT(t->t_root == NULL);
208 	KASSERT(t->t_height == 0);
209 }
210 
211 #if defined(_KERNEL)
212 pool_cache_t radix_tree_node_cache;
213 
214 static int
215 radix_tree_node_ctor(void *dummy, void *item, int flags)
216 {
217 	struct radix_tree_node *n = item;
218 
219 	KASSERT(dummy == NULL);
220 	memset(n, 0, sizeof(*n));
221 	return 0;
222 }
223 
224 /*
225  * radix_tree_init:
226  *
227  * initialize the subsystem.
228  */
229 
230 void
231 radix_tree_init(void)
232 {
233 
234 	radix_tree_node_cache = pool_cache_init(sizeof(struct radix_tree_node),
235 	    0, 0, 0, "radix_tree_node", NULL, IPL_NONE, radix_tree_node_ctor,
236 	    NULL, NULL);
237 	KASSERT(radix_tree_node_cache != NULL);
238 }
239 #endif /* defined(_KERNEL) */
240 
241 static bool __unused
242 radix_tree_node_clean_p(const struct radix_tree_node *n)
243 {
244 	unsigned int i;
245 
246 	if (n->n_nptrs != 0) {
247 		return false;
248 	}
249 	for (i = 0; i < RADIX_TREE_PTR_PER_NODE; i++) {
250 		if (n->n_ptrs[i] != NULL) {
251 			return false;
252 		}
253 	}
254 	return true;
255 }
256 
257 static struct radix_tree_node *
258 radix_tree_alloc_node(void)
259 {
260 	struct radix_tree_node *n;
261 
262 #if defined(_KERNEL)
263 	n = pool_cache_get(radix_tree_node_cache, PR_NOWAIT);
264 #else /* defined(_KERNEL) */
265 	n = calloc(1, sizeof(*n));
266 #endif /* defined(_KERNEL) */
267 	KASSERT(n == NULL || radix_tree_node_clean_p(n));
268 	return n;
269 }
270 
271 static void
272 radix_tree_free_node(struct radix_tree_node *n)
273 {
274 
275 	KASSERT(radix_tree_node_clean_p(n));
276 #if defined(_KERNEL)
277 	pool_cache_put(radix_tree_node_cache, n);
278 #else /* defined(_KERNEL) */
279 	free(n);
280 #endif /* defined(_KERNEL) */
281 }
282 
283 static int
284 radix_tree_grow(struct radix_tree *t, unsigned int newheight)
285 {
286 	const unsigned int tagmask = entry_tagmask(t->t_root);
287 
288 	KASSERT(newheight <= 64 / RADIX_TREE_BITS_PER_HEIGHT);
289 	if (t->t_root == NULL) {
290 		t->t_height = newheight;
291 		return 0;
292 	}
293 	while (t->t_height < newheight) {
294 		struct radix_tree_node *n;
295 
296 		n = radix_tree_alloc_node();
297 		if (n == NULL) {
298 			/*
299 			 * don't bother to revert our changes.
300 			 * the caller will likely retry.
301 			 */
302 			return ENOMEM;
303 		}
304 		n->n_nptrs = 1;
305 		n->n_ptrs[0] = t->t_root;
306 		t->t_root = entry_compose(n, tagmask);
307 		t->t_height++;
308 	}
309 	return 0;
310 }
311 
312 static inline void **
313 radix_tree_lookup_ptr(struct radix_tree *t, uint64_t idx,
314     struct radix_tree_path *path, bool alloc, const unsigned int tagmask)
315 {
316 	struct radix_tree_node *n;
317 	int hshift = RADIX_TREE_BITS_PER_HEIGHT * t->t_height;
318 	int shift;
319 	void **vpp;
320 	const uint64_t mask = (UINT64_C(1) << RADIX_TREE_BITS_PER_HEIGHT) - 1;
321 	struct radix_tree_node_ref *refs = NULL;
322 
323 	KASSERT(tagmask == 0 || !alloc);
324 	KASSERT(path == NULL || !alloc);
325 	vpp = &t->t_root;
326 	if (path != NULL) {
327 		refs = path->p_refs;
328 		refs->pptr = vpp;
329 	}
330 	n = NULL;
331 	for (shift = 64 - RADIX_TREE_BITS_PER_HEIGHT; shift >= 0;) {
332 		struct radix_tree_node *c;
333 		void *entry;
334 		const uint64_t i = (idx >> shift) & mask;
335 
336 		if (shift >= hshift) {
337 			unsigned int newheight;
338 
339 			KASSERT(vpp == &t->t_root);
340 			if (i == 0) {
341 				shift -= RADIX_TREE_BITS_PER_HEIGHT;
342 				continue;
343 			}
344 			if (!alloc) {
345 				if (path != NULL) {
346 					KASSERT((refs - path->p_refs) == 0);
347 					path->p_lastidx = 0;
348 				}
349 				return NULL;
350 			}
351 			newheight = shift / RADIX_TREE_BITS_PER_HEIGHT + 1;
352 			if (radix_tree_grow(t, newheight)) {
353 				return NULL;
354 			}
355 			hshift = RADIX_TREE_BITS_PER_HEIGHT * t->t_height;
356 		}
357 		entry = *vpp;
358 		c = entry_ptr(entry);
359 		if (c == NULL ||
360 		    (tagmask != 0 &&
361 		    (entry_tagmask(entry) & tagmask) == 0)) {
362 			if (!alloc) {
363 				if (path != NULL) {
364 					path->p_lastidx = refs - path->p_refs;
365 				}
366 				return NULL;
367 			}
368 			c = radix_tree_alloc_node();
369 			if (c == NULL) {
370 				return NULL;
371 			}
372 			*vpp = c;
373 			if (n != NULL) {
374 				KASSERT(n->n_nptrs < RADIX_TREE_PTR_PER_NODE);
375 				n->n_nptrs++;
376 			}
377 		}
378 		n = c;
379 		vpp = &n->n_ptrs[i];
380 		if (path != NULL) {
381 			refs++;
382 			refs->pptr = vpp;
383 		}
384 		shift -= RADIX_TREE_BITS_PER_HEIGHT;
385 	}
386 	if (alloc) {
387 		KASSERT(*vpp == NULL);
388 		if (n != NULL) {
389 			KASSERT(n->n_nptrs < RADIX_TREE_PTR_PER_NODE);
390 			n->n_nptrs++;
391 		}
392 	}
393 	if (path != NULL) {
394 		path->p_lastidx = refs - path->p_refs;
395 	}
396 	return vpp;
397 }
398 
399 /*
400  * radix_tree_insert_node:
401  *
402  * insert the node at idx.
403  * it's illegal to insert NULL.
404  * it's illegal to insert a non-aligned pointer.
405  *
406  * this function returns ENOMEM if necessary memory allocation failed.
407  * otherwise, this function returns 0.
408  *
409  * note that inserting a node can involves memory allocation for intermediate
410  * nodes.  if _KERNEL, it's done with non-blocking IPL_NONE memory allocation.
411  */
412 
413 int
414 radix_tree_insert_node(struct radix_tree *t, uint64_t idx, void *p)
415 {
416 	void **vpp;
417 
418 	KASSERT(p != NULL);
419 	KASSERT(entry_compose(p, 0) == p);
420 	vpp = radix_tree_lookup_ptr(t, idx, NULL, true, 0);
421 	if (vpp == NULL) {
422 		return ENOMEM;
423 	}
424 	KASSERT(*vpp == NULL);
425 	*vpp = p;
426 	return 0;
427 }
428 
429 void *
430 radix_tree_replace_node(struct radix_tree *t, uint64_t idx, void *p)
431 {
432 	void **vpp;
433 	void *oldp;
434 
435 	KASSERT(p != NULL);
436 	KASSERT(entry_compose(p, 0) == p);
437 	vpp = radix_tree_lookup_ptr(t, idx, NULL, false, 0);
438 	KASSERT(vpp != NULL);
439 	oldp = *vpp;
440 	KASSERT(oldp != NULL);
441 	*vpp = entry_compose(p, entry_tagmask(*vpp));
442 	return entry_ptr(oldp);
443 }
444 
445 /*
446  * radix_tree_remove_node:
447  *
448  * remove the node at idx.
449  * it's illegal to try to remove a node which has not been inserted.
450  */
451 
452 void *
453 radix_tree_remove_node(struct radix_tree *t, uint64_t idx)
454 {
455 	struct radix_tree_path path;
456 	void **vpp;
457 	void *oldp;
458 	int i;
459 
460 	vpp = radix_tree_lookup_ptr(t, idx, &path, false, 0);
461 	KASSERT(vpp != NULL);
462 	oldp = *vpp;
463 	KASSERT(oldp != NULL);
464 	KASSERT(path.p_lastidx == t->t_height);
465 	KASSERT(vpp == path_pptr(t, &path, path.p_lastidx));
466 	*vpp = NULL;
467 	for (i = t->t_height - 1; i >= 0; i--) {
468 		void *entry;
469 		struct radix_tree_node ** const pptr =
470 		    (struct radix_tree_node **)path_pptr(t, &path, i);
471 		struct radix_tree_node *n;
472 
473 		KASSERT(pptr != NULL);
474 		entry = *pptr;
475 		n = entry_ptr(entry);
476 		KASSERT(n != NULL);
477 		KASSERT(n->n_nptrs > 0);
478 		n->n_nptrs--;
479 		if (n->n_nptrs > 0) {
480 			break;
481 		}
482 		radix_tree_free_node(n);
483 		*pptr = NULL;
484 	}
485 	/*
486 	 * fix up height
487 	 */
488 	if (i < 0) {
489 		KASSERT(t->t_root == NULL);
490 		t->t_height = 0;
491 	}
492 	/*
493 	 * update tags
494 	 */
495 	for (; i >= 0; i--) {
496 		void *entry;
497 		struct radix_tree_node ** const pptr =
498 		    (struct radix_tree_node **)path_pptr(t, &path, i);
499 		struct radix_tree_node *n;
500 		unsigned int newmask;
501 
502 		KASSERT(pptr != NULL);
503 		entry = *pptr;
504 		n = entry_ptr(entry);
505 		KASSERT(n != NULL);
506 		KASSERT(n->n_nptrs > 0);
507 		newmask = any_children_tagmask(n);
508 		if (newmask == entry_tagmask(entry)) {
509 			break;
510 		}
511 		*pptr = entry_compose(n, newmask);
512 	}
513 	/*
514 	 * XXX is it worth to try to reduce height?
515 	 * if we do that, make radix_tree_grow rollback its change as well.
516 	 */
517 	return entry_ptr(oldp);
518 }
519 
520 /*
521  * radix_tree_lookup_node:
522  *
523  * returns the node at idx.
524  * returns NULL if nothing is found at idx.
525  */
526 
527 void *
528 radix_tree_lookup_node(struct radix_tree *t, uint64_t idx)
529 {
530 	void **vpp;
531 
532 	vpp = radix_tree_lookup_ptr(t, idx, NULL, false, 0);
533 	if (vpp == NULL) {
534 		return NULL;
535 	}
536 	return entry_ptr(*vpp);
537 }
538 
539 static inline void
540 gang_lookup_init(struct radix_tree *t, uint64_t idx,
541     struct radix_tree_path *path, const unsigned int tagmask)
542 {
543 	void **vpp;
544 
545 	vpp = radix_tree_lookup_ptr(t, idx, path, false, tagmask);
546 	KASSERT(vpp == NULL ||
547 	    vpp == path_pptr(t, path, path->p_lastidx));
548 	KASSERT(&t->t_root == path_pptr(t, path, 0));
549 }
550 
551 static inline unsigned int
552 gang_lookup_scan(struct radix_tree *t, struct radix_tree_path *path,
553     void **results, unsigned int maxresults, const unsigned int tagmask)
554 {
555 	void **vpp;
556 	int nfound;
557 	unsigned int lastidx;
558 
559 	KASSERT(maxresults > 0);
560 	lastidx = path->p_lastidx;
561 	if (lastidx == 0) {
562 		return 0;
563 	}
564 	nfound = 0;
565 	vpp = path_pptr(t, path, lastidx);
566 	while (/*CONSTCOND*/true) {
567 		struct radix_tree_node *n;
568 		int i;
569 
570 		if (entry_match_p(*vpp, tagmask)) {
571 			KASSERT(lastidx == t->t_height);
572 			/*
573 			 * record the non-NULL leaf.
574 			 */
575 			results[nfound] = entry_ptr(*vpp);
576 			nfound++;
577 			if (nfound == maxresults) {
578 				return nfound;
579 			}
580 		}
581 scan_siblings:
582 		/*
583 		 * try to find the next non-NULL sibling.
584 		 */
585 		n = path_node(t, path, lastidx - 1);
586 		if (*vpp != NULL && n->n_nptrs == 1) {
587 			/*
588 			 * optimization
589 			 */
590 			goto no_siblings;
591 		}
592 		for (i = path_idx(t, path, lastidx - 1) + 1;
593 		    i < RADIX_TREE_PTR_PER_NODE;
594 		    i++) {
595 			if (entry_match_p(n->n_ptrs[i], tagmask)) {
596 				vpp = &n->n_ptrs[i];
597 				path->p_refs[lastidx].pptr = vpp;
598 				KASSERT(path_idx(t, path, lastidx - 1)
599 				    == i);
600 				break;
601 			}
602 		}
603 		if (i == RADIX_TREE_PTR_PER_NODE) {
604 no_siblings:
605 			/*
606 			 * not found.  go to parent.
607 			 */
608 			lastidx--;
609 			if (lastidx == 0) {
610 				return nfound;
611 			}
612 			vpp = path_pptr(t, path, lastidx);
613 			goto scan_siblings;
614 		}
615 		/*
616 		 * descending the left-most child node, upto the leaf or NULL.
617 		 */
618 		while (entry_match_p(*vpp, tagmask) && lastidx < t->t_height) {
619 			n = entry_ptr(*vpp);
620 			vpp = &n->n_ptrs[0];
621 			lastidx++;
622 			path->p_refs[lastidx].pptr = vpp;
623 		}
624 	}
625 }
626 
627 /*
628  * radix_tree_gang_lookup_node:
629  *
630  * search nodes starting from idx in the ascending order.
631  * results should be an array large enough to hold maxresults pointers.
632  * returns the number of nodes found, up to maxresults.
633  * returning less than maxresults means there are no more nodes.
634  *
635  * the result of this function is semantically equivalent to what could be
636  * obtained by repeated calls of radix_tree_lookup_node with increasing index.
637  * but this function is much faster when node indexes are distributed sparsely.
638  *
639  * note that this function doesn't return exact values of node indexes of
640  * found nodes.  if they are important for a caller, it's the caller's
641  * responsibility to check them, typically by examinining the returned nodes
642  * using some caller-specific knowledge about them.
643  */
644 
645 unsigned int
646 radix_tree_gang_lookup_node(struct radix_tree *t, uint64_t idx,
647     void **results, unsigned int maxresults)
648 {
649 	struct radix_tree_path path;
650 
651 	gang_lookup_init(t, idx, &path, 0);
652 	return gang_lookup_scan(t, &path, results, maxresults, 0);
653 }
654 
655 /*
656  * radix_tree_gang_lookup_tagged_node:
657  *
658  * same as radix_tree_gang_lookup_node except that this one only returns
659  * nodes tagged with tagid.
660  */
661 
662 unsigned int
663 radix_tree_gang_lookup_tagged_node(struct radix_tree *t, uint64_t idx,
664     void **results, unsigned int maxresults, radix_tree_tagid_t tagid)
665 {
666 	struct radix_tree_path path;
667 	const unsigned int tagmask = tagid_to_mask(tagid);
668 
669 	gang_lookup_init(t, idx, &path, tagmask);
670 	return gang_lookup_scan(t, &path, results, maxresults, tagmask);
671 }
672 
673 bool
674 radix_tree_get_tag(struct radix_tree *t, uint64_t idx,
675     radix_tree_tagid_t tagid)
676 {
677 #if 1
678 	const unsigned int tagmask = tagid_to_mask(tagid);
679 	void **vpp;
680 
681 	vpp = radix_tree_lookup_ptr(t, idx, NULL, false, tagmask);
682 	if (vpp == NULL) {
683 		return false;
684 	}
685 	KASSERT(*vpp != NULL);
686 	return (entry_tagmask(*vpp) & tagmask) != 0;
687 #else
688 	const unsigned int tagmask = tagid_to_mask(tagid);
689 	void **vpp;
690 
691 	vpp = radix_tree_lookup_ptr(t, idx, NULL, false, 0);
692 	KASSERT(vpp != NULL);
693 	return (entry_tagmask(*vpp) & tagmask) != 0;
694 #endif
695 }
696 
697 void
698 radix_tree_set_tag(struct radix_tree *t, uint64_t idx,
699     radix_tree_tagid_t tagid)
700 {
701 	struct radix_tree_path path;
702 	const unsigned int tagmask = tagid_to_mask(tagid);
703 	void **vpp;
704 	int i;
705 
706 	vpp = radix_tree_lookup_ptr(t, idx, &path, false, 0);
707 	KASSERT(vpp != NULL);
708 	KASSERT(*vpp != NULL);
709 	KASSERT(path.p_lastidx == t->t_height);
710 	KASSERT(vpp == path_pptr(t, &path, path.p_lastidx));
711 	for (i = t->t_height; i >= 0; i--) {
712 		void ** const pptr = (void **)path_pptr(t, &path, i);
713 		void *entry;
714 
715 		KASSERT(pptr != NULL);
716 		entry = *pptr;
717 		if ((entry_tagmask(entry) & tagmask) != 0) {
718 			break;
719 		}
720 		*pptr = (void *)((uintptr_t)entry | tagmask);
721 	}
722 }
723 
724 void
725 radix_tree_clear_tag(struct radix_tree *t, uint64_t idx,
726     radix_tree_tagid_t tagid)
727 {
728 	struct radix_tree_path path;
729 	const unsigned int tagmask = tagid_to_mask(tagid);
730 	void **vpp;
731 	int i;
732 
733 	vpp = radix_tree_lookup_ptr(t, idx, &path, false, 0);
734 	KASSERT(vpp != NULL);
735 	KASSERT(*vpp != NULL);
736 	KASSERT(path.p_lastidx == t->t_height);
737 	KASSERT(vpp == path_pptr(t, &path, path.p_lastidx));
738 	if ((entry_tagmask(*vpp) & tagmask) == 0) {
739 		return;
740 	}
741 	for (i = t->t_height; i >= 0; i--) {
742 		void ** const pptr = (void **)path_pptr(t, &path, i);
743 		void *entry;
744 
745 		KASSERT(pptr != NULL);
746 		entry = *pptr;
747 		KASSERT((entry_tagmask(entry) & tagmask) != 0);
748 		*pptr = entry_compose(entry_ptr(entry),
749 		    entry_tagmask(entry) & ~tagmask);
750 		if (0 < i && i < t->t_height - 1) {
751 			struct radix_tree_node *n = path_node(t, &path, i - 1);
752 
753 			if ((any_children_tagmask(n) & tagmask) != 0) {
754 				break;
755 			}
756 		}
757 	}
758 }
759 
760 #if defined(UNITTEST)
761 
762 #include <inttypes.h>
763 #include <stdio.h>
764 
765 static void
766 radix_tree_dump_node(const struct radix_tree *t, void *vp,
767     uint64_t offset, unsigned int height)
768 {
769 	struct radix_tree_node *n;
770 	unsigned int i;
771 
772 	for (i = 0; i < t->t_height - height; i++) {
773 		printf(" ");
774 	}
775 	if (entry_tagmask(vp) == 0) {
776 		printf("[%" PRIu64 "] %p", offset, entry_ptr(vp));
777 	} else {
778 		printf("[%" PRIu64 "] %p (tagmask=0x%x)", offset, entry_ptr(vp),
779 		    entry_tagmask(vp));
780 	}
781 	if (height == 0) {
782 		printf(" (leaf)\n");
783 		return;
784 	}
785 	n = entry_ptr(vp);
786 	assert(any_children_tagmask(n) == entry_tagmask(vp));
787 	printf(" (%u children)\n", n->n_nptrs);
788 	for (i = 0; i < __arraycount(n->n_ptrs); i++) {
789 		void *c;
790 
791 		c = n->n_ptrs[i];
792 		if (c == NULL) {
793 			continue;
794 		}
795 		radix_tree_dump_node(t, c,
796 		    offset + i * (UINT64_C(1) <<
797 		    (RADIX_TREE_BITS_PER_HEIGHT * (height - 1))), height - 1);
798 	}
799 }
800 
801 void radix_tree_dump(const struct radix_tree *);
802 
803 void
804 radix_tree_dump(const struct radix_tree *t)
805 {
806 
807 	printf("tree %p height=%u\n", t, t->t_height);
808 	radix_tree_dump_node(t, t->t_root, 0, t->t_height);
809 }
810 
811 static void
812 test1(void)
813 {
814 	struct radix_tree s;
815 	struct radix_tree *t = &s;
816 	void *results[3];
817 
818 	radix_tree_init_tree(t);
819 	radix_tree_dump(t);
820 	assert(radix_tree_lookup_node(t, 0) == NULL);
821 	assert(radix_tree_lookup_node(t, 1000) == NULL);
822 	assert(radix_tree_insert_node(t, 1000, (void *)0xdeadbea0) == 0);
823 	radix_tree_dump(t);
824 	assert(!radix_tree_get_tag(t, 1000, 0));
825 	assert(!radix_tree_get_tag(t, 1000, 1));
826 	radix_tree_set_tag(t, 1000, 1);
827 	assert(!radix_tree_get_tag(t, 1000, 0));
828 	assert(radix_tree_get_tag(t, 1000, 1));
829 	radix_tree_dump(t);
830 	assert(radix_tree_lookup_node(t, 1000) == (void *)0xdeadbea0);
831 	assert(radix_tree_insert_node(t, 0, (void *)0xbea0) == 0);
832 	radix_tree_dump(t);
833 	assert(radix_tree_lookup_node(t, 0) == (void *)0xbea0);
834 	assert(radix_tree_lookup_node(t, 1000) == (void *)0xdeadbea0);
835 	assert(radix_tree_insert_node(t, UINT64_C(10000000000), (void *)0xdea0)
836 	    == 0);
837 	radix_tree_dump(t);
838 	assert(radix_tree_lookup_node(t, 0) == (void *)0xbea0);
839 	assert(radix_tree_lookup_node(t, 1000) == (void *)0xdeadbea0);
840 	assert(radix_tree_lookup_node(t, UINT64_C(10000000000)) ==
841 	    (void *)0xdea0);
842 	radix_tree_dump(t);
843 	assert(!radix_tree_get_tag(t, 0, 1));
844 	assert(radix_tree_get_tag(t, 1000, 1));
845 	assert(!radix_tree_get_tag(t, UINT64_C(10000000000), 1));
846 	radix_tree_set_tag(t, 0, 1);;
847 	radix_tree_set_tag(t, UINT64_C(10000000000), 1);
848 	radix_tree_dump(t);
849 	assert(radix_tree_get_tag(t, 0, 1));
850 	assert(radix_tree_get_tag(t, 1000, 1));
851 	assert(radix_tree_get_tag(t, UINT64_C(10000000000), 1));
852 	radix_tree_clear_tag(t, 0, 1);;
853 	radix_tree_clear_tag(t, UINT64_C(10000000000), 1);
854 	radix_tree_dump(t);
855 	assert(!radix_tree_get_tag(t, 0, 1));
856 	assert(radix_tree_get_tag(t, 1000, 1));
857 	assert(!radix_tree_get_tag(t, UINT64_C(10000000000), 1));
858 	radix_tree_dump(t);
859 	assert(radix_tree_replace_node(t, 1000, (void *)0x12345678) ==
860 	    (void *)0xdeadbea0);
861 	assert(!radix_tree_get_tag(t, 1000, 0));
862 	assert(radix_tree_get_tag(t, 1000, 1));
863 	assert(radix_tree_gang_lookup_node(t, 0, results, 3) == 3);
864 	assert(results[0] == (void *)0xbea0);
865 	assert(results[1] == (void *)0x12345678);
866 	assert(results[2] == (void *)0xdea0);
867 	assert(radix_tree_gang_lookup_node(t, 1, results, 3) == 2);
868 	assert(results[0] == (void *)0x12345678);
869 	assert(results[1] == (void *)0xdea0);
870 	assert(radix_tree_gang_lookup_node(t, 1001, results, 3) == 1);
871 	assert(results[0] == (void *)0xdea0);
872 	assert(radix_tree_gang_lookup_node(t, UINT64_C(10000000001), results, 3)
873 	    == 0);
874 	assert(radix_tree_gang_lookup_node(t, UINT64_C(1000000000000), results,
875 	    3) == 0);
876 	assert(radix_tree_gang_lookup_tagged_node(t, 0, results, 100, 1) == 1);
877 	assert(results[0] == (void *)0x12345678);
878 	assert(entry_tagmask(t->t_root) != 0);
879 	assert(radix_tree_remove_node(t, 1000) == (void *)0x12345678);
880 	assert(entry_tagmask(t->t_root) == 0);
881 	radix_tree_dump(t);
882 	assert(radix_tree_remove_node(t, UINT64_C(10000000000)) ==
883 	    (void *)0xdea0);
884 	radix_tree_dump(t);
885 	assert(radix_tree_remove_node(t, 0) == (void *)0xbea0);
886 	radix_tree_dump(t);
887 	radix_tree_fini_tree(t);
888 }
889 
890 #include <sys/time.h>
891 
892 struct testnode {
893 	uint64_t idx;
894 };
895 
896 static void
897 printops(const char *name, unsigned int n, const struct timeval *stv,
898     const struct timeval *etv)
899 {
900 	uint64_t s = stv->tv_sec * 1000000 + stv->tv_usec;
901 	uint64_t e = etv->tv_sec * 1000000 + etv->tv_usec;
902 
903 	printf("%lf %s/s\n", (double)n / (e - s) * 1000000, name);
904 }
905 
906 #define	TEST2_GANG_LOOKUP_NODES	16
907 
908 static bool
909 test2_should_tag(unsigned int i, radix_tree_tagid_t tagid)
910 {
911 
912 	if (tagid == 0) {
913 		return (i & 0x3) == 0;
914 	} else {
915 		return (i % 7) == 0;
916 	}
917 }
918 
919 static void
920 test2(bool dense)
921 {
922 	struct radix_tree s;
923 	struct radix_tree *t = &s;
924 	struct testnode *n;
925 	unsigned int i;
926 	unsigned int nnodes = 100000;
927 	unsigned int removed;
928 	radix_tree_tagid_t tag;
929 	unsigned int ntagged[RADIX_TREE_TAG_ID_MAX];
930 	struct testnode *nodes;
931 	struct timeval stv;
932 	struct timeval etv;
933 
934 	nodes = malloc(nnodes * sizeof(*nodes));
935 	for (tag = 0; tag < RADIX_TREE_TAG_ID_MAX; tag++) {
936 		ntagged[tag] = 0;
937 	}
938 	radix_tree_init_tree(t);
939 	for (i = 0; i < nnodes; i++) {
940 		n = &nodes[i];
941 		n->idx = random();
942 		if (sizeof(long) == 4) {
943 			n->idx <<= 32;
944 			n->idx |= (uint32_t)random();
945 		}
946 		if (dense) {
947 			n->idx %= nnodes * 2;
948 		}
949 		while (radix_tree_lookup_node(t, n->idx) != NULL) {
950 			n->idx++;
951 		}
952 		radix_tree_insert_node(t, n->idx, n);
953 		for (tag = 0; tag < RADIX_TREE_TAG_ID_MAX; tag++) {
954 			if (test2_should_tag(i, tag)) {
955 				radix_tree_set_tag(t, n->idx, tag);
956 				ntagged[tag]++;
957 			}
958 			assert(test2_should_tag(i, tag) ==
959 			    radix_tree_get_tag(t, n->idx, tag));
960 		}
961 	}
962 
963 	gettimeofday(&stv, NULL);
964 	for (i = 0; i < nnodes; i++) {
965 		n = &nodes[i];
966 		assert(radix_tree_lookup_node(t, n->idx) == n);
967 	}
968 	gettimeofday(&etv, NULL);
969 	printops("lookup", nnodes, &stv, &etv);
970 
971 	for (tag = 0; tag < RADIX_TREE_TAG_ID_MAX; tag++) {
972 		gettimeofday(&stv, NULL);
973 		for (i = 0; i < nnodes; i++) {
974 			n = &nodes[i];
975 			assert(test2_should_tag(i, tag) ==
976 			    radix_tree_get_tag(t, n->idx, tag));
977 		}
978 		gettimeofday(&etv, NULL);
979 		printops("get_tag", ntagged[tag], &stv, &etv);
980 	}
981 
982 	gettimeofday(&stv, NULL);
983 	for (i = 0; i < nnodes; i++) {
984 		n = &nodes[i];
985 		radix_tree_remove_node(t, n->idx);
986 	}
987 	gettimeofday(&etv, NULL);
988 	printops("remove", nnodes, &stv, &etv);
989 
990 	gettimeofday(&stv, NULL);
991 	for (i = 0; i < nnodes; i++) {
992 		n = &nodes[i];
993 		radix_tree_insert_node(t, n->idx, n);
994 	}
995 	gettimeofday(&etv, NULL);
996 	printops("insert", nnodes, &stv, &etv);
997 
998 	for (tag = 0; tag < RADIX_TREE_TAG_ID_MAX; tag++) {
999 		ntagged[tag] = 0;
1000 		gettimeofday(&stv, NULL);
1001 		for (i = 0; i < nnodes; i++) {
1002 			n = &nodes[i];
1003 			if (test2_should_tag(i, tag)) {
1004 				radix_tree_set_tag(t, n->idx, tag);
1005 				ntagged[tag]++;
1006 			}
1007 		}
1008 		gettimeofday(&etv, NULL);
1009 		printops("set_tag", ntagged[tag], &stv, &etv);
1010 	}
1011 
1012 	gettimeofday(&stv, NULL);
1013 	{
1014 		struct testnode *results[TEST2_GANG_LOOKUP_NODES];
1015 		uint64_t nextidx;
1016 		unsigned int nfound;
1017 		unsigned int total;
1018 
1019 		nextidx = 0;
1020 		total = 0;
1021 		while ((nfound = radix_tree_gang_lookup_node(t, nextidx,
1022 		    (void *)results, __arraycount(results))) > 0) {
1023 			nextidx = results[nfound - 1]->idx + 1;
1024 			total += nfound;
1025 		}
1026 		assert(total == nnodes);
1027 	}
1028 	gettimeofday(&etv, NULL);
1029 	printops("ganglookup", nnodes, &stv, &etv);
1030 
1031 	for (tag = 0; tag < RADIX_TREE_TAG_ID_MAX; tag++) {
1032 		gettimeofday(&stv, NULL);
1033 		{
1034 			struct testnode *results[TEST2_GANG_LOOKUP_NODES];
1035 			uint64_t nextidx;
1036 			unsigned int nfound;
1037 			unsigned int total;
1038 
1039 			nextidx = 0;
1040 			total = 0;
1041 			while ((nfound = radix_tree_gang_lookup_tagged_node(t,
1042 			    nextidx, (void *)results, __arraycount(results),
1043 			    tag)) > 0) {
1044 				nextidx = results[nfound - 1]->idx + 1;
1045 				total += nfound;
1046 			}
1047 			assert(total == ntagged[tag]);
1048 		}
1049 		gettimeofday(&etv, NULL);
1050 		printops("ganglookup_tag", ntagged[tag], &stv, &etv);
1051 	}
1052 
1053 	removed = 0;
1054 	for (tag = 0; tag < RADIX_TREE_TAG_ID_MAX; tag++) {
1055 		unsigned int total;
1056 
1057 		total = 0;
1058 		gettimeofday(&stv, NULL);
1059 		{
1060 			struct testnode *results[TEST2_GANG_LOOKUP_NODES];
1061 			uint64_t nextidx;
1062 			unsigned int nfound;
1063 
1064 			nextidx = 0;
1065 			while ((nfound = radix_tree_gang_lookup_tagged_node(t,
1066 			    nextidx, (void *)results, __arraycount(results),
1067 			    tag)) > 0) {
1068 				for (i = 0; i < nfound; i++) {
1069 					radix_tree_remove_node(t,
1070 					    results[i]->idx);
1071 				}
1072 				nextidx = results[nfound - 1]->idx + 1;
1073 				total += nfound;
1074 			}
1075 			assert(tag != 0 || total == ntagged[tag]);
1076 			assert(total <= ntagged[tag]);
1077 		}
1078 		gettimeofday(&etv, NULL);
1079 		printops("ganglookup_tag+remove", total, &stv, &etv);
1080 		removed += total;
1081 	}
1082 
1083 	gettimeofday(&stv, NULL);
1084 	{
1085 		struct testnode *results[TEST2_GANG_LOOKUP_NODES];
1086 		uint64_t nextidx;
1087 		unsigned int nfound;
1088 		unsigned int total;
1089 
1090 		nextidx = 0;
1091 		total = 0;
1092 		while ((nfound = radix_tree_gang_lookup_node(t, nextidx,
1093 		    (void *)results, __arraycount(results))) > 0) {
1094 			for (i = 0; i < nfound; i++) {
1095 				assert(results[i] == radix_tree_remove_node(t,
1096 				    results[i]->idx));
1097 			}
1098 			nextidx = results[nfound - 1]->idx + 1;
1099 			total += nfound;
1100 		}
1101 		assert(total == nnodes - removed);
1102 	}
1103 	gettimeofday(&etv, NULL);
1104 	printops("ganglookup+remove", nnodes - removed, &stv, &etv);
1105 
1106 	radix_tree_fini_tree(t);
1107 	free(nodes);
1108 }
1109 
1110 int
1111 main(int argc, char *argv[])
1112 {
1113 
1114 	test1();
1115 	printf("dense distribution:\n");
1116 	test2(true);
1117 	printf("sparse distribution:\n");
1118 	test2(false);
1119 	return 0;
1120 }
1121 
1122 #endif /* defined(UNITTEST) */
1123