xref: /netbsd-src/common/lib/libc/gen/radixtree.c (revision 88fcb00c0357f2d7c1774f86a352637bfda96184)
1 /*	$NetBSD: radixtree.c,v 1.7 2011/05/19 10:06:56 yamt Exp $	*/
2 
3 /*-
4  * Copyright (c)2011 YAMAMOTO Takashi,
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 /*
30  * radix tree
31  *
32  * it's designed to work efficiently with dense index distribution.
33  * the memory consumption (number of necessary intermediate nodes)
34  * heavily depends on index distribution.  basically, more dense index
35  * distribution consumes less nodes per item.
36  * approximately,
37  * the best case: about RADIX_TREE_PTR_PER_NODE items per node.
38  * the worst case: RADIX_TREE_MAX_HEIGHT nodes per item.
39  */
40 
41 #include <sys/cdefs.h>
42 
43 #if defined(_KERNEL) || defined(_STANDALONE)
44 __KERNEL_RCSID(0, "$NetBSD: radixtree.c,v 1.7 2011/05/19 10:06:56 yamt Exp $");
45 #include <sys/param.h>
46 #include <sys/errno.h>
47 #include <sys/pool.h>
48 #include <sys/radixtree.h>
49 #include <lib/libkern/libkern.h>
50 #if defined(_STANDALONE)
51 #include <lib/libsa/stand.h>
52 #endif /* defined(_STANDALONE) */
53 #else /* defined(_KERNEL) || defined(_STANDALONE) */
54 __RCSID("$NetBSD: radixtree.c,v 1.7 2011/05/19 10:06:56 yamt Exp $");
55 #include <assert.h>
56 #include <errno.h>
57 #include <stdbool.h>
58 #include <stdlib.h>
59 #if 1
60 #define KASSERT assert
61 #else
62 #define KASSERT(a)	/* nothing */
63 #endif
64 #endif /* defined(_KERNEL) || defined(_STANDALONE) */
65 
66 #include <sys/radixtree.h>
67 
68 #define	RADIX_TREE_BITS_PER_HEIGHT	4	/* XXX tune */
69 #define	RADIX_TREE_PTR_PER_NODE		(1 << RADIX_TREE_BITS_PER_HEIGHT)
70 #define	RADIX_TREE_MAX_HEIGHT		(64 / RADIX_TREE_BITS_PER_HEIGHT)
71 __CTASSERT((64 % RADIX_TREE_BITS_PER_HEIGHT) == 0);
72 
73 __CTASSERT(((1 << RADIX_TREE_TAG_ID_MAX) & (sizeof(int) - 1)) == 0);
74 #define	RADIX_TREE_TAG_MASK	((1 << RADIX_TREE_TAG_ID_MAX) - 1)
75 
76 static inline void *
77 entry_ptr(void *p)
78 {
79 
80 	return (void *)((uintptr_t)p & ~RADIX_TREE_TAG_MASK);
81 }
82 
83 static inline unsigned int
84 entry_tagmask(void *p)
85 {
86 
87 	return (uintptr_t)p & RADIX_TREE_TAG_MASK;
88 }
89 
90 static inline void *
91 entry_compose(void *p, unsigned int tagmask)
92 {
93 
94 	return (void *)((uintptr_t)p | tagmask);
95 }
96 
97 static inline bool
98 entry_match_p(void *p, unsigned int tagmask)
99 {
100 
101 	KASSERT(entry_ptr(p) != NULL || entry_tagmask(p) == 0);
102 	if (p == NULL) {
103 		return false;
104 	}
105 	if (tagmask == 0) {
106 		return true;
107 	}
108 	return (entry_tagmask(p) & tagmask) != 0;
109 }
110 
111 static inline unsigned int
112 tagid_to_mask(radix_tree_tagid_t id)
113 {
114 
115 	KASSERT(id >= 0);
116 	KASSERT(id < RADIX_TREE_TAG_ID_MAX);
117 	return 1U << id;
118 }
119 
120 /*
121  * radix_tree_node: an intermediate node
122  *
123  * we don't care the type of leaf nodes.  they are just void *.
124  */
125 
126 struct radix_tree_node {
127 	void *n_ptrs[RADIX_TREE_PTR_PER_NODE];
128 	unsigned int n_nptrs;	/* # of non-NULL pointers in n_ptrs */
129 };
130 
131 /*
132  * any_children_tagmask:
133  *
134  * return OR'ed tagmask of the given node's children.
135  */
136 
137 static unsigned int
138 any_children_tagmask(struct radix_tree_node *n)
139 {
140 	unsigned int mask;
141 	int i;
142 
143 	mask = 0;
144 	for (i = 0; i < RADIX_TREE_PTR_PER_NODE; i++) {
145 		mask |= (unsigned int)(uintptr_t)n->n_ptrs[i];
146 	}
147 	return mask & RADIX_TREE_TAG_MASK;
148 }
149 
150 /*
151  * p_refs[0].pptr == &t->t_root
152  *	:
153  * p_refs[n].pptr == &(*p_refs[n-1])->n_ptrs[x]
154  *	:
155  *	:
156  * p_refs[t->t_height].pptr == &leaf_pointer
157  */
158 
159 struct radix_tree_path {
160 	struct radix_tree_node_ref {
161 		void **pptr;
162 	} p_refs[RADIX_TREE_MAX_HEIGHT + 1]; /* +1 for the root ptr */
163 	int p_lastidx;
164 };
165 
166 static inline void **
167 path_pptr(struct radix_tree *t, struct radix_tree_path *p,
168     unsigned int height)
169 {
170 
171 	KASSERT(height <= t->t_height);
172 	return p->p_refs[height].pptr;
173 }
174 
175 static inline struct radix_tree_node *
176 path_node(struct radix_tree * t, struct radix_tree_path *p, unsigned int height)
177 {
178 
179 	KASSERT(height <= t->t_height);
180 	return entry_ptr(*path_pptr(t, p, height));
181 }
182 
183 static inline unsigned int
184 path_idx(struct radix_tree * t, struct radix_tree_path *p, unsigned int height)
185 {
186 
187 	KASSERT(height <= t->t_height);
188 	return path_pptr(t, p, height + 1) - path_node(t, p, height)->n_ptrs;
189 }
190 
191 /*
192  * radix_tree_init_tree:
193  *
194  * initialize a tree.
195  */
196 
197 void
198 radix_tree_init_tree(struct radix_tree *t)
199 {
200 
201 	t->t_height = 0;
202 	t->t_root = NULL;
203 }
204 
205 /*
206  * radix_tree_init_tree:
207  *
208  * clean up a tree.
209  */
210 
211 void
212 radix_tree_fini_tree(struct radix_tree *t)
213 {
214 
215 	KASSERT(t->t_root == NULL);
216 	KASSERT(t->t_height == 0);
217 }
218 
219 static void
220 radix_tree_node_init(struct radix_tree_node *n)
221 {
222 
223 	memset(n, 0, sizeof(*n));
224 }
225 
226 #if defined(_KERNEL)
227 pool_cache_t radix_tree_node_cache __read_mostly;
228 
229 static int
230 radix_tree_node_ctor(void *dummy, void *item, int flags)
231 {
232 	struct radix_tree_node *n = item;
233 
234 	KASSERT(dummy == NULL);
235 	radix_tree_node_init(n);
236 	return 0;
237 }
238 
239 /*
240  * radix_tree_init:
241  *
242  * initialize the subsystem.
243  */
244 
245 void
246 radix_tree_init(void)
247 {
248 
249 	radix_tree_node_cache = pool_cache_init(sizeof(struct radix_tree_node),
250 	    0, 0, 0, "radix_tree_node", NULL, IPL_NONE, radix_tree_node_ctor,
251 	    NULL, NULL);
252 	KASSERT(radix_tree_node_cache != NULL);
253 }
254 #endif /* defined(_KERNEL) */
255 
256 static bool __unused
257 radix_tree_node_clean_p(const struct radix_tree_node *n)
258 {
259 	unsigned int i;
260 
261 	if (n->n_nptrs != 0) {
262 		return false;
263 	}
264 	for (i = 0; i < RADIX_TREE_PTR_PER_NODE; i++) {
265 		if (n->n_ptrs[i] != NULL) {
266 			return false;
267 		}
268 	}
269 	return true;
270 }
271 
272 static struct radix_tree_node *
273 radix_tree_alloc_node(void)
274 {
275 	struct radix_tree_node *n;
276 
277 #if defined(_KERNEL)
278 	n = pool_cache_get(radix_tree_node_cache, PR_NOWAIT);
279 #else /* defined(_KERNEL) */
280 #if defined(_STANDALONE)
281 	n = alloc(sizeof(*n));
282 #else /* defined(_STANDALONE) */
283 	n = malloc(sizeof(*n));
284 #endif /* defined(_STANDALONE) */
285 	if (n != NULL) {
286 		radix_tree_node_init(n);
287 	}
288 #endif /* defined(_KERNEL) */
289 	KASSERT(n == NULL || radix_tree_node_clean_p(n));
290 	return n;
291 }
292 
293 static void
294 radix_tree_free_node(struct radix_tree_node *n)
295 {
296 
297 	KASSERT(radix_tree_node_clean_p(n));
298 #if defined(_KERNEL)
299 	pool_cache_put(radix_tree_node_cache, n);
300 #elif defined(_STANDALONE)
301 	dealloc(n, sizeof(*n));
302 #else
303 	free(n);
304 #endif
305 }
306 
307 static int
308 radix_tree_grow(struct radix_tree *t, unsigned int newheight)
309 {
310 	const unsigned int tagmask = entry_tagmask(t->t_root);
311 
312 	KASSERT(newheight <= 64 / RADIX_TREE_BITS_PER_HEIGHT);
313 	if (t->t_root == NULL) {
314 		t->t_height = newheight;
315 		return 0;
316 	}
317 	while (t->t_height < newheight) {
318 		struct radix_tree_node *n;
319 
320 		n = radix_tree_alloc_node();
321 		if (n == NULL) {
322 			/*
323 			 * don't bother to revert our changes.
324 			 * the caller will likely retry.
325 			 */
326 			return ENOMEM;
327 		}
328 		n->n_nptrs = 1;
329 		n->n_ptrs[0] = t->t_root;
330 		t->t_root = entry_compose(n, tagmask);
331 		t->t_height++;
332 	}
333 	return 0;
334 }
335 
336 /*
337  * radix_tree_lookup_ptr:
338  *
339  * an internal helper function used for various exported functions.
340  *
341  * return the pointer to store the node for the given index.
342  *
343  * if alloc is true, try to allocate the storage.  (note for _KERNEL:
344  * in that case, this function can block.)  if the allocation failed or
345  * alloc is false, return NULL.
346  *
347  * if path is not NULL, fill it for the caller's investigation.
348  *
349  * if tagmask is not zero, search only for nodes with the tag set.
350  *
351  * while this function is a bit large, as it's called with some constant
352  * arguments, inlining might have benefits.  anyway, a compiler will decide.
353  */
354 
355 static inline void **
356 radix_tree_lookup_ptr(struct radix_tree *t, uint64_t idx,
357     struct radix_tree_path *path, bool alloc, const unsigned int tagmask)
358 {
359 	struct radix_tree_node *n;
360 	int hshift = RADIX_TREE_BITS_PER_HEIGHT * t->t_height;
361 	int shift;
362 	void **vpp;
363 	const uint64_t mask = (UINT64_C(1) << RADIX_TREE_BITS_PER_HEIGHT) - 1;
364 	struct radix_tree_node_ref *refs = NULL;
365 
366 	/*
367 	 * check unsupported combinations
368 	 */
369 	KASSERT(tagmask == 0 || !alloc);
370 	KASSERT(path == NULL || !alloc);
371 	vpp = &t->t_root;
372 	if (path != NULL) {
373 		refs = path->p_refs;
374 		refs->pptr = vpp;
375 	}
376 	n = NULL;
377 	for (shift = 64 - RADIX_TREE_BITS_PER_HEIGHT; shift >= 0;) {
378 		struct radix_tree_node *c;
379 		void *entry;
380 		const uint64_t i = (idx >> shift) & mask;
381 
382 		if (shift >= hshift) {
383 			unsigned int newheight;
384 
385 			KASSERT(vpp == &t->t_root);
386 			if (i == 0) {
387 				shift -= RADIX_TREE_BITS_PER_HEIGHT;
388 				continue;
389 			}
390 			if (!alloc) {
391 				if (path != NULL) {
392 					KASSERT((refs - path->p_refs) == 0);
393 					path->p_lastidx = 0;
394 				}
395 				return NULL;
396 			}
397 			newheight = shift / RADIX_TREE_BITS_PER_HEIGHT + 1;
398 			if (radix_tree_grow(t, newheight)) {
399 				return NULL;
400 			}
401 			hshift = RADIX_TREE_BITS_PER_HEIGHT * t->t_height;
402 		}
403 		entry = *vpp;
404 		c = entry_ptr(entry);
405 		if (c == NULL ||
406 		    (tagmask != 0 &&
407 		    (entry_tagmask(entry) & tagmask) == 0)) {
408 			if (!alloc) {
409 				if (path != NULL) {
410 					path->p_lastidx = refs - path->p_refs;
411 				}
412 				return NULL;
413 			}
414 			c = radix_tree_alloc_node();
415 			if (c == NULL) {
416 				return NULL;
417 			}
418 			*vpp = c;
419 			if (n != NULL) {
420 				KASSERT(n->n_nptrs < RADIX_TREE_PTR_PER_NODE);
421 				n->n_nptrs++;
422 			}
423 		}
424 		n = c;
425 		vpp = &n->n_ptrs[i];
426 		if (path != NULL) {
427 			refs++;
428 			refs->pptr = vpp;
429 		}
430 		shift -= RADIX_TREE_BITS_PER_HEIGHT;
431 	}
432 	if (alloc) {
433 		KASSERT(*vpp == NULL);
434 		if (n != NULL) {
435 			KASSERT(n->n_nptrs < RADIX_TREE_PTR_PER_NODE);
436 			n->n_nptrs++;
437 		}
438 	}
439 	if (path != NULL) {
440 		path->p_lastidx = refs - path->p_refs;
441 	}
442 	return vpp;
443 }
444 
445 /*
446  * radix_tree_insert_node:
447  *
448  * insert the node at idx.
449  * it's illegal to insert NULL.
450  * it's illegal to insert a non-aligned pointer.
451  *
452  * this function returns ENOMEM if necessary memory allocation failed.
453  * otherwise, this function returns 0.
454  *
455  * note that inserting a node can involves memory allocation for intermediate
456  * nodes.  if _KERNEL, it's done with non-blocking IPL_NONE memory allocation.
457  *
458  * for the newly inserted node, all tags are cleared.
459  */
460 
461 int
462 radix_tree_insert_node(struct radix_tree *t, uint64_t idx, void *p)
463 {
464 	void **vpp;
465 
466 	KASSERT(p != NULL);
467 	KASSERT(entry_compose(p, 0) == p);
468 	vpp = radix_tree_lookup_ptr(t, idx, NULL, true, 0);
469 	if (vpp == NULL) {
470 		return ENOMEM;
471 	}
472 	KASSERT(*vpp == NULL);
473 	*vpp = p;
474 	return 0;
475 }
476 
477 /*
478  * radix_tree_replace_node:
479  *
480  * replace a node at the given index with the given node.
481  * return the old node.
482  * it's illegal to try to replace a node which has not been inserted.
483  *
484  * this function doesn't change tags.
485  */
486 
487 void *
488 radix_tree_replace_node(struct radix_tree *t, uint64_t idx, void *p)
489 {
490 	void **vpp;
491 	void *oldp;
492 
493 	KASSERT(p != NULL);
494 	KASSERT(entry_compose(p, 0) == p);
495 	vpp = radix_tree_lookup_ptr(t, idx, NULL, false, 0);
496 	KASSERT(vpp != NULL);
497 	oldp = *vpp;
498 	KASSERT(oldp != NULL);
499 	*vpp = entry_compose(p, entry_tagmask(*vpp));
500 	return entry_ptr(oldp);
501 }
502 
503 /*
504  * radix_tree_remove_node:
505  *
506  * remove the node at idx.
507  * it's illegal to try to remove a node which has not been inserted.
508  */
509 
510 void *
511 radix_tree_remove_node(struct radix_tree *t, uint64_t idx)
512 {
513 	struct radix_tree_path path;
514 	void **vpp;
515 	void *oldp;
516 	int i;
517 
518 	vpp = radix_tree_lookup_ptr(t, idx, &path, false, 0);
519 	KASSERT(vpp != NULL);
520 	oldp = *vpp;
521 	KASSERT(oldp != NULL);
522 	KASSERT(path.p_lastidx == t->t_height);
523 	KASSERT(vpp == path_pptr(t, &path, path.p_lastidx));
524 	*vpp = NULL;
525 	for (i = t->t_height - 1; i >= 0; i--) {
526 		void *entry;
527 		struct radix_tree_node ** const pptr =
528 		    (struct radix_tree_node **)path_pptr(t, &path, i);
529 		struct radix_tree_node *n;
530 
531 		KASSERT(pptr != NULL);
532 		entry = *pptr;
533 		n = entry_ptr(entry);
534 		KASSERT(n != NULL);
535 		KASSERT(n->n_nptrs > 0);
536 		n->n_nptrs--;
537 		if (n->n_nptrs > 0) {
538 			break;
539 		}
540 		radix_tree_free_node(n);
541 		*pptr = NULL;
542 	}
543 	/*
544 	 * fix up height
545 	 */
546 	if (i < 0) {
547 		KASSERT(t->t_root == NULL);
548 		t->t_height = 0;
549 	}
550 	/*
551 	 * update tags
552 	 */
553 	for (; i >= 0; i--) {
554 		void *entry;
555 		struct radix_tree_node ** const pptr =
556 		    (struct radix_tree_node **)path_pptr(t, &path, i);
557 		struct radix_tree_node *n;
558 		unsigned int newmask;
559 
560 		KASSERT(pptr != NULL);
561 		entry = *pptr;
562 		n = entry_ptr(entry);
563 		KASSERT(n != NULL);
564 		KASSERT(n->n_nptrs > 0);
565 		newmask = any_children_tagmask(n);
566 		if (newmask == entry_tagmask(entry)) {
567 			break;
568 		}
569 		*pptr = entry_compose(n, newmask);
570 	}
571 	/*
572 	 * XXX is it worth to try to reduce height?
573 	 * if we do that, make radix_tree_grow rollback its change as well.
574 	 */
575 	return entry_ptr(oldp);
576 }
577 
578 /*
579  * radix_tree_lookup_node:
580  *
581  * returns the node at idx.
582  * returns NULL if nothing is found at idx.
583  */
584 
585 void *
586 radix_tree_lookup_node(struct radix_tree *t, uint64_t idx)
587 {
588 	void **vpp;
589 
590 	vpp = radix_tree_lookup_ptr(t, idx, NULL, false, 0);
591 	if (vpp == NULL) {
592 		return NULL;
593 	}
594 	return entry_ptr(*vpp);
595 }
596 
597 static inline void
598 gang_lookup_init(struct radix_tree *t, uint64_t idx,
599     struct radix_tree_path *path, const unsigned int tagmask)
600 {
601 	void **vpp;
602 
603 	vpp = radix_tree_lookup_ptr(t, idx, path, false, tagmask);
604 	KASSERT(vpp == NULL ||
605 	    vpp == path_pptr(t, path, path->p_lastidx));
606 	KASSERT(&t->t_root == path_pptr(t, path, 0));
607 }
608 
609 static inline unsigned int
610 gang_lookup_scan(struct radix_tree *t, struct radix_tree_path *path,
611     void **results, unsigned int maxresults, const unsigned int tagmask)
612 {
613 	void **vpp;
614 	int nfound;
615 	unsigned int lastidx;
616 
617 	KASSERT(maxresults > 0);
618 	lastidx = path->p_lastidx;
619 	if (lastidx == 0) {
620 		return 0;
621 	}
622 	nfound = 0;
623 	vpp = path_pptr(t, path, lastidx);
624 	while (/*CONSTCOND*/true) {
625 		struct radix_tree_node *n;
626 		int i;
627 
628 		if (entry_match_p(*vpp, tagmask)) {
629 			KASSERT(lastidx == t->t_height);
630 			/*
631 			 * record the non-NULL leaf.
632 			 */
633 			results[nfound] = entry_ptr(*vpp);
634 			nfound++;
635 			if (nfound == maxresults) {
636 				return nfound;
637 			}
638 		}
639 scan_siblings:
640 		/*
641 		 * try to find the next non-NULL sibling.
642 		 */
643 		n = path_node(t, path, lastidx - 1);
644 		if (*vpp != NULL && n->n_nptrs == 1) {
645 			/*
646 			 * optimization
647 			 */
648 			goto no_siblings;
649 		}
650 		for (i = path_idx(t, path, lastidx - 1) + 1;
651 		    i < RADIX_TREE_PTR_PER_NODE;
652 		    i++) {
653 			if (entry_match_p(n->n_ptrs[i], tagmask)) {
654 				vpp = &n->n_ptrs[i];
655 				path->p_refs[lastidx].pptr = vpp;
656 				KASSERT(path_idx(t, path, lastidx - 1)
657 				    == i);
658 				break;
659 			}
660 		}
661 		if (i == RADIX_TREE_PTR_PER_NODE) {
662 no_siblings:
663 			/*
664 			 * not found.  go to parent.
665 			 */
666 			lastidx--;
667 			if (lastidx == 0) {
668 				return nfound;
669 			}
670 			vpp = path_pptr(t, path, lastidx);
671 			goto scan_siblings;
672 		}
673 		/*
674 		 * descending the left-most child node, upto the leaf or NULL.
675 		 */
676 		while (entry_match_p(*vpp, tagmask) && lastidx < t->t_height) {
677 			n = entry_ptr(*vpp);
678 			vpp = &n->n_ptrs[0];
679 			lastidx++;
680 			path->p_refs[lastidx].pptr = vpp;
681 		}
682 	}
683 }
684 
685 /*
686  * radix_tree_gang_lookup_node:
687  *
688  * search nodes starting from idx in the ascending order.
689  * results should be an array large enough to hold maxresults pointers.
690  * returns the number of nodes found, up to maxresults.
691  * returning less than maxresults means there are no more nodes.
692  *
693  * the result of this function is semantically equivalent to what could be
694  * obtained by repeated calls of radix_tree_lookup_node with increasing index.
695  * but this function is much faster when node indexes are distributed sparsely.
696  *
697  * note that this function doesn't return exact values of node indexes of
698  * found nodes.  if they are important for a caller, it's the caller's
699  * responsibility to check them, typically by examinining the returned nodes
700  * using some caller-specific knowledge about them.
701  */
702 
703 unsigned int
704 radix_tree_gang_lookup_node(struct radix_tree *t, uint64_t idx,
705     void **results, unsigned int maxresults)
706 {
707 	struct radix_tree_path path;
708 
709 	gang_lookup_init(t, idx, &path, 0);
710 	return gang_lookup_scan(t, &path, results, maxresults, 0);
711 }
712 
713 /*
714  * radix_tree_gang_lookup_tagged_node:
715  *
716  * same as radix_tree_gang_lookup_node except that this one only returns
717  * nodes tagged with tagid.
718  */
719 
720 unsigned int
721 radix_tree_gang_lookup_tagged_node(struct radix_tree *t, uint64_t idx,
722     void **results, unsigned int maxresults, radix_tree_tagid_t tagid)
723 {
724 	struct radix_tree_path path;
725 	const unsigned int tagmask = tagid_to_mask(tagid);
726 
727 	gang_lookup_init(t, idx, &path, tagmask);
728 	return gang_lookup_scan(t, &path, results, maxresults, tagmask);
729 }
730 
731 /*
732  * radix_tree_get_tag:
733  *
734  * return if the tag is set for the node at the given index.  (true if set)
735  * it's illegal to call this function for a node which has not been inserted.
736  */
737 
738 bool
739 radix_tree_get_tag(struct radix_tree *t, uint64_t idx,
740     radix_tree_tagid_t tagid)
741 {
742 #if 1
743 	const unsigned int tagmask = tagid_to_mask(tagid);
744 	void **vpp;
745 
746 	vpp = radix_tree_lookup_ptr(t, idx, NULL, false, tagmask);
747 	if (vpp == NULL) {
748 		return false;
749 	}
750 	KASSERT(*vpp != NULL);
751 	return (entry_tagmask(*vpp) & tagmask) != 0;
752 #else
753 	const unsigned int tagmask = tagid_to_mask(tagid);
754 	void **vpp;
755 
756 	vpp = radix_tree_lookup_ptr(t, idx, NULL, false, 0);
757 	KASSERT(vpp != NULL);
758 	return (entry_tagmask(*vpp) & tagmask) != 0;
759 #endif
760 }
761 
762 /*
763  * radix_tree_set_tag:
764  *
765  * set the tag for the node at the given index.
766  * it's illegal to call this function for a node which has not been inserted.
767  */
768 
769 void
770 radix_tree_set_tag(struct radix_tree *t, uint64_t idx,
771     radix_tree_tagid_t tagid)
772 {
773 	struct radix_tree_path path;
774 	const unsigned int tagmask = tagid_to_mask(tagid);
775 	void **vpp;
776 	int i;
777 
778 	vpp = radix_tree_lookup_ptr(t, idx, &path, false, 0);
779 	KASSERT(vpp != NULL);
780 	KASSERT(*vpp != NULL);
781 	KASSERT(path.p_lastidx == t->t_height);
782 	KASSERT(vpp == path_pptr(t, &path, path.p_lastidx));
783 	for (i = t->t_height; i >= 0; i--) {
784 		void ** const pptr = (void **)path_pptr(t, &path, i);
785 		void *entry;
786 
787 		KASSERT(pptr != NULL);
788 		entry = *pptr;
789 		if ((entry_tagmask(entry) & tagmask) != 0) {
790 			break;
791 		}
792 		*pptr = (void *)((uintptr_t)entry | tagmask);
793 	}
794 }
795 
796 /*
797  * radix_tree_clear_tag:
798  *
799  * clear the tag for the node at the given index.
800  * it's illegal to call this function for a node which has not been inserted.
801  */
802 
803 void
804 radix_tree_clear_tag(struct radix_tree *t, uint64_t idx,
805     radix_tree_tagid_t tagid)
806 {
807 	struct radix_tree_path path;
808 	const unsigned int tagmask = tagid_to_mask(tagid);
809 	void **vpp;
810 	int i;
811 
812 	vpp = radix_tree_lookup_ptr(t, idx, &path, false, 0);
813 	KASSERT(vpp != NULL);
814 	KASSERT(*vpp != NULL);
815 	KASSERT(path.p_lastidx == t->t_height);
816 	KASSERT(vpp == path_pptr(t, &path, path.p_lastidx));
817 	/*
818 	 * if already cleared, nothing to do
819 	 */
820 	if ((entry_tagmask(*vpp) & tagmask) == 0) {
821 		return;
822 	}
823 	/*
824 	 * clear the tag only if no children have the tag.
825 	 */
826 	for (i = t->t_height; i >= 0; i--) {
827 		void ** const pptr = (void **)path_pptr(t, &path, i);
828 		void *entry;
829 
830 		KASSERT(pptr != NULL);
831 		entry = *pptr;
832 		KASSERT((entry_tagmask(entry) & tagmask) != 0);
833 		*pptr = entry_compose(entry_ptr(entry),
834 		    entry_tagmask(entry) & ~tagmask);
835 		/*
836 		 * check if we should proceed to process the next level.
837 		 */
838 		if (0 < i) {
839 			struct radix_tree_node *n = path_node(t, &path, i - 1);
840 
841 			if ((any_children_tagmask(n) & tagmask) != 0) {
842 				break;
843 			}
844 		}
845 	}
846 }
847 
848 #if defined(UNITTEST)
849 
850 #include <inttypes.h>
851 #include <stdio.h>
852 
853 static void
854 radix_tree_dump_node(const struct radix_tree *t, void *vp,
855     uint64_t offset, unsigned int height)
856 {
857 	struct radix_tree_node *n;
858 	unsigned int i;
859 
860 	for (i = 0; i < t->t_height - height; i++) {
861 		printf(" ");
862 	}
863 	if (entry_tagmask(vp) == 0) {
864 		printf("[%" PRIu64 "] %p", offset, entry_ptr(vp));
865 	} else {
866 		printf("[%" PRIu64 "] %p (tagmask=0x%x)", offset, entry_ptr(vp),
867 		    entry_tagmask(vp));
868 	}
869 	if (height == 0) {
870 		printf(" (leaf)\n");
871 		return;
872 	}
873 	n = entry_ptr(vp);
874 	assert(any_children_tagmask(n) == entry_tagmask(vp));
875 	printf(" (%u children)\n", n->n_nptrs);
876 	for (i = 0; i < __arraycount(n->n_ptrs); i++) {
877 		void *c;
878 
879 		c = n->n_ptrs[i];
880 		if (c == NULL) {
881 			continue;
882 		}
883 		radix_tree_dump_node(t, c,
884 		    offset + i * (UINT64_C(1) <<
885 		    (RADIX_TREE_BITS_PER_HEIGHT * (height - 1))), height - 1);
886 	}
887 }
888 
889 void radix_tree_dump(const struct radix_tree *);
890 
891 void
892 radix_tree_dump(const struct radix_tree *t)
893 {
894 
895 	printf("tree %p height=%u\n", t, t->t_height);
896 	radix_tree_dump_node(t, t->t_root, 0, t->t_height);
897 }
898 
899 static void
900 test1(void)
901 {
902 	struct radix_tree s;
903 	struct radix_tree *t = &s;
904 	void *results[3];
905 
906 	radix_tree_init_tree(t);
907 	radix_tree_dump(t);
908 	assert(radix_tree_lookup_node(t, 0) == NULL);
909 	assert(radix_tree_lookup_node(t, 1000) == NULL);
910 	assert(radix_tree_insert_node(t, 1000, (void *)0xdeadbea0) == 0);
911 	radix_tree_dump(t);
912 	assert(!radix_tree_get_tag(t, 1000, 0));
913 	assert(!radix_tree_get_tag(t, 1000, 1));
914 	radix_tree_set_tag(t, 1000, 1);
915 	assert(!radix_tree_get_tag(t, 1000, 0));
916 	assert(radix_tree_get_tag(t, 1000, 1));
917 	radix_tree_dump(t);
918 	assert(radix_tree_lookup_node(t, 1000) == (void *)0xdeadbea0);
919 	assert(radix_tree_insert_node(t, 0, (void *)0xbea0) == 0);
920 	radix_tree_dump(t);
921 	assert(radix_tree_lookup_node(t, 0) == (void *)0xbea0);
922 	assert(radix_tree_lookup_node(t, 1000) == (void *)0xdeadbea0);
923 	assert(radix_tree_insert_node(t, UINT64_C(10000000000), (void *)0xdea0)
924 	    == 0);
925 	radix_tree_dump(t);
926 	assert(radix_tree_lookup_node(t, 0) == (void *)0xbea0);
927 	assert(radix_tree_lookup_node(t, 1000) == (void *)0xdeadbea0);
928 	assert(radix_tree_lookup_node(t, UINT64_C(10000000000)) ==
929 	    (void *)0xdea0);
930 	radix_tree_dump(t);
931 	assert(!radix_tree_get_tag(t, 0, 1));
932 	assert(radix_tree_get_tag(t, 1000, 1));
933 	assert(!radix_tree_get_tag(t, UINT64_C(10000000000), 1));
934 	radix_tree_set_tag(t, 0, 1);;
935 	radix_tree_set_tag(t, UINT64_C(10000000000), 1);
936 	radix_tree_dump(t);
937 	assert(radix_tree_get_tag(t, 0, 1));
938 	assert(radix_tree_get_tag(t, 1000, 1));
939 	assert(radix_tree_get_tag(t, UINT64_C(10000000000), 1));
940 	radix_tree_clear_tag(t, 0, 1);;
941 	radix_tree_clear_tag(t, UINT64_C(10000000000), 1);
942 	radix_tree_dump(t);
943 	assert(!radix_tree_get_tag(t, 0, 1));
944 	assert(radix_tree_get_tag(t, 1000, 1));
945 	assert(!radix_tree_get_tag(t, UINT64_C(10000000000), 1));
946 	radix_tree_dump(t);
947 	assert(radix_tree_replace_node(t, 1000, (void *)0x12345678) ==
948 	    (void *)0xdeadbea0);
949 	assert(!radix_tree_get_tag(t, 1000, 0));
950 	assert(radix_tree_get_tag(t, 1000, 1));
951 	assert(radix_tree_gang_lookup_node(t, 0, results, 3) == 3);
952 	assert(results[0] == (void *)0xbea0);
953 	assert(results[1] == (void *)0x12345678);
954 	assert(results[2] == (void *)0xdea0);
955 	assert(radix_tree_gang_lookup_node(t, 1, results, 3) == 2);
956 	assert(results[0] == (void *)0x12345678);
957 	assert(results[1] == (void *)0xdea0);
958 	assert(radix_tree_gang_lookup_node(t, 1001, results, 3) == 1);
959 	assert(results[0] == (void *)0xdea0);
960 	assert(radix_tree_gang_lookup_node(t, UINT64_C(10000000001), results, 3)
961 	    == 0);
962 	assert(radix_tree_gang_lookup_node(t, UINT64_C(1000000000000), results,
963 	    3) == 0);
964 	assert(radix_tree_gang_lookup_tagged_node(t, 0, results, 100, 1) == 1);
965 	assert(results[0] == (void *)0x12345678);
966 	assert(entry_tagmask(t->t_root) != 0);
967 	assert(radix_tree_remove_node(t, 1000) == (void *)0x12345678);
968 	assert(entry_tagmask(t->t_root) == 0);
969 	radix_tree_dump(t);
970 	assert(radix_tree_remove_node(t, UINT64_C(10000000000)) ==
971 	    (void *)0xdea0);
972 	radix_tree_dump(t);
973 	assert(radix_tree_remove_node(t, 0) == (void *)0xbea0);
974 	radix_tree_dump(t);
975 	radix_tree_fini_tree(t);
976 }
977 
978 #include <sys/time.h>
979 
980 struct testnode {
981 	uint64_t idx;
982 };
983 
984 static void
985 printops(const char *name, unsigned int n, const struct timeval *stv,
986     const struct timeval *etv)
987 {
988 	uint64_t s = stv->tv_sec * 1000000 + stv->tv_usec;
989 	uint64_t e = etv->tv_sec * 1000000 + etv->tv_usec;
990 
991 	printf("%lf %s/s\n", (double)n / (e - s) * 1000000, name);
992 }
993 
994 #define	TEST2_GANG_LOOKUP_NODES	16
995 
996 static bool
997 test2_should_tag(unsigned int i, radix_tree_tagid_t tagid)
998 {
999 
1000 	if (tagid == 0) {
1001 		return (i & 0x3) == 0;
1002 	} else {
1003 		return (i % 7) == 0;
1004 	}
1005 }
1006 
1007 static void
1008 test2(bool dense)
1009 {
1010 	struct radix_tree s;
1011 	struct radix_tree *t = &s;
1012 	struct testnode *n;
1013 	unsigned int i;
1014 	unsigned int nnodes = 100000;
1015 	unsigned int removed;
1016 	radix_tree_tagid_t tag;
1017 	unsigned int ntagged[RADIX_TREE_TAG_ID_MAX];
1018 	struct testnode *nodes;
1019 	struct timeval stv;
1020 	struct timeval etv;
1021 
1022 	nodes = malloc(nnodes * sizeof(*nodes));
1023 	for (tag = 0; tag < RADIX_TREE_TAG_ID_MAX; tag++) {
1024 		ntagged[tag] = 0;
1025 	}
1026 	radix_tree_init_tree(t);
1027 	for (i = 0; i < nnodes; i++) {
1028 		n = &nodes[i];
1029 		n->idx = random();
1030 		if (sizeof(long) == 4) {
1031 			n->idx <<= 32;
1032 			n->idx |= (uint32_t)random();
1033 		}
1034 		if (dense) {
1035 			n->idx %= nnodes * 2;
1036 		}
1037 		while (radix_tree_lookup_node(t, n->idx) != NULL) {
1038 			n->idx++;
1039 		}
1040 		radix_tree_insert_node(t, n->idx, n);
1041 		for (tag = 0; tag < RADIX_TREE_TAG_ID_MAX; tag++) {
1042 			if (test2_should_tag(i, tag)) {
1043 				radix_tree_set_tag(t, n->idx, tag);
1044 				ntagged[tag]++;
1045 			}
1046 			assert(test2_should_tag(i, tag) ==
1047 			    radix_tree_get_tag(t, n->idx, tag));
1048 		}
1049 	}
1050 
1051 	gettimeofday(&stv, NULL);
1052 	for (i = 0; i < nnodes; i++) {
1053 		n = &nodes[i];
1054 		assert(radix_tree_lookup_node(t, n->idx) == n);
1055 	}
1056 	gettimeofday(&etv, NULL);
1057 	printops("lookup", nnodes, &stv, &etv);
1058 
1059 	for (tag = 0; tag < RADIX_TREE_TAG_ID_MAX; tag++) {
1060 		gettimeofday(&stv, NULL);
1061 		for (i = 0; i < nnodes; i++) {
1062 			n = &nodes[i];
1063 			assert(test2_should_tag(i, tag) ==
1064 			    radix_tree_get_tag(t, n->idx, tag));
1065 		}
1066 		gettimeofday(&etv, NULL);
1067 		printops("get_tag", ntagged[tag], &stv, &etv);
1068 	}
1069 
1070 	gettimeofday(&stv, NULL);
1071 	for (i = 0; i < nnodes; i++) {
1072 		n = &nodes[i];
1073 		radix_tree_remove_node(t, n->idx);
1074 	}
1075 	gettimeofday(&etv, NULL);
1076 	printops("remove", nnodes, &stv, &etv);
1077 
1078 	gettimeofday(&stv, NULL);
1079 	for (i = 0; i < nnodes; i++) {
1080 		n = &nodes[i];
1081 		radix_tree_insert_node(t, n->idx, n);
1082 	}
1083 	gettimeofday(&etv, NULL);
1084 	printops("insert", nnodes, &stv, &etv);
1085 
1086 	for (tag = 0; tag < RADIX_TREE_TAG_ID_MAX; tag++) {
1087 		ntagged[tag] = 0;
1088 		gettimeofday(&stv, NULL);
1089 		for (i = 0; i < nnodes; i++) {
1090 			n = &nodes[i];
1091 			if (test2_should_tag(i, tag)) {
1092 				radix_tree_set_tag(t, n->idx, tag);
1093 				ntagged[tag]++;
1094 			}
1095 		}
1096 		gettimeofday(&etv, NULL);
1097 		printops("set_tag", ntagged[tag], &stv, &etv);
1098 	}
1099 
1100 	gettimeofday(&stv, NULL);
1101 	{
1102 		struct testnode *results[TEST2_GANG_LOOKUP_NODES];
1103 		uint64_t nextidx;
1104 		unsigned int nfound;
1105 		unsigned int total;
1106 
1107 		nextidx = 0;
1108 		total = 0;
1109 		while ((nfound = radix_tree_gang_lookup_node(t, nextidx,
1110 		    (void *)results, __arraycount(results))) > 0) {
1111 			nextidx = results[nfound - 1]->idx + 1;
1112 			total += nfound;
1113 		}
1114 		assert(total == nnodes);
1115 	}
1116 	gettimeofday(&etv, NULL);
1117 	printops("ganglookup", nnodes, &stv, &etv);
1118 
1119 	for (tag = 0; tag < RADIX_TREE_TAG_ID_MAX; tag++) {
1120 		gettimeofday(&stv, NULL);
1121 		{
1122 			struct testnode *results[TEST2_GANG_LOOKUP_NODES];
1123 			uint64_t nextidx;
1124 			unsigned int nfound;
1125 			unsigned int total;
1126 
1127 			nextidx = 0;
1128 			total = 0;
1129 			while ((nfound = radix_tree_gang_lookup_tagged_node(t,
1130 			    nextidx, (void *)results, __arraycount(results),
1131 			    tag)) > 0) {
1132 				nextidx = results[nfound - 1]->idx + 1;
1133 				total += nfound;
1134 			}
1135 			assert(total == ntagged[tag]);
1136 		}
1137 		gettimeofday(&etv, NULL);
1138 		printops("ganglookup_tag", ntagged[tag], &stv, &etv);
1139 	}
1140 
1141 	removed = 0;
1142 	for (tag = 0; tag < RADIX_TREE_TAG_ID_MAX; tag++) {
1143 		unsigned int total;
1144 
1145 		total = 0;
1146 		gettimeofday(&stv, NULL);
1147 		{
1148 			struct testnode *results[TEST2_GANG_LOOKUP_NODES];
1149 			uint64_t nextidx;
1150 			unsigned int nfound;
1151 
1152 			nextidx = 0;
1153 			while ((nfound = radix_tree_gang_lookup_tagged_node(t,
1154 			    nextidx, (void *)results, __arraycount(results),
1155 			    tag)) > 0) {
1156 				for (i = 0; i < nfound; i++) {
1157 					radix_tree_remove_node(t,
1158 					    results[i]->idx);
1159 				}
1160 				nextidx = results[nfound - 1]->idx + 1;
1161 				total += nfound;
1162 			}
1163 			assert(tag != 0 || total == ntagged[tag]);
1164 			assert(total <= ntagged[tag]);
1165 		}
1166 		gettimeofday(&etv, NULL);
1167 		printops("ganglookup_tag+remove", total, &stv, &etv);
1168 		removed += total;
1169 	}
1170 
1171 	gettimeofday(&stv, NULL);
1172 	{
1173 		struct testnode *results[TEST2_GANG_LOOKUP_NODES];
1174 		uint64_t nextidx;
1175 		unsigned int nfound;
1176 		unsigned int total;
1177 
1178 		nextidx = 0;
1179 		total = 0;
1180 		while ((nfound = radix_tree_gang_lookup_node(t, nextidx,
1181 		    (void *)results, __arraycount(results))) > 0) {
1182 			for (i = 0; i < nfound; i++) {
1183 				assert(results[i] == radix_tree_remove_node(t,
1184 				    results[i]->idx));
1185 			}
1186 			nextidx = results[nfound - 1]->idx + 1;
1187 			total += nfound;
1188 		}
1189 		assert(total == nnodes - removed);
1190 	}
1191 	gettimeofday(&etv, NULL);
1192 	printops("ganglookup+remove", nnodes - removed, &stv, &etv);
1193 
1194 	radix_tree_fini_tree(t);
1195 	free(nodes);
1196 }
1197 
1198 int
1199 main(int argc, char *argv[])
1200 {
1201 
1202 	test1();
1203 	printf("dense distribution:\n");
1204 	test2(true);
1205 	printf("sparse distribution:\n");
1206 	test2(false);
1207 	return 0;
1208 }
1209 
1210 #endif /* defined(UNITTEST) */
1211