xref: /netbsd-src/common/lib/libc/gen/radixtree.c (revision 779666e6a046a896eec03d9f90a6ee45b68c472f)
1 /*	$NetBSD: radixtree.c,v 1.34 2024/05/04 17:58:24 chs Exp $	*/
2 
3 /*-
4  * Copyright (c)2011,2012,2013 YAMAMOTO Takashi,
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 /*
30  * radixtree.c
31  *
32  * Overview:
33  *
34  * This is an implementation of radix tree, whose keys are uint64_t and leafs
35  * are user provided pointers.
36  *
37  * Leaf nodes are just void * and this implementation doesn't care about
38  * what they actually point to.  However, this implementation has an assumption
39  * about their alignment.  Specifically, this implementation assumes that their
40  * 2 LSBs are always zero and uses them for internal accounting.
41  *
42  * Intermediate nodes and memory allocation:
43  *
44  * Intermediate nodes are automatically allocated and freed internally and
45  * basically users don't need to care about them.  The allocation is done via
46  * kmem_zalloc(9) for _KERNEL, malloc(3) for userland, and alloc() for
47  * _STANDALONE environment.  Only radix_tree_insert_node function can allocate
48  * memory for intermediate nodes and thus can fail for ENOMEM.
49  *
50  * Memory Efficiency:
51  *
52  * It's designed to work efficiently with dense index distribution.
53  * The memory consumption (number of necessary intermediate nodes) heavily
54  * depends on the index distribution.  Basically, more dense index distribution
55  * consumes less nodes per item.  Approximately,
56  *
57  *  - the best case: about RADIX_TREE_PTR_PER_NODE items per intermediate node.
58  *    it would look like the following.
59  *
60  *     root (t_height=1)
61  *      |
62  *      v
63  *      [ | | | ]   (intermediate node.  RADIX_TREE_PTR_PER_NODE=4 in this fig)
64  *       | | | |
65  *       v v v v
66  *       p p p p    (items)
67  *
68  *  - the worst case: RADIX_TREE_MAX_HEIGHT intermediate nodes per item.
69  *    it would look like the following if RADIX_TREE_MAX_HEIGHT=3.
70  *
71  *     root (t_height=3)
72  *      |
73  *      v
74  *      [ | | | ]
75  *           |
76  *           v
77  *           [ | | | ]
78  *                |
79  *                v
80  *                [ | | | ]
81  *                   |
82  *                   v
83  *                   p
84  *
85  * The height of tree (t_height) is dynamic.  It's smaller if only small
86  * index values are used.  As an extreme case, if only index 0 is used,
87  * the corresponding value is directly stored in the root of the tree
88  * (struct radix_tree) without allocating any intermediate nodes.  In that
89  * case, t_height=0.
90  *
91  * Gang lookup:
92  *
93  * This implementation provides a way to scan many nodes quickly via
94  * radix_tree_gang_lookup_node function and its varients.
95  *
96  * Tags:
97  *
98  * This implementation provides tagging functionality, which allows quick
99  * scanning of a subset of leaf nodes.  Leaf nodes are untagged when inserted
100  * into the tree and can be tagged by radix_tree_set_tag function.
101  * radix_tree_gang_lookup_tagged_node function and its variants returns only
102  * leaf nodes with the given tag.  To reduce amount of nodes to visit for
103  * these functions, this implementation keeps tagging information in internal
104  * intermediate nodes and quickly skips uninterested parts of a tree.
105  *
106  * A tree has RADIX_TREE_TAG_ID_MAX independent tag spaces, each of which are
107  * identified by a zero-origin numbers, tagid.  For the current implementation,
108  * RADIX_TREE_TAG_ID_MAX is 2.  A set of tags is described as a bitmask tagmask,
109  * which is a bitwise OR of (1 << tagid).
110  */
111 
112 #include <sys/cdefs.h>
113 
114 #if defined(_KERNEL) || defined(_STANDALONE)
115 __KERNEL_RCSID(0, "$NetBSD: radixtree.c,v 1.34 2024/05/04 17:58:24 chs Exp $");
116 #include <sys/param.h>
117 #include <sys/errno.h>
118 #include <sys/kmem.h>
119 #include <sys/radixtree.h>
120 #include <lib/libkern/libkern.h>
121 #if defined(_STANDALONE)
122 #include <lib/libsa/stand.h>
123 #endif /* defined(_STANDALONE) */
124 #else /* defined(_KERNEL) || defined(_STANDALONE) */
125 __RCSID("$NetBSD: radixtree.c,v 1.34 2024/05/04 17:58:24 chs Exp $");
126 #include <assert.h>
127 #include <errno.h>
128 #include <stdbool.h>
129 #include <stdlib.h>
130 #include <string.h>
131 #if 1
132 #define KASSERT assert
133 #else
134 #define KASSERT(a)	/* nothing */
135 #endif
136 #endif /* defined(_KERNEL) || defined(_STANDALONE) */
137 
138 #include <sys/radixtree.h>
139 
140 #define	RADIX_TREE_BITS_PER_HEIGHT	4	/* XXX tune */
141 #define	RADIX_TREE_PTR_PER_NODE		(1 << RADIX_TREE_BITS_PER_HEIGHT)
142 #define	RADIX_TREE_MAX_HEIGHT		(64 / RADIX_TREE_BITS_PER_HEIGHT)
143 #define	RADIX_TREE_INVALID_HEIGHT	(RADIX_TREE_MAX_HEIGHT + 1)
144 __CTASSERT((64 % RADIX_TREE_BITS_PER_HEIGHT) == 0);
145 
146 __CTASSERT(((1 << RADIX_TREE_TAG_ID_MAX) & (sizeof(int) - 1)) == 0);
147 #define	RADIX_TREE_TAG_MASK	((1 << RADIX_TREE_TAG_ID_MAX) - 1)
148 
149 static inline void *
entry_ptr(void * p)150 entry_ptr(void *p)
151 {
152 
153 	return (void *)((uintptr_t)p & ~RADIX_TREE_TAG_MASK);
154 }
155 
156 static inline unsigned int
entry_tagmask(void * p)157 entry_tagmask(void *p)
158 {
159 
160 	return (uintptr_t)p & RADIX_TREE_TAG_MASK;
161 }
162 
163 static inline void *
entry_compose(void * p,unsigned int tagmask)164 entry_compose(void *p, unsigned int tagmask)
165 {
166 
167 	return (void *)((uintptr_t)p | tagmask);
168 }
169 
170 static inline bool
entry_match_p(void * p,unsigned int tagmask)171 entry_match_p(void *p, unsigned int tagmask)
172 {
173 
174 	KASSERT(entry_ptr(p) != NULL || entry_tagmask(p) == 0);
175 	if (p == NULL) {
176 		return false;
177 	}
178 	if (tagmask == 0) {
179 		return true;
180 	}
181 	return (entry_tagmask(p) & tagmask) != 0;
182 }
183 
184 /*
185  * radix_tree_node: an intermediate node
186  *
187  * we don't care the type of leaf nodes.  they are just void *.
188  *
189  * we used to maintain a count of non-NULL nodes in this structure, but it
190  * prevented it from being aligned to a cache line boundary; the performance
191  * benefit from being cache friendly is greater than the benefit of having
192  * a dedicated count value, especially in multi-processor situations where
193  * we need to avoid intra-pool-page false sharing.
194  */
195 
196 struct radix_tree_node {
197 	void *n_ptrs[RADIX_TREE_PTR_PER_NODE];
198 };
199 
200 /*
201  * p_refs[0].pptr == &t->t_root
202  *	:
203  * p_refs[n].pptr == &(*p_refs[n-1])->n_ptrs[x]
204  *	:
205  *	:
206  * p_refs[t->t_height].pptr == &leaf_pointer
207  */
208 
209 struct radix_tree_path {
210 	struct radix_tree_node_ref {
211 		void **pptr;
212 	} p_refs[RADIX_TREE_MAX_HEIGHT + 1]; /* +1 for the root ptr */
213 	/*
214 	 * p_lastidx is either the index of the last valid element of p_refs[]
215 	 * or RADIX_TREE_INVALID_HEIGHT.
216 	 * RADIX_TREE_INVALID_HEIGHT means that radix_tree_lookup_ptr found
217 	 * that the height of the tree is not enough to cover the given index.
218 	 */
219 	unsigned int p_lastidx;
220 };
221 
222 static inline void **
path_pptr(const struct radix_tree * t,const struct radix_tree_path * p,unsigned int height)223 path_pptr(const struct radix_tree *t, const struct radix_tree_path *p,
224     unsigned int height)
225 {
226 
227 	KASSERT(height <= t->t_height);
228 	return p->p_refs[height].pptr;
229 }
230 
231 static inline struct radix_tree_node *
path_node(const struct radix_tree * t,const struct radix_tree_path * p,unsigned int height)232 path_node(const struct radix_tree * t, const struct radix_tree_path *p,
233     unsigned int height)
234 {
235 
236 	KASSERT(height <= t->t_height);
237 	return entry_ptr(*path_pptr(t, p, height));
238 }
239 
240 /*
241  * radix_tree_init_tree:
242  *
243  * Initialize a tree.
244  */
245 
246 void
radix_tree_init_tree(struct radix_tree * t)247 radix_tree_init_tree(struct radix_tree *t)
248 {
249 
250 	t->t_height = 0;
251 	t->t_root = NULL;
252 }
253 
254 /*
255  * radix_tree_fini_tree:
256  *
257  * Finish using a tree.
258  */
259 
260 void
radix_tree_fini_tree(struct radix_tree * t)261 radix_tree_fini_tree(struct radix_tree *t)
262 {
263 
264 	KASSERT(t->t_root == NULL);
265 	KASSERT(t->t_height == 0);
266 }
267 
268 /*
269  * radix_tree_empty_tree_p:
270  *
271  * Return if the tree is empty.
272  */
273 
274 bool
radix_tree_empty_tree_p(struct radix_tree * t)275 radix_tree_empty_tree_p(struct radix_tree *t)
276 {
277 
278 	return t->t_root == NULL;
279 }
280 
281 /*
282  * radix_tree_empty_tree_p:
283  *
284  * Return true if the tree has any nodes with the given tag.  Otherwise
285  * return false.
286  *
287  * It's illegal to call this function with tagmask 0.
288  */
289 
290 bool
radix_tree_empty_tagged_tree_p(struct radix_tree * t,unsigned int tagmask)291 radix_tree_empty_tagged_tree_p(struct radix_tree *t, unsigned int tagmask)
292 {
293 
294 	KASSERT(tagmask != 0);
295 	return (entry_tagmask(t->t_root) & tagmask) == 0;
296 }
297 
298 static void
radix_tree_node_init(struct radix_tree_node * n)299 radix_tree_node_init(struct radix_tree_node *n)
300 {
301 
302 	memset(n, 0, sizeof(*n));
303 }
304 
305 #if defined(_KERNEL)
306 /*
307  * radix_tree_init:
308  *
309  * initialize the subsystem.
310  */
311 
312 void
radix_tree_init(void)313 radix_tree_init(void)
314 {
315 
316 	/* nothing right now */
317 }
318 
319 /*
320  * radix_tree_await_memory:
321  *
322  * after an insert has failed with ENOMEM, wait for memory to become
323  * available, so the caller can retry.  this needs to ensure that the
324  * maximum possible required number of nodes is available.
325  */
326 
327 void
radix_tree_await_memory(void)328 radix_tree_await_memory(void)
329 {
330 	struct radix_tree_node *nodes[RADIX_TREE_MAX_HEIGHT];
331 	int i;
332 
333 	for (i = 0; i < __arraycount(nodes); i++) {
334 		nodes[i] = kmem_intr_alloc(sizeof(struct radix_tree_node),
335 		    KM_SLEEP);
336 	}
337 	while (--i >= 0) {
338 		kmem_intr_free(nodes[i], sizeof(struct radix_tree_node));
339 	}
340 }
341 
342 #endif /* defined(_KERNEL) */
343 
344 /*
345  * radix_tree_sum_node:
346  *
347  * return the logical sum of all entries in the given node.  used to quickly
348  * check for tag masks or empty nodes.
349  */
350 
351 static uintptr_t
radix_tree_sum_node(const struct radix_tree_node * n)352 radix_tree_sum_node(const struct radix_tree_node *n)
353 {
354 #if RADIX_TREE_PTR_PER_NODE > 16
355 	unsigned int i;
356 	uintptr_t sum;
357 
358 	for (i = 0, sum = 0; i < RADIX_TREE_PTR_PER_NODE; i++) {
359 		sum |= (uintptr_t)n->n_ptrs[i];
360 	}
361 	return sum;
362 #else /* RADIX_TREE_PTR_PER_NODE > 16 */
363 	uintptr_t sum;
364 
365 	/*
366 	 * Unrolling the above is much better than a tight loop with two
367 	 * test+branch pairs.  On x86 with gcc 5.5.0 this compiles into 19
368 	 * deterministic instructions including the "return" and prologue &
369 	 * epilogue.
370 	 */
371 	sum = (uintptr_t)n->n_ptrs[0];
372 	sum |= (uintptr_t)n->n_ptrs[1];
373 	sum |= (uintptr_t)n->n_ptrs[2];
374 	sum |= (uintptr_t)n->n_ptrs[3];
375 #if RADIX_TREE_PTR_PER_NODE > 4
376 	sum |= (uintptr_t)n->n_ptrs[4];
377 	sum |= (uintptr_t)n->n_ptrs[5];
378 	sum |= (uintptr_t)n->n_ptrs[6];
379 	sum |= (uintptr_t)n->n_ptrs[7];
380 #endif
381 #if RADIX_TREE_PTR_PER_NODE > 8
382 	sum |= (uintptr_t)n->n_ptrs[8];
383 	sum |= (uintptr_t)n->n_ptrs[9];
384 	sum |= (uintptr_t)n->n_ptrs[10];
385 	sum |= (uintptr_t)n->n_ptrs[11];
386 	sum |= (uintptr_t)n->n_ptrs[12];
387 	sum |= (uintptr_t)n->n_ptrs[13];
388 	sum |= (uintptr_t)n->n_ptrs[14];
389 	sum |= (uintptr_t)n->n_ptrs[15];
390 #endif
391 	return sum;
392 #endif /* RADIX_TREE_PTR_PER_NODE > 16 */
393 }
394 
395 static int __unused
radix_tree_node_count_ptrs(const struct radix_tree_node * n)396 radix_tree_node_count_ptrs(const struct radix_tree_node *n)
397 {
398 	unsigned int i, c;
399 
400 	for (i = c = 0; i < RADIX_TREE_PTR_PER_NODE; i++) {
401 		c += (n->n_ptrs[i] != NULL);
402 	}
403 	return c;
404 }
405 
406 static struct radix_tree_node *
radix_tree_alloc_node(void)407 radix_tree_alloc_node(void)
408 {
409 	struct radix_tree_node *n;
410 
411 #if defined(_KERNEL)
412 	/*
413 	 * We must not block waiting for memory because this function
414 	 * can be called in contexts where waiting for memory is illegal.
415 	 */
416 	n = kmem_intr_alloc(sizeof(struct radix_tree_node), KM_NOSLEEP);
417 #elif defined(_STANDALONE)
418 	n = alloc(sizeof(*n));
419 #else /* defined(_STANDALONE) */
420 	n = malloc(sizeof(*n));
421 #endif /* defined(_STANDALONE) */
422 	if (n != NULL) {
423 		radix_tree_node_init(n);
424 	}
425 	KASSERT(n == NULL || radix_tree_sum_node(n) == 0);
426 	return n;
427 }
428 
429 static void
radix_tree_free_node(struct radix_tree_node * n)430 radix_tree_free_node(struct radix_tree_node *n)
431 {
432 
433 	KASSERT(radix_tree_sum_node(n) == 0);
434 #if defined(_KERNEL)
435 	kmem_intr_free(n, sizeof(struct radix_tree_node));
436 #elif defined(_STANDALONE)
437 	dealloc(n, sizeof(*n));
438 #else
439 	free(n);
440 #endif
441 }
442 
443 /*
444  * radix_tree_grow:
445  *
446  * increase the height of the tree.
447  */
448 
449 static __noinline int
radix_tree_grow(struct radix_tree * t,unsigned int newheight)450 radix_tree_grow(struct radix_tree *t, unsigned int newheight)
451 {
452 	const unsigned int tagmask = entry_tagmask(t->t_root);
453 	struct radix_tree_node *newnodes[RADIX_TREE_MAX_HEIGHT];
454 	void *root;
455 	int h;
456 
457 	KASSERT(newheight <= RADIX_TREE_MAX_HEIGHT);
458 	if ((root = t->t_root) == NULL) {
459 		t->t_height = newheight;
460 		return 0;
461 	}
462 	for (h = t->t_height; h < newheight; h++) {
463 		newnodes[h] = radix_tree_alloc_node();
464 		if (__predict_false(newnodes[h] == NULL)) {
465 			while (--h >= (int)t->t_height) {
466 				newnodes[h]->n_ptrs[0] = NULL;
467 				radix_tree_free_node(newnodes[h]);
468 			}
469 			return ENOMEM;
470 		}
471 		newnodes[h]->n_ptrs[0] = root;
472 		root = entry_compose(newnodes[h], tagmask);
473 	}
474 	t->t_root = root;
475 	t->t_height = h;
476 	return 0;
477 }
478 
479 /*
480  * radix_tree_lookup_ptr:
481  *
482  * an internal helper function used for various exported functions.
483  *
484  * return the pointer to store the node for the given index.
485  *
486  * if alloc is true, try to allocate the storage.  (note for _KERNEL:
487  * in that case, this function can block.)  if the allocation failed or
488  * alloc is false, return NULL.
489  *
490  * if path is not NULL, fill it for the caller's investigation.
491  *
492  * if tagmask is not zero, search only for nodes with the tag set.
493  * note that, however, this function doesn't check the tagmask for the leaf
494  * pointer.  it's a caller's responsibility to investigate the value which
495  * is pointed by the returned pointer if necessary.
496  *
497  * while this function is a bit large, as it's called with some constant
498  * arguments, inlining might have benefits.  anyway, a compiler will decide.
499  */
500 
501 static inline void **
radix_tree_lookup_ptr(struct radix_tree * t,uint64_t idx,struct radix_tree_path * path,bool alloc,const unsigned int tagmask)502 radix_tree_lookup_ptr(struct radix_tree *t, uint64_t idx,
503     struct radix_tree_path *path, bool alloc, const unsigned int tagmask)
504 {
505 	struct radix_tree_node *n;
506 	int hshift = RADIX_TREE_BITS_PER_HEIGHT * t->t_height;
507 	int shift;
508 	void **vpp;
509 	const uint64_t mask = (UINT64_C(1) << RADIX_TREE_BITS_PER_HEIGHT) - 1;
510 	struct radix_tree_node_ref *refs = NULL;
511 
512 	/*
513 	 * check unsupported combinations
514 	 */
515 	KASSERT(tagmask == 0 || !alloc);
516 	KASSERT(path == NULL || !alloc);
517 	vpp = &t->t_root;
518 	if (path != NULL) {
519 		refs = path->p_refs;
520 		refs->pptr = vpp;
521 	}
522 	n = NULL;
523 	for (shift = 64 - RADIX_TREE_BITS_PER_HEIGHT; shift >= 0;) {
524 		struct radix_tree_node *c;
525 		void *entry;
526 		const uint64_t i = (idx >> shift) & mask;
527 
528 		if (shift >= hshift) {
529 			unsigned int newheight;
530 
531 			KASSERT(vpp == &t->t_root);
532 			if (i == 0) {
533 				shift -= RADIX_TREE_BITS_PER_HEIGHT;
534 				continue;
535 			}
536 			if (!alloc) {
537 				if (path != NULL) {
538 					KASSERT((refs - path->p_refs) == 0);
539 					path->p_lastidx =
540 					    RADIX_TREE_INVALID_HEIGHT;
541 				}
542 				return NULL;
543 			}
544 			newheight = shift / RADIX_TREE_BITS_PER_HEIGHT + 1;
545 			if (radix_tree_grow(t, newheight)) {
546 				return NULL;
547 			}
548 			hshift = RADIX_TREE_BITS_PER_HEIGHT * t->t_height;
549 		}
550 		entry = *vpp;
551 		c = entry_ptr(entry);
552 		if (c == NULL ||
553 		    (tagmask != 0 &&
554 		    (entry_tagmask(entry) & tagmask) == 0)) {
555 			if (!alloc) {
556 				if (path != NULL) {
557 					path->p_lastidx = refs - path->p_refs;
558 				}
559 				return NULL;
560 			}
561 			c = radix_tree_alloc_node();
562 			if (c == NULL) {
563 				return NULL;
564 			}
565 			*vpp = c;
566 		}
567 		n = c;
568 		vpp = &n->n_ptrs[i];
569 		if (path != NULL) {
570 			refs++;
571 			refs->pptr = vpp;
572 		}
573 		shift -= RADIX_TREE_BITS_PER_HEIGHT;
574 	}
575 	if (alloc) {
576 		KASSERT(*vpp == NULL);
577 	}
578 	if (path != NULL) {
579 		path->p_lastidx = refs - path->p_refs;
580 	}
581 	return vpp;
582 }
583 
584 /*
585  * radix_tree_undo_insert_node:
586  *
587  * Undo the effects of a failed insert.  The conditions that led to the
588  * insert may change and it may not be retried.  If the insert is not
589  * retried, there will be no corresponding radix_tree_remove_node() for
590  * this index in the future.  Therefore any adjustments made to the tree
591  * before memory was exhausted must be reverted.
592  */
593 
594 static __noinline void
radix_tree_undo_insert_node(struct radix_tree * t,uint64_t idx)595 radix_tree_undo_insert_node(struct radix_tree *t, uint64_t idx)
596 {
597 	struct radix_tree_path path;
598 	int i;
599 
600 	(void)radix_tree_lookup_ptr(t, idx, &path, false, 0);
601 	if (path.p_lastidx == RADIX_TREE_INVALID_HEIGHT) {
602 		/*
603 		 * no nodes were inserted.
604 		 */
605 		return;
606 	}
607 	for (i = path.p_lastidx - 1; i >= 0; i--) {
608 		struct radix_tree_node ** const pptr =
609 		    (struct radix_tree_node **)path_pptr(t, &path, i);
610 		struct radix_tree_node *n;
611 
612 		KASSERT(pptr != NULL);
613 		n = entry_ptr(*pptr);
614 		KASSERT(n != NULL);
615 		if (radix_tree_sum_node(n) != 0) {
616 			break;
617 		}
618 		radix_tree_free_node(n);
619 		*pptr = NULL;
620 	}
621 	/*
622 	 * fix up height
623 	 */
624 	if (i < 0) {
625 		KASSERT(t->t_root == NULL);
626 		t->t_height = 0;
627 	}
628 }
629 
630 /*
631  * radix_tree_insert_node:
632  *
633  * Insert the node at the given index.
634  *
635  * It's illegal to insert NULL.  It's illegal to insert a non-aligned pointer.
636  *
637  * This function returns ENOMEM if necessary memory allocation failed.
638  * Otherwise, this function returns 0.
639  *
640  * Note that inserting a node can involves memory allocation for intermediate
641  * nodes.  If _KERNEL, it's done with no-sleep IPL_NONE memory allocation.
642  *
643  * For the newly inserted node, all tags are cleared.
644  */
645 
646 int
radix_tree_insert_node(struct radix_tree * t,uint64_t idx,void * p)647 radix_tree_insert_node(struct radix_tree *t, uint64_t idx, void *p)
648 {
649 	void **vpp;
650 
651 	KASSERT(p != NULL);
652 	KASSERT(entry_tagmask(entry_compose(p, 0)) == 0);
653 	vpp = radix_tree_lookup_ptr(t, idx, NULL, true, 0);
654 	if (__predict_false(vpp == NULL)) {
655 		radix_tree_undo_insert_node(t, idx);
656 		return ENOMEM;
657 	}
658 	KASSERT(*vpp == NULL);
659 	*vpp = p;
660 	return 0;
661 }
662 
663 /*
664  * radix_tree_replace_node:
665  *
666  * Replace a node at the given index with the given node and return the
667  * replaced one.
668  *
669  * It's illegal to try to replace a node which has not been inserted.
670  *
671  * This function keeps tags intact.
672  */
673 
674 void *
radix_tree_replace_node(struct radix_tree * t,uint64_t idx,void * p)675 radix_tree_replace_node(struct radix_tree *t, uint64_t idx, void *p)
676 {
677 	void **vpp;
678 	void *oldp;
679 
680 	KASSERT(p != NULL);
681 	KASSERT(entry_tagmask(entry_compose(p, 0)) == 0);
682 	vpp = radix_tree_lookup_ptr(t, idx, NULL, false, 0);
683 	KASSERT(vpp != NULL);
684 	oldp = *vpp;
685 	KASSERT(oldp != NULL);
686 	*vpp = entry_compose(p, entry_tagmask(*vpp));
687 	return entry_ptr(oldp);
688 }
689 
690 /*
691  * radix_tree_remove_node:
692  *
693  * Remove the node at the given index.
694  *
695  * It's illegal to try to remove a node which has not been inserted.
696  */
697 
698 void *
radix_tree_remove_node(struct radix_tree * t,uint64_t idx)699 radix_tree_remove_node(struct radix_tree *t, uint64_t idx)
700 {
701 	struct radix_tree_path path;
702 	void **vpp;
703 	void *oldp;
704 	int i;
705 
706 	vpp = radix_tree_lookup_ptr(t, idx, &path, false, 0);
707 	KASSERT(vpp != NULL);
708 	oldp = *vpp;
709 	KASSERT(oldp != NULL);
710 	KASSERT(path.p_lastidx == t->t_height);
711 	KASSERT(vpp == path_pptr(t, &path, path.p_lastidx));
712 	*vpp = NULL;
713 	for (i = t->t_height - 1; i >= 0; i--) {
714 		void *entry;
715 		struct radix_tree_node ** const pptr =
716 		    (struct radix_tree_node **)path_pptr(t, &path, i);
717 		struct radix_tree_node *n;
718 
719 		KASSERT(pptr != NULL);
720 		entry = *pptr;
721 		n = entry_ptr(entry);
722 		KASSERT(n != NULL);
723 		if (radix_tree_sum_node(n) != 0) {
724 			break;
725 		}
726 		radix_tree_free_node(n);
727 		*pptr = NULL;
728 	}
729 	/*
730 	 * fix up height
731 	 */
732 	if (i < 0) {
733 		KASSERT(t->t_root == NULL);
734 		t->t_height = 0;
735 	}
736 	/*
737 	 * update tags
738 	 */
739 	for (; i >= 0; i--) {
740 		void *entry;
741 		struct radix_tree_node ** const pptr =
742 		    (struct radix_tree_node **)path_pptr(t, &path, i);
743 		struct radix_tree_node *n;
744 		unsigned int newmask;
745 
746 		KASSERT(pptr != NULL);
747 		entry = *pptr;
748 		n = entry_ptr(entry);
749 		KASSERT(n != NULL);
750 		KASSERT(radix_tree_sum_node(n) != 0);
751 		newmask = radix_tree_sum_node(n) & RADIX_TREE_TAG_MASK;
752 		if (newmask == entry_tagmask(entry)) {
753 			break;
754 		}
755 		*pptr = entry_compose(n, newmask);
756 	}
757 	/*
758 	 * XXX is it worth to try to reduce height?
759 	 * if we do that, make radix_tree_grow rollback its change as well.
760 	 */
761 	return entry_ptr(oldp);
762 }
763 
764 /*
765  * radix_tree_lookup_node:
766  *
767  * Returns the node at the given index.
768  * Returns NULL if nothing is found at the given index.
769  */
770 
771 void *
radix_tree_lookup_node(struct radix_tree * t,uint64_t idx)772 radix_tree_lookup_node(struct radix_tree *t, uint64_t idx)
773 {
774 	void **vpp;
775 
776 	vpp = radix_tree_lookup_ptr(t, idx, NULL, false, 0);
777 	if (vpp == NULL) {
778 		return NULL;
779 	}
780 	return entry_ptr(*vpp);
781 }
782 
783 static inline void
gang_lookup_init(struct radix_tree * t,uint64_t idx,struct radix_tree_path * path,const unsigned int tagmask)784 gang_lookup_init(struct radix_tree *t, uint64_t idx,
785     struct radix_tree_path *path, const unsigned int tagmask)
786 {
787 	void **vpp __unused;
788 
789 	vpp = radix_tree_lookup_ptr(t, idx, path, false, tagmask);
790 	KASSERT(vpp == NULL ||
791 	    vpp == path_pptr(t, path, path->p_lastidx));
792 	KASSERT(&t->t_root == path_pptr(t, path, 0));
793 	KASSERT(path->p_lastidx == RADIX_TREE_INVALID_HEIGHT ||
794 	   path->p_lastidx == t->t_height ||
795 	   !entry_match_p(*path_pptr(t, path, path->p_lastidx), tagmask));
796 }
797 
798 /*
799  * gang_lookup_scan:
800  *
801  * a helper routine for radix_tree_gang_lookup_node and its variants.
802  */
803 
804 static inline unsigned int
805 __attribute__((__always_inline__))
gang_lookup_scan(struct radix_tree * t,struct radix_tree_path * path,void ** results,const unsigned int maxresults,const unsigned int tagmask,const bool reverse,const bool dense)806 gang_lookup_scan(struct radix_tree *t, struct radix_tree_path *path,
807     void **results, const unsigned int maxresults, const unsigned int tagmask,
808     const bool reverse, const bool dense)
809 {
810 
811 	/*
812 	 * we keep the path updated only for lastidx-1.
813 	 * vpp is what path_pptr(t, path, lastidx) would be.
814 	 */
815 	void **vpp;
816 	unsigned int nfound;
817 	unsigned int lastidx;
818 	/*
819 	 * set up scan direction dependant constants so that we can iterate
820 	 * n_ptrs as the following.
821 	 *
822 	 *	for (i = first; i != guard; i += step)
823 	 *		visit n->n_ptrs[i];
824 	 */
825 	const int step = reverse ? -1 : 1;
826 	const unsigned int first = reverse ? RADIX_TREE_PTR_PER_NODE - 1 : 0;
827 	const unsigned int last = reverse ? 0 : RADIX_TREE_PTR_PER_NODE - 1;
828 	const unsigned int guard = last + step;
829 
830 	KASSERT(maxresults > 0);
831 	KASSERT(&t->t_root == path_pptr(t, path, 0));
832 	lastidx = path->p_lastidx;
833 	KASSERT(lastidx == RADIX_TREE_INVALID_HEIGHT ||
834 	   lastidx == t->t_height ||
835 	   !entry_match_p(*path_pptr(t, path, lastidx), tagmask));
836 	nfound = 0;
837 	if (lastidx == RADIX_TREE_INVALID_HEIGHT) {
838 		/*
839 		 * requested idx is beyond the right-most node.
840 		 */
841 		if (reverse && !dense) {
842 			lastidx = 0;
843 			vpp = path_pptr(t, path, lastidx);
844 			goto descend;
845 		}
846 		return 0;
847 	}
848 	vpp = path_pptr(t, path, lastidx);
849 	while (/*CONSTCOND*/true) {
850 		struct radix_tree_node *n;
851 		unsigned int i;
852 
853 		if (entry_match_p(*vpp, tagmask)) {
854 			KASSERT(lastidx == t->t_height);
855 			/*
856 			 * record the matching non-NULL leaf.
857 			 */
858 			results[nfound] = entry_ptr(*vpp);
859 			nfound++;
860 			if (nfound == maxresults) {
861 				return nfound;
862 			}
863 		} else if (dense) {
864 			return nfound;
865 		}
866 scan_siblings:
867 		/*
868 		 * try to find the next matching non-NULL sibling.
869 		 */
870 		if (lastidx == 0) {
871 			/*
872 			 * the root has no siblings.
873 			 * we've done.
874 			 */
875 			KASSERT(vpp == &t->t_root);
876 			break;
877 		}
878 		n = path_node(t, path, lastidx - 1);
879 		for (i = vpp - n->n_ptrs + step; i != guard; i += step) {
880 			KASSERT(i < RADIX_TREE_PTR_PER_NODE);
881 			if (entry_match_p(n->n_ptrs[i], tagmask)) {
882 				vpp = &n->n_ptrs[i];
883 				break;
884 			} else if (dense) {
885 				return nfound;
886 			}
887 		}
888 		if (i == guard) {
889 			/*
890 			 * not found.  go to parent.
891 			 */
892 			lastidx--;
893 			vpp = path_pptr(t, path, lastidx);
894 			goto scan_siblings;
895 		}
896 descend:
897 		/*
898 		 * following the left-most (or right-most in the case of
899 		 * reverse scan) child node, descend until reaching the leaf or
900 		 * a non-matching entry.
901 		 */
902 		while (entry_match_p(*vpp, tagmask) && lastidx < t->t_height) {
903 			/*
904 			 * save vpp in the path so that we can come back to this
905 			 * node after finishing visiting children.
906 			 */
907 			path->p_refs[lastidx].pptr = vpp;
908 			n = entry_ptr(*vpp);
909 			vpp = &n->n_ptrs[first];
910 			lastidx++;
911 		}
912 	}
913 	return nfound;
914 }
915 
916 /*
917  * radix_tree_gang_lookup_node:
918  *
919  * Scan the tree starting from the given index in the ascending order and
920  * return found nodes.
921  *
922  * results should be an array large enough to hold maxresults pointers.
923  * This function returns the number of nodes found, up to maxresults.
924  * Returning less than maxresults means there are no more nodes in the tree.
925  *
926  * If dense == true, this function stops scanning when it founds a hole of
927  * indexes.  I.e. an index for which radix_tree_lookup_node would returns NULL.
928  * If dense == false, this function skips holes and continue scanning until
929  * maxresults nodes are found or it reaches the limit of the index range.
930  *
931  * The result of this function is semantically equivalent to what could be
932  * obtained by repeated calls of radix_tree_lookup_node with increasing index.
933  * but this function is expected to be computationally cheaper when looking up
934  * multiple nodes at once.  Especially, it's expected to be much cheaper when
935  * node indexes are distributed sparsely.
936  *
937  * Note that this function doesn't return index values of found nodes.
938  * Thus, in the case of dense == false, if index values are important for
939  * a caller, it's the caller's responsibility to check them, typically
940  * by examining the returned nodes using some caller-specific knowledge
941  * about them.
942  * In the case of dense == true, a node returned via results[N] is always for
943  * the index (idx + N).
944  */
945 
946 unsigned int
radix_tree_gang_lookup_node(struct radix_tree * t,uint64_t idx,void ** results,unsigned int maxresults,bool dense)947 radix_tree_gang_lookup_node(struct radix_tree *t, uint64_t idx,
948     void **results, unsigned int maxresults, bool dense)
949 {
950 	struct radix_tree_path path;
951 
952 	gang_lookup_init(t, idx, &path, 0);
953 	return gang_lookup_scan(t, &path, results, maxresults, 0, false, dense);
954 }
955 
956 /*
957  * radix_tree_gang_lookup_node_reverse:
958  *
959  * Same as radix_tree_gang_lookup_node except that this one scans the
960  * tree in the reverse order.  I.e. descending index values.
961  */
962 
963 unsigned int
radix_tree_gang_lookup_node_reverse(struct radix_tree * t,uint64_t idx,void ** results,unsigned int maxresults,bool dense)964 radix_tree_gang_lookup_node_reverse(struct radix_tree *t, uint64_t idx,
965     void **results, unsigned int maxresults, bool dense)
966 {
967 	struct radix_tree_path path;
968 
969 	gang_lookup_init(t, idx, &path, 0);
970 	return gang_lookup_scan(t, &path, results, maxresults, 0, true, dense);
971 }
972 
973 /*
974  * radix_tree_gang_lookup_tagged_node:
975  *
976  * Same as radix_tree_gang_lookup_node except that this one only returns
977  * nodes tagged with tagid.
978  *
979  * It's illegal to call this function with tagmask 0.
980  */
981 
982 unsigned int
radix_tree_gang_lookup_tagged_node(struct radix_tree * t,uint64_t idx,void ** results,unsigned int maxresults,bool dense,unsigned int tagmask)983 radix_tree_gang_lookup_tagged_node(struct radix_tree *t, uint64_t idx,
984     void **results, unsigned int maxresults, bool dense, unsigned int tagmask)
985 {
986 	struct radix_tree_path path;
987 
988 	KASSERT(tagmask != 0);
989 	gang_lookup_init(t, idx, &path, tagmask);
990 	return gang_lookup_scan(t, &path, results, maxresults, tagmask, false,
991 	    dense);
992 }
993 
994 /*
995  * radix_tree_gang_lookup_tagged_node_reverse:
996  *
997  * Same as radix_tree_gang_lookup_tagged_node except that this one scans the
998  * tree in the reverse order.  I.e. descending index values.
999  */
1000 
1001 unsigned int
radix_tree_gang_lookup_tagged_node_reverse(struct radix_tree * t,uint64_t idx,void ** results,unsigned int maxresults,bool dense,unsigned int tagmask)1002 radix_tree_gang_lookup_tagged_node_reverse(struct radix_tree *t, uint64_t idx,
1003     void **results, unsigned int maxresults, bool dense, unsigned int tagmask)
1004 {
1005 	struct radix_tree_path path;
1006 
1007 	KASSERT(tagmask != 0);
1008 	gang_lookup_init(t, idx, &path, tagmask);
1009 	return gang_lookup_scan(t, &path, results, maxresults, tagmask, true,
1010 	    dense);
1011 }
1012 
1013 /*
1014  * radix_tree_get_tag:
1015  *
1016  * Return the tagmask for the node at the given index.
1017  *
1018  * It's illegal to call this function for a node which has not been inserted.
1019  */
1020 
1021 unsigned int
radix_tree_get_tag(struct radix_tree * t,uint64_t idx,unsigned int tagmask)1022 radix_tree_get_tag(struct radix_tree *t, uint64_t idx, unsigned int tagmask)
1023 {
1024 	/*
1025 	 * the following two implementations should behave same.
1026 	 * the former one was chosen because it seems faster.
1027 	 */
1028 #if 1
1029 	void **vpp;
1030 
1031 	vpp = radix_tree_lookup_ptr(t, idx, NULL, false, tagmask);
1032 	if (vpp == NULL) {
1033 		return false;
1034 	}
1035 	KASSERT(*vpp != NULL);
1036 	return (entry_tagmask(*vpp) & tagmask);
1037 #else
1038 	void **vpp;
1039 
1040 	vpp = radix_tree_lookup_ptr(t, idx, NULL, false, 0);
1041 	KASSERT(vpp != NULL);
1042 	return (entry_tagmask(*vpp) & tagmask);
1043 #endif
1044 }
1045 
1046 /*
1047  * radix_tree_set_tag:
1048  *
1049  * Set the tag for the node at the given index.
1050  *
1051  * It's illegal to call this function for a node which has not been inserted.
1052  * It's illegal to call this function with tagmask 0.
1053  */
1054 
1055 void
radix_tree_set_tag(struct radix_tree * t,uint64_t idx,unsigned int tagmask)1056 radix_tree_set_tag(struct radix_tree *t, uint64_t idx, unsigned int tagmask)
1057 {
1058 	struct radix_tree_path path;
1059 	void **vpp __unused;
1060 	int i;
1061 
1062 	KASSERT(tagmask != 0);
1063 	vpp = radix_tree_lookup_ptr(t, idx, &path, false, 0);
1064 	KASSERT(vpp != NULL);
1065 	KASSERT(*vpp != NULL);
1066 	KASSERT(path.p_lastidx == t->t_height);
1067 	KASSERT(vpp == path_pptr(t, &path, path.p_lastidx));
1068 	for (i = t->t_height; i >= 0; i--) {
1069 		void ** const pptr = (void **)path_pptr(t, &path, i);
1070 		void *entry;
1071 
1072 		KASSERT(pptr != NULL);
1073 		entry = *pptr;
1074 		if ((entry_tagmask(entry) & tagmask) != 0) {
1075 			break;
1076 		}
1077 		*pptr = (void *)((uintptr_t)entry | tagmask);
1078 	}
1079 }
1080 
1081 /*
1082  * radix_tree_clear_tag:
1083  *
1084  * Clear the tag for the node at the given index.
1085  *
1086  * It's illegal to call this function for a node which has not been inserted.
1087  * It's illegal to call this function with tagmask 0.
1088  */
1089 
1090 void
radix_tree_clear_tag(struct radix_tree * t,uint64_t idx,unsigned int tagmask)1091 radix_tree_clear_tag(struct radix_tree *t, uint64_t idx, unsigned int tagmask)
1092 {
1093 	struct radix_tree_path path;
1094 	void **vpp;
1095 	int i;
1096 
1097 	KASSERT(tagmask != 0);
1098 	vpp = radix_tree_lookup_ptr(t, idx, &path, false, 0);
1099 	KASSERT(vpp != NULL);
1100 	KASSERT(*vpp != NULL);
1101 	KASSERT(path.p_lastidx == t->t_height);
1102 	KASSERT(vpp == path_pptr(t, &path, path.p_lastidx));
1103 	/*
1104 	 * if already cleared, nothing to do
1105 	 */
1106 	if ((entry_tagmask(*vpp) & tagmask) == 0) {
1107 		return;
1108 	}
1109 	/*
1110 	 * clear the tag only if no children have the tag.
1111 	 */
1112 	for (i = t->t_height; i >= 0; i--) {
1113 		void ** const pptr = (void **)path_pptr(t, &path, i);
1114 		void *entry;
1115 
1116 		KASSERT(pptr != NULL);
1117 		entry = *pptr;
1118 		KASSERT((entry_tagmask(entry) & tagmask) != 0);
1119 		*pptr = entry_compose(entry_ptr(entry),
1120 		    entry_tagmask(entry) & ~tagmask);
1121 		/*
1122 		 * check if we should proceed to process the next level.
1123 		 */
1124 		if (0 < i) {
1125 			struct radix_tree_node *n = path_node(t, &path, i - 1);
1126 
1127 			if ((radix_tree_sum_node(n) & tagmask) != 0) {
1128 				break;
1129 			}
1130 		}
1131 	}
1132 }
1133 
1134 #if defined(UNITTEST)
1135 
1136 #include <inttypes.h>
1137 #include <stdio.h>
1138 
1139 static void
radix_tree_dump_node(const struct radix_tree * t,void * vp,uint64_t offset,unsigned int height)1140 radix_tree_dump_node(const struct radix_tree *t, void *vp,
1141     uint64_t offset, unsigned int height)
1142 {
1143 	struct radix_tree_node *n;
1144 	unsigned int i;
1145 
1146 	for (i = 0; i < t->t_height - height; i++) {
1147 		printf(" ");
1148 	}
1149 	if (entry_tagmask(vp) == 0) {
1150 		printf("[%" PRIu64 "] %p", offset, entry_ptr(vp));
1151 	} else {
1152 		printf("[%" PRIu64 "] %p (tagmask=0x%x)", offset, entry_ptr(vp),
1153 		    entry_tagmask(vp));
1154 	}
1155 	if (height == 0) {
1156 		printf(" (leaf)\n");
1157 		return;
1158 	}
1159 	n = entry_ptr(vp);
1160 	assert((radix_tree_sum_node(n) & RADIX_TREE_TAG_MASK) ==
1161 	    entry_tagmask(vp));
1162 	printf(" (%u children)\n", radix_tree_node_count_ptrs(n));
1163 	for (i = 0; i < __arraycount(n->n_ptrs); i++) {
1164 		void *c;
1165 
1166 		c = n->n_ptrs[i];
1167 		if (c == NULL) {
1168 			continue;
1169 		}
1170 		radix_tree_dump_node(t, c,
1171 		    offset + i * (UINT64_C(1) <<
1172 		    (RADIX_TREE_BITS_PER_HEIGHT * (height - 1))), height - 1);
1173 	}
1174 }
1175 
1176 void radix_tree_dump(const struct radix_tree *);
1177 
1178 void
radix_tree_dump(const struct radix_tree * t)1179 radix_tree_dump(const struct radix_tree *t)
1180 {
1181 
1182 	printf("tree %p height=%u\n", t, t->t_height);
1183 	radix_tree_dump_node(t, t->t_root, 0, t->t_height);
1184 }
1185 
1186 static void
test1(void)1187 test1(void)
1188 {
1189 	struct radix_tree s;
1190 	struct radix_tree *t = &s;
1191 	void *results[3];
1192 
1193 	radix_tree_init_tree(t);
1194 	radix_tree_dump(t);
1195 	assert(radix_tree_lookup_node(t, 0) == NULL);
1196 	assert(radix_tree_lookup_node(t, 1000) == NULL);
1197 	assert(radix_tree_gang_lookup_node(t, 0, results, 3, false) == 0);
1198 	assert(radix_tree_gang_lookup_node(t, 0, results, 3, true) == 0);
1199 	assert(radix_tree_gang_lookup_node(t, 1000, results, 3, false) == 0);
1200 	assert(radix_tree_gang_lookup_node(t, 1000, results, 3, true) == 0);
1201 	assert(radix_tree_gang_lookup_node_reverse(t, 0, results, 3, false) ==
1202 	    0);
1203 	assert(radix_tree_gang_lookup_node_reverse(t, 0, results, 3, true) ==
1204 	    0);
1205 	assert(radix_tree_gang_lookup_node_reverse(t, 1000, results, 3, false)
1206 	    == 0);
1207 	assert(radix_tree_gang_lookup_node_reverse(t, 1000, results, 3, true)
1208 	    == 0);
1209 	assert(radix_tree_gang_lookup_tagged_node(t, 0, results, 3, false, 1)
1210 	    == 0);
1211 	assert(radix_tree_gang_lookup_tagged_node(t, 0, results, 3, true, 1)
1212 	    == 0);
1213 	assert(radix_tree_gang_lookup_tagged_node(t, 1000, results, 3, false, 1)
1214 	    == 0);
1215 	assert(radix_tree_gang_lookup_tagged_node(t, 1000, results, 3, true, 1)
1216 	    == 0);
1217 	assert(radix_tree_gang_lookup_tagged_node_reverse(t, 0, results, 3,
1218 	    false, 1) == 0);
1219 	assert(radix_tree_gang_lookup_tagged_node_reverse(t, 0, results, 3,
1220 	    true, 1) == 0);
1221 	assert(radix_tree_gang_lookup_tagged_node_reverse(t, 1000, results, 3,
1222 	    false, 1) == 0);
1223 	assert(radix_tree_gang_lookup_tagged_node_reverse(t, 1000, results, 3,
1224 	    true, 1) == 0);
1225 	assert(radix_tree_empty_tree_p(t));
1226 	assert(radix_tree_empty_tagged_tree_p(t, 1));
1227 	assert(radix_tree_empty_tagged_tree_p(t, 2));
1228 	assert(radix_tree_insert_node(t, 0, (void *)0xdeadbea0) == 0);
1229 	assert(!radix_tree_empty_tree_p(t));
1230 	assert(radix_tree_empty_tagged_tree_p(t, 1));
1231 	assert(radix_tree_empty_tagged_tree_p(t, 2));
1232 	assert(radix_tree_lookup_node(t, 0) == (void *)0xdeadbea0);
1233 	assert(radix_tree_lookup_node(t, 1000) == NULL);
1234 	memset(results, 0, sizeof(results));
1235 	assert(radix_tree_gang_lookup_node(t, 0, results, 3, false) == 1);
1236 	assert(results[0] == (void *)0xdeadbea0);
1237 	memset(results, 0, sizeof(results));
1238 	assert(radix_tree_gang_lookup_node(t, 0, results, 3, true) == 1);
1239 	assert(results[0] == (void *)0xdeadbea0);
1240 	assert(radix_tree_gang_lookup_node(t, 1000, results, 3, false) == 0);
1241 	assert(radix_tree_gang_lookup_node(t, 1000, results, 3, true) == 0);
1242 	memset(results, 0, sizeof(results));
1243 	assert(radix_tree_gang_lookup_node_reverse(t, 0, results, 3, false) ==
1244 	    1);
1245 	assert(results[0] == (void *)0xdeadbea0);
1246 	memset(results, 0, sizeof(results));
1247 	assert(radix_tree_gang_lookup_node_reverse(t, 0, results, 3, true) ==
1248 	    1);
1249 	assert(results[0] == (void *)0xdeadbea0);
1250 	memset(results, 0, sizeof(results));
1251 	assert(radix_tree_gang_lookup_node_reverse(t, 1000, results, 3, false)
1252 	    == 1);
1253 	assert(results[0] == (void *)0xdeadbea0);
1254 	assert(radix_tree_gang_lookup_node_reverse(t, 1000, results, 3, true)
1255 	    == 0);
1256 	assert(radix_tree_gang_lookup_tagged_node(t, 0, results, 3, false, 1)
1257 	    == 0);
1258 	assert(radix_tree_gang_lookup_tagged_node(t, 0, results, 3, true, 1)
1259 	    == 0);
1260 	assert(radix_tree_gang_lookup_tagged_node_reverse(t, 0, results, 3,
1261 	    false, 1) == 0);
1262 	assert(radix_tree_gang_lookup_tagged_node_reverse(t, 0, results, 3,
1263 	    true, 1) == 0);
1264 	assert(radix_tree_insert_node(t, 1000, (void *)0xdeadbea0) == 0);
1265 	assert(radix_tree_remove_node(t, 0) == (void *)0xdeadbea0);
1266 	assert(!radix_tree_empty_tree_p(t));
1267 	radix_tree_dump(t);
1268 	assert(radix_tree_lookup_node(t, 0) == NULL);
1269 	assert(radix_tree_lookup_node(t, 1000) == (void *)0xdeadbea0);
1270 	memset(results, 0, sizeof(results));
1271 	assert(radix_tree_gang_lookup_node(t, 0, results, 3, false) == 1);
1272 	assert(results[0] == (void *)0xdeadbea0);
1273 	assert(radix_tree_gang_lookup_node(t, 0, results, 3, true) == 0);
1274 	memset(results, 0, sizeof(results));
1275 	assert(radix_tree_gang_lookup_node(t, 1000, results, 3, false) == 1);
1276 	assert(results[0] == (void *)0xdeadbea0);
1277 	memset(results, 0, sizeof(results));
1278 	assert(radix_tree_gang_lookup_node(t, 1000, results, 3, true) == 1);
1279 	assert(results[0] == (void *)0xdeadbea0);
1280 	assert(radix_tree_gang_lookup_node_reverse(t, 0, results, 3, false)
1281 	    == 0);
1282 	assert(radix_tree_gang_lookup_node_reverse(t, 0, results, 3, true)
1283 	    == 0);
1284 	memset(results, 0, sizeof(results));
1285 	assert(radix_tree_gang_lookup_node_reverse(t, 1000, results, 3, false)
1286 	    == 1);
1287 	memset(results, 0, sizeof(results));
1288 	assert(radix_tree_gang_lookup_node_reverse(t, 1000, results, 3, true)
1289 	    == 1);
1290 	assert(results[0] == (void *)0xdeadbea0);
1291 	assert(radix_tree_gang_lookup_tagged_node(t, 0, results, 3, false, 1)
1292 	    == 0);
1293 	assert(radix_tree_gang_lookup_tagged_node(t, 0, results, 3, true, 1)
1294 	    == 0);
1295 	assert(radix_tree_gang_lookup_tagged_node_reverse(t, 0, results, 3,
1296 	    false, 1) == 0);
1297 	assert(radix_tree_gang_lookup_tagged_node_reverse(t, 0, results, 3,
1298 	    true, 1) == 0);
1299 	assert(!radix_tree_get_tag(t, 1000, 1));
1300 	assert(!radix_tree_get_tag(t, 1000, 2));
1301 	assert(radix_tree_get_tag(t, 1000, 2 | 1) == 0);
1302 	assert(radix_tree_empty_tagged_tree_p(t, 1));
1303 	assert(radix_tree_empty_tagged_tree_p(t, 2));
1304 	radix_tree_set_tag(t, 1000, 2);
1305 	assert(!radix_tree_get_tag(t, 1000, 1));
1306 	assert(radix_tree_get_tag(t, 1000, 2));
1307 	assert(radix_tree_get_tag(t, 1000, 2 | 1) == 2);
1308 	assert(radix_tree_empty_tagged_tree_p(t, 1));
1309 	assert(!radix_tree_empty_tagged_tree_p(t, 2));
1310 	radix_tree_dump(t);
1311 	assert(radix_tree_lookup_node(t, 1000) == (void *)0xdeadbea0);
1312 	assert(radix_tree_insert_node(t, 0, (void *)0xbea0) == 0);
1313 	radix_tree_dump(t);
1314 	assert(radix_tree_lookup_node(t, 0) == (void *)0xbea0);
1315 	assert(radix_tree_lookup_node(t, 1000) == (void *)0xdeadbea0);
1316 	assert(radix_tree_insert_node(t, UINT64_C(10000000000), (void *)0xdea0)
1317 	    == 0);
1318 	radix_tree_dump(t);
1319 	assert(radix_tree_lookup_node(t, 0) == (void *)0xbea0);
1320 	assert(radix_tree_lookup_node(t, 1000) == (void *)0xdeadbea0);
1321 	assert(radix_tree_lookup_node(t, UINT64_C(10000000000)) ==
1322 	    (void *)0xdea0);
1323 	radix_tree_dump(t);
1324 	assert(!radix_tree_get_tag(t, 0, 2));
1325 	assert(radix_tree_get_tag(t, 1000, 2));
1326 	assert(!radix_tree_get_tag(t, UINT64_C(10000000000), 1));
1327 	radix_tree_set_tag(t, 0, 2);
1328 	radix_tree_set_tag(t, UINT64_C(10000000000), 2);
1329 	radix_tree_dump(t);
1330 	assert(radix_tree_get_tag(t, 0, 2));
1331 	assert(radix_tree_get_tag(t, 1000, 2));
1332 	assert(radix_tree_get_tag(t, UINT64_C(10000000000), 2));
1333 	radix_tree_clear_tag(t, 0, 2);
1334 	radix_tree_clear_tag(t, UINT64_C(10000000000), 2);
1335 	radix_tree_dump(t);
1336 	assert(!radix_tree_get_tag(t, 0, 2));
1337 	assert(radix_tree_get_tag(t, 1000, 2));
1338 	assert(!radix_tree_get_tag(t, UINT64_C(10000000000), 2));
1339 	radix_tree_dump(t);
1340 	assert(radix_tree_replace_node(t, 1000, (void *)0x12345678) ==
1341 	    (void *)0xdeadbea0);
1342 	assert(!radix_tree_get_tag(t, 1000, 1));
1343 	assert(radix_tree_get_tag(t, 1000, 2));
1344 	assert(radix_tree_get_tag(t, 1000, 2 | 1) == 2);
1345 	memset(results, 0, sizeof(results));
1346 	assert(radix_tree_gang_lookup_node(t, 0, results, 3, false) == 3);
1347 	assert(results[0] == (void *)0xbea0);
1348 	assert(results[1] == (void *)0x12345678);
1349 	assert(results[2] == (void *)0xdea0);
1350 	memset(results, 0, sizeof(results));
1351 	assert(radix_tree_gang_lookup_node(t, 0, results, 3, true) == 1);
1352 	assert(results[0] == (void *)0xbea0);
1353 	memset(results, 0, sizeof(results));
1354 	assert(radix_tree_gang_lookup_node(t, 1, results, 3, false) == 2);
1355 	assert(results[0] == (void *)0x12345678);
1356 	assert(results[1] == (void *)0xdea0);
1357 	assert(radix_tree_gang_lookup_node(t, 1, results, 3, true) == 0);
1358 	memset(results, 0, sizeof(results));
1359 	assert(radix_tree_gang_lookup_node(t, 1001, results, 3, false) == 1);
1360 	assert(results[0] == (void *)0xdea0);
1361 	assert(radix_tree_gang_lookup_node(t, 1001, results, 3, true) == 0);
1362 	assert(radix_tree_gang_lookup_node(t, UINT64_C(10000000001), results, 3,
1363 	    false) == 0);
1364 	assert(radix_tree_gang_lookup_node(t, UINT64_C(10000000001), results, 3,
1365 	    true) == 0);
1366 	assert(radix_tree_gang_lookup_node(t, UINT64_C(1000000000000), results,
1367 	    3, false) == 0);
1368 	assert(radix_tree_gang_lookup_node(t, UINT64_C(1000000000000), results,
1369 	    3, true) == 0);
1370 	memset(results, 0, sizeof(results));
1371 	assert(radix_tree_gang_lookup_tagged_node(t, 0, results, 100, false, 2)
1372 	    == 1);
1373 	assert(results[0] == (void *)0x12345678);
1374 	assert(radix_tree_gang_lookup_tagged_node(t, 0, results, 100, true, 2)
1375 	    == 0);
1376 	assert(entry_tagmask(t->t_root) != 0);
1377 	assert(radix_tree_remove_node(t, 1000) == (void *)0x12345678);
1378 	assert(entry_tagmask(t->t_root) == 0);
1379 	radix_tree_dump(t);
1380 	assert(radix_tree_insert_node(t, UINT64_C(10000000001), (void *)0xfff0)
1381 	    == 0);
1382 	memset(results, 0, sizeof(results));
1383 	assert(radix_tree_gang_lookup_node(t, UINT64_C(10000000000), results, 3,
1384 	    false) == 2);
1385 	assert(results[0] == (void *)0xdea0);
1386 	assert(results[1] == (void *)0xfff0);
1387 	memset(results, 0, sizeof(results));
1388 	assert(radix_tree_gang_lookup_node(t, UINT64_C(10000000000), results, 3,
1389 	    true) == 2);
1390 	assert(results[0] == (void *)0xdea0);
1391 	assert(results[1] == (void *)0xfff0);
1392 	memset(results, 0, sizeof(results));
1393 	assert(radix_tree_gang_lookup_node_reverse(t, UINT64_C(10000000001),
1394 	    results, 3, false) == 3);
1395 	assert(results[0] == (void *)0xfff0);
1396 	assert(results[1] == (void *)0xdea0);
1397 	assert(results[2] == (void *)0xbea0);
1398 	memset(results, 0, sizeof(results));
1399 	assert(radix_tree_gang_lookup_node_reverse(t, UINT64_C(10000000001),
1400 	    results, 3, true) == 2);
1401 	assert(results[0] == (void *)0xfff0);
1402 	assert(results[1] == (void *)0xdea0);
1403 	assert(radix_tree_remove_node(t, UINT64_C(10000000000)) ==
1404 	    (void *)0xdea0);
1405 	assert(radix_tree_remove_node(t, UINT64_C(10000000001)) ==
1406 	    (void *)0xfff0);
1407 	radix_tree_dump(t);
1408 	assert(radix_tree_remove_node(t, 0) == (void *)0xbea0);
1409 	radix_tree_dump(t);
1410 	radix_tree_fini_tree(t);
1411 }
1412 
1413 #include <sys/time.h>
1414 
1415 struct testnode {
1416 	uint64_t idx;
1417 	bool tagged[RADIX_TREE_TAG_ID_MAX];
1418 };
1419 
1420 static void
printops(const char * title,const char * name,int tag,unsigned int n,const struct timeval * stv,const struct timeval * etv)1421 printops(const char *title, const char *name, int tag, unsigned int n,
1422     const struct timeval *stv, const struct timeval *etv)
1423 {
1424 	uint64_t s = stv->tv_sec * 1000000 + stv->tv_usec;
1425 	uint64_t e = etv->tv_sec * 1000000 + etv->tv_usec;
1426 
1427 	printf("RESULT %s %s %d %lf op/s\n", title, name, tag,
1428 	    (double)n / (e - s) * 1000000);
1429 }
1430 
1431 #define	TEST2_GANG_LOOKUP_NODES	16
1432 
1433 static bool
test2_should_tag(unsigned int i,unsigned int tagid)1434 test2_should_tag(unsigned int i, unsigned int tagid)
1435 {
1436 
1437 	if (tagid == 0) {
1438 		return (i % 4) == 0;	/* 25% */
1439 	} else {
1440 		return (i % 7) == 0;	/* 14% */
1441 	}
1442 	return 1;
1443 }
1444 
1445 static void
check_tag_count(const unsigned int * ntagged,unsigned int tagmask,unsigned int count)1446 check_tag_count(const unsigned int *ntagged, unsigned int tagmask,
1447     unsigned int count)
1448 {
1449 	unsigned int tag;
1450 
1451 	for (tag = 0; tag < RADIX_TREE_TAG_ID_MAX; tag++) {
1452 		if ((tagmask & (1 << tag)) == 0) {
1453 			continue;
1454 		}
1455 		if (((tagmask - 1) & tagmask) == 0) {
1456 			assert(count == ntagged[tag]);
1457 		} else {
1458 			assert(count >= ntagged[tag]);
1459 		}
1460 	}
1461 }
1462 
1463 static void
test2(const char * title,bool dense)1464 test2(const char *title, bool dense)
1465 {
1466 	struct radix_tree s;
1467 	struct radix_tree *t = &s;
1468 	struct testnode *n;
1469 	unsigned int i;
1470 	unsigned int nnodes = 100000;
1471 	unsigned int removed;
1472 	unsigned int tag;
1473 	unsigned int tagmask;
1474 	unsigned int ntagged[RADIX_TREE_TAG_ID_MAX];
1475 	struct testnode *nodes;
1476 	struct timeval stv;
1477 	struct timeval etv;
1478 
1479 	nodes = malloc(nnodes * sizeof(*nodes));
1480 	for (tag = 0; tag < RADIX_TREE_TAG_ID_MAX; tag++) {
1481 		ntagged[tag] = 0;
1482 	}
1483 	radix_tree_init_tree(t);
1484 	for (i = 0; i < nnodes; i++) {
1485 		n = &nodes[i];
1486 		n->idx = random();
1487 		if (sizeof(long) == 4) {
1488 			n->idx <<= 32;
1489 			n->idx |= (uint32_t)random();
1490 		}
1491 		if (dense) {
1492 			n->idx %= nnodes * 2;
1493 		}
1494 		while (radix_tree_lookup_node(t, n->idx) != NULL) {
1495 			n->idx++;
1496 		}
1497 		radix_tree_insert_node(t, n->idx, n);
1498 		for (tag = 0; tag < RADIX_TREE_TAG_ID_MAX; tag++) {
1499 			tagmask = 1 << tag;
1500 
1501 			n->tagged[tag] = test2_should_tag(i, tag);
1502 			if (n->tagged[tag]) {
1503 				radix_tree_set_tag(t, n->idx, tagmask);
1504 				ntagged[tag]++;
1505 			}
1506 			assert((n->tagged[tag] ? tagmask : 0) ==
1507 			    radix_tree_get_tag(t, n->idx, tagmask));
1508 		}
1509 	}
1510 
1511 	gettimeofday(&stv, NULL);
1512 	for (i = 0; i < nnodes; i++) {
1513 		n = &nodes[i];
1514 		assert(radix_tree_lookup_node(t, n->idx) == n);
1515 	}
1516 	gettimeofday(&etv, NULL);
1517 	printops(title, "lookup", 0, nnodes, &stv, &etv);
1518 
1519 	for (tagmask = 1; tagmask <= RADIX_TREE_TAG_MASK; tagmask ++) {
1520 		unsigned int count = 0;
1521 
1522 		gettimeofday(&stv, NULL);
1523 		for (i = 0; i < nnodes; i++) {
1524 			unsigned int tagged;
1525 
1526 			n = &nodes[i];
1527 			tagged = radix_tree_get_tag(t, n->idx, tagmask);
1528 			assert((tagged & ~tagmask) == 0);
1529 			for (tag = 0; tag < RADIX_TREE_TAG_ID_MAX; tag++) {
1530 				assert((tagmask & (1 << tag)) == 0 ||
1531 				    n->tagged[tag] == !!(tagged & (1 << tag)));
1532 			}
1533 			if (tagged) {
1534 				count++;
1535 			}
1536 		}
1537 		gettimeofday(&etv, NULL);
1538 		check_tag_count(ntagged, tagmask, count);
1539 		printops(title, "get_tag", tagmask, nnodes, &stv, &etv);
1540 	}
1541 
1542 	gettimeofday(&stv, NULL);
1543 	for (i = 0; i < nnodes; i++) {
1544 		n = &nodes[i];
1545 		radix_tree_remove_node(t, n->idx);
1546 	}
1547 	gettimeofday(&etv, NULL);
1548 	printops(title, "remove", 0, nnodes, &stv, &etv);
1549 
1550 	gettimeofday(&stv, NULL);
1551 	for (i = 0; i < nnodes; i++) {
1552 		n = &nodes[i];
1553 		radix_tree_insert_node(t, n->idx, n);
1554 	}
1555 	gettimeofday(&etv, NULL);
1556 	printops(title, "insert", 0, nnodes, &stv, &etv);
1557 
1558 	for (tag = 0; tag < RADIX_TREE_TAG_ID_MAX; tag++) {
1559 		tagmask = 1 << tag;
1560 
1561 		ntagged[tag] = 0;
1562 		gettimeofday(&stv, NULL);
1563 		for (i = 0; i < nnodes; i++) {
1564 			n = &nodes[i];
1565 			if (n->tagged[tag]) {
1566 				radix_tree_set_tag(t, n->idx, tagmask);
1567 				ntagged[tag]++;
1568 			}
1569 		}
1570 		gettimeofday(&etv, NULL);
1571 		printops(title, "set_tag", tag, ntagged[tag], &stv, &etv);
1572 	}
1573 
1574 	gettimeofday(&stv, NULL);
1575 	{
1576 		struct testnode *results[TEST2_GANG_LOOKUP_NODES];
1577 		uint64_t nextidx;
1578 		unsigned int nfound;
1579 		unsigned int total;
1580 
1581 		nextidx = 0;
1582 		total = 0;
1583 		while ((nfound = radix_tree_gang_lookup_node(t, nextidx,
1584 		    (void *)results, __arraycount(results), false)) > 0) {
1585 			nextidx = results[nfound - 1]->idx + 1;
1586 			total += nfound;
1587 			if (nextidx == 0) {
1588 				break;
1589 			}
1590 		}
1591 		assert(total == nnodes);
1592 	}
1593 	gettimeofday(&etv, NULL);
1594 	printops(title, "ganglookup", 0, nnodes, &stv, &etv);
1595 
1596 	gettimeofday(&stv, NULL);
1597 	{
1598 		struct testnode *results[TEST2_GANG_LOOKUP_NODES];
1599 		uint64_t nextidx;
1600 		unsigned int nfound;
1601 		unsigned int total;
1602 
1603 		nextidx = UINT64_MAX;
1604 		total = 0;
1605 		while ((nfound = radix_tree_gang_lookup_node_reverse(t, nextidx,
1606 		    (void *)results, __arraycount(results), false)) > 0) {
1607 			nextidx = results[nfound - 1]->idx - 1;
1608 			total += nfound;
1609 			if (nextidx == UINT64_MAX) {
1610 				break;
1611 			}
1612 		}
1613 		assert(total == nnodes);
1614 	}
1615 	gettimeofday(&etv, NULL);
1616 	printops(title, "ganglookup_reverse", 0, nnodes, &stv, &etv);
1617 
1618 	for (tagmask = 1; tagmask <= RADIX_TREE_TAG_MASK; tagmask ++) {
1619 		unsigned int total = 0;
1620 
1621 		gettimeofday(&stv, NULL);
1622 		{
1623 			struct testnode *results[TEST2_GANG_LOOKUP_NODES];
1624 			uint64_t nextidx;
1625 			unsigned int nfound;
1626 
1627 			nextidx = 0;
1628 			while ((nfound = radix_tree_gang_lookup_tagged_node(t,
1629 			    nextidx, (void *)results, __arraycount(results),
1630 			    false, tagmask)) > 0) {
1631 				nextidx = results[nfound - 1]->idx + 1;
1632 				total += nfound;
1633 			}
1634 		}
1635 		gettimeofday(&etv, NULL);
1636 		check_tag_count(ntagged, tagmask, total);
1637 		assert(tagmask != 0 || total == 0);
1638 		printops(title, "ganglookup_tag", tagmask, total, &stv, &etv);
1639 	}
1640 
1641 	for (tagmask = 1; tagmask <= RADIX_TREE_TAG_MASK; tagmask ++) {
1642 		unsigned int total = 0;
1643 
1644 		gettimeofday(&stv, NULL);
1645 		{
1646 			struct testnode *results[TEST2_GANG_LOOKUP_NODES];
1647 			uint64_t nextidx;
1648 			unsigned int nfound;
1649 
1650 			nextidx = UINT64_MAX;
1651 			while ((nfound =
1652 			    radix_tree_gang_lookup_tagged_node_reverse(t,
1653 			    nextidx, (void *)results, __arraycount(results),
1654 			    false, tagmask)) > 0) {
1655 				nextidx = results[nfound - 1]->idx - 1;
1656 				total += nfound;
1657 				if (nextidx == UINT64_MAX) {
1658 					break;
1659 				}
1660 			}
1661 		}
1662 		gettimeofday(&etv, NULL);
1663 		check_tag_count(ntagged, tagmask, total);
1664 		assert(tagmask != 0 || total == 0);
1665 		printops(title, "ganglookup_tag_reverse", tagmask, total,
1666 		    &stv, &etv);
1667 	}
1668 
1669 	removed = 0;
1670 	for (tag = 0; tag < RADIX_TREE_TAG_ID_MAX; tag++) {
1671 		unsigned int total;
1672 
1673 		total = 0;
1674 		tagmask = 1 << tag;
1675 		gettimeofday(&stv, NULL);
1676 		{
1677 			struct testnode *results[TEST2_GANG_LOOKUP_NODES];
1678 			uint64_t nextidx;
1679 			unsigned int nfound;
1680 
1681 			nextidx = 0;
1682 			while ((nfound = radix_tree_gang_lookup_tagged_node(t,
1683 			    nextidx, (void *)results, __arraycount(results),
1684 			    false, tagmask)) > 0) {
1685 				for (i = 0; i < nfound; i++) {
1686 					radix_tree_remove_node(t,
1687 					    results[i]->idx);
1688 				}
1689 				nextidx = results[nfound - 1]->idx + 1;
1690 				total += nfound;
1691 				if (nextidx == 0) {
1692 					break;
1693 				}
1694 			}
1695 		}
1696 		gettimeofday(&etv, NULL);
1697 		if (tag == 0) {
1698 			check_tag_count(ntagged, tagmask, total);
1699 		} else {
1700 			assert(total <= ntagged[tag]);
1701 		}
1702 		printops(title, "ganglookup_tag+remove", tagmask, total, &stv,
1703 		    &etv);
1704 		removed += total;
1705 	}
1706 
1707 	gettimeofday(&stv, NULL);
1708 	{
1709 		struct testnode *results[TEST2_GANG_LOOKUP_NODES];
1710 		uint64_t nextidx;
1711 		unsigned int nfound;
1712 		unsigned int total;
1713 
1714 		nextidx = 0;
1715 		total = 0;
1716 		while ((nfound = radix_tree_gang_lookup_node(t, nextidx,
1717 		    (void *)results, __arraycount(results), false)) > 0) {
1718 			for (i = 0; i < nfound; i++) {
1719 				assert(results[i] == radix_tree_remove_node(t,
1720 				    results[i]->idx));
1721 			}
1722 			nextidx = results[nfound - 1]->idx + 1;
1723 			total += nfound;
1724 			if (nextidx == 0) {
1725 				break;
1726 			}
1727 		}
1728 		assert(total == nnodes - removed);
1729 	}
1730 	gettimeofday(&etv, NULL);
1731 	printops(title, "ganglookup+remove", 0, nnodes - removed, &stv, &etv);
1732 
1733 	assert(radix_tree_empty_tree_p(t));
1734 	for (tagmask = 1; tagmask <= RADIX_TREE_TAG_MASK; tagmask ++) {
1735 		assert(radix_tree_empty_tagged_tree_p(t, tagmask));
1736 	}
1737 	radix_tree_fini_tree(t);
1738 	free(nodes);
1739 }
1740 
1741 int
main(int argc,char * argv[])1742 main(int argc, char *argv[])
1743 {
1744 
1745 	test1();
1746 	test2("dense", true);
1747 	test2("sparse", false);
1748 	return 0;
1749 }
1750 
1751 #endif /* defined(UNITTEST) */
1752