1 /* $NetBSD: radixtree.c,v 1.34 2024/05/04 17:58:24 chs Exp $ */
2
3 /*-
4 * Copyright (c)2011,2012,2013 YAMAMOTO Takashi,
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 /*
30 * radixtree.c
31 *
32 * Overview:
33 *
34 * This is an implementation of radix tree, whose keys are uint64_t and leafs
35 * are user provided pointers.
36 *
37 * Leaf nodes are just void * and this implementation doesn't care about
38 * what they actually point to. However, this implementation has an assumption
39 * about their alignment. Specifically, this implementation assumes that their
40 * 2 LSBs are always zero and uses them for internal accounting.
41 *
42 * Intermediate nodes and memory allocation:
43 *
44 * Intermediate nodes are automatically allocated and freed internally and
45 * basically users don't need to care about them. The allocation is done via
46 * kmem_zalloc(9) for _KERNEL, malloc(3) for userland, and alloc() for
47 * _STANDALONE environment. Only radix_tree_insert_node function can allocate
48 * memory for intermediate nodes and thus can fail for ENOMEM.
49 *
50 * Memory Efficiency:
51 *
52 * It's designed to work efficiently with dense index distribution.
53 * The memory consumption (number of necessary intermediate nodes) heavily
54 * depends on the index distribution. Basically, more dense index distribution
55 * consumes less nodes per item. Approximately,
56 *
57 * - the best case: about RADIX_TREE_PTR_PER_NODE items per intermediate node.
58 * it would look like the following.
59 *
60 * root (t_height=1)
61 * |
62 * v
63 * [ | | | ] (intermediate node. RADIX_TREE_PTR_PER_NODE=4 in this fig)
64 * | | | |
65 * v v v v
66 * p p p p (items)
67 *
68 * - the worst case: RADIX_TREE_MAX_HEIGHT intermediate nodes per item.
69 * it would look like the following if RADIX_TREE_MAX_HEIGHT=3.
70 *
71 * root (t_height=3)
72 * |
73 * v
74 * [ | | | ]
75 * |
76 * v
77 * [ | | | ]
78 * |
79 * v
80 * [ | | | ]
81 * |
82 * v
83 * p
84 *
85 * The height of tree (t_height) is dynamic. It's smaller if only small
86 * index values are used. As an extreme case, if only index 0 is used,
87 * the corresponding value is directly stored in the root of the tree
88 * (struct radix_tree) without allocating any intermediate nodes. In that
89 * case, t_height=0.
90 *
91 * Gang lookup:
92 *
93 * This implementation provides a way to scan many nodes quickly via
94 * radix_tree_gang_lookup_node function and its varients.
95 *
96 * Tags:
97 *
98 * This implementation provides tagging functionality, which allows quick
99 * scanning of a subset of leaf nodes. Leaf nodes are untagged when inserted
100 * into the tree and can be tagged by radix_tree_set_tag function.
101 * radix_tree_gang_lookup_tagged_node function and its variants returns only
102 * leaf nodes with the given tag. To reduce amount of nodes to visit for
103 * these functions, this implementation keeps tagging information in internal
104 * intermediate nodes and quickly skips uninterested parts of a tree.
105 *
106 * A tree has RADIX_TREE_TAG_ID_MAX independent tag spaces, each of which are
107 * identified by a zero-origin numbers, tagid. For the current implementation,
108 * RADIX_TREE_TAG_ID_MAX is 2. A set of tags is described as a bitmask tagmask,
109 * which is a bitwise OR of (1 << tagid).
110 */
111
112 #include <sys/cdefs.h>
113
114 #if defined(_KERNEL) || defined(_STANDALONE)
115 __KERNEL_RCSID(0, "$NetBSD: radixtree.c,v 1.34 2024/05/04 17:58:24 chs Exp $");
116 #include <sys/param.h>
117 #include <sys/errno.h>
118 #include <sys/kmem.h>
119 #include <sys/radixtree.h>
120 #include <lib/libkern/libkern.h>
121 #if defined(_STANDALONE)
122 #include <lib/libsa/stand.h>
123 #endif /* defined(_STANDALONE) */
124 #else /* defined(_KERNEL) || defined(_STANDALONE) */
125 __RCSID("$NetBSD: radixtree.c,v 1.34 2024/05/04 17:58:24 chs Exp $");
126 #include <assert.h>
127 #include <errno.h>
128 #include <stdbool.h>
129 #include <stdlib.h>
130 #include <string.h>
131 #if 1
132 #define KASSERT assert
133 #else
134 #define KASSERT(a) /* nothing */
135 #endif
136 #endif /* defined(_KERNEL) || defined(_STANDALONE) */
137
138 #include <sys/radixtree.h>
139
140 #define RADIX_TREE_BITS_PER_HEIGHT 4 /* XXX tune */
141 #define RADIX_TREE_PTR_PER_NODE (1 << RADIX_TREE_BITS_PER_HEIGHT)
142 #define RADIX_TREE_MAX_HEIGHT (64 / RADIX_TREE_BITS_PER_HEIGHT)
143 #define RADIX_TREE_INVALID_HEIGHT (RADIX_TREE_MAX_HEIGHT + 1)
144 __CTASSERT((64 % RADIX_TREE_BITS_PER_HEIGHT) == 0);
145
146 __CTASSERT(((1 << RADIX_TREE_TAG_ID_MAX) & (sizeof(int) - 1)) == 0);
147 #define RADIX_TREE_TAG_MASK ((1 << RADIX_TREE_TAG_ID_MAX) - 1)
148
149 static inline void *
entry_ptr(void * p)150 entry_ptr(void *p)
151 {
152
153 return (void *)((uintptr_t)p & ~RADIX_TREE_TAG_MASK);
154 }
155
156 static inline unsigned int
entry_tagmask(void * p)157 entry_tagmask(void *p)
158 {
159
160 return (uintptr_t)p & RADIX_TREE_TAG_MASK;
161 }
162
163 static inline void *
entry_compose(void * p,unsigned int tagmask)164 entry_compose(void *p, unsigned int tagmask)
165 {
166
167 return (void *)((uintptr_t)p | tagmask);
168 }
169
170 static inline bool
entry_match_p(void * p,unsigned int tagmask)171 entry_match_p(void *p, unsigned int tagmask)
172 {
173
174 KASSERT(entry_ptr(p) != NULL || entry_tagmask(p) == 0);
175 if (p == NULL) {
176 return false;
177 }
178 if (tagmask == 0) {
179 return true;
180 }
181 return (entry_tagmask(p) & tagmask) != 0;
182 }
183
184 /*
185 * radix_tree_node: an intermediate node
186 *
187 * we don't care the type of leaf nodes. they are just void *.
188 *
189 * we used to maintain a count of non-NULL nodes in this structure, but it
190 * prevented it from being aligned to a cache line boundary; the performance
191 * benefit from being cache friendly is greater than the benefit of having
192 * a dedicated count value, especially in multi-processor situations where
193 * we need to avoid intra-pool-page false sharing.
194 */
195
196 struct radix_tree_node {
197 void *n_ptrs[RADIX_TREE_PTR_PER_NODE];
198 };
199
200 /*
201 * p_refs[0].pptr == &t->t_root
202 * :
203 * p_refs[n].pptr == &(*p_refs[n-1])->n_ptrs[x]
204 * :
205 * :
206 * p_refs[t->t_height].pptr == &leaf_pointer
207 */
208
209 struct radix_tree_path {
210 struct radix_tree_node_ref {
211 void **pptr;
212 } p_refs[RADIX_TREE_MAX_HEIGHT + 1]; /* +1 for the root ptr */
213 /*
214 * p_lastidx is either the index of the last valid element of p_refs[]
215 * or RADIX_TREE_INVALID_HEIGHT.
216 * RADIX_TREE_INVALID_HEIGHT means that radix_tree_lookup_ptr found
217 * that the height of the tree is not enough to cover the given index.
218 */
219 unsigned int p_lastidx;
220 };
221
222 static inline void **
path_pptr(const struct radix_tree * t,const struct radix_tree_path * p,unsigned int height)223 path_pptr(const struct radix_tree *t, const struct radix_tree_path *p,
224 unsigned int height)
225 {
226
227 KASSERT(height <= t->t_height);
228 return p->p_refs[height].pptr;
229 }
230
231 static inline struct radix_tree_node *
path_node(const struct radix_tree * t,const struct radix_tree_path * p,unsigned int height)232 path_node(const struct radix_tree * t, const struct radix_tree_path *p,
233 unsigned int height)
234 {
235
236 KASSERT(height <= t->t_height);
237 return entry_ptr(*path_pptr(t, p, height));
238 }
239
240 /*
241 * radix_tree_init_tree:
242 *
243 * Initialize a tree.
244 */
245
246 void
radix_tree_init_tree(struct radix_tree * t)247 radix_tree_init_tree(struct radix_tree *t)
248 {
249
250 t->t_height = 0;
251 t->t_root = NULL;
252 }
253
254 /*
255 * radix_tree_fini_tree:
256 *
257 * Finish using a tree.
258 */
259
260 void
radix_tree_fini_tree(struct radix_tree * t)261 radix_tree_fini_tree(struct radix_tree *t)
262 {
263
264 KASSERT(t->t_root == NULL);
265 KASSERT(t->t_height == 0);
266 }
267
268 /*
269 * radix_tree_empty_tree_p:
270 *
271 * Return if the tree is empty.
272 */
273
274 bool
radix_tree_empty_tree_p(struct radix_tree * t)275 radix_tree_empty_tree_p(struct radix_tree *t)
276 {
277
278 return t->t_root == NULL;
279 }
280
281 /*
282 * radix_tree_empty_tree_p:
283 *
284 * Return true if the tree has any nodes with the given tag. Otherwise
285 * return false.
286 *
287 * It's illegal to call this function with tagmask 0.
288 */
289
290 bool
radix_tree_empty_tagged_tree_p(struct radix_tree * t,unsigned int tagmask)291 radix_tree_empty_tagged_tree_p(struct radix_tree *t, unsigned int tagmask)
292 {
293
294 KASSERT(tagmask != 0);
295 return (entry_tagmask(t->t_root) & tagmask) == 0;
296 }
297
298 static void
radix_tree_node_init(struct radix_tree_node * n)299 radix_tree_node_init(struct radix_tree_node *n)
300 {
301
302 memset(n, 0, sizeof(*n));
303 }
304
305 #if defined(_KERNEL)
306 /*
307 * radix_tree_init:
308 *
309 * initialize the subsystem.
310 */
311
312 void
radix_tree_init(void)313 radix_tree_init(void)
314 {
315
316 /* nothing right now */
317 }
318
319 /*
320 * radix_tree_await_memory:
321 *
322 * after an insert has failed with ENOMEM, wait for memory to become
323 * available, so the caller can retry. this needs to ensure that the
324 * maximum possible required number of nodes is available.
325 */
326
327 void
radix_tree_await_memory(void)328 radix_tree_await_memory(void)
329 {
330 struct radix_tree_node *nodes[RADIX_TREE_MAX_HEIGHT];
331 int i;
332
333 for (i = 0; i < __arraycount(nodes); i++) {
334 nodes[i] = kmem_intr_alloc(sizeof(struct radix_tree_node),
335 KM_SLEEP);
336 }
337 while (--i >= 0) {
338 kmem_intr_free(nodes[i], sizeof(struct radix_tree_node));
339 }
340 }
341
342 #endif /* defined(_KERNEL) */
343
344 /*
345 * radix_tree_sum_node:
346 *
347 * return the logical sum of all entries in the given node. used to quickly
348 * check for tag masks or empty nodes.
349 */
350
351 static uintptr_t
radix_tree_sum_node(const struct radix_tree_node * n)352 radix_tree_sum_node(const struct radix_tree_node *n)
353 {
354 #if RADIX_TREE_PTR_PER_NODE > 16
355 unsigned int i;
356 uintptr_t sum;
357
358 for (i = 0, sum = 0; i < RADIX_TREE_PTR_PER_NODE; i++) {
359 sum |= (uintptr_t)n->n_ptrs[i];
360 }
361 return sum;
362 #else /* RADIX_TREE_PTR_PER_NODE > 16 */
363 uintptr_t sum;
364
365 /*
366 * Unrolling the above is much better than a tight loop with two
367 * test+branch pairs. On x86 with gcc 5.5.0 this compiles into 19
368 * deterministic instructions including the "return" and prologue &
369 * epilogue.
370 */
371 sum = (uintptr_t)n->n_ptrs[0];
372 sum |= (uintptr_t)n->n_ptrs[1];
373 sum |= (uintptr_t)n->n_ptrs[2];
374 sum |= (uintptr_t)n->n_ptrs[3];
375 #if RADIX_TREE_PTR_PER_NODE > 4
376 sum |= (uintptr_t)n->n_ptrs[4];
377 sum |= (uintptr_t)n->n_ptrs[5];
378 sum |= (uintptr_t)n->n_ptrs[6];
379 sum |= (uintptr_t)n->n_ptrs[7];
380 #endif
381 #if RADIX_TREE_PTR_PER_NODE > 8
382 sum |= (uintptr_t)n->n_ptrs[8];
383 sum |= (uintptr_t)n->n_ptrs[9];
384 sum |= (uintptr_t)n->n_ptrs[10];
385 sum |= (uintptr_t)n->n_ptrs[11];
386 sum |= (uintptr_t)n->n_ptrs[12];
387 sum |= (uintptr_t)n->n_ptrs[13];
388 sum |= (uintptr_t)n->n_ptrs[14];
389 sum |= (uintptr_t)n->n_ptrs[15];
390 #endif
391 return sum;
392 #endif /* RADIX_TREE_PTR_PER_NODE > 16 */
393 }
394
395 static int __unused
radix_tree_node_count_ptrs(const struct radix_tree_node * n)396 radix_tree_node_count_ptrs(const struct radix_tree_node *n)
397 {
398 unsigned int i, c;
399
400 for (i = c = 0; i < RADIX_TREE_PTR_PER_NODE; i++) {
401 c += (n->n_ptrs[i] != NULL);
402 }
403 return c;
404 }
405
406 static struct radix_tree_node *
radix_tree_alloc_node(void)407 radix_tree_alloc_node(void)
408 {
409 struct radix_tree_node *n;
410
411 #if defined(_KERNEL)
412 /*
413 * We must not block waiting for memory because this function
414 * can be called in contexts where waiting for memory is illegal.
415 */
416 n = kmem_intr_alloc(sizeof(struct radix_tree_node), KM_NOSLEEP);
417 #elif defined(_STANDALONE)
418 n = alloc(sizeof(*n));
419 #else /* defined(_STANDALONE) */
420 n = malloc(sizeof(*n));
421 #endif /* defined(_STANDALONE) */
422 if (n != NULL) {
423 radix_tree_node_init(n);
424 }
425 KASSERT(n == NULL || radix_tree_sum_node(n) == 0);
426 return n;
427 }
428
429 static void
radix_tree_free_node(struct radix_tree_node * n)430 radix_tree_free_node(struct radix_tree_node *n)
431 {
432
433 KASSERT(radix_tree_sum_node(n) == 0);
434 #if defined(_KERNEL)
435 kmem_intr_free(n, sizeof(struct radix_tree_node));
436 #elif defined(_STANDALONE)
437 dealloc(n, sizeof(*n));
438 #else
439 free(n);
440 #endif
441 }
442
443 /*
444 * radix_tree_grow:
445 *
446 * increase the height of the tree.
447 */
448
449 static __noinline int
radix_tree_grow(struct radix_tree * t,unsigned int newheight)450 radix_tree_grow(struct radix_tree *t, unsigned int newheight)
451 {
452 const unsigned int tagmask = entry_tagmask(t->t_root);
453 struct radix_tree_node *newnodes[RADIX_TREE_MAX_HEIGHT];
454 void *root;
455 int h;
456
457 KASSERT(newheight <= RADIX_TREE_MAX_HEIGHT);
458 if ((root = t->t_root) == NULL) {
459 t->t_height = newheight;
460 return 0;
461 }
462 for (h = t->t_height; h < newheight; h++) {
463 newnodes[h] = radix_tree_alloc_node();
464 if (__predict_false(newnodes[h] == NULL)) {
465 while (--h >= (int)t->t_height) {
466 newnodes[h]->n_ptrs[0] = NULL;
467 radix_tree_free_node(newnodes[h]);
468 }
469 return ENOMEM;
470 }
471 newnodes[h]->n_ptrs[0] = root;
472 root = entry_compose(newnodes[h], tagmask);
473 }
474 t->t_root = root;
475 t->t_height = h;
476 return 0;
477 }
478
479 /*
480 * radix_tree_lookup_ptr:
481 *
482 * an internal helper function used for various exported functions.
483 *
484 * return the pointer to store the node for the given index.
485 *
486 * if alloc is true, try to allocate the storage. (note for _KERNEL:
487 * in that case, this function can block.) if the allocation failed or
488 * alloc is false, return NULL.
489 *
490 * if path is not NULL, fill it for the caller's investigation.
491 *
492 * if tagmask is not zero, search only for nodes with the tag set.
493 * note that, however, this function doesn't check the tagmask for the leaf
494 * pointer. it's a caller's responsibility to investigate the value which
495 * is pointed by the returned pointer if necessary.
496 *
497 * while this function is a bit large, as it's called with some constant
498 * arguments, inlining might have benefits. anyway, a compiler will decide.
499 */
500
501 static inline void **
radix_tree_lookup_ptr(struct radix_tree * t,uint64_t idx,struct radix_tree_path * path,bool alloc,const unsigned int tagmask)502 radix_tree_lookup_ptr(struct radix_tree *t, uint64_t idx,
503 struct radix_tree_path *path, bool alloc, const unsigned int tagmask)
504 {
505 struct radix_tree_node *n;
506 int hshift = RADIX_TREE_BITS_PER_HEIGHT * t->t_height;
507 int shift;
508 void **vpp;
509 const uint64_t mask = (UINT64_C(1) << RADIX_TREE_BITS_PER_HEIGHT) - 1;
510 struct radix_tree_node_ref *refs = NULL;
511
512 /*
513 * check unsupported combinations
514 */
515 KASSERT(tagmask == 0 || !alloc);
516 KASSERT(path == NULL || !alloc);
517 vpp = &t->t_root;
518 if (path != NULL) {
519 refs = path->p_refs;
520 refs->pptr = vpp;
521 }
522 n = NULL;
523 for (shift = 64 - RADIX_TREE_BITS_PER_HEIGHT; shift >= 0;) {
524 struct radix_tree_node *c;
525 void *entry;
526 const uint64_t i = (idx >> shift) & mask;
527
528 if (shift >= hshift) {
529 unsigned int newheight;
530
531 KASSERT(vpp == &t->t_root);
532 if (i == 0) {
533 shift -= RADIX_TREE_BITS_PER_HEIGHT;
534 continue;
535 }
536 if (!alloc) {
537 if (path != NULL) {
538 KASSERT((refs - path->p_refs) == 0);
539 path->p_lastidx =
540 RADIX_TREE_INVALID_HEIGHT;
541 }
542 return NULL;
543 }
544 newheight = shift / RADIX_TREE_BITS_PER_HEIGHT + 1;
545 if (radix_tree_grow(t, newheight)) {
546 return NULL;
547 }
548 hshift = RADIX_TREE_BITS_PER_HEIGHT * t->t_height;
549 }
550 entry = *vpp;
551 c = entry_ptr(entry);
552 if (c == NULL ||
553 (tagmask != 0 &&
554 (entry_tagmask(entry) & tagmask) == 0)) {
555 if (!alloc) {
556 if (path != NULL) {
557 path->p_lastidx = refs - path->p_refs;
558 }
559 return NULL;
560 }
561 c = radix_tree_alloc_node();
562 if (c == NULL) {
563 return NULL;
564 }
565 *vpp = c;
566 }
567 n = c;
568 vpp = &n->n_ptrs[i];
569 if (path != NULL) {
570 refs++;
571 refs->pptr = vpp;
572 }
573 shift -= RADIX_TREE_BITS_PER_HEIGHT;
574 }
575 if (alloc) {
576 KASSERT(*vpp == NULL);
577 }
578 if (path != NULL) {
579 path->p_lastidx = refs - path->p_refs;
580 }
581 return vpp;
582 }
583
584 /*
585 * radix_tree_undo_insert_node:
586 *
587 * Undo the effects of a failed insert. The conditions that led to the
588 * insert may change and it may not be retried. If the insert is not
589 * retried, there will be no corresponding radix_tree_remove_node() for
590 * this index in the future. Therefore any adjustments made to the tree
591 * before memory was exhausted must be reverted.
592 */
593
594 static __noinline void
radix_tree_undo_insert_node(struct radix_tree * t,uint64_t idx)595 radix_tree_undo_insert_node(struct radix_tree *t, uint64_t idx)
596 {
597 struct radix_tree_path path;
598 int i;
599
600 (void)radix_tree_lookup_ptr(t, idx, &path, false, 0);
601 if (path.p_lastidx == RADIX_TREE_INVALID_HEIGHT) {
602 /*
603 * no nodes were inserted.
604 */
605 return;
606 }
607 for (i = path.p_lastidx - 1; i >= 0; i--) {
608 struct radix_tree_node ** const pptr =
609 (struct radix_tree_node **)path_pptr(t, &path, i);
610 struct radix_tree_node *n;
611
612 KASSERT(pptr != NULL);
613 n = entry_ptr(*pptr);
614 KASSERT(n != NULL);
615 if (radix_tree_sum_node(n) != 0) {
616 break;
617 }
618 radix_tree_free_node(n);
619 *pptr = NULL;
620 }
621 /*
622 * fix up height
623 */
624 if (i < 0) {
625 KASSERT(t->t_root == NULL);
626 t->t_height = 0;
627 }
628 }
629
630 /*
631 * radix_tree_insert_node:
632 *
633 * Insert the node at the given index.
634 *
635 * It's illegal to insert NULL. It's illegal to insert a non-aligned pointer.
636 *
637 * This function returns ENOMEM if necessary memory allocation failed.
638 * Otherwise, this function returns 0.
639 *
640 * Note that inserting a node can involves memory allocation for intermediate
641 * nodes. If _KERNEL, it's done with no-sleep IPL_NONE memory allocation.
642 *
643 * For the newly inserted node, all tags are cleared.
644 */
645
646 int
radix_tree_insert_node(struct radix_tree * t,uint64_t idx,void * p)647 radix_tree_insert_node(struct radix_tree *t, uint64_t idx, void *p)
648 {
649 void **vpp;
650
651 KASSERT(p != NULL);
652 KASSERT(entry_tagmask(entry_compose(p, 0)) == 0);
653 vpp = radix_tree_lookup_ptr(t, idx, NULL, true, 0);
654 if (__predict_false(vpp == NULL)) {
655 radix_tree_undo_insert_node(t, idx);
656 return ENOMEM;
657 }
658 KASSERT(*vpp == NULL);
659 *vpp = p;
660 return 0;
661 }
662
663 /*
664 * radix_tree_replace_node:
665 *
666 * Replace a node at the given index with the given node and return the
667 * replaced one.
668 *
669 * It's illegal to try to replace a node which has not been inserted.
670 *
671 * This function keeps tags intact.
672 */
673
674 void *
radix_tree_replace_node(struct radix_tree * t,uint64_t idx,void * p)675 radix_tree_replace_node(struct radix_tree *t, uint64_t idx, void *p)
676 {
677 void **vpp;
678 void *oldp;
679
680 KASSERT(p != NULL);
681 KASSERT(entry_tagmask(entry_compose(p, 0)) == 0);
682 vpp = radix_tree_lookup_ptr(t, idx, NULL, false, 0);
683 KASSERT(vpp != NULL);
684 oldp = *vpp;
685 KASSERT(oldp != NULL);
686 *vpp = entry_compose(p, entry_tagmask(*vpp));
687 return entry_ptr(oldp);
688 }
689
690 /*
691 * radix_tree_remove_node:
692 *
693 * Remove the node at the given index.
694 *
695 * It's illegal to try to remove a node which has not been inserted.
696 */
697
698 void *
radix_tree_remove_node(struct radix_tree * t,uint64_t idx)699 radix_tree_remove_node(struct radix_tree *t, uint64_t idx)
700 {
701 struct radix_tree_path path;
702 void **vpp;
703 void *oldp;
704 int i;
705
706 vpp = radix_tree_lookup_ptr(t, idx, &path, false, 0);
707 KASSERT(vpp != NULL);
708 oldp = *vpp;
709 KASSERT(oldp != NULL);
710 KASSERT(path.p_lastidx == t->t_height);
711 KASSERT(vpp == path_pptr(t, &path, path.p_lastidx));
712 *vpp = NULL;
713 for (i = t->t_height - 1; i >= 0; i--) {
714 void *entry;
715 struct radix_tree_node ** const pptr =
716 (struct radix_tree_node **)path_pptr(t, &path, i);
717 struct radix_tree_node *n;
718
719 KASSERT(pptr != NULL);
720 entry = *pptr;
721 n = entry_ptr(entry);
722 KASSERT(n != NULL);
723 if (radix_tree_sum_node(n) != 0) {
724 break;
725 }
726 radix_tree_free_node(n);
727 *pptr = NULL;
728 }
729 /*
730 * fix up height
731 */
732 if (i < 0) {
733 KASSERT(t->t_root == NULL);
734 t->t_height = 0;
735 }
736 /*
737 * update tags
738 */
739 for (; i >= 0; i--) {
740 void *entry;
741 struct radix_tree_node ** const pptr =
742 (struct radix_tree_node **)path_pptr(t, &path, i);
743 struct radix_tree_node *n;
744 unsigned int newmask;
745
746 KASSERT(pptr != NULL);
747 entry = *pptr;
748 n = entry_ptr(entry);
749 KASSERT(n != NULL);
750 KASSERT(radix_tree_sum_node(n) != 0);
751 newmask = radix_tree_sum_node(n) & RADIX_TREE_TAG_MASK;
752 if (newmask == entry_tagmask(entry)) {
753 break;
754 }
755 *pptr = entry_compose(n, newmask);
756 }
757 /*
758 * XXX is it worth to try to reduce height?
759 * if we do that, make radix_tree_grow rollback its change as well.
760 */
761 return entry_ptr(oldp);
762 }
763
764 /*
765 * radix_tree_lookup_node:
766 *
767 * Returns the node at the given index.
768 * Returns NULL if nothing is found at the given index.
769 */
770
771 void *
radix_tree_lookup_node(struct radix_tree * t,uint64_t idx)772 radix_tree_lookup_node(struct radix_tree *t, uint64_t idx)
773 {
774 void **vpp;
775
776 vpp = radix_tree_lookup_ptr(t, idx, NULL, false, 0);
777 if (vpp == NULL) {
778 return NULL;
779 }
780 return entry_ptr(*vpp);
781 }
782
783 static inline void
gang_lookup_init(struct radix_tree * t,uint64_t idx,struct radix_tree_path * path,const unsigned int tagmask)784 gang_lookup_init(struct radix_tree *t, uint64_t idx,
785 struct radix_tree_path *path, const unsigned int tagmask)
786 {
787 void **vpp __unused;
788
789 vpp = radix_tree_lookup_ptr(t, idx, path, false, tagmask);
790 KASSERT(vpp == NULL ||
791 vpp == path_pptr(t, path, path->p_lastidx));
792 KASSERT(&t->t_root == path_pptr(t, path, 0));
793 KASSERT(path->p_lastidx == RADIX_TREE_INVALID_HEIGHT ||
794 path->p_lastidx == t->t_height ||
795 !entry_match_p(*path_pptr(t, path, path->p_lastidx), tagmask));
796 }
797
798 /*
799 * gang_lookup_scan:
800 *
801 * a helper routine for radix_tree_gang_lookup_node and its variants.
802 */
803
804 static inline unsigned int
805 __attribute__((__always_inline__))
gang_lookup_scan(struct radix_tree * t,struct radix_tree_path * path,void ** results,const unsigned int maxresults,const unsigned int tagmask,const bool reverse,const bool dense)806 gang_lookup_scan(struct radix_tree *t, struct radix_tree_path *path,
807 void **results, const unsigned int maxresults, const unsigned int tagmask,
808 const bool reverse, const bool dense)
809 {
810
811 /*
812 * we keep the path updated only for lastidx-1.
813 * vpp is what path_pptr(t, path, lastidx) would be.
814 */
815 void **vpp;
816 unsigned int nfound;
817 unsigned int lastidx;
818 /*
819 * set up scan direction dependant constants so that we can iterate
820 * n_ptrs as the following.
821 *
822 * for (i = first; i != guard; i += step)
823 * visit n->n_ptrs[i];
824 */
825 const int step = reverse ? -1 : 1;
826 const unsigned int first = reverse ? RADIX_TREE_PTR_PER_NODE - 1 : 0;
827 const unsigned int last = reverse ? 0 : RADIX_TREE_PTR_PER_NODE - 1;
828 const unsigned int guard = last + step;
829
830 KASSERT(maxresults > 0);
831 KASSERT(&t->t_root == path_pptr(t, path, 0));
832 lastidx = path->p_lastidx;
833 KASSERT(lastidx == RADIX_TREE_INVALID_HEIGHT ||
834 lastidx == t->t_height ||
835 !entry_match_p(*path_pptr(t, path, lastidx), tagmask));
836 nfound = 0;
837 if (lastidx == RADIX_TREE_INVALID_HEIGHT) {
838 /*
839 * requested idx is beyond the right-most node.
840 */
841 if (reverse && !dense) {
842 lastidx = 0;
843 vpp = path_pptr(t, path, lastidx);
844 goto descend;
845 }
846 return 0;
847 }
848 vpp = path_pptr(t, path, lastidx);
849 while (/*CONSTCOND*/true) {
850 struct radix_tree_node *n;
851 unsigned int i;
852
853 if (entry_match_p(*vpp, tagmask)) {
854 KASSERT(lastidx == t->t_height);
855 /*
856 * record the matching non-NULL leaf.
857 */
858 results[nfound] = entry_ptr(*vpp);
859 nfound++;
860 if (nfound == maxresults) {
861 return nfound;
862 }
863 } else if (dense) {
864 return nfound;
865 }
866 scan_siblings:
867 /*
868 * try to find the next matching non-NULL sibling.
869 */
870 if (lastidx == 0) {
871 /*
872 * the root has no siblings.
873 * we've done.
874 */
875 KASSERT(vpp == &t->t_root);
876 break;
877 }
878 n = path_node(t, path, lastidx - 1);
879 for (i = vpp - n->n_ptrs + step; i != guard; i += step) {
880 KASSERT(i < RADIX_TREE_PTR_PER_NODE);
881 if (entry_match_p(n->n_ptrs[i], tagmask)) {
882 vpp = &n->n_ptrs[i];
883 break;
884 } else if (dense) {
885 return nfound;
886 }
887 }
888 if (i == guard) {
889 /*
890 * not found. go to parent.
891 */
892 lastidx--;
893 vpp = path_pptr(t, path, lastidx);
894 goto scan_siblings;
895 }
896 descend:
897 /*
898 * following the left-most (or right-most in the case of
899 * reverse scan) child node, descend until reaching the leaf or
900 * a non-matching entry.
901 */
902 while (entry_match_p(*vpp, tagmask) && lastidx < t->t_height) {
903 /*
904 * save vpp in the path so that we can come back to this
905 * node after finishing visiting children.
906 */
907 path->p_refs[lastidx].pptr = vpp;
908 n = entry_ptr(*vpp);
909 vpp = &n->n_ptrs[first];
910 lastidx++;
911 }
912 }
913 return nfound;
914 }
915
916 /*
917 * radix_tree_gang_lookup_node:
918 *
919 * Scan the tree starting from the given index in the ascending order and
920 * return found nodes.
921 *
922 * results should be an array large enough to hold maxresults pointers.
923 * This function returns the number of nodes found, up to maxresults.
924 * Returning less than maxresults means there are no more nodes in the tree.
925 *
926 * If dense == true, this function stops scanning when it founds a hole of
927 * indexes. I.e. an index for which radix_tree_lookup_node would returns NULL.
928 * If dense == false, this function skips holes and continue scanning until
929 * maxresults nodes are found or it reaches the limit of the index range.
930 *
931 * The result of this function is semantically equivalent to what could be
932 * obtained by repeated calls of radix_tree_lookup_node with increasing index.
933 * but this function is expected to be computationally cheaper when looking up
934 * multiple nodes at once. Especially, it's expected to be much cheaper when
935 * node indexes are distributed sparsely.
936 *
937 * Note that this function doesn't return index values of found nodes.
938 * Thus, in the case of dense == false, if index values are important for
939 * a caller, it's the caller's responsibility to check them, typically
940 * by examining the returned nodes using some caller-specific knowledge
941 * about them.
942 * In the case of dense == true, a node returned via results[N] is always for
943 * the index (idx + N).
944 */
945
946 unsigned int
radix_tree_gang_lookup_node(struct radix_tree * t,uint64_t idx,void ** results,unsigned int maxresults,bool dense)947 radix_tree_gang_lookup_node(struct radix_tree *t, uint64_t idx,
948 void **results, unsigned int maxresults, bool dense)
949 {
950 struct radix_tree_path path;
951
952 gang_lookup_init(t, idx, &path, 0);
953 return gang_lookup_scan(t, &path, results, maxresults, 0, false, dense);
954 }
955
956 /*
957 * radix_tree_gang_lookup_node_reverse:
958 *
959 * Same as radix_tree_gang_lookup_node except that this one scans the
960 * tree in the reverse order. I.e. descending index values.
961 */
962
963 unsigned int
radix_tree_gang_lookup_node_reverse(struct radix_tree * t,uint64_t idx,void ** results,unsigned int maxresults,bool dense)964 radix_tree_gang_lookup_node_reverse(struct radix_tree *t, uint64_t idx,
965 void **results, unsigned int maxresults, bool dense)
966 {
967 struct radix_tree_path path;
968
969 gang_lookup_init(t, idx, &path, 0);
970 return gang_lookup_scan(t, &path, results, maxresults, 0, true, dense);
971 }
972
973 /*
974 * radix_tree_gang_lookup_tagged_node:
975 *
976 * Same as radix_tree_gang_lookup_node except that this one only returns
977 * nodes tagged with tagid.
978 *
979 * It's illegal to call this function with tagmask 0.
980 */
981
982 unsigned int
radix_tree_gang_lookup_tagged_node(struct radix_tree * t,uint64_t idx,void ** results,unsigned int maxresults,bool dense,unsigned int tagmask)983 radix_tree_gang_lookup_tagged_node(struct radix_tree *t, uint64_t idx,
984 void **results, unsigned int maxresults, bool dense, unsigned int tagmask)
985 {
986 struct radix_tree_path path;
987
988 KASSERT(tagmask != 0);
989 gang_lookup_init(t, idx, &path, tagmask);
990 return gang_lookup_scan(t, &path, results, maxresults, tagmask, false,
991 dense);
992 }
993
994 /*
995 * radix_tree_gang_lookup_tagged_node_reverse:
996 *
997 * Same as radix_tree_gang_lookup_tagged_node except that this one scans the
998 * tree in the reverse order. I.e. descending index values.
999 */
1000
1001 unsigned int
radix_tree_gang_lookup_tagged_node_reverse(struct radix_tree * t,uint64_t idx,void ** results,unsigned int maxresults,bool dense,unsigned int tagmask)1002 radix_tree_gang_lookup_tagged_node_reverse(struct radix_tree *t, uint64_t idx,
1003 void **results, unsigned int maxresults, bool dense, unsigned int tagmask)
1004 {
1005 struct radix_tree_path path;
1006
1007 KASSERT(tagmask != 0);
1008 gang_lookup_init(t, idx, &path, tagmask);
1009 return gang_lookup_scan(t, &path, results, maxresults, tagmask, true,
1010 dense);
1011 }
1012
1013 /*
1014 * radix_tree_get_tag:
1015 *
1016 * Return the tagmask for the node at the given index.
1017 *
1018 * It's illegal to call this function for a node which has not been inserted.
1019 */
1020
1021 unsigned int
radix_tree_get_tag(struct radix_tree * t,uint64_t idx,unsigned int tagmask)1022 radix_tree_get_tag(struct radix_tree *t, uint64_t idx, unsigned int tagmask)
1023 {
1024 /*
1025 * the following two implementations should behave same.
1026 * the former one was chosen because it seems faster.
1027 */
1028 #if 1
1029 void **vpp;
1030
1031 vpp = radix_tree_lookup_ptr(t, idx, NULL, false, tagmask);
1032 if (vpp == NULL) {
1033 return false;
1034 }
1035 KASSERT(*vpp != NULL);
1036 return (entry_tagmask(*vpp) & tagmask);
1037 #else
1038 void **vpp;
1039
1040 vpp = radix_tree_lookup_ptr(t, idx, NULL, false, 0);
1041 KASSERT(vpp != NULL);
1042 return (entry_tagmask(*vpp) & tagmask);
1043 #endif
1044 }
1045
1046 /*
1047 * radix_tree_set_tag:
1048 *
1049 * Set the tag for the node at the given index.
1050 *
1051 * It's illegal to call this function for a node which has not been inserted.
1052 * It's illegal to call this function with tagmask 0.
1053 */
1054
1055 void
radix_tree_set_tag(struct radix_tree * t,uint64_t idx,unsigned int tagmask)1056 radix_tree_set_tag(struct radix_tree *t, uint64_t idx, unsigned int tagmask)
1057 {
1058 struct radix_tree_path path;
1059 void **vpp __unused;
1060 int i;
1061
1062 KASSERT(tagmask != 0);
1063 vpp = radix_tree_lookup_ptr(t, idx, &path, false, 0);
1064 KASSERT(vpp != NULL);
1065 KASSERT(*vpp != NULL);
1066 KASSERT(path.p_lastidx == t->t_height);
1067 KASSERT(vpp == path_pptr(t, &path, path.p_lastidx));
1068 for (i = t->t_height; i >= 0; i--) {
1069 void ** const pptr = (void **)path_pptr(t, &path, i);
1070 void *entry;
1071
1072 KASSERT(pptr != NULL);
1073 entry = *pptr;
1074 if ((entry_tagmask(entry) & tagmask) != 0) {
1075 break;
1076 }
1077 *pptr = (void *)((uintptr_t)entry | tagmask);
1078 }
1079 }
1080
1081 /*
1082 * radix_tree_clear_tag:
1083 *
1084 * Clear the tag for the node at the given index.
1085 *
1086 * It's illegal to call this function for a node which has not been inserted.
1087 * It's illegal to call this function with tagmask 0.
1088 */
1089
1090 void
radix_tree_clear_tag(struct radix_tree * t,uint64_t idx,unsigned int tagmask)1091 radix_tree_clear_tag(struct radix_tree *t, uint64_t idx, unsigned int tagmask)
1092 {
1093 struct radix_tree_path path;
1094 void **vpp;
1095 int i;
1096
1097 KASSERT(tagmask != 0);
1098 vpp = radix_tree_lookup_ptr(t, idx, &path, false, 0);
1099 KASSERT(vpp != NULL);
1100 KASSERT(*vpp != NULL);
1101 KASSERT(path.p_lastidx == t->t_height);
1102 KASSERT(vpp == path_pptr(t, &path, path.p_lastidx));
1103 /*
1104 * if already cleared, nothing to do
1105 */
1106 if ((entry_tagmask(*vpp) & tagmask) == 0) {
1107 return;
1108 }
1109 /*
1110 * clear the tag only if no children have the tag.
1111 */
1112 for (i = t->t_height; i >= 0; i--) {
1113 void ** const pptr = (void **)path_pptr(t, &path, i);
1114 void *entry;
1115
1116 KASSERT(pptr != NULL);
1117 entry = *pptr;
1118 KASSERT((entry_tagmask(entry) & tagmask) != 0);
1119 *pptr = entry_compose(entry_ptr(entry),
1120 entry_tagmask(entry) & ~tagmask);
1121 /*
1122 * check if we should proceed to process the next level.
1123 */
1124 if (0 < i) {
1125 struct radix_tree_node *n = path_node(t, &path, i - 1);
1126
1127 if ((radix_tree_sum_node(n) & tagmask) != 0) {
1128 break;
1129 }
1130 }
1131 }
1132 }
1133
1134 #if defined(UNITTEST)
1135
1136 #include <inttypes.h>
1137 #include <stdio.h>
1138
1139 static void
radix_tree_dump_node(const struct radix_tree * t,void * vp,uint64_t offset,unsigned int height)1140 radix_tree_dump_node(const struct radix_tree *t, void *vp,
1141 uint64_t offset, unsigned int height)
1142 {
1143 struct radix_tree_node *n;
1144 unsigned int i;
1145
1146 for (i = 0; i < t->t_height - height; i++) {
1147 printf(" ");
1148 }
1149 if (entry_tagmask(vp) == 0) {
1150 printf("[%" PRIu64 "] %p", offset, entry_ptr(vp));
1151 } else {
1152 printf("[%" PRIu64 "] %p (tagmask=0x%x)", offset, entry_ptr(vp),
1153 entry_tagmask(vp));
1154 }
1155 if (height == 0) {
1156 printf(" (leaf)\n");
1157 return;
1158 }
1159 n = entry_ptr(vp);
1160 assert((radix_tree_sum_node(n) & RADIX_TREE_TAG_MASK) ==
1161 entry_tagmask(vp));
1162 printf(" (%u children)\n", radix_tree_node_count_ptrs(n));
1163 for (i = 0; i < __arraycount(n->n_ptrs); i++) {
1164 void *c;
1165
1166 c = n->n_ptrs[i];
1167 if (c == NULL) {
1168 continue;
1169 }
1170 radix_tree_dump_node(t, c,
1171 offset + i * (UINT64_C(1) <<
1172 (RADIX_TREE_BITS_PER_HEIGHT * (height - 1))), height - 1);
1173 }
1174 }
1175
1176 void radix_tree_dump(const struct radix_tree *);
1177
1178 void
radix_tree_dump(const struct radix_tree * t)1179 radix_tree_dump(const struct radix_tree *t)
1180 {
1181
1182 printf("tree %p height=%u\n", t, t->t_height);
1183 radix_tree_dump_node(t, t->t_root, 0, t->t_height);
1184 }
1185
1186 static void
test1(void)1187 test1(void)
1188 {
1189 struct radix_tree s;
1190 struct radix_tree *t = &s;
1191 void *results[3];
1192
1193 radix_tree_init_tree(t);
1194 radix_tree_dump(t);
1195 assert(radix_tree_lookup_node(t, 0) == NULL);
1196 assert(radix_tree_lookup_node(t, 1000) == NULL);
1197 assert(radix_tree_gang_lookup_node(t, 0, results, 3, false) == 0);
1198 assert(radix_tree_gang_lookup_node(t, 0, results, 3, true) == 0);
1199 assert(radix_tree_gang_lookup_node(t, 1000, results, 3, false) == 0);
1200 assert(radix_tree_gang_lookup_node(t, 1000, results, 3, true) == 0);
1201 assert(radix_tree_gang_lookup_node_reverse(t, 0, results, 3, false) ==
1202 0);
1203 assert(radix_tree_gang_lookup_node_reverse(t, 0, results, 3, true) ==
1204 0);
1205 assert(radix_tree_gang_lookup_node_reverse(t, 1000, results, 3, false)
1206 == 0);
1207 assert(radix_tree_gang_lookup_node_reverse(t, 1000, results, 3, true)
1208 == 0);
1209 assert(radix_tree_gang_lookup_tagged_node(t, 0, results, 3, false, 1)
1210 == 0);
1211 assert(radix_tree_gang_lookup_tagged_node(t, 0, results, 3, true, 1)
1212 == 0);
1213 assert(radix_tree_gang_lookup_tagged_node(t, 1000, results, 3, false, 1)
1214 == 0);
1215 assert(radix_tree_gang_lookup_tagged_node(t, 1000, results, 3, true, 1)
1216 == 0);
1217 assert(radix_tree_gang_lookup_tagged_node_reverse(t, 0, results, 3,
1218 false, 1) == 0);
1219 assert(radix_tree_gang_lookup_tagged_node_reverse(t, 0, results, 3,
1220 true, 1) == 0);
1221 assert(radix_tree_gang_lookup_tagged_node_reverse(t, 1000, results, 3,
1222 false, 1) == 0);
1223 assert(radix_tree_gang_lookup_tagged_node_reverse(t, 1000, results, 3,
1224 true, 1) == 0);
1225 assert(radix_tree_empty_tree_p(t));
1226 assert(radix_tree_empty_tagged_tree_p(t, 1));
1227 assert(radix_tree_empty_tagged_tree_p(t, 2));
1228 assert(radix_tree_insert_node(t, 0, (void *)0xdeadbea0) == 0);
1229 assert(!radix_tree_empty_tree_p(t));
1230 assert(radix_tree_empty_tagged_tree_p(t, 1));
1231 assert(radix_tree_empty_tagged_tree_p(t, 2));
1232 assert(radix_tree_lookup_node(t, 0) == (void *)0xdeadbea0);
1233 assert(radix_tree_lookup_node(t, 1000) == NULL);
1234 memset(results, 0, sizeof(results));
1235 assert(radix_tree_gang_lookup_node(t, 0, results, 3, false) == 1);
1236 assert(results[0] == (void *)0xdeadbea0);
1237 memset(results, 0, sizeof(results));
1238 assert(radix_tree_gang_lookup_node(t, 0, results, 3, true) == 1);
1239 assert(results[0] == (void *)0xdeadbea0);
1240 assert(radix_tree_gang_lookup_node(t, 1000, results, 3, false) == 0);
1241 assert(radix_tree_gang_lookup_node(t, 1000, results, 3, true) == 0);
1242 memset(results, 0, sizeof(results));
1243 assert(radix_tree_gang_lookup_node_reverse(t, 0, results, 3, false) ==
1244 1);
1245 assert(results[0] == (void *)0xdeadbea0);
1246 memset(results, 0, sizeof(results));
1247 assert(radix_tree_gang_lookup_node_reverse(t, 0, results, 3, true) ==
1248 1);
1249 assert(results[0] == (void *)0xdeadbea0);
1250 memset(results, 0, sizeof(results));
1251 assert(radix_tree_gang_lookup_node_reverse(t, 1000, results, 3, false)
1252 == 1);
1253 assert(results[0] == (void *)0xdeadbea0);
1254 assert(radix_tree_gang_lookup_node_reverse(t, 1000, results, 3, true)
1255 == 0);
1256 assert(radix_tree_gang_lookup_tagged_node(t, 0, results, 3, false, 1)
1257 == 0);
1258 assert(radix_tree_gang_lookup_tagged_node(t, 0, results, 3, true, 1)
1259 == 0);
1260 assert(radix_tree_gang_lookup_tagged_node_reverse(t, 0, results, 3,
1261 false, 1) == 0);
1262 assert(radix_tree_gang_lookup_tagged_node_reverse(t, 0, results, 3,
1263 true, 1) == 0);
1264 assert(radix_tree_insert_node(t, 1000, (void *)0xdeadbea0) == 0);
1265 assert(radix_tree_remove_node(t, 0) == (void *)0xdeadbea0);
1266 assert(!radix_tree_empty_tree_p(t));
1267 radix_tree_dump(t);
1268 assert(radix_tree_lookup_node(t, 0) == NULL);
1269 assert(radix_tree_lookup_node(t, 1000) == (void *)0xdeadbea0);
1270 memset(results, 0, sizeof(results));
1271 assert(radix_tree_gang_lookup_node(t, 0, results, 3, false) == 1);
1272 assert(results[0] == (void *)0xdeadbea0);
1273 assert(radix_tree_gang_lookup_node(t, 0, results, 3, true) == 0);
1274 memset(results, 0, sizeof(results));
1275 assert(radix_tree_gang_lookup_node(t, 1000, results, 3, false) == 1);
1276 assert(results[0] == (void *)0xdeadbea0);
1277 memset(results, 0, sizeof(results));
1278 assert(radix_tree_gang_lookup_node(t, 1000, results, 3, true) == 1);
1279 assert(results[0] == (void *)0xdeadbea0);
1280 assert(radix_tree_gang_lookup_node_reverse(t, 0, results, 3, false)
1281 == 0);
1282 assert(radix_tree_gang_lookup_node_reverse(t, 0, results, 3, true)
1283 == 0);
1284 memset(results, 0, sizeof(results));
1285 assert(radix_tree_gang_lookup_node_reverse(t, 1000, results, 3, false)
1286 == 1);
1287 memset(results, 0, sizeof(results));
1288 assert(radix_tree_gang_lookup_node_reverse(t, 1000, results, 3, true)
1289 == 1);
1290 assert(results[0] == (void *)0xdeadbea0);
1291 assert(radix_tree_gang_lookup_tagged_node(t, 0, results, 3, false, 1)
1292 == 0);
1293 assert(radix_tree_gang_lookup_tagged_node(t, 0, results, 3, true, 1)
1294 == 0);
1295 assert(radix_tree_gang_lookup_tagged_node_reverse(t, 0, results, 3,
1296 false, 1) == 0);
1297 assert(radix_tree_gang_lookup_tagged_node_reverse(t, 0, results, 3,
1298 true, 1) == 0);
1299 assert(!radix_tree_get_tag(t, 1000, 1));
1300 assert(!radix_tree_get_tag(t, 1000, 2));
1301 assert(radix_tree_get_tag(t, 1000, 2 | 1) == 0);
1302 assert(radix_tree_empty_tagged_tree_p(t, 1));
1303 assert(radix_tree_empty_tagged_tree_p(t, 2));
1304 radix_tree_set_tag(t, 1000, 2);
1305 assert(!radix_tree_get_tag(t, 1000, 1));
1306 assert(radix_tree_get_tag(t, 1000, 2));
1307 assert(radix_tree_get_tag(t, 1000, 2 | 1) == 2);
1308 assert(radix_tree_empty_tagged_tree_p(t, 1));
1309 assert(!radix_tree_empty_tagged_tree_p(t, 2));
1310 radix_tree_dump(t);
1311 assert(radix_tree_lookup_node(t, 1000) == (void *)0xdeadbea0);
1312 assert(radix_tree_insert_node(t, 0, (void *)0xbea0) == 0);
1313 radix_tree_dump(t);
1314 assert(radix_tree_lookup_node(t, 0) == (void *)0xbea0);
1315 assert(radix_tree_lookup_node(t, 1000) == (void *)0xdeadbea0);
1316 assert(radix_tree_insert_node(t, UINT64_C(10000000000), (void *)0xdea0)
1317 == 0);
1318 radix_tree_dump(t);
1319 assert(radix_tree_lookup_node(t, 0) == (void *)0xbea0);
1320 assert(radix_tree_lookup_node(t, 1000) == (void *)0xdeadbea0);
1321 assert(radix_tree_lookup_node(t, UINT64_C(10000000000)) ==
1322 (void *)0xdea0);
1323 radix_tree_dump(t);
1324 assert(!radix_tree_get_tag(t, 0, 2));
1325 assert(radix_tree_get_tag(t, 1000, 2));
1326 assert(!radix_tree_get_tag(t, UINT64_C(10000000000), 1));
1327 radix_tree_set_tag(t, 0, 2);
1328 radix_tree_set_tag(t, UINT64_C(10000000000), 2);
1329 radix_tree_dump(t);
1330 assert(radix_tree_get_tag(t, 0, 2));
1331 assert(radix_tree_get_tag(t, 1000, 2));
1332 assert(radix_tree_get_tag(t, UINT64_C(10000000000), 2));
1333 radix_tree_clear_tag(t, 0, 2);
1334 radix_tree_clear_tag(t, UINT64_C(10000000000), 2);
1335 radix_tree_dump(t);
1336 assert(!radix_tree_get_tag(t, 0, 2));
1337 assert(radix_tree_get_tag(t, 1000, 2));
1338 assert(!radix_tree_get_tag(t, UINT64_C(10000000000), 2));
1339 radix_tree_dump(t);
1340 assert(radix_tree_replace_node(t, 1000, (void *)0x12345678) ==
1341 (void *)0xdeadbea0);
1342 assert(!radix_tree_get_tag(t, 1000, 1));
1343 assert(radix_tree_get_tag(t, 1000, 2));
1344 assert(radix_tree_get_tag(t, 1000, 2 | 1) == 2);
1345 memset(results, 0, sizeof(results));
1346 assert(radix_tree_gang_lookup_node(t, 0, results, 3, false) == 3);
1347 assert(results[0] == (void *)0xbea0);
1348 assert(results[1] == (void *)0x12345678);
1349 assert(results[2] == (void *)0xdea0);
1350 memset(results, 0, sizeof(results));
1351 assert(radix_tree_gang_lookup_node(t, 0, results, 3, true) == 1);
1352 assert(results[0] == (void *)0xbea0);
1353 memset(results, 0, sizeof(results));
1354 assert(radix_tree_gang_lookup_node(t, 1, results, 3, false) == 2);
1355 assert(results[0] == (void *)0x12345678);
1356 assert(results[1] == (void *)0xdea0);
1357 assert(radix_tree_gang_lookup_node(t, 1, results, 3, true) == 0);
1358 memset(results, 0, sizeof(results));
1359 assert(radix_tree_gang_lookup_node(t, 1001, results, 3, false) == 1);
1360 assert(results[0] == (void *)0xdea0);
1361 assert(radix_tree_gang_lookup_node(t, 1001, results, 3, true) == 0);
1362 assert(radix_tree_gang_lookup_node(t, UINT64_C(10000000001), results, 3,
1363 false) == 0);
1364 assert(radix_tree_gang_lookup_node(t, UINT64_C(10000000001), results, 3,
1365 true) == 0);
1366 assert(radix_tree_gang_lookup_node(t, UINT64_C(1000000000000), results,
1367 3, false) == 0);
1368 assert(radix_tree_gang_lookup_node(t, UINT64_C(1000000000000), results,
1369 3, true) == 0);
1370 memset(results, 0, sizeof(results));
1371 assert(radix_tree_gang_lookup_tagged_node(t, 0, results, 100, false, 2)
1372 == 1);
1373 assert(results[0] == (void *)0x12345678);
1374 assert(radix_tree_gang_lookup_tagged_node(t, 0, results, 100, true, 2)
1375 == 0);
1376 assert(entry_tagmask(t->t_root) != 0);
1377 assert(radix_tree_remove_node(t, 1000) == (void *)0x12345678);
1378 assert(entry_tagmask(t->t_root) == 0);
1379 radix_tree_dump(t);
1380 assert(radix_tree_insert_node(t, UINT64_C(10000000001), (void *)0xfff0)
1381 == 0);
1382 memset(results, 0, sizeof(results));
1383 assert(radix_tree_gang_lookup_node(t, UINT64_C(10000000000), results, 3,
1384 false) == 2);
1385 assert(results[0] == (void *)0xdea0);
1386 assert(results[1] == (void *)0xfff0);
1387 memset(results, 0, sizeof(results));
1388 assert(radix_tree_gang_lookup_node(t, UINT64_C(10000000000), results, 3,
1389 true) == 2);
1390 assert(results[0] == (void *)0xdea0);
1391 assert(results[1] == (void *)0xfff0);
1392 memset(results, 0, sizeof(results));
1393 assert(radix_tree_gang_lookup_node_reverse(t, UINT64_C(10000000001),
1394 results, 3, false) == 3);
1395 assert(results[0] == (void *)0xfff0);
1396 assert(results[1] == (void *)0xdea0);
1397 assert(results[2] == (void *)0xbea0);
1398 memset(results, 0, sizeof(results));
1399 assert(radix_tree_gang_lookup_node_reverse(t, UINT64_C(10000000001),
1400 results, 3, true) == 2);
1401 assert(results[0] == (void *)0xfff0);
1402 assert(results[1] == (void *)0xdea0);
1403 assert(radix_tree_remove_node(t, UINT64_C(10000000000)) ==
1404 (void *)0xdea0);
1405 assert(radix_tree_remove_node(t, UINT64_C(10000000001)) ==
1406 (void *)0xfff0);
1407 radix_tree_dump(t);
1408 assert(radix_tree_remove_node(t, 0) == (void *)0xbea0);
1409 radix_tree_dump(t);
1410 radix_tree_fini_tree(t);
1411 }
1412
1413 #include <sys/time.h>
1414
1415 struct testnode {
1416 uint64_t idx;
1417 bool tagged[RADIX_TREE_TAG_ID_MAX];
1418 };
1419
1420 static void
printops(const char * title,const char * name,int tag,unsigned int n,const struct timeval * stv,const struct timeval * etv)1421 printops(const char *title, const char *name, int tag, unsigned int n,
1422 const struct timeval *stv, const struct timeval *etv)
1423 {
1424 uint64_t s = stv->tv_sec * 1000000 + stv->tv_usec;
1425 uint64_t e = etv->tv_sec * 1000000 + etv->tv_usec;
1426
1427 printf("RESULT %s %s %d %lf op/s\n", title, name, tag,
1428 (double)n / (e - s) * 1000000);
1429 }
1430
1431 #define TEST2_GANG_LOOKUP_NODES 16
1432
1433 static bool
test2_should_tag(unsigned int i,unsigned int tagid)1434 test2_should_tag(unsigned int i, unsigned int tagid)
1435 {
1436
1437 if (tagid == 0) {
1438 return (i % 4) == 0; /* 25% */
1439 } else {
1440 return (i % 7) == 0; /* 14% */
1441 }
1442 return 1;
1443 }
1444
1445 static void
check_tag_count(const unsigned int * ntagged,unsigned int tagmask,unsigned int count)1446 check_tag_count(const unsigned int *ntagged, unsigned int tagmask,
1447 unsigned int count)
1448 {
1449 unsigned int tag;
1450
1451 for (tag = 0; tag < RADIX_TREE_TAG_ID_MAX; tag++) {
1452 if ((tagmask & (1 << tag)) == 0) {
1453 continue;
1454 }
1455 if (((tagmask - 1) & tagmask) == 0) {
1456 assert(count == ntagged[tag]);
1457 } else {
1458 assert(count >= ntagged[tag]);
1459 }
1460 }
1461 }
1462
1463 static void
test2(const char * title,bool dense)1464 test2(const char *title, bool dense)
1465 {
1466 struct radix_tree s;
1467 struct radix_tree *t = &s;
1468 struct testnode *n;
1469 unsigned int i;
1470 unsigned int nnodes = 100000;
1471 unsigned int removed;
1472 unsigned int tag;
1473 unsigned int tagmask;
1474 unsigned int ntagged[RADIX_TREE_TAG_ID_MAX];
1475 struct testnode *nodes;
1476 struct timeval stv;
1477 struct timeval etv;
1478
1479 nodes = malloc(nnodes * sizeof(*nodes));
1480 for (tag = 0; tag < RADIX_TREE_TAG_ID_MAX; tag++) {
1481 ntagged[tag] = 0;
1482 }
1483 radix_tree_init_tree(t);
1484 for (i = 0; i < nnodes; i++) {
1485 n = &nodes[i];
1486 n->idx = random();
1487 if (sizeof(long) == 4) {
1488 n->idx <<= 32;
1489 n->idx |= (uint32_t)random();
1490 }
1491 if (dense) {
1492 n->idx %= nnodes * 2;
1493 }
1494 while (radix_tree_lookup_node(t, n->idx) != NULL) {
1495 n->idx++;
1496 }
1497 radix_tree_insert_node(t, n->idx, n);
1498 for (tag = 0; tag < RADIX_TREE_TAG_ID_MAX; tag++) {
1499 tagmask = 1 << tag;
1500
1501 n->tagged[tag] = test2_should_tag(i, tag);
1502 if (n->tagged[tag]) {
1503 radix_tree_set_tag(t, n->idx, tagmask);
1504 ntagged[tag]++;
1505 }
1506 assert((n->tagged[tag] ? tagmask : 0) ==
1507 radix_tree_get_tag(t, n->idx, tagmask));
1508 }
1509 }
1510
1511 gettimeofday(&stv, NULL);
1512 for (i = 0; i < nnodes; i++) {
1513 n = &nodes[i];
1514 assert(radix_tree_lookup_node(t, n->idx) == n);
1515 }
1516 gettimeofday(&etv, NULL);
1517 printops(title, "lookup", 0, nnodes, &stv, &etv);
1518
1519 for (tagmask = 1; tagmask <= RADIX_TREE_TAG_MASK; tagmask ++) {
1520 unsigned int count = 0;
1521
1522 gettimeofday(&stv, NULL);
1523 for (i = 0; i < nnodes; i++) {
1524 unsigned int tagged;
1525
1526 n = &nodes[i];
1527 tagged = radix_tree_get_tag(t, n->idx, tagmask);
1528 assert((tagged & ~tagmask) == 0);
1529 for (tag = 0; tag < RADIX_TREE_TAG_ID_MAX; tag++) {
1530 assert((tagmask & (1 << tag)) == 0 ||
1531 n->tagged[tag] == !!(tagged & (1 << tag)));
1532 }
1533 if (tagged) {
1534 count++;
1535 }
1536 }
1537 gettimeofday(&etv, NULL);
1538 check_tag_count(ntagged, tagmask, count);
1539 printops(title, "get_tag", tagmask, nnodes, &stv, &etv);
1540 }
1541
1542 gettimeofday(&stv, NULL);
1543 for (i = 0; i < nnodes; i++) {
1544 n = &nodes[i];
1545 radix_tree_remove_node(t, n->idx);
1546 }
1547 gettimeofday(&etv, NULL);
1548 printops(title, "remove", 0, nnodes, &stv, &etv);
1549
1550 gettimeofday(&stv, NULL);
1551 for (i = 0; i < nnodes; i++) {
1552 n = &nodes[i];
1553 radix_tree_insert_node(t, n->idx, n);
1554 }
1555 gettimeofday(&etv, NULL);
1556 printops(title, "insert", 0, nnodes, &stv, &etv);
1557
1558 for (tag = 0; tag < RADIX_TREE_TAG_ID_MAX; tag++) {
1559 tagmask = 1 << tag;
1560
1561 ntagged[tag] = 0;
1562 gettimeofday(&stv, NULL);
1563 for (i = 0; i < nnodes; i++) {
1564 n = &nodes[i];
1565 if (n->tagged[tag]) {
1566 radix_tree_set_tag(t, n->idx, tagmask);
1567 ntagged[tag]++;
1568 }
1569 }
1570 gettimeofday(&etv, NULL);
1571 printops(title, "set_tag", tag, ntagged[tag], &stv, &etv);
1572 }
1573
1574 gettimeofday(&stv, NULL);
1575 {
1576 struct testnode *results[TEST2_GANG_LOOKUP_NODES];
1577 uint64_t nextidx;
1578 unsigned int nfound;
1579 unsigned int total;
1580
1581 nextidx = 0;
1582 total = 0;
1583 while ((nfound = radix_tree_gang_lookup_node(t, nextidx,
1584 (void *)results, __arraycount(results), false)) > 0) {
1585 nextidx = results[nfound - 1]->idx + 1;
1586 total += nfound;
1587 if (nextidx == 0) {
1588 break;
1589 }
1590 }
1591 assert(total == nnodes);
1592 }
1593 gettimeofday(&etv, NULL);
1594 printops(title, "ganglookup", 0, nnodes, &stv, &etv);
1595
1596 gettimeofday(&stv, NULL);
1597 {
1598 struct testnode *results[TEST2_GANG_LOOKUP_NODES];
1599 uint64_t nextidx;
1600 unsigned int nfound;
1601 unsigned int total;
1602
1603 nextidx = UINT64_MAX;
1604 total = 0;
1605 while ((nfound = radix_tree_gang_lookup_node_reverse(t, nextidx,
1606 (void *)results, __arraycount(results), false)) > 0) {
1607 nextidx = results[nfound - 1]->idx - 1;
1608 total += nfound;
1609 if (nextidx == UINT64_MAX) {
1610 break;
1611 }
1612 }
1613 assert(total == nnodes);
1614 }
1615 gettimeofday(&etv, NULL);
1616 printops(title, "ganglookup_reverse", 0, nnodes, &stv, &etv);
1617
1618 for (tagmask = 1; tagmask <= RADIX_TREE_TAG_MASK; tagmask ++) {
1619 unsigned int total = 0;
1620
1621 gettimeofday(&stv, NULL);
1622 {
1623 struct testnode *results[TEST2_GANG_LOOKUP_NODES];
1624 uint64_t nextidx;
1625 unsigned int nfound;
1626
1627 nextidx = 0;
1628 while ((nfound = radix_tree_gang_lookup_tagged_node(t,
1629 nextidx, (void *)results, __arraycount(results),
1630 false, tagmask)) > 0) {
1631 nextidx = results[nfound - 1]->idx + 1;
1632 total += nfound;
1633 }
1634 }
1635 gettimeofday(&etv, NULL);
1636 check_tag_count(ntagged, tagmask, total);
1637 assert(tagmask != 0 || total == 0);
1638 printops(title, "ganglookup_tag", tagmask, total, &stv, &etv);
1639 }
1640
1641 for (tagmask = 1; tagmask <= RADIX_TREE_TAG_MASK; tagmask ++) {
1642 unsigned int total = 0;
1643
1644 gettimeofday(&stv, NULL);
1645 {
1646 struct testnode *results[TEST2_GANG_LOOKUP_NODES];
1647 uint64_t nextidx;
1648 unsigned int nfound;
1649
1650 nextidx = UINT64_MAX;
1651 while ((nfound =
1652 radix_tree_gang_lookup_tagged_node_reverse(t,
1653 nextidx, (void *)results, __arraycount(results),
1654 false, tagmask)) > 0) {
1655 nextidx = results[nfound - 1]->idx - 1;
1656 total += nfound;
1657 if (nextidx == UINT64_MAX) {
1658 break;
1659 }
1660 }
1661 }
1662 gettimeofday(&etv, NULL);
1663 check_tag_count(ntagged, tagmask, total);
1664 assert(tagmask != 0 || total == 0);
1665 printops(title, "ganglookup_tag_reverse", tagmask, total,
1666 &stv, &etv);
1667 }
1668
1669 removed = 0;
1670 for (tag = 0; tag < RADIX_TREE_TAG_ID_MAX; tag++) {
1671 unsigned int total;
1672
1673 total = 0;
1674 tagmask = 1 << tag;
1675 gettimeofday(&stv, NULL);
1676 {
1677 struct testnode *results[TEST2_GANG_LOOKUP_NODES];
1678 uint64_t nextidx;
1679 unsigned int nfound;
1680
1681 nextidx = 0;
1682 while ((nfound = radix_tree_gang_lookup_tagged_node(t,
1683 nextidx, (void *)results, __arraycount(results),
1684 false, tagmask)) > 0) {
1685 for (i = 0; i < nfound; i++) {
1686 radix_tree_remove_node(t,
1687 results[i]->idx);
1688 }
1689 nextidx = results[nfound - 1]->idx + 1;
1690 total += nfound;
1691 if (nextidx == 0) {
1692 break;
1693 }
1694 }
1695 }
1696 gettimeofday(&etv, NULL);
1697 if (tag == 0) {
1698 check_tag_count(ntagged, tagmask, total);
1699 } else {
1700 assert(total <= ntagged[tag]);
1701 }
1702 printops(title, "ganglookup_tag+remove", tagmask, total, &stv,
1703 &etv);
1704 removed += total;
1705 }
1706
1707 gettimeofday(&stv, NULL);
1708 {
1709 struct testnode *results[TEST2_GANG_LOOKUP_NODES];
1710 uint64_t nextidx;
1711 unsigned int nfound;
1712 unsigned int total;
1713
1714 nextidx = 0;
1715 total = 0;
1716 while ((nfound = radix_tree_gang_lookup_node(t, nextidx,
1717 (void *)results, __arraycount(results), false)) > 0) {
1718 for (i = 0; i < nfound; i++) {
1719 assert(results[i] == radix_tree_remove_node(t,
1720 results[i]->idx));
1721 }
1722 nextidx = results[nfound - 1]->idx + 1;
1723 total += nfound;
1724 if (nextidx == 0) {
1725 break;
1726 }
1727 }
1728 assert(total == nnodes - removed);
1729 }
1730 gettimeofday(&etv, NULL);
1731 printops(title, "ganglookup+remove", 0, nnodes - removed, &stv, &etv);
1732
1733 assert(radix_tree_empty_tree_p(t));
1734 for (tagmask = 1; tagmask <= RADIX_TREE_TAG_MASK; tagmask ++) {
1735 assert(radix_tree_empty_tagged_tree_p(t, tagmask));
1736 }
1737 radix_tree_fini_tree(t);
1738 free(nodes);
1739 }
1740
1741 int
main(int argc,char * argv[])1742 main(int argc, char *argv[])
1743 {
1744
1745 test1();
1746 test2("dense", true);
1747 test2("sparse", false);
1748 return 0;
1749 }
1750
1751 #endif /* defined(UNITTEST) */
1752