xref: /freebsd-src/sys/kern/subr_pctrie.c (revision a905c589be67db98bbb9c99932511be70e5027fb)
18a36da99SPedro F. Giffuni /*-
24d846d26SWarner Losh  * SPDX-License-Identifier: BSD-2-Clause
38a36da99SPedro F. Giffuni  *
4f2cc1285SJeff Roberson  * Copyright (c) 2013 EMC Corp.
5f2cc1285SJeff Roberson  * Copyright (c) 2011 Jeffrey Roberson <jeff@freebsd.org>
6f2cc1285SJeff Roberson  * Copyright (c) 2008 Mayur Shardul <mayur.shardul@gmail.com>
7f2cc1285SJeff Roberson  * All rights reserved.
8f2cc1285SJeff Roberson  *
9f2cc1285SJeff Roberson  * Redistribution and use in source and binary forms, with or without
10f2cc1285SJeff Roberson  * modification, are permitted provided that the following conditions
11f2cc1285SJeff Roberson  * are met:
12f2cc1285SJeff Roberson  * 1. Redistributions of source code must retain the above copyright
13f2cc1285SJeff Roberson  *    notice, this list of conditions and the following disclaimer.
14f2cc1285SJeff Roberson  * 2. Redistributions in binary form must reproduce the above copyright
15f2cc1285SJeff Roberson  *    notice, this list of conditions and the following disclaimer in the
16f2cc1285SJeff Roberson  *    documentation and/or other materials provided with the distribution.
17f2cc1285SJeff Roberson  *
18f2cc1285SJeff Roberson  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19f2cc1285SJeff Roberson  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20f2cc1285SJeff Roberson  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21f2cc1285SJeff Roberson  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22f2cc1285SJeff Roberson  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23f2cc1285SJeff Roberson  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24f2cc1285SJeff Roberson  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25f2cc1285SJeff Roberson  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26f2cc1285SJeff Roberson  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27f2cc1285SJeff Roberson  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28f2cc1285SJeff Roberson  * SUCH DAMAGE.
29f2cc1285SJeff Roberson  *
30f2cc1285SJeff Roberson  */
31f2cc1285SJeff Roberson 
32f2cc1285SJeff Roberson /*
33f2cc1285SJeff Roberson  * Path-compressed radix trie implementation.
34f2cc1285SJeff Roberson  *
35f2cc1285SJeff Roberson  * The implementation takes into account the following rationale:
36f2cc1285SJeff Roberson  * - Size of the nodes should be as small as possible but still big enough
37f2cc1285SJeff Roberson  *   to avoid a large maximum depth for the trie.  This is a balance
38f2cc1285SJeff Roberson  *   between the necessity to not wire too much physical memory for the nodes
39f2cc1285SJeff Roberson  *   and the necessity to avoid too much cache pollution during the trie
40f2cc1285SJeff Roberson  *   operations.
41f2cc1285SJeff Roberson  * - There is not a huge bias toward the number of lookup operations over
42f2cc1285SJeff Roberson  *   the number of insert and remove operations.  This basically implies
43f2cc1285SJeff Roberson  *   that optimizations supposedly helping one operation but hurting the
44f2cc1285SJeff Roberson  *   other might be carefully evaluated.
45f2cc1285SJeff Roberson  * - On average not many nodes are expected to be fully populated, hence
46f2cc1285SJeff Roberson  *   level compression may just complicate things.
47f2cc1285SJeff Roberson  */
48f2cc1285SJeff Roberson 
49f2cc1285SJeff Roberson #include <sys/cdefs.h>
50f2cc1285SJeff Roberson #include "opt_ddb.h"
51f2cc1285SJeff Roberson 
52f2cc1285SJeff Roberson #include <sys/param.h>
53f2cc1285SJeff Roberson #include <sys/systm.h>
54f2cc1285SJeff Roberson #include <sys/kernel.h>
5505963ea4SDoug Moore #include <sys/libkern.h>
56f2cc1285SJeff Roberson #include <sys/pctrie.h>
573c30b235SConrad Meyer #include <sys/proc.h>	/* smr.h depends on struct thread. */
583c30b235SConrad Meyer #include <sys/smr.h>
593c30b235SConrad Meyer #include <sys/smr_types.h>
60f2cc1285SJeff Roberson 
61f2cc1285SJeff Roberson #ifdef DDB
62f2cc1285SJeff Roberson #include <ddb/ddb.h>
63f2cc1285SJeff Roberson #endif
64f2cc1285SJeff Roberson 
658df38859SDoug Moore #if PCTRIE_WIDTH == 3
668df38859SDoug Moore typedef uint8_t pn_popmap_t;
678df38859SDoug Moore #elif PCTRIE_WIDTH == 4
688df38859SDoug Moore typedef uint16_t pn_popmap_t;
698df38859SDoug Moore #elif PCTRIE_WIDTH == 5
708df38859SDoug Moore typedef uint32_t pn_popmap_t;
718df38859SDoug Moore #else
728df38859SDoug Moore #error Unsupported width
738df38859SDoug Moore #endif
748df38859SDoug Moore _Static_assert(sizeof(pn_popmap_t) <= sizeof(int),
758df38859SDoug Moore     "pn_popmap_t too wide");
768df38859SDoug Moore 
773c30b235SConrad Meyer struct pctrie_node;
783c30b235SConrad Meyer typedef SMR_POINTER(struct pctrie_node *) smr_pctnode_t;
793c30b235SConrad Meyer 
80f2cc1285SJeff Roberson struct pctrie_node {
81f2cc1285SJeff Roberson 	uint64_t	pn_owner;			/* Owner of record. */
828df38859SDoug Moore 	pn_popmap_t	pn_popmap;			/* Valid children. */
8338f5cb1bSDoug Moore 	uint8_t		pn_clev;			/* Level * WIDTH. */
843c30b235SConrad Meyer 	smr_pctnode_t	pn_child[PCTRIE_COUNT];		/* Child nodes. */
85f2cc1285SJeff Roberson };
86f2cc1285SJeff Roberson 
87f2cc1285SJeff Roberson /*
88ac0572e6SDoug Moore  * Map index to an array position for the children of node,
89da72505fSDoug Moore  */
90da72505fSDoug Moore static __inline int
91ac0572e6SDoug Moore pctrie_slot(struct pctrie_node *node, uint64_t index)
92da72505fSDoug Moore {
93fd1d6662SDoug Moore 	return ((index >> node->pn_clev) & (PCTRIE_COUNT - 1));
94da72505fSDoug Moore }
95da72505fSDoug Moore 
96ac0572e6SDoug Moore /*
97ac0572e6SDoug Moore  * Returns true if index does not belong to the specified node.  Otherwise,
98ac0572e6SDoug Moore  * sets slot value, and returns false.
99ac0572e6SDoug Moore  */
100ac0572e6SDoug Moore static __inline bool
101ac0572e6SDoug Moore pctrie_keybarr(struct pctrie_node *node, uint64_t index, int *slot)
102da72505fSDoug Moore {
103ac0572e6SDoug Moore 	index = (index - node->pn_owner) >> node->pn_clev;
104ac0572e6SDoug Moore 	if (index >= PCTRIE_COUNT)
105ac0572e6SDoug Moore 		return (true);
106ac0572e6SDoug Moore 	*slot = index;
107ac0572e6SDoug Moore 	return (false);
108da72505fSDoug Moore }
109da72505fSDoug Moore 
110da72505fSDoug Moore /*
1113b7ffacdSDoug Moore  * Check radix node.
112f2cc1285SJeff Roberson  */
113f2cc1285SJeff Roberson static __inline void
1143b7ffacdSDoug Moore pctrie_node_put(struct pctrie_node *node)
115f2cc1285SJeff Roberson {
116f2cc1285SJeff Roberson #ifdef INVARIANTS
117f2cc1285SJeff Roberson 	int slot;
118f2cc1285SJeff Roberson 
1198df38859SDoug Moore 	KASSERT(powerof2(node->pn_popmap),
1208df38859SDoug Moore 	    ("pctrie_node_put: node %p has too many children %04x", node,
1218df38859SDoug Moore 	    node->pn_popmap));
1223c30b235SConrad Meyer 	for (slot = 0; slot < PCTRIE_COUNT; slot++) {
1238df38859SDoug Moore 		if ((node->pn_popmap & (1 << slot)) != 0)
1243c30b235SConrad Meyer 			continue;
1253c30b235SConrad Meyer 		KASSERT(smr_unserialized_load(&node->pn_child[slot], true) ==
1262d2bcba7SDoug Moore 		    PCTRIE_NULL,
1272d2bcba7SDoug Moore 		    ("pctrie_node_put: node %p has a child", node));
1283c30b235SConrad Meyer 	}
129f2cc1285SJeff Roberson #endif
130f2cc1285SJeff Roberson }
131f2cc1285SJeff Roberson 
132fd1d6662SDoug Moore enum pctrie_access { PCTRIE_SMR, PCTRIE_LOCKED, PCTRIE_UNSERIALIZED };
133fd1d6662SDoug Moore 
134f2cc1285SJeff Roberson /*
1353c30b235SConrad Meyer  * Fetch a node pointer from a slot.
1363c30b235SConrad Meyer  */
1373c30b235SConrad Meyer static __inline struct pctrie_node *
1383c30b235SConrad Meyer pctrie_node_load(smr_pctnode_t *p, smr_t smr, enum pctrie_access access)
1393c30b235SConrad Meyer {
1403c30b235SConrad Meyer 	switch (access) {
1413c30b235SConrad Meyer 	case PCTRIE_UNSERIALIZED:
1423c30b235SConrad Meyer 		return (smr_unserialized_load(p, true));
1433c30b235SConrad Meyer 	case PCTRIE_LOCKED:
1443c30b235SConrad Meyer 		return (smr_serialized_load(p, true));
1453c30b235SConrad Meyer 	case PCTRIE_SMR:
1463c30b235SConrad Meyer 		return (smr_entered_load(p, smr));
1473c30b235SConrad Meyer 	}
1483c30b235SConrad Meyer 	__assert_unreachable();
1493c30b235SConrad Meyer }
1503c30b235SConrad Meyer 
1513c30b235SConrad Meyer static __inline void
1523c30b235SConrad Meyer pctrie_node_store(smr_pctnode_t *p, void *v, enum pctrie_access access)
1533c30b235SConrad Meyer {
1543c30b235SConrad Meyer 	switch (access) {
1553c30b235SConrad Meyer 	case PCTRIE_UNSERIALIZED:
1563c30b235SConrad Meyer 		smr_unserialized_store(p, v, true);
1573c30b235SConrad Meyer 		break;
1583c30b235SConrad Meyer 	case PCTRIE_LOCKED:
1593c30b235SConrad Meyer 		smr_serialized_store(p, v, true);
1603c30b235SConrad Meyer 		break;
1613c30b235SConrad Meyer 	case PCTRIE_SMR:
1623c30b235SConrad Meyer 		panic("%s: Not supported in SMR section.", __func__);
1633c30b235SConrad Meyer 		break;
1643c30b235SConrad Meyer 	default:
1653c30b235SConrad Meyer 		__assert_unreachable();
1663c30b235SConrad Meyer 		break;
1673c30b235SConrad Meyer 	}
1683c30b235SConrad Meyer }
1693c30b235SConrad Meyer 
1703c30b235SConrad Meyer /*
171*a905c589SDoug Moore  * Get the root address, cast to proper type for load/store.
172*a905c589SDoug Moore  */
173*a905c589SDoug Moore static __inline smr_pctnode_t *
174*a905c589SDoug Moore pctrie_root(struct pctrie *ptree)
175*a905c589SDoug Moore {
176*a905c589SDoug Moore 	return ((smr_pctnode_t *)&ptree->pt_root);
177*a905c589SDoug Moore }
178*a905c589SDoug Moore 
179*a905c589SDoug Moore /*
180f2cc1285SJeff Roberson  * Get the root node for a tree.
181f2cc1285SJeff Roberson  */
182f2cc1285SJeff Roberson static __inline struct pctrie_node *
1833c30b235SConrad Meyer pctrie_root_load(struct pctrie *ptree, smr_t smr, enum pctrie_access access)
184f2cc1285SJeff Roberson {
185*a905c589SDoug Moore 	return (pctrie_node_load(pctrie_root(ptree), smr, access));
186f2cc1285SJeff Roberson }
187f2cc1285SJeff Roberson 
188f2cc1285SJeff Roberson /*
189f2cc1285SJeff Roberson  * Returns TRUE if the specified node is a leaf and FALSE otherwise.
190f2cc1285SJeff Roberson  */
19104f9afaeSConrad Meyer static __inline bool
192f2cc1285SJeff Roberson pctrie_isleaf(struct pctrie_node *node)
193f2cc1285SJeff Roberson {
194f2cc1285SJeff Roberson 	return (((uintptr_t)node & PCTRIE_ISLEAF) != 0);
195f2cc1285SJeff Roberson }
196f2cc1285SJeff Roberson 
197f2cc1285SJeff Roberson /*
1989cfed089SDoug Moore  * Returns val with leaf bit set.
1999cfed089SDoug Moore  */
2009cfed089SDoug Moore static __inline void *
2019cfed089SDoug Moore pctrie_toleaf(uint64_t *val)
2029cfed089SDoug Moore {
2039cfed089SDoug Moore 	return ((void *)((uintptr_t)val | PCTRIE_ISLEAF));
2049cfed089SDoug Moore }
2059cfed089SDoug Moore 
2069cfed089SDoug Moore /*
207f2cc1285SJeff Roberson  * Returns the associated val extracted from node.
208f2cc1285SJeff Roberson  */
209f2cc1285SJeff Roberson static __inline uint64_t *
210f2cc1285SJeff Roberson pctrie_toval(struct pctrie_node *node)
211f2cc1285SJeff Roberson {
212f2cc1285SJeff Roberson 	return ((uint64_t *)((uintptr_t)node & ~PCTRIE_FLAGS));
213f2cc1285SJeff Roberson }
214f2cc1285SJeff Roberson 
215f2cc1285SJeff Roberson /*
216c0d0bc2bSDoug Moore  * Returns the associated pointer extracted from node and field offset.
217c0d0bc2bSDoug Moore  */
218c0d0bc2bSDoug Moore static __inline void *
219c0d0bc2bSDoug Moore pctrie_toptr(struct pctrie_node *node, int keyoff)
220c0d0bc2bSDoug Moore {
221c0d0bc2bSDoug Moore 	return ((void *)(((uintptr_t)node & ~PCTRIE_FLAGS) - keyoff));
222c0d0bc2bSDoug Moore }
223c0d0bc2bSDoug Moore 
224c0d0bc2bSDoug Moore /*
22516e01c05SDoug Moore  * Make 'child' a child of 'node'.
226f2cc1285SJeff Roberson  */
227f2cc1285SJeff Roberson static __inline void
22838f5cb1bSDoug Moore pctrie_addnode(struct pctrie_node *node, uint64_t index,
22916e01c05SDoug Moore     struct pctrie_node *child, enum pctrie_access access)
230f2cc1285SJeff Roberson {
231f2cc1285SJeff Roberson 	int slot;
232f2cc1285SJeff Roberson 
233ac0572e6SDoug Moore 	slot = pctrie_slot(node, index);
23416e01c05SDoug Moore 	pctrie_node_store(&node->pn_child[slot], child, access);
2358df38859SDoug Moore 	node->pn_popmap ^= 1 << slot;
2368df38859SDoug Moore 	KASSERT((node->pn_popmap & (1 << slot)) != 0,
2378df38859SDoug Moore 	    ("%s: bad popmap slot %d in node %p", __func__, slot, node));
238f2cc1285SJeff Roberson }
239f2cc1285SJeff Roberson 
240f2cc1285SJeff Roberson /*
241f2cc1285SJeff Roberson  * pctrie node zone initializer.
242f2cc1285SJeff Roberson  */
243f2cc1285SJeff Roberson int
244f2cc1285SJeff Roberson pctrie_zone_init(void *mem, int size __unused, int flags __unused)
245f2cc1285SJeff Roberson {
246f2cc1285SJeff Roberson 	struct pctrie_node *node;
247f2cc1285SJeff Roberson 
248f2cc1285SJeff Roberson 	node = mem;
2498df38859SDoug Moore 	node->pn_popmap = 0;
2502d2bcba7SDoug Moore 	for (int i = 0; i < nitems(node->pn_child); i++)
2512d2bcba7SDoug Moore 		pctrie_node_store(&node->pn_child[i], PCTRIE_NULL,
2522d2bcba7SDoug Moore 		    PCTRIE_UNSERIALIZED);
253f2cc1285SJeff Roberson 	return (0);
254f2cc1285SJeff Roberson }
255f2cc1285SJeff Roberson 
256f2cc1285SJeff Roberson size_t
257f2cc1285SJeff Roberson pctrie_node_size(void)
258f2cc1285SJeff Roberson {
259f2cc1285SJeff Roberson 
260f2cc1285SJeff Roberson 	return (sizeof(struct pctrie_node));
261f2cc1285SJeff Roberson }
262f2cc1285SJeff Roberson 
263bbf81f46SRyan Libby enum pctrie_insert_neighbor_mode {
264bbf81f46SRyan Libby 	PCTRIE_INSERT_NEIGHBOR_NONE,
265bbf81f46SRyan Libby 	PCTRIE_INSERT_NEIGHBOR_LT,
266bbf81f46SRyan Libby 	PCTRIE_INSERT_NEIGHBOR_GT,
267bbf81f46SRyan Libby };
268bbf81f46SRyan Libby 
269f2cc1285SJeff Roberson /*
270bbf81f46SRyan Libby  * Look for where to insert the key-value pair into the trie.  Complete the
271bbf81f46SRyan Libby  * insertion if it replaces a null leaf.  Return the insertion location if the
272bbf81f46SRyan Libby  * insertion needs to be completed by the caller; otherwise return NULL.
273bbf81f46SRyan Libby  *
274bbf81f46SRyan Libby  * If the key is already present in the trie, populate *found_out as if by
275bbf81f46SRyan Libby  * pctrie_lookup().
276bbf81f46SRyan Libby  *
277bbf81f46SRyan Libby  * With mode PCTRIE_INSERT_NEIGHBOR_GT or PCTRIE_INSERT_NEIGHBOR_LT, set
278bbf81f46SRyan Libby  * *neighbor_out to the lowest level node we encounter during the insert lookup
279bbf81f46SRyan Libby  * that is a parent of the next greater or lesser entry.  The value is not
280bbf81f46SRyan Libby  * defined if the key was already present in the trie.
281bbf81f46SRyan Libby  *
282bbf81f46SRyan Libby  * Note that mode is expected to be a compile-time constant, and this procedure
283bbf81f46SRyan Libby  * is expected to be inlined into callers with extraneous code optimized out.
284f2cc1285SJeff Roberson  */
285bbf81f46SRyan Libby static __always_inline void *
286bbf81f46SRyan Libby pctrie_insert_lookup_compound(struct pctrie *ptree, uint64_t *val,
287bbf81f46SRyan Libby     uint64_t **found_out, struct pctrie_node **neighbor_out,
288bbf81f46SRyan Libby     enum pctrie_insert_neighbor_mode mode)
289f2cc1285SJeff Roberson {
2903b7ffacdSDoug Moore 	uint64_t index;
2913b7ffacdSDoug Moore 	struct pctrie_node *node, *parent;
292f2cc1285SJeff Roberson 	int slot;
293f2cc1285SJeff Roberson 
294f2cc1285SJeff Roberson 	index = *val;
295f2cc1285SJeff Roberson 
296f2cc1285SJeff Roberson 	/*
297f2cc1285SJeff Roberson 	 * The owner of record for root is not really important because it
298f2cc1285SJeff Roberson 	 * will never be used.
299f2cc1285SJeff Roberson 	 */
3003c30b235SConrad Meyer 	node = pctrie_root_load(ptree, NULL, PCTRIE_LOCKED);
3012d2bcba7SDoug Moore 	parent = NULL;
3022d2bcba7SDoug Moore 	for (;;) {
3032d2bcba7SDoug Moore 		if (pctrie_isleaf(node)) {
3042d2bcba7SDoug Moore 			if (node == PCTRIE_NULL) {
3052d2bcba7SDoug Moore 				if (parent == NULL)
306*a905c589SDoug Moore 					pctrie_node_store(pctrie_root(ptree),
3079147a0c9SDoug Moore 					    pctrie_toleaf(val), PCTRIE_LOCKED);
3082d2bcba7SDoug Moore 				else
3093b7ffacdSDoug Moore 					pctrie_addnode(parent, index,
3103b7ffacdSDoug Moore 					    pctrie_toleaf(val), PCTRIE_LOCKED);
3113b7ffacdSDoug Moore 				return (NULL);
312f2cc1285SJeff Roberson 			}
313bbf81f46SRyan Libby 			if (*pctrie_toval(node) == index) {
314bbf81f46SRyan Libby 				*found_out = pctrie_toval(node);
315bbf81f46SRyan Libby 				return (NULL);
316bbf81f46SRyan Libby 			}
317f2cc1285SJeff Roberson 			break;
3182d2bcba7SDoug Moore 		}
3193b7ffacdSDoug Moore 		if (pctrie_keybarr(node, index, &slot))
32016e01c05SDoug Moore 			break;
321bbf81f46SRyan Libby 		/*
322bbf81f46SRyan Libby 		 * Descend.  If we're tracking the next neighbor and this node
323bbf81f46SRyan Libby 		 * contains a neighboring entry in the right direction, record
324bbf81f46SRyan Libby 		 * it.
325bbf81f46SRyan Libby 		 */
326bbf81f46SRyan Libby 		if (mode == PCTRIE_INSERT_NEIGHBOR_LT) {
327bbf81f46SRyan Libby 			if ((node->pn_popmap & ((1 << slot) - 1)) != 0)
328bbf81f46SRyan Libby 				*neighbor_out = node;
329bbf81f46SRyan Libby 		} else if (mode == PCTRIE_INSERT_NEIGHBOR_GT) {
330bbf81f46SRyan Libby 			if ((node->pn_popmap >> slot) > 1)
331bbf81f46SRyan Libby 				*neighbor_out = node;
332bbf81f46SRyan Libby 		}
3332d2bcba7SDoug Moore 		parent = node;
3342d2bcba7SDoug Moore 		node = pctrie_node_load(&node->pn_child[slot], NULL,
3353c30b235SConrad Meyer 		    PCTRIE_LOCKED);
336f2cc1285SJeff Roberson 	}
337f2cc1285SJeff Roberson 
338f2cc1285SJeff Roberson 	/*
339bbf81f46SRyan Libby 	 * The caller will split this node.  If we're tracking the next
340bbf81f46SRyan Libby 	 * neighbor, record the old node if the old entry is in the right
341bbf81f46SRyan Libby 	 * direction.
342bbf81f46SRyan Libby 	 */
343bbf81f46SRyan Libby 	if (mode == PCTRIE_INSERT_NEIGHBOR_LT) {
344bbf81f46SRyan Libby 		if (*pctrie_toval(node) < index)
345bbf81f46SRyan Libby 			*neighbor_out = node;
346bbf81f46SRyan Libby 	} else if (mode == PCTRIE_INSERT_NEIGHBOR_GT) {
347bbf81f46SRyan Libby 		if (*pctrie_toval(node) > index)
348bbf81f46SRyan Libby 			*neighbor_out = node;
349bbf81f46SRyan Libby 	}
350bbf81f46SRyan Libby 
351bbf81f46SRyan Libby 	/*
3523b7ffacdSDoug Moore 	 * 'node' must be replaced in the tree with a new branch node, with
3533b7ffacdSDoug Moore 	 * children 'node' and 'val'. Return the place that points to 'node'
3543b7ffacdSDoug Moore 	 * now, and will point to to the new branching node later.
355f2cc1285SJeff Roberson 	 */
356*a905c589SDoug Moore 	return ((parent == NULL) ? pctrie_root(ptree): &parent->pn_child[slot]);
3573b7ffacdSDoug Moore }
3583b7ffacdSDoug Moore 
3593b7ffacdSDoug Moore /*
360bbf81f46SRyan Libby  * Wrap pctrie_insert_lookup_compound to implement a strict insertion.  Panic
361bbf81f46SRyan Libby  * if the key already exists, and do not look for neighboring entries.
362bbf81f46SRyan Libby  */
363bbf81f46SRyan Libby void *
364bbf81f46SRyan Libby pctrie_insert_lookup_strict(struct pctrie *ptree, uint64_t *val)
365bbf81f46SRyan Libby {
366bbf81f46SRyan Libby 	void *parentp;
367bbf81f46SRyan Libby 	uint64_t *found;
368bbf81f46SRyan Libby 
369bbf81f46SRyan Libby 	found = NULL;
370bbf81f46SRyan Libby 	parentp = pctrie_insert_lookup_compound(ptree, val, &found, NULL,
371bbf81f46SRyan Libby 	    PCTRIE_INSERT_NEIGHBOR_NONE);
372bbf81f46SRyan Libby 	if (__predict_false(found != NULL))
373bbf81f46SRyan Libby 		panic("%s: key %jx is already present", __func__,
374bbf81f46SRyan Libby 		    (uintmax_t)*val);
375bbf81f46SRyan Libby 	return (parentp);
376bbf81f46SRyan Libby }
377bbf81f46SRyan Libby 
378bbf81f46SRyan Libby /*
379bbf81f46SRyan Libby  * Wrap pctrie_insert_lookup_compound to implement find-or-insert.  Do not look
380bbf81f46SRyan Libby  * for neighboring entries.
381bbf81f46SRyan Libby  */
382bbf81f46SRyan Libby void *
383bbf81f46SRyan Libby pctrie_insert_lookup(struct pctrie *ptree, uint64_t *val,
384bbf81f46SRyan Libby     uint64_t **found_out)
385bbf81f46SRyan Libby {
386bbf81f46SRyan Libby 	*found_out = NULL;
387bbf81f46SRyan Libby 	return (pctrie_insert_lookup_compound(ptree, val, found_out, NULL,
388bbf81f46SRyan Libby 	    PCTRIE_INSERT_NEIGHBOR_NONE));
389bbf81f46SRyan Libby }
390bbf81f46SRyan Libby 
391bbf81f46SRyan Libby /*
392bbf81f46SRyan Libby  * Wrap pctrie_insert_lookup_compound to implement find or insert and find next
393bbf81f46SRyan Libby  * greater entry.  Find a subtree that contains the next entry greater than the
394bbf81f46SRyan Libby  * newly-inserted or to-be-inserted entry.
395bbf81f46SRyan Libby  */
396bbf81f46SRyan Libby void *
397bbf81f46SRyan Libby pctrie_insert_lookup_gt(struct pctrie *ptree, uint64_t *val,
398bbf81f46SRyan Libby     uint64_t **found_out, struct pctrie_node **neighbor_out)
399bbf81f46SRyan Libby {
400bbf81f46SRyan Libby 	*found_out = NULL;
401bbf81f46SRyan Libby 	*neighbor_out = NULL;
402bbf81f46SRyan Libby 	return (pctrie_insert_lookup_compound(ptree, val, found_out,
403bbf81f46SRyan Libby 	    neighbor_out, PCTRIE_INSERT_NEIGHBOR_GT));
404bbf81f46SRyan Libby }
405bbf81f46SRyan Libby 
406bbf81f46SRyan Libby /*
407bbf81f46SRyan Libby  * Wrap pctrie_insert_lookup_compound to implement find or insert and find next
408bbf81f46SRyan Libby  * lesser entry.  Find a subtree that contains the next entry less than the
409bbf81f46SRyan Libby  * newly-inserted or to-be-inserted entry.
410bbf81f46SRyan Libby  */
411bbf81f46SRyan Libby void *
412bbf81f46SRyan Libby pctrie_insert_lookup_lt(struct pctrie *ptree, uint64_t *val,
413bbf81f46SRyan Libby     uint64_t **found_out, struct pctrie_node **neighbor_out)
414bbf81f46SRyan Libby {
415bbf81f46SRyan Libby 	*found_out = NULL;
416bbf81f46SRyan Libby 	*neighbor_out = NULL;
417bbf81f46SRyan Libby 	return (pctrie_insert_lookup_compound(ptree, val, found_out,
418bbf81f46SRyan Libby 	    neighbor_out, PCTRIE_INSERT_NEIGHBOR_LT));
419bbf81f46SRyan Libby }
420bbf81f46SRyan Libby 
421bbf81f46SRyan Libby /*
4223b7ffacdSDoug Moore  * Uses new node to insert key-value pair into the trie at given location.
4233b7ffacdSDoug Moore  */
4243b7ffacdSDoug Moore void
4253b7ffacdSDoug Moore pctrie_insert_node(void *parentp, struct pctrie_node *parent, uint64_t *val)
4263b7ffacdSDoug Moore {
4273b7ffacdSDoug Moore 	struct pctrie_node *node;
4283b7ffacdSDoug Moore 	uint64_t index, newind;
4293b7ffacdSDoug Moore 
4303b7ffacdSDoug Moore 	/*
4313b7ffacdSDoug Moore 	 * Clear the last child pointer of the newly allocated parent.  We want
4323b7ffacdSDoug Moore 	 * to clear it after the final section has exited so lookup can not
4333b7ffacdSDoug Moore 	 * return false negatives.  It is done here because it will be
4343b7ffacdSDoug Moore 	 * cache-cold in the dtor callback.
4353b7ffacdSDoug Moore 	 */
4363b7ffacdSDoug Moore 	if (parent->pn_popmap != 0) {
4373b7ffacdSDoug Moore 		pctrie_node_store(&parent->pn_child[ffs(parent->pn_popmap) - 1],
4383b7ffacdSDoug Moore 		    PCTRIE_NULL, PCTRIE_UNSERIALIZED);
4393b7ffacdSDoug Moore 		parent->pn_popmap = 0;
4403b7ffacdSDoug Moore 	}
4413b7ffacdSDoug Moore 
4423b7ffacdSDoug Moore 	/*
4433b7ffacdSDoug Moore 	 * Recover the values of the two children of the new parent node.  If
4443b7ffacdSDoug Moore 	 * 'node' is not a leaf, this stores into 'newind' the 'owner' field,
4453b7ffacdSDoug Moore 	 * which must be first in the node.
4463b7ffacdSDoug Moore 	 */
4473b7ffacdSDoug Moore 	index = *val;
4483b7ffacdSDoug Moore 	node = pctrie_node_load(parentp, NULL, PCTRIE_UNSERIALIZED);
4493b7ffacdSDoug Moore 	newind = *pctrie_toval(node);
4503b7ffacdSDoug Moore 
4513b7ffacdSDoug Moore 	/*
4523b7ffacdSDoug Moore 	 * From the highest-order bit where the indexes differ,
4533b7ffacdSDoug Moore 	 * compute the highest level in the trie where they differ.  Then,
4543b7ffacdSDoug Moore 	 * compute the least index of this subtrie.
4553b7ffacdSDoug Moore 	 */
4563b7ffacdSDoug Moore 	_Static_assert(sizeof(long long) >= sizeof(uint64_t),
4573b7ffacdSDoug Moore 	    "uint64 too wide");
4583b7ffacdSDoug Moore 	_Static_assert(sizeof(uint64_t) * NBBY <=
4593b7ffacdSDoug Moore 	    (1 << (sizeof(parent->pn_clev) * NBBY)), "pn_clev too narrow");
460749c249dSDoug Moore 	parent->pn_clev = rounddown(ilog2(index ^ newind), PCTRIE_WIDTH);
4613b7ffacdSDoug Moore 	parent->pn_owner = PCTRIE_COUNT;
4623b7ffacdSDoug Moore 	parent->pn_owner = index & -(parent->pn_owner << parent->pn_clev);
4633b7ffacdSDoug Moore 
4643b7ffacdSDoug Moore 
4653c30b235SConrad Meyer 	/* These writes are not yet visible due to ordering. */
4663b7ffacdSDoug Moore 	pctrie_addnode(parent, index, pctrie_toleaf(val), PCTRIE_UNSERIALIZED);
46738f5cb1bSDoug Moore 	pctrie_addnode(parent, newind, node, PCTRIE_UNSERIALIZED);
4683c30b235SConrad Meyer 	/* Synchronize to make the above visible. */
4692d2bcba7SDoug Moore 	pctrie_node_store(parentp, parent, PCTRIE_LOCKED);
470f2cc1285SJeff Roberson }
471f2cc1285SJeff Roberson 
472f2cc1285SJeff Roberson /*
473fd1d6662SDoug Moore  * Return the value associated with the node, if the node is a leaf that matches
474fd1d6662SDoug Moore  * the index; otherwise NULL.
475fd1d6662SDoug Moore  */
476fd1d6662SDoug Moore static __always_inline uint64_t *
477fd1d6662SDoug Moore pctrie_match_value(struct pctrie_node *node, uint64_t index)
478fd1d6662SDoug Moore {
479fd1d6662SDoug Moore 	uint64_t *m;
480fd1d6662SDoug Moore 
481fd1d6662SDoug Moore 	if (!pctrie_isleaf(node) || (m = pctrie_toval(node)) == NULL ||
482fd1d6662SDoug Moore 	    *m != index)
483fd1d6662SDoug Moore 		m = NULL;
484fd1d6662SDoug Moore 	return (m);
485fd1d6662SDoug Moore }
486fd1d6662SDoug Moore 
487fd1d6662SDoug Moore /*
488f2cc1285SJeff Roberson  * Returns the value stored at the index.  If the index is not present,
489f2cc1285SJeff Roberson  * NULL is returned.
490f2cc1285SJeff Roberson  */
4913c30b235SConrad Meyer static __always_inline uint64_t *
4923c30b235SConrad Meyer _pctrie_lookup(struct pctrie *ptree, uint64_t index, smr_t smr,
4933c30b235SConrad Meyer     enum pctrie_access access)
494f2cc1285SJeff Roberson {
495f2cc1285SJeff Roberson 	struct pctrie_node *node;
496f2cc1285SJeff Roberson 	int slot;
497f2cc1285SJeff Roberson 
4983c30b235SConrad Meyer 	node = pctrie_root_load(ptree, smr, access);
499fd1d6662SDoug Moore 	/* Seek a node that matches index. */
500fd1d6662SDoug Moore 	while (!pctrie_isleaf(node) && !pctrie_keybarr(node, index, &slot))
5013c30b235SConrad Meyer 		node = pctrie_node_load(&node->pn_child[slot], smr, access);
502fd1d6662SDoug Moore 	return (pctrie_match_value(node, index));
503f2cc1285SJeff Roberson }
504f2cc1285SJeff Roberson 
505f2cc1285SJeff Roberson /*
5063c30b235SConrad Meyer  * Returns the value stored at the index, assuming access is externally
5073c30b235SConrad Meyer  * synchronized by a lock.
5083c30b235SConrad Meyer  *
5093c30b235SConrad Meyer  * If the index is not present, NULL is returned.
5103c30b235SConrad Meyer  */
5113c30b235SConrad Meyer uint64_t *
5123c30b235SConrad Meyer pctrie_lookup(struct pctrie *ptree, uint64_t index)
5133c30b235SConrad Meyer {
5143c30b235SConrad Meyer 	return (_pctrie_lookup(ptree, index, NULL, PCTRIE_LOCKED));
5153c30b235SConrad Meyer }
5163c30b235SConrad Meyer 
5173c30b235SConrad Meyer /*
5183c30b235SConrad Meyer  * Returns the value stored at the index without requiring an external lock.
5193c30b235SConrad Meyer  *
5203c30b235SConrad Meyer  * If the index is not present, NULL is returned.
5213c30b235SConrad Meyer  */
5223c30b235SConrad Meyer uint64_t *
5233c30b235SConrad Meyer pctrie_lookup_unlocked(struct pctrie *ptree, uint64_t index, smr_t smr)
5243c30b235SConrad Meyer {
5253c30b235SConrad Meyer 	uint64_t *res;
5263c30b235SConrad Meyer 
5273c30b235SConrad Meyer 	smr_enter(smr);
5283c30b235SConrad Meyer 	res = _pctrie_lookup(ptree, index, smr, PCTRIE_SMR);
5293c30b235SConrad Meyer 	smr_exit(smr);
5303c30b235SConrad Meyer 	return (res);
5313c30b235SConrad Meyer }
5323c30b235SConrad Meyer 
5333c30b235SConrad Meyer /*
534fd1d6662SDoug Moore  * Returns the last node examined in the search for the index, and updates the
535fd1d6662SDoug Moore  * search path to that node.
536fd1d6662SDoug Moore  */
537fd1d6662SDoug Moore static __always_inline struct pctrie_node *
538fd1d6662SDoug Moore _pctrie_iter_lookup_node(struct pctrie_iter *it, uint64_t index, smr_t smr,
539fd1d6662SDoug Moore     enum pctrie_access access)
540fd1d6662SDoug Moore {
541fd1d6662SDoug Moore 	struct pctrie_node *node;
542fd1d6662SDoug Moore 	int slot;
543fd1d6662SDoug Moore 
544fd1d6662SDoug Moore 	/*
545fd1d6662SDoug Moore 	 * Climb the search path to find the lowest node from which to start the
546fd1d6662SDoug Moore 	 * search for a value matching 'index'.
547fd1d6662SDoug Moore 	 */
548fd1d6662SDoug Moore 	while (it->top != 0) {
549fd1d6662SDoug Moore 		node = it->path[it->top - 1];
550fd1d6662SDoug Moore 		KASSERT(!powerof2(node->pn_popmap),
551fd1d6662SDoug Moore 		    ("%s: freed node in iter path", __func__));
552fd1d6662SDoug Moore 		if (!pctrie_keybarr(node, index, &slot)) {
553fd1d6662SDoug Moore 			node = pctrie_node_load(
554fd1d6662SDoug Moore 			    &node->pn_child[slot], smr, access);
555fd1d6662SDoug Moore 			break;
556fd1d6662SDoug Moore 		}
557fd1d6662SDoug Moore 		--it->top;
558fd1d6662SDoug Moore 	}
559fd1d6662SDoug Moore 	if (it->top == 0)
560fd1d6662SDoug Moore 		node = pctrie_root_load(it->ptree, smr, access);
561fd1d6662SDoug Moore 
562fd1d6662SDoug Moore 	/* Seek a node that matches index. */
563fd1d6662SDoug Moore 	while (!pctrie_isleaf(node) && !pctrie_keybarr(node, index, &slot)) {
564fd1d6662SDoug Moore 		KASSERT(it->top < nitems(it->path),
565fd1d6662SDoug Moore 		    ("%s: path overflow in trie %p", __func__, it->ptree));
566fd1d6662SDoug Moore 		it->path[it->top++] = node;
567fd1d6662SDoug Moore 		node = pctrie_node_load(&node->pn_child[slot], smr, access);
568fd1d6662SDoug Moore 	}
569fd1d6662SDoug Moore 	return (node);
570fd1d6662SDoug Moore }
571fd1d6662SDoug Moore 
572fd1d6662SDoug Moore /*
573fd1d6662SDoug Moore  * Returns the value stored at a given index value, possibly NULL.
574fd1d6662SDoug Moore  */
575fd1d6662SDoug Moore static __always_inline uint64_t *
576fd1d6662SDoug Moore _pctrie_iter_lookup(struct pctrie_iter *it, uint64_t index, smr_t smr,
577fd1d6662SDoug Moore     enum pctrie_access access)
578fd1d6662SDoug Moore {
579fd1d6662SDoug Moore 	struct pctrie_node *node;
580fd1d6662SDoug Moore 
581fd1d6662SDoug Moore 	it->index = index;
582fd1d6662SDoug Moore 	node = _pctrie_iter_lookup_node(it, index, smr, access);
583fd1d6662SDoug Moore 	return (pctrie_match_value(node, index));
584fd1d6662SDoug Moore }
585fd1d6662SDoug Moore 
586fd1d6662SDoug Moore /*
587fd1d6662SDoug Moore  * Returns the value stored at a given index value, possibly NULL.
588fd1d6662SDoug Moore  */
589fd1d6662SDoug Moore uint64_t *
590fd1d6662SDoug Moore pctrie_iter_lookup(struct pctrie_iter *it, uint64_t index)
591fd1d6662SDoug Moore {
592fd1d6662SDoug Moore 	return (_pctrie_iter_lookup(it, index, NULL, PCTRIE_LOCKED));
593fd1d6662SDoug Moore }
594fd1d6662SDoug Moore 
595fd1d6662SDoug Moore /*
596d0b225d1SDoug Moore  * Insert the val in the trie, starting search with iterator.  Return a pointer
597d0b225d1SDoug Moore  * to indicate where a new node must be allocated to complete insertion.
598d0b225d1SDoug Moore  * Assumes access is externally synchronized by a lock.
599d0b225d1SDoug Moore  */
600d0b225d1SDoug Moore void *
601d0b225d1SDoug Moore pctrie_iter_insert_lookup(struct pctrie_iter *it, uint64_t *val)
602d0b225d1SDoug Moore {
603d0b225d1SDoug Moore 	struct pctrie_node *node;
604d0b225d1SDoug Moore 
605d0b225d1SDoug Moore 	it->index = *val;
606d0b225d1SDoug Moore 	node = _pctrie_iter_lookup_node(it, *val, NULL, PCTRIE_LOCKED);
607d0b225d1SDoug Moore 	if (node == PCTRIE_NULL) {
608d0b225d1SDoug Moore 		if (it->top == 0)
609*a905c589SDoug Moore 			pctrie_node_store(pctrie_root(it->ptree),
610d0b225d1SDoug Moore 			    pctrie_toleaf(val), PCTRIE_LOCKED);
611d0b225d1SDoug Moore 		else
612d0b225d1SDoug Moore 			pctrie_addnode(it->path[it->top - 1], it->index,
613d0b225d1SDoug Moore 			    pctrie_toleaf(val), PCTRIE_LOCKED);
614d0b225d1SDoug Moore 		return (NULL);
615d0b225d1SDoug Moore 	}
616d0b225d1SDoug Moore 	if (__predict_false(pctrie_match_value(node, it->index) != NULL))
617d0b225d1SDoug Moore 		panic("%s: key %jx is already present", __func__,
618d0b225d1SDoug Moore 		    (uintmax_t)it->index);
619d0b225d1SDoug Moore 
620d0b225d1SDoug Moore 	/*
621d0b225d1SDoug Moore 	 * 'node' must be replaced in the tree with a new branch node, with
622d0b225d1SDoug Moore 	 * children 'node' and 'val'. Return the place that points to 'node'
623d0b225d1SDoug Moore 	 * now, and will point to to the new branching node later.
624d0b225d1SDoug Moore 	 */
625d0b225d1SDoug Moore 	if (it->top == 0)
626*a905c589SDoug Moore 		return (pctrie_root(it->ptree));
627d0b225d1SDoug Moore 	node = it->path[it->top - 1];
628d0b225d1SDoug Moore 	return (&node->pn_child[pctrie_slot(node, it->index)]);
629d0b225d1SDoug Moore }
630d0b225d1SDoug Moore 
631d0b225d1SDoug Moore /*
632fd1d6662SDoug Moore  * Returns the value stored at a fixed offset from the current index value,
633fd1d6662SDoug Moore  * possibly NULL.
634fd1d6662SDoug Moore  */
635fd1d6662SDoug Moore static __always_inline uint64_t *
636fd1d6662SDoug Moore _pctrie_iter_stride(struct pctrie_iter *it, int stride, smr_t smr,
637fd1d6662SDoug Moore     enum pctrie_access access)
638fd1d6662SDoug Moore {
639fd1d6662SDoug Moore 	uint64_t index = it->index + stride;
640fd1d6662SDoug Moore 
641fd1d6662SDoug Moore 	/* Detect stride overflow. */
642fd1d6662SDoug Moore 	if ((stride > 0) != (index > it->index))
643fd1d6662SDoug Moore 		return (NULL);
644fd1d6662SDoug Moore 	/* Detect crossing limit */
645fd1d6662SDoug Moore 	if ((index < it->limit) != (it->index < it->limit))
646fd1d6662SDoug Moore 		return (NULL);
647fd1d6662SDoug Moore 
648fd1d6662SDoug Moore 	return (_pctrie_iter_lookup(it, index, smr, access));
649fd1d6662SDoug Moore }
650fd1d6662SDoug Moore 
651fd1d6662SDoug Moore /*
652fd1d6662SDoug Moore  * Returns the value stored at a fixed offset from the current index value,
653fd1d6662SDoug Moore  * possibly NULL.
654fd1d6662SDoug Moore  */
655fd1d6662SDoug Moore uint64_t *
656fd1d6662SDoug Moore pctrie_iter_stride(struct pctrie_iter *it, int stride)
657fd1d6662SDoug Moore {
658fd1d6662SDoug Moore 	return (_pctrie_iter_stride(it, stride, NULL, PCTRIE_LOCKED));
659fd1d6662SDoug Moore }
660fd1d6662SDoug Moore 
661fd1d6662SDoug Moore /*
662fd1d6662SDoug Moore  * Returns the value stored at one more than the current index value, possibly
663fd1d6662SDoug Moore  * NULL, assuming access is externally synchronized by a lock.
664fd1d6662SDoug Moore  */
665fd1d6662SDoug Moore uint64_t *
666fd1d6662SDoug Moore pctrie_iter_next(struct pctrie_iter *it)
667fd1d6662SDoug Moore {
668fd1d6662SDoug Moore 	return (_pctrie_iter_stride(it, 1, NULL, PCTRIE_LOCKED));
669fd1d6662SDoug Moore }
670fd1d6662SDoug Moore 
671fd1d6662SDoug Moore /*
672fd1d6662SDoug Moore  * Returns the value stored at one less than the current index value, possibly
673fd1d6662SDoug Moore  * NULL, assuming access is externally synchronized by a lock.
674fd1d6662SDoug Moore  */
675fd1d6662SDoug Moore uint64_t *
676fd1d6662SDoug Moore pctrie_iter_prev(struct pctrie_iter *it)
677fd1d6662SDoug Moore {
678fd1d6662SDoug Moore 	return (_pctrie_iter_stride(it, -1, NULL, PCTRIE_LOCKED));
679fd1d6662SDoug Moore }
680fd1d6662SDoug Moore 
681fd1d6662SDoug Moore /*
6826f251ef2SDoug Moore  * Returns the value with the least index that is greater than or equal to the
6836f251ef2SDoug Moore  * specified index, or NULL if there are no such values.
6846f251ef2SDoug Moore  *
6856f251ef2SDoug Moore  * Requires that access be externally synchronized by a lock.
686f2cc1285SJeff Roberson  */
687bbf81f46SRyan Libby static __inline uint64_t *
688bbf81f46SRyan Libby pctrie_lookup_ge_node(struct pctrie_node *node, uint64_t index)
689f2cc1285SJeff Roberson {
690bbf81f46SRyan Libby 	struct pctrie_node *succ;
691f2cc1285SJeff Roberson 	uint64_t *m;
692d1139b52SConrad Meyer 	int slot;
693f2cc1285SJeff Roberson 
6946f251ef2SDoug Moore 	/*
6956f251ef2SDoug Moore 	 * Descend the trie as if performing an ordinary lookup for the
6966f251ef2SDoug Moore 	 * specified value.  However, unlike an ordinary lookup, as we descend
6976f251ef2SDoug Moore 	 * the trie, we use "succ" to remember the last branching-off point,
6986f251ef2SDoug Moore 	 * that is, the interior node under which the least value that is both
6996f251ef2SDoug Moore 	 * outside our current path down the trie and greater than the specified
7006f251ef2SDoug Moore 	 * index resides.  (The node's popmap makes it fast and easy to
7016f251ef2SDoug Moore 	 * recognize a branching-off point.)  If our ordinary lookup fails to
7026f251ef2SDoug Moore 	 * yield a value that is greater than or equal to the specified index,
7036f251ef2SDoug Moore 	 * then we will exit this loop and perform a lookup starting from
7046f251ef2SDoug Moore 	 * "succ".  If "succ" is not NULL, then that lookup is guaranteed to
7056f251ef2SDoug Moore 	 * succeed.
7066f251ef2SDoug Moore 	 */
7076f251ef2SDoug Moore 	succ = NULL;
7082d2bcba7SDoug Moore 	for (;;) {
7096f251ef2SDoug Moore 		if (pctrie_isleaf(node)) {
7102d2bcba7SDoug Moore 			if ((m = pctrie_toval(node)) != NULL && *m >= index)
711f2cc1285SJeff Roberson 				return (m);
7126f251ef2SDoug Moore 			break;
713f2cc1285SJeff Roberson 		}
714ac0572e6SDoug Moore 		if (pctrie_keybarr(node, index, &slot)) {
715f2cc1285SJeff Roberson 			/*
7166f251ef2SDoug Moore 			 * If all values in this subtree are > index, then the
7176f251ef2SDoug Moore 			 * least value in this subtree is the answer.
718f2cc1285SJeff Roberson 			 */
7196f251ef2SDoug Moore 			if (node->pn_owner > index)
7206f251ef2SDoug Moore 				succ = node;
7216f251ef2SDoug Moore 			break;
722f2cc1285SJeff Roberson 		}
723f2cc1285SJeff Roberson 
724f2cc1285SJeff Roberson 		/*
7256f251ef2SDoug Moore 		 * Just in case the next search step leads to a subtree of all
7266f251ef2SDoug Moore 		 * values < index, check popmap to see if a next bigger step, to
7276f251ef2SDoug Moore 		 * a subtree of all pages with values > index, is available.  If
7286f251ef2SDoug Moore 		 * so, remember to restart the search here.
729f2cc1285SJeff Roberson 		 */
7306f251ef2SDoug Moore 		if ((node->pn_popmap >> slot) > 1)
7316f251ef2SDoug Moore 			succ = node;
7326f251ef2SDoug Moore 		node = pctrie_node_load(&node->pn_child[slot], NULL,
7336f251ef2SDoug Moore 		    PCTRIE_LOCKED);
734f2cc1285SJeff Roberson 	}
735f2cc1285SJeff Roberson 
736f2cc1285SJeff Roberson 	/*
7376f251ef2SDoug Moore 	 * Restart the search from the last place visited in the subtree that
7386f251ef2SDoug Moore 	 * included some values > index, if there was such a place.
7396f251ef2SDoug Moore 	 */
7406f251ef2SDoug Moore 	if (succ == NULL)
7416f251ef2SDoug Moore 		return (NULL);
7426f251ef2SDoug Moore 	if (succ != node) {
7436f251ef2SDoug Moore 		/*
7446f251ef2SDoug Moore 		 * Take a step to the next bigger sibling of the node chosen
7456f251ef2SDoug Moore 		 * last time.  In that subtree, all values > index.
7466f251ef2SDoug Moore 		 */
747ac0572e6SDoug Moore 		slot = pctrie_slot(succ, index) + 1;
7486f251ef2SDoug Moore 		KASSERT((succ->pn_popmap >> slot) != 0,
7496f251ef2SDoug Moore 		    ("%s: no popmap siblings past slot %d in node %p",
7506f251ef2SDoug Moore 		    __func__, slot, succ));
7516f251ef2SDoug Moore 		slot += ffs(succ->pn_popmap >> slot) - 1;
7526f251ef2SDoug Moore 		succ = pctrie_node_load(&succ->pn_child[slot], NULL,
7536f251ef2SDoug Moore 		    PCTRIE_LOCKED);
7546f251ef2SDoug Moore 	}
7556f251ef2SDoug Moore 
7566f251ef2SDoug Moore 	/*
7576f251ef2SDoug Moore 	 * Find the value in the subtree rooted at "succ" with the least index.
7586f251ef2SDoug Moore 	 */
7596f251ef2SDoug Moore 	while (!pctrie_isleaf(succ)) {
7606f251ef2SDoug Moore 		KASSERT(succ->pn_popmap != 0,
7616f251ef2SDoug Moore 		    ("%s: no popmap children in node %p",  __func__, succ));
7626f251ef2SDoug Moore 		slot = ffs(succ->pn_popmap) - 1;
7636f251ef2SDoug Moore 		succ = pctrie_node_load(&succ->pn_child[slot], NULL,
7646f251ef2SDoug Moore 		    PCTRIE_LOCKED);
7656f251ef2SDoug Moore 	}
7666f251ef2SDoug Moore 	return (pctrie_toval(succ));
7676f251ef2SDoug Moore }
7686f251ef2SDoug Moore 
769bbf81f46SRyan Libby uint64_t *
770bbf81f46SRyan Libby pctrie_lookup_ge(struct pctrie *ptree, uint64_t index)
771bbf81f46SRyan Libby {
772bbf81f46SRyan Libby 	return (pctrie_lookup_ge_node(
773bbf81f46SRyan Libby 	    pctrie_root_load(ptree, NULL, PCTRIE_LOCKED), index));
774bbf81f46SRyan Libby }
775bbf81f46SRyan Libby 
776bbf81f46SRyan Libby uint64_t *
777bbf81f46SRyan Libby pctrie_subtree_lookup_gt(struct pctrie_node *node, uint64_t index)
778bbf81f46SRyan Libby {
779bbf81f46SRyan Libby 	if (node == NULL || index + 1 == 0)
780bbf81f46SRyan Libby 		return (NULL);
781bbf81f46SRyan Libby 	return (pctrie_lookup_ge_node(node, index + 1));
782bbf81f46SRyan Libby }
783bbf81f46SRyan Libby 
784fd1d6662SDoug Moore /*
785fd1d6662SDoug Moore  * Find first leaf >= index, and fill iter with the path to the parent of that
786fd1d6662SDoug Moore  * leaf.  Return NULL if there is no such leaf less than limit.
787fd1d6662SDoug Moore  */
788fd1d6662SDoug Moore uint64_t *
789fd1d6662SDoug Moore pctrie_iter_lookup_ge(struct pctrie_iter *it, uint64_t index)
790fd1d6662SDoug Moore {
791fd1d6662SDoug Moore 	struct pctrie_node *node;
792fd1d6662SDoug Moore 	uint64_t *m;
793fd1d6662SDoug Moore 	int slot;
794fd1d6662SDoug Moore 
795fd1d6662SDoug Moore 	/* Seek a node that matches index. */
796fd1d6662SDoug Moore 	node = _pctrie_iter_lookup_node(it, index, NULL, PCTRIE_LOCKED);
797fd1d6662SDoug Moore 
798fd1d6662SDoug Moore 	/*
799fd1d6662SDoug Moore 	 * If no such node was found, and instead this path leads only to nodes
800fd1d6662SDoug Moore 	 * < index, back up to find a subtrie with the least value > index.
801fd1d6662SDoug Moore 	 */
8020d965bc0SDoug Moore 	if (node == PCTRIE_NULL || *pctrie_toval(node) < index) {
803fd1d6662SDoug Moore 		/* Climb the path to find a node with a descendant > index. */
804fd1d6662SDoug Moore 		while (it->top != 0) {
805fd1d6662SDoug Moore 			node = it->path[it->top - 1];
806fd1d6662SDoug Moore 			slot = pctrie_slot(node, index) + 1;
807fd1d6662SDoug Moore 			if ((node->pn_popmap >> slot) != 0)
808fd1d6662SDoug Moore 				break;
809fd1d6662SDoug Moore 			--it->top;
810fd1d6662SDoug Moore 		}
811fd1d6662SDoug Moore 		if (it->top == 0)
812fd1d6662SDoug Moore 			return (NULL);
813fd1d6662SDoug Moore 
814fd1d6662SDoug Moore 		/* Step to the least child with a descendant > index. */
815fd1d6662SDoug Moore 		slot += ffs(node->pn_popmap >> slot) - 1;
816fd1d6662SDoug Moore 		node = pctrie_node_load(&node->pn_child[slot], NULL,
817fd1d6662SDoug Moore 		    PCTRIE_LOCKED);
818fd1d6662SDoug Moore 	}
819fd1d6662SDoug Moore 	/* Descend to the least leaf of the subtrie. */
820fd1d6662SDoug Moore 	while (!pctrie_isleaf(node)) {
821fd1d6662SDoug Moore 		if (it->limit != 0 && node->pn_owner >= it->limit)
822fd1d6662SDoug Moore 			return (NULL);
823fd1d6662SDoug Moore 		slot = ffs(node->pn_popmap) - 1;
824fd1d6662SDoug Moore 		KASSERT(it->top < nitems(it->path),
825fd1d6662SDoug Moore 		    ("%s: path overflow in trie %p", __func__, it->ptree));
826fd1d6662SDoug Moore 		it->path[it->top++] = node;
827fd1d6662SDoug Moore 		node = pctrie_node_load(&node->pn_child[slot], NULL,
828fd1d6662SDoug Moore 		    PCTRIE_LOCKED);
829fd1d6662SDoug Moore 	}
830fd1d6662SDoug Moore 	m = pctrie_toval(node);
831fd1d6662SDoug Moore 	if (it->limit != 0 && *m >= it->limit)
832fd1d6662SDoug Moore 		return (NULL);
833fd1d6662SDoug Moore 	it->index = *m;
834fd1d6662SDoug Moore 	return (m);
835fd1d6662SDoug Moore }
836fd1d6662SDoug Moore 
837fd1d6662SDoug Moore /*
838fd1d6662SDoug Moore  * Find the first leaf with value at least 'jump' greater than the previous
839fd1d6662SDoug Moore  * leaf.  Return NULL if that value is >= limit.
840fd1d6662SDoug Moore  */
841fd1d6662SDoug Moore uint64_t *
842fd1d6662SDoug Moore pctrie_iter_jump_ge(struct pctrie_iter *it, int64_t jump)
843fd1d6662SDoug Moore {
844fd1d6662SDoug Moore 	uint64_t index = it->index + jump;
845fd1d6662SDoug Moore 
846fd1d6662SDoug Moore 	/* Detect jump overflow. */
847fd1d6662SDoug Moore 	if ((jump > 0) != (index > it->index))
848fd1d6662SDoug Moore 		return (NULL);
849fd1d6662SDoug Moore 	if (it->limit != 0 && index >= it->limit)
850fd1d6662SDoug Moore 		return (NULL);
851fd1d6662SDoug Moore 	return (pctrie_iter_lookup_ge(it, index));
852fd1d6662SDoug Moore }
853fd1d6662SDoug Moore 
854bbf81f46SRyan Libby #ifdef INVARIANTS
855bbf81f46SRyan Libby void
856bbf81f46SRyan Libby pctrie_subtree_lookup_gt_assert(struct pctrie_node *node, uint64_t index,
857bbf81f46SRyan Libby     struct pctrie *ptree, uint64_t *res)
858bbf81f46SRyan Libby {
859bbf81f46SRyan Libby 	uint64_t *expected;
860bbf81f46SRyan Libby 
861bbf81f46SRyan Libby 	if (index + 1 == 0)
862bbf81f46SRyan Libby 		expected = NULL;
863bbf81f46SRyan Libby 	else
864bbf81f46SRyan Libby 		expected = pctrie_lookup_ge(ptree, index + 1);
865bbf81f46SRyan Libby 	KASSERT(res == expected,
866bbf81f46SRyan Libby 	    ("pctrie subtree lookup gt result different from root lookup: "
867bbf81f46SRyan Libby 	    "ptree %p, index %ju, subtree %p, found %p, expected %p", ptree,
868bbf81f46SRyan Libby 	    (uintmax_t)index, node, res, expected));
869bbf81f46SRyan Libby }
870bbf81f46SRyan Libby #endif
871bbf81f46SRyan Libby 
8726f251ef2SDoug Moore /*
8736f251ef2SDoug Moore  * Returns the value with the greatest index that is less than or equal to the
8746f251ef2SDoug Moore  * specified index, or NULL if there are no such values.
8756f251ef2SDoug Moore  *
8766f251ef2SDoug Moore  * Requires that access be externally synchronized by a lock.
877f2cc1285SJeff Roberson  */
878bbf81f46SRyan Libby static __inline uint64_t *
879bbf81f46SRyan Libby pctrie_lookup_le_node(struct pctrie_node *node, uint64_t index)
880f2cc1285SJeff Roberson {
881bbf81f46SRyan Libby 	struct pctrie_node *pred;
882f2cc1285SJeff Roberson 	uint64_t *m;
883d1139b52SConrad Meyer 	int slot;
884f2cc1285SJeff Roberson 
8856f251ef2SDoug Moore 	/*
886bbf81f46SRyan Libby 	 * Mirror the implementation of pctrie_lookup_ge_node, described above.
8876f251ef2SDoug Moore 	 */
8886f251ef2SDoug Moore 	pred = NULL;
8892d2bcba7SDoug Moore 	for (;;) {
8906f251ef2SDoug Moore 		if (pctrie_isleaf(node)) {
8912d2bcba7SDoug Moore 			if ((m = pctrie_toval(node)) != NULL && *m <= index)
892f2cc1285SJeff Roberson 				return (m);
8936f251ef2SDoug Moore 			break;
894f2cc1285SJeff Roberson 		}
895ac0572e6SDoug Moore 		if (pctrie_keybarr(node, index, &slot)) {
8966f251ef2SDoug Moore 			if (node->pn_owner < index)
8976f251ef2SDoug Moore 				pred = node;
8986f251ef2SDoug Moore 			break;
899f2cc1285SJeff Roberson 		}
9006f251ef2SDoug Moore 		if ((node->pn_popmap & ((1 << slot) - 1)) != 0)
9016f251ef2SDoug Moore 			pred = node;
9026f251ef2SDoug Moore 		node = pctrie_node_load(&node->pn_child[slot], NULL,
9033c30b235SConrad Meyer 		    PCTRIE_LOCKED);
9048df38859SDoug Moore 	}
9056f251ef2SDoug Moore 	if (pred == NULL)
9066f251ef2SDoug Moore 		return (NULL);
9076f251ef2SDoug Moore 	if (pred != node) {
908ac0572e6SDoug Moore 		slot = pctrie_slot(pred, index);
9096f251ef2SDoug Moore 		KASSERT((pred->pn_popmap & ((1 << slot) - 1)) != 0,
9106f251ef2SDoug Moore 		    ("%s: no popmap siblings before slot %d in node %p",
9116f251ef2SDoug Moore 		    __func__, slot, pred));
912749c249dSDoug Moore 		slot = ilog2(pred->pn_popmap & ((1 << slot) - 1));
9136f251ef2SDoug Moore 		pred = pctrie_node_load(&pred->pn_child[slot], NULL,
9146f251ef2SDoug Moore 		    PCTRIE_LOCKED);
915f2cc1285SJeff Roberson 	}
9166f251ef2SDoug Moore 	while (!pctrie_isleaf(pred)) {
9176f251ef2SDoug Moore 		KASSERT(pred->pn_popmap != 0,
9186f251ef2SDoug Moore 		    ("%s: no popmap children in node %p",  __func__, pred));
919749c249dSDoug Moore 		slot = ilog2(pred->pn_popmap);
9206f251ef2SDoug Moore 		pred = pctrie_node_load(&pred->pn_child[slot], NULL,
9216f251ef2SDoug Moore 		    PCTRIE_LOCKED);
9226f251ef2SDoug Moore 	}
9236f251ef2SDoug Moore 	return (pctrie_toval(pred));
924f2cc1285SJeff Roberson }
925f2cc1285SJeff Roberson 
926bbf81f46SRyan Libby uint64_t *
927bbf81f46SRyan Libby pctrie_lookup_le(struct pctrie *ptree, uint64_t index)
928bbf81f46SRyan Libby {
929bbf81f46SRyan Libby 	return (pctrie_lookup_le_node(
930bbf81f46SRyan Libby 	    pctrie_root_load(ptree, NULL, PCTRIE_LOCKED), index));
931bbf81f46SRyan Libby }
932bbf81f46SRyan Libby 
933bbf81f46SRyan Libby uint64_t *
934bbf81f46SRyan Libby pctrie_subtree_lookup_lt(struct pctrie_node *node, uint64_t index)
935bbf81f46SRyan Libby {
936bbf81f46SRyan Libby 	if (node == NULL || index == 0)
937bbf81f46SRyan Libby 		return (NULL);
938bbf81f46SRyan Libby 	return (pctrie_lookup_le_node(node, index - 1));
939bbf81f46SRyan Libby }
940bbf81f46SRyan Libby 
941fd1d6662SDoug Moore /*
942fd1d6662SDoug Moore  * Find first leaf <= index, and fill iter with the path to the parent of that
943fd1d6662SDoug Moore  * leaf.  Return NULL if there is no such leaf greater than limit.
944fd1d6662SDoug Moore  */
945fd1d6662SDoug Moore uint64_t *
946fd1d6662SDoug Moore pctrie_iter_lookup_le(struct pctrie_iter *it, uint64_t index)
947fd1d6662SDoug Moore {
948fd1d6662SDoug Moore 	struct pctrie_node *node;
949fd1d6662SDoug Moore 	uint64_t *m;
950fd1d6662SDoug Moore 	int slot;
951fd1d6662SDoug Moore 
952fd1d6662SDoug Moore 	/* Seek a node that matches index. */
953fd1d6662SDoug Moore 	node = _pctrie_iter_lookup_node(it, index, NULL, PCTRIE_LOCKED);
954fd1d6662SDoug Moore 
955fd1d6662SDoug Moore 	/*
956fd1d6662SDoug Moore 	 * If no such node was found, and instead this path leads only to nodes
957d2d0d6cbSDoug Moore 	 * > index, back up to find a subtrie with the greatest value < index.
958fd1d6662SDoug Moore 	 */
9590d965bc0SDoug Moore 	if (node == PCTRIE_NULL || *pctrie_toval(node) > index) {
960fd1d6662SDoug Moore 		/* Climb the path to find a node with a descendant < index. */
961fd1d6662SDoug Moore 		while (it->top != 0) {
962fd1d6662SDoug Moore 			node = it->path[it->top - 1];
963fd1d6662SDoug Moore 			slot = pctrie_slot(node, index);
964fd1d6662SDoug Moore 			if ((node->pn_popmap & ((1 << slot) - 1)) != 0)
965fd1d6662SDoug Moore 				break;
966fd1d6662SDoug Moore 			--it->top;
967fd1d6662SDoug Moore 		}
968fd1d6662SDoug Moore 		if (it->top == 0)
969fd1d6662SDoug Moore 			return (NULL);
970fd1d6662SDoug Moore 
971fd1d6662SDoug Moore 		/* Step to the greatest child with a descendant < index. */
972fd1d6662SDoug Moore 		slot = ilog2(node->pn_popmap & ((1 << slot) - 1));
973fd1d6662SDoug Moore 		node = pctrie_node_load(&node->pn_child[slot], NULL,
974fd1d6662SDoug Moore 		    PCTRIE_LOCKED);
975fd1d6662SDoug Moore 	}
976fd1d6662SDoug Moore 	/* Descend to the greatest leaf of the subtrie. */
977fd1d6662SDoug Moore 	while (!pctrie_isleaf(node)) {
978fd1d6662SDoug Moore 		if (it->limit != 0 && it->limit >=
979fd1d6662SDoug Moore 		    node->pn_owner + (PCTRIE_COUNT << node->pn_clev) - 1)
980fd1d6662SDoug Moore 			return (NULL);
981fd1d6662SDoug Moore 		slot = ilog2(node->pn_popmap);
982fd1d6662SDoug Moore 		KASSERT(it->top < nitems(it->path),
983fd1d6662SDoug Moore 		    ("%s: path overflow in trie %p", __func__, it->ptree));
984fd1d6662SDoug Moore 		it->path[it->top++] = node;
985fd1d6662SDoug Moore 		node = pctrie_node_load(&node->pn_child[slot], NULL,
986fd1d6662SDoug Moore 		    PCTRIE_LOCKED);
987fd1d6662SDoug Moore 	}
988fd1d6662SDoug Moore 	m = pctrie_toval(node);
989fd1d6662SDoug Moore 	if (it->limit != 0 && *m <= it->limit)
990fd1d6662SDoug Moore 		return (NULL);
991fd1d6662SDoug Moore 	it->index = *m;
992fd1d6662SDoug Moore 	return (m);
993fd1d6662SDoug Moore }
994fd1d6662SDoug Moore 
995fd1d6662SDoug Moore /*
996fd1d6662SDoug Moore  * Find the first leaf with value at most 'jump' less than the previous
997fd1d6662SDoug Moore  * leaf.  Return NULL if that value is <= limit.
998fd1d6662SDoug Moore  */
999fd1d6662SDoug Moore uint64_t *
1000fd1d6662SDoug Moore pctrie_iter_jump_le(struct pctrie_iter *it, int64_t jump)
1001fd1d6662SDoug Moore {
1002fd1d6662SDoug Moore 	uint64_t index = it->index - jump;
1003fd1d6662SDoug Moore 
1004fd1d6662SDoug Moore 	/* Detect jump overflow. */
1005fd1d6662SDoug Moore 	if ((jump > 0) != (index < it->index))
1006fd1d6662SDoug Moore 		return (NULL);
1007fd1d6662SDoug Moore 	if (it->limit != 0 && index <= it->limit)
1008fd1d6662SDoug Moore 		return (NULL);
1009fd1d6662SDoug Moore 	return (pctrie_iter_lookup_le(it, index));
1010fd1d6662SDoug Moore }
1011fd1d6662SDoug Moore 
1012bbf81f46SRyan Libby #ifdef INVARIANTS
1013bbf81f46SRyan Libby void
1014bbf81f46SRyan Libby pctrie_subtree_lookup_lt_assert(struct pctrie_node *node, uint64_t index,
1015bbf81f46SRyan Libby     struct pctrie *ptree, uint64_t *res)
1016bbf81f46SRyan Libby {
1017bbf81f46SRyan Libby 	uint64_t *expected;
1018bbf81f46SRyan Libby 
1019bbf81f46SRyan Libby 	if (index == 0)
1020bbf81f46SRyan Libby 		expected = NULL;
1021bbf81f46SRyan Libby 	else
1022bbf81f46SRyan Libby 		expected = pctrie_lookup_le(ptree, index - 1);
1023bbf81f46SRyan Libby 	KASSERT(res == expected,
1024bbf81f46SRyan Libby 	    ("pctrie subtree lookup lt result different from root lookup: "
1025bbf81f46SRyan Libby 	    "ptree %p, index %ju, subtree %p, found %p, expected %p", ptree,
1026bbf81f46SRyan Libby 	    (uintmax_t)index, node, res, expected));
1027bbf81f46SRyan Libby }
1028bbf81f46SRyan Libby #endif
1029bbf81f46SRyan Libby 
1030fd1d6662SDoug Moore static void
1031fd1d6662SDoug Moore pctrie_remove(struct pctrie *ptree, uint64_t index, struct pctrie_node *parent,
1032fd1d6662SDoug Moore     struct pctrie_node *node, struct pctrie_node **freenode)
1033f2cc1285SJeff Roberson {
1034fd1d6662SDoug Moore 	struct pctrie_node *child;
10358df38859SDoug Moore 	int slot;
1036f2cc1285SJeff Roberson 
10372d2bcba7SDoug Moore 	if (node == NULL) {
1038*a905c589SDoug Moore 		pctrie_node_store(pctrie_root(ptree),
1039*a905c589SDoug Moore 		    PCTRIE_NULL, PCTRIE_LOCKED);
1040fd1d6662SDoug Moore 		return;
1041f2cc1285SJeff Roberson 	}
1042fd1d6662SDoug Moore 	slot = pctrie_slot(node, index);
10438df38859SDoug Moore 	KASSERT((node->pn_popmap & (1 << slot)) != 0,
10448df38859SDoug Moore 	    ("%s: bad popmap slot %d in node %p",
10458df38859SDoug Moore 	    __func__, slot, node));
10468df38859SDoug Moore 	node->pn_popmap ^= 1 << slot;
10472d2bcba7SDoug Moore 	pctrie_node_store(&node->pn_child[slot], PCTRIE_NULL, PCTRIE_LOCKED);
10488df38859SDoug Moore 	if (!powerof2(node->pn_popmap))
1049fd1d6662SDoug Moore 		return;
10502d2bcba7SDoug Moore 	KASSERT(node->pn_popmap != 0, ("%s: bad popmap all zeroes", __func__));
10518df38859SDoug Moore 	slot = ffs(node->pn_popmap) - 1;
10522d2bcba7SDoug Moore 	child = pctrie_node_load(&node->pn_child[slot], NULL, PCTRIE_LOCKED);
10532d2bcba7SDoug Moore 	KASSERT(child != PCTRIE_NULL,
10542d2bcba7SDoug Moore 	    ("%s: bad popmap slot %d in node %p", __func__, slot, node));
1055f2cc1285SJeff Roberson 	if (parent == NULL)
1056*a905c589SDoug Moore 		pctrie_node_store(pctrie_root(ptree), child, PCTRIE_LOCKED);
1057f2cc1285SJeff Roberson 	else {
1058ac0572e6SDoug Moore 		slot = pctrie_slot(parent, index);
10592d2bcba7SDoug Moore 		KASSERT(node ==
10602d2bcba7SDoug Moore 		    pctrie_node_load(&parent->pn_child[slot], NULL,
10612d2bcba7SDoug Moore 		    PCTRIE_LOCKED), ("%s: invalid child value", __func__));
10622d2bcba7SDoug Moore 		pctrie_node_store(&parent->pn_child[slot], child,
10633c30b235SConrad Meyer 		    PCTRIE_LOCKED);
1064f2cc1285SJeff Roberson 	}
10653c30b235SConrad Meyer 	/*
10663c30b235SConrad Meyer 	 * The child is still valid and we can not zero the
10673c30b235SConrad Meyer 	 * pointer until all SMR references are gone.
10683c30b235SConrad Meyer 	 */
10693b7ffacdSDoug Moore 	pctrie_node_put(node);
10703b7ffacdSDoug Moore 	*freenode = node;
1071fd1d6662SDoug Moore }
1072fd1d6662SDoug Moore 
1073fd1d6662SDoug Moore /*
1074fd1d6662SDoug Moore  * Remove the specified index from the tree, and return the value stored at
1075fd1d6662SDoug Moore  * that index.  If the index is not present, return NULL.
1076fd1d6662SDoug Moore  */
1077fd1d6662SDoug Moore uint64_t *
1078fd1d6662SDoug Moore pctrie_remove_lookup(struct pctrie *ptree, uint64_t index,
1079fd1d6662SDoug Moore     struct pctrie_node **freenode)
1080fd1d6662SDoug Moore {
1081fd1d6662SDoug Moore 	struct pctrie_node *child, *node, *parent;
1082fd1d6662SDoug Moore 	uint64_t *m;
1083fd1d6662SDoug Moore 	int slot;
1084fd1d6662SDoug Moore 
1085fd1d6662SDoug Moore 	DEBUG_POISON_POINTER(parent);
1086fd1d6662SDoug Moore 	*freenode = node = NULL;
1087fd1d6662SDoug Moore 	child = pctrie_root_load(ptree, NULL, PCTRIE_LOCKED);
1088fd1d6662SDoug Moore 	while (!pctrie_isleaf(child)) {
1089fd1d6662SDoug Moore 		parent = node;
1090fd1d6662SDoug Moore 		node = child;
1091fd1d6662SDoug Moore 		slot = pctrie_slot(node, index);
1092fd1d6662SDoug Moore 		child = pctrie_node_load(&node->pn_child[slot], NULL,
1093fd1d6662SDoug Moore 		    PCTRIE_LOCKED);
1094fd1d6662SDoug Moore 	}
1095fd1d6662SDoug Moore 	m = pctrie_match_value(child, index);
1096fd1d6662SDoug Moore 	if (m != NULL)
1097fd1d6662SDoug Moore 		pctrie_remove(ptree, index, parent, node, freenode);
10983b7ffacdSDoug Moore 	return (m);
1099f2cc1285SJeff Roberson }
1100f2cc1285SJeff Roberson 
1101f2cc1285SJeff Roberson /*
1102fd1d6662SDoug Moore  * Remove from the trie the leaf last chosen by the iterator, and
1103fd1d6662SDoug Moore  * adjust the path if it's last member is to be freed.
1104fd1d6662SDoug Moore  */
1105fd1d6662SDoug Moore uint64_t *
1106fd1d6662SDoug Moore pctrie_iter_remove(struct pctrie_iter *it, struct pctrie_node **freenode)
1107fd1d6662SDoug Moore {
1108fd1d6662SDoug Moore 	struct pctrie_node *child, *node, *parent;
1109fd1d6662SDoug Moore 	uint64_t *m;
1110fd1d6662SDoug Moore 	int slot;
1111fd1d6662SDoug Moore 
1112fd1d6662SDoug Moore 	DEBUG_POISON_POINTER(parent);
1113fd1d6662SDoug Moore 	*freenode = NULL;
1114fd1d6662SDoug Moore 	if (it->top >= 1) {
1115fd1d6662SDoug Moore 		parent = (it->top >= 2) ? it->path[it->top - 2] : NULL;
1116fd1d6662SDoug Moore 		node = it->path[it->top - 1];
1117fd1d6662SDoug Moore 		slot = pctrie_slot(node, it->index);
1118fd1d6662SDoug Moore 		child = pctrie_node_load(&node->pn_child[slot], NULL,
1119fd1d6662SDoug Moore 		    PCTRIE_LOCKED);
1120fd1d6662SDoug Moore 	} else {
1121fd1d6662SDoug Moore 		node = NULL;
1122fd1d6662SDoug Moore 		child = pctrie_root_load(it->ptree, NULL, PCTRIE_LOCKED);
1123fd1d6662SDoug Moore 	}
1124fd1d6662SDoug Moore 	m = pctrie_match_value(child, it->index);
1125fd1d6662SDoug Moore 	if (m != NULL)
1126fd1d6662SDoug Moore 		pctrie_remove(it->ptree, it->index, parent, node, freenode);
1127fd1d6662SDoug Moore 	if (*freenode != NULL)
1128fd1d6662SDoug Moore 		--it->top;
1129fd1d6662SDoug Moore 	return (m);
1130fd1d6662SDoug Moore }
1131fd1d6662SDoug Moore 
1132fd1d6662SDoug Moore /*
1133fd1d6662SDoug Moore  * Return the current leaf, assuming access is externally synchronized by a
1134fd1d6662SDoug Moore  * lock.
1135fd1d6662SDoug Moore  */
1136fd1d6662SDoug Moore uint64_t *
1137fd1d6662SDoug Moore pctrie_iter_value(struct pctrie_iter *it)
1138fd1d6662SDoug Moore {
1139fd1d6662SDoug Moore 	struct pctrie_node *node;
1140fd1d6662SDoug Moore 	int slot;
1141fd1d6662SDoug Moore 
1142fd1d6662SDoug Moore 	if (it->top == 0)
1143fd1d6662SDoug Moore 		node = pctrie_root_load(it->ptree, NULL,
1144fd1d6662SDoug Moore 		    PCTRIE_LOCKED);
1145fd1d6662SDoug Moore 	else {
1146fd1d6662SDoug Moore 		node = it->path[it->top - 1];
1147fd1d6662SDoug Moore 		slot = pctrie_slot(node, it->index);
1148fd1d6662SDoug Moore 		node = pctrie_node_load(&node->pn_child[slot], NULL,
1149fd1d6662SDoug Moore 		    PCTRIE_LOCKED);
1150fd1d6662SDoug Moore 	}
1151fd1d6662SDoug Moore 	return (pctrie_toval(node));
1152fd1d6662SDoug Moore }
1153fd1d6662SDoug Moore 
1154fd1d6662SDoug Moore /*
1155c0d0bc2bSDoug Moore  * Walk the subtrie rooted at *pnode in order, invoking callback on leaves and
1156c0d0bc2bSDoug Moore  * using the leftmost child pointer for path reversal, until an interior node
1157c0d0bc2bSDoug Moore  * is stripped of all children, and returned for deallocation, with *pnode left
1158d19851f0SDoug Moore  * pointing to the parent of that node.
1159f2cc1285SJeff Roberson  */
1160c0d0bc2bSDoug Moore static __always_inline struct pctrie_node *
1161c0d0bc2bSDoug Moore pctrie_reclaim_prune(struct pctrie_node **pnode, struct pctrie_node *parent,
1162c0d0bc2bSDoug Moore     pctrie_cb_t callback, int keyoff, void *arg)
1163f2cc1285SJeff Roberson {
11643b7ffacdSDoug Moore 	struct pctrie_node *child, *node;
11653b7ffacdSDoug Moore 	int slot;
1166f2cc1285SJeff Roberson 
11673b7ffacdSDoug Moore 	node = *pnode;
11683b7ffacdSDoug Moore 	while (node->pn_popmap != 0) {
11693b7ffacdSDoug Moore 		slot = ffs(node->pn_popmap) - 1;
11703b7ffacdSDoug Moore 		node->pn_popmap ^= 1 << slot;
11713b7ffacdSDoug Moore 		child = pctrie_node_load(&node->pn_child[slot], NULL,
11723b7ffacdSDoug Moore 		    PCTRIE_UNSERIALIZED);
11733b7ffacdSDoug Moore 		pctrie_node_store(&node->pn_child[slot], PCTRIE_NULL,
11743b7ffacdSDoug Moore 		    PCTRIE_UNSERIALIZED);
1175c0d0bc2bSDoug Moore 		if (pctrie_isleaf(child)) {
1176c0d0bc2bSDoug Moore 			if (callback != NULL)
1177c0d0bc2bSDoug Moore 				callback(pctrie_toptr(child, keyoff), arg);
11783b7ffacdSDoug Moore 			continue;
1179c0d0bc2bSDoug Moore 		}
11803b7ffacdSDoug Moore 		/* Climb one level down the trie. */
11813b7ffacdSDoug Moore 		pctrie_node_store(&node->pn_child[0], parent,
11823b7ffacdSDoug Moore 		    PCTRIE_UNSERIALIZED);
11833b7ffacdSDoug Moore 		parent = node;
11843b7ffacdSDoug Moore 		node = child;
11853b7ffacdSDoug Moore 	}
11863b7ffacdSDoug Moore 	*pnode = parent;
11873b7ffacdSDoug Moore 	return (node);
11883b7ffacdSDoug Moore }
11893b7ffacdSDoug Moore 
11903b7ffacdSDoug Moore /*
11913b7ffacdSDoug Moore  * Recover the node parent from its first child and continue pruning.
11923b7ffacdSDoug Moore  */
1193c0d0bc2bSDoug Moore static __always_inline struct pctrie_node *
1194c0d0bc2bSDoug Moore pctrie_reclaim_resume_compound(struct pctrie_node **pnode,
1195c0d0bc2bSDoug Moore     pctrie_cb_t callback, int keyoff, void *arg)
11963b7ffacdSDoug Moore {
11973b7ffacdSDoug Moore 	struct pctrie_node *parent, *node;
11983b7ffacdSDoug Moore 
11993b7ffacdSDoug Moore 	node = *pnode;
12003b7ffacdSDoug Moore 	if (node == NULL)
12013b7ffacdSDoug Moore 		return (NULL);
12023b7ffacdSDoug Moore 	/* Climb one level up the trie. */
12033b7ffacdSDoug Moore 	parent = pctrie_node_load(&node->pn_child[0], NULL,
12043b7ffacdSDoug Moore 	    PCTRIE_UNSERIALIZED);
12053b7ffacdSDoug Moore 	pctrie_node_store(&node->pn_child[0], PCTRIE_NULL, PCTRIE_UNSERIALIZED);
1206c0d0bc2bSDoug Moore 	return (pctrie_reclaim_prune(pnode, parent, callback, keyoff, arg));
12073b7ffacdSDoug Moore }
12083b7ffacdSDoug Moore 
12093b7ffacdSDoug Moore /*
12103b7ffacdSDoug Moore  * Find the trie root, and start pruning with a NULL parent.
12113b7ffacdSDoug Moore  */
1212c0d0bc2bSDoug Moore static __always_inline struct pctrie_node *
1213c0d0bc2bSDoug Moore pctrie_reclaim_begin_compound(struct pctrie_node **pnode,
1214c0d0bc2bSDoug Moore     struct pctrie *ptree,
1215c0d0bc2bSDoug Moore     pctrie_cb_t callback, int keyoff, void *arg)
12163b7ffacdSDoug Moore {
12173b7ffacdSDoug Moore 	struct pctrie_node *node;
12183b7ffacdSDoug Moore 
12193b7ffacdSDoug Moore 	node = pctrie_root_load(ptree, NULL, PCTRIE_UNSERIALIZED);
1220*a905c589SDoug Moore 	pctrie_node_store(pctrie_root(ptree), PCTRIE_NULL, PCTRIE_UNSERIALIZED);
1221c0d0bc2bSDoug Moore 	if (pctrie_isleaf(node)) {
1222c0d0bc2bSDoug Moore 		if (callback != NULL && node != PCTRIE_NULL)
1223c0d0bc2bSDoug Moore 			callback(pctrie_toptr(node, keyoff), arg);
12243b7ffacdSDoug Moore 		return (NULL);
1225c0d0bc2bSDoug Moore 	}
12263b7ffacdSDoug Moore 	*pnode = node;
1227c0d0bc2bSDoug Moore 	return (pctrie_reclaim_prune(pnode, NULL, callback, keyoff, arg));
1228c0d0bc2bSDoug Moore }
1229c0d0bc2bSDoug Moore 
1230c0d0bc2bSDoug Moore struct pctrie_node *
1231c0d0bc2bSDoug Moore pctrie_reclaim_resume(struct pctrie_node **pnode)
1232c0d0bc2bSDoug Moore {
1233c0d0bc2bSDoug Moore 	return (pctrie_reclaim_resume_compound(pnode, NULL, 0, NULL));
1234c0d0bc2bSDoug Moore }
1235c0d0bc2bSDoug Moore 
1236c0d0bc2bSDoug Moore struct pctrie_node *
1237c0d0bc2bSDoug Moore pctrie_reclaim_begin(struct pctrie_node **pnode, struct pctrie *ptree)
1238c0d0bc2bSDoug Moore {
1239c0d0bc2bSDoug Moore 	return (pctrie_reclaim_begin_compound(pnode, ptree, NULL, 0, NULL));
1240c0d0bc2bSDoug Moore }
1241c0d0bc2bSDoug Moore 
1242c0d0bc2bSDoug Moore struct pctrie_node *
1243c0d0bc2bSDoug Moore pctrie_reclaim_resume_cb(struct pctrie_node **pnode,
1244c0d0bc2bSDoug Moore     pctrie_cb_t callback, int keyoff, void *arg)
1245c0d0bc2bSDoug Moore {
1246c0d0bc2bSDoug Moore 	return (pctrie_reclaim_resume_compound(pnode, callback, keyoff, arg));
1247c0d0bc2bSDoug Moore }
1248c0d0bc2bSDoug Moore 
1249c0d0bc2bSDoug Moore struct pctrie_node *
1250c0d0bc2bSDoug Moore pctrie_reclaim_begin_cb(struct pctrie_node **pnode, struct pctrie *ptree,
1251c0d0bc2bSDoug Moore     pctrie_cb_t callback, int keyoff, void *arg)
1252c0d0bc2bSDoug Moore {
1253c0d0bc2bSDoug Moore 	return (pctrie_reclaim_begin_compound(pnode, ptree,
1254c0d0bc2bSDoug Moore 	    callback, keyoff, arg));
12553b7ffacdSDoug Moore }
12563b7ffacdSDoug Moore 
12573b7ffacdSDoug Moore /*
12583b7ffacdSDoug Moore  * Replace an existing value in the trie with another one.
12593b7ffacdSDoug Moore  * Panics if there is not an old value in the trie at the new value's index.
12603b7ffacdSDoug Moore  */
12613b7ffacdSDoug Moore uint64_t *
12623b7ffacdSDoug Moore pctrie_replace(struct pctrie *ptree, uint64_t *newval)
12633b7ffacdSDoug Moore {
12643b7ffacdSDoug Moore 	struct pctrie_node *leaf, *parent, *node;
12653b7ffacdSDoug Moore 	uint64_t *m;
12663b7ffacdSDoug Moore 	uint64_t index;
12673b7ffacdSDoug Moore 	int slot;
12683b7ffacdSDoug Moore 
12693b7ffacdSDoug Moore 	leaf = pctrie_toleaf(newval);
12703b7ffacdSDoug Moore 	index = *newval;
12713b7ffacdSDoug Moore 	node = pctrie_root_load(ptree, NULL, PCTRIE_LOCKED);
12723b7ffacdSDoug Moore 	parent = NULL;
12733b7ffacdSDoug Moore 	for (;;) {
12743b7ffacdSDoug Moore 		if (pctrie_isleaf(node)) {
12753b7ffacdSDoug Moore 			if ((m = pctrie_toval(node)) != NULL && *m == index) {
12763b7ffacdSDoug Moore 				if (parent == NULL)
1277*a905c589SDoug Moore 					pctrie_node_store(pctrie_root(ptree),
12789147a0c9SDoug Moore 					    leaf, PCTRIE_LOCKED);
12793b7ffacdSDoug Moore 				else
12803b7ffacdSDoug Moore 					pctrie_node_store(
12813b7ffacdSDoug Moore 					    &parent->pn_child[slot], leaf,
12823b7ffacdSDoug Moore 					    PCTRIE_LOCKED);
12833b7ffacdSDoug Moore 				return (m);
12843b7ffacdSDoug Moore 			}
12853b7ffacdSDoug Moore 			break;
12863b7ffacdSDoug Moore 		}
12873b7ffacdSDoug Moore 		if (pctrie_keybarr(node, index, &slot))
12883b7ffacdSDoug Moore 			break;
12893b7ffacdSDoug Moore 		parent = node;
12903b7ffacdSDoug Moore 		node = pctrie_node_load(&node->pn_child[slot], NULL,
12913b7ffacdSDoug Moore 		    PCTRIE_LOCKED);
12923b7ffacdSDoug Moore 	}
12933b7ffacdSDoug Moore 	panic("%s: original replacing value not found", __func__);
1294f2cc1285SJeff Roberson }
1295f2cc1285SJeff Roberson 
1296f2cc1285SJeff Roberson #ifdef DDB
1297f2cc1285SJeff Roberson /*
1298f2cc1285SJeff Roberson  * Show details about the given node.
1299f2cc1285SJeff Roberson  */
1300f2cc1285SJeff Roberson DB_SHOW_COMMAND(pctrienode, db_show_pctrienode)
1301f2cc1285SJeff Roberson {
13023c30b235SConrad Meyer 	struct pctrie_node *node, *tmp;
13038df38859SDoug Moore 	int slot;
13048df38859SDoug Moore 	pn_popmap_t popmap;
1305f2cc1285SJeff Roberson 
1306f2cc1285SJeff Roberson         if (!have_addr)
1307f2cc1285SJeff Roberson                 return;
1308f2cc1285SJeff Roberson 	node = (struct pctrie_node *)addr;
13098df38859SDoug Moore 	db_printf("node %p, owner %jx, children popmap %04x, level %u:\n",
13108df38859SDoug Moore 	    (void *)node, (uintmax_t)node->pn_owner, node->pn_popmap,
131138f5cb1bSDoug Moore 	    node->pn_clev / PCTRIE_WIDTH);
13128df38859SDoug Moore 	for (popmap = node->pn_popmap; popmap != 0; popmap ^= 1 << slot) {
13138df38859SDoug Moore 		slot = ffs(popmap) - 1;
13148df38859SDoug Moore 		tmp = pctrie_node_load(&node->pn_child[slot], NULL,
13153c30b235SConrad Meyer 		    PCTRIE_UNSERIALIZED);
1316f2cc1285SJeff Roberson 		db_printf("slot: %d, val: %p, value: %p, clev: %d\n",
13178df38859SDoug Moore 		    slot, (void *)tmp,
13183c30b235SConrad Meyer 		    pctrie_isleaf(tmp) ? pctrie_toval(tmp) : NULL,
131938f5cb1bSDoug Moore 		    node->pn_clev / PCTRIE_WIDTH);
1320f2cc1285SJeff Roberson 	}
13213c30b235SConrad Meyer }
1322f2cc1285SJeff Roberson #endif /* DDB */
1323