xref: /openbsd-src/sys/net/art.c (revision d59bb9942320b767f2a19aaa7690c8c6e30b724c)
1 /*	$OpenBSD: art.c,v 1.27 2017/02/28 09:50:13 mpi Exp $ */
2 
3 /*
4  * Copyright (c) 2015 Martin Pieuchot
5  * Copyright (c) 2001 Yoichi Hariguchi
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  */
19 
20 /*
21  * Allotment Routing Table (ART).
22  *
23  * Yoichi Hariguchi paper can be found at:
24  *	http://www.hariguchi.org/art/art.pdf
25  */
26 
27 #ifndef _KERNEL
28 #include "kern_compat.h"
29 #else
30 #include <sys/param.h>
31 #include <sys/systm.h>
32 #include <sys/malloc.h>
33 #include <sys/pool.h>
34 #include <sys/task.h>
35 #include <sys/socket.h>
36 #endif
37 
38 #include <net/art.h>
39 
40 #define ISLEAF(e)	(((unsigned long)(e) & 1) == 0)
41 #define SUBTABLE(e)	((struct art_table *)((unsigned long)(e) & ~1))
42 #define ASNODE(t)	((struct art_node *)((unsigned long)(t) | 1))
43 
44 /*
45  * Allotment Table.
46  */
47 struct art_table {
48 	struct art_table	*at_parent;	/* Parent table */
49 	uint32_t		 at_index;	/* Index in the parent table */
50 	uint32_t		 at_minfringe;	/* Index that fringe begins */
51 	uint32_t		 at_level;	/* Level of the table */
52 	uint8_t			 at_bits;	/* Stride length of the table */
53 	uint8_t			 at_offset;	/* Sum of parents' stride len */
54 
55 	/*
56 	 * Items stored in the heap are pointers to nodes, in the leaf
57 	 * case, or tables otherwise.  One exception is index 0 which
58 	 * is a route counter.
59 	 */
60 	union {
61 		struct srp		 node;
62 		unsigned long		 count;
63 	} *at_heap;				/* Array of 2^(slen+1) items */
64 };
65 #define	at_refcnt	at_heap[0].count/* Refcounter (1 per different route) */
66 #define	at_default	at_heap[1].node	/* Default route (was in parent heap) */
67 
68 /* Heap size for an ART table of stride length ``slen''. */
69 #define AT_HEAPSIZE(slen)	((1 << ((slen) + 1)) * sizeof(void *))
70 
71 int			 art_bindex(struct art_table *, uint8_t *, int);
72 void			 art_allot(struct art_table *at, int, struct art_node *,
73 			     struct art_node *);
74 struct art_table	*art_table_get(struct art_root *, struct art_table *,
75 			     int);
76 struct art_table	*art_table_put(struct art_root *, struct art_table *);
77 struct art_node		*art_table_insert(struct art_root *, struct art_table *,
78 			     int, struct art_node *);
79 struct art_node		*art_table_delete(struct art_root *, struct art_table *,
80 			     int, struct art_node *);
81 struct art_table	*art_table_ref(struct art_root *, struct art_table *);
82 int			 art_table_free(struct art_root *, struct art_table *);
83 int			 art_table_walk(struct art_root *, struct art_table *,
84 			     int (*f)(struct art_node *, void *), void *);
85 int			 art_walk_apply(struct art_root *,
86 			     struct art_node *, struct art_node *,
87 			     int (*f)(struct art_node *, void *), void *);
88 void			 art_table_gc(void *);
89 void			 art_gc(void *);
90 
91 struct pool		an_pool, at_pool, at_heap_4_pool, at_heap_8_pool;
92 
93 struct art_table	*art_table_gc_list = NULL;
94 struct mutex		 art_table_gc_mtx = MUTEX_INITIALIZER(IPL_SOFTNET);
95 struct task		 art_table_gc_task =
96 			     TASK_INITIALIZER(art_table_gc, NULL);
97 
98 struct art_node		*art_node_gc_list = NULL;
99 struct mutex		 art_node_gc_mtx = MUTEX_INITIALIZER(IPL_SOFTNET);
100 struct task		 art_node_gc_task = TASK_INITIALIZER(art_gc, NULL);
101 
102 void
103 art_init(void)
104 {
105 	pool_init(&an_pool, sizeof(struct art_node), 0, IPL_SOFTNET, 0,
106 	    "art_node", NULL);
107 	pool_init(&at_pool, sizeof(struct art_table), 0, IPL_SOFTNET, 0,
108 	    "art_table", NULL);
109 	pool_init(&at_heap_4_pool, AT_HEAPSIZE(4), 0, IPL_SOFTNET, 0,
110 	    "art_heap4", NULL);
111 	pool_init(&at_heap_8_pool, AT_HEAPSIZE(8), 0, IPL_SOFTNET, 0,
112 	    "art_heap8", &pool_allocator_single);
113 }
114 
115 /*
116  * Per routing table initialization API function.
117  */
118 struct art_root *
119 art_alloc(unsigned int rtableid, unsigned int alen, unsigned int off)
120 {
121 	struct art_root		*ar;
122 	int			 i;
123 
124 	ar = malloc(sizeof(*ar), M_RTABLE, M_NOWAIT|M_ZERO);
125 	if (ar == NULL)
126 		return (NULL);
127 
128 	switch (alen) {
129 	case 32:
130 		ar->ar_alen = 32;
131 		ar->ar_nlvl = 7;
132 		ar->ar_bits[0] = 8;
133 		for (i = 1; i < ar->ar_nlvl; i++)
134 			ar->ar_bits[i] = 4;
135 		break;
136 	case 128:
137 		ar->ar_alen = 128;
138 		ar->ar_nlvl = 32;
139 		for (i = 0; i < ar->ar_nlvl; i++)
140 			ar->ar_bits[i] = 4;
141 		break;
142 	default:
143 		printf("%s: incorrect address length %u\n", __func__, alen);
144 		free(ar, M_RTABLE, sizeof(*ar));
145 		return (NULL);
146 	}
147 
148 	ar->ar_off = off;
149 	ar->ar_rtableid = rtableid;
150 	rw_init(&ar->ar_lock, "art");
151 
152 	return (ar);
153 }
154 
155 /*
156  * Return 1 if ``old'' and ``new`` are identical, 0 otherwise.
157  */
158 static inline int
159 art_check_duplicate(struct art_root *ar, struct art_node *old,
160     struct art_node *new)
161 {
162 	if (old == NULL)
163 		return (0);
164 
165 	if (old->an_plen == new->an_plen)
166 		return (1);
167 
168 	return (0);
169 }
170 
171 /*
172  * Return the base index of the part of ``addr'' and ``plen''
173  * corresponding to the range covered by the table ``at''.
174  *
175  * In other words, this function take the multi-level (complete)
176  * address ``addr'' and prefix length ``plen'' and return the
177  * single level base index for the table ``at''.
178  *
179  * For example with an address size of 32bit divided into four
180  * 8bit-long tables, there's a maximum of 4 base indexes if the
181  * prefix length is > 24.
182  */
183 int
184 art_bindex(struct art_table *at, uint8_t *addr, int plen)
185 {
186 	uint8_t			boff, bend;
187 	uint32_t		k;
188 
189 	if (plen < at->at_offset || plen > (at->at_offset + at->at_bits))
190 		return (-1);
191 
192 	/*
193 	 * We are only interested in the part of the prefix length
194 	 * corresponding to the range of this table.
195 	 */
196 	plen -= at->at_offset;
197 
198 	/*
199 	 * Jump to the first byte of the address containing bits
200 	 * covered by this table.
201 	 */
202 	addr += (at->at_offset / 8);
203 
204 	/* ``at'' covers the bit range between ``boff'' & ``bend''. */
205 	boff = (at->at_offset % 8);
206 	bend = (at->at_bits + boff);
207 
208 	KASSERT(bend <= 32);
209 
210 	if (bend > 24) {
211 		k = (addr[0] & ((1 << (8 - boff)) - 1)) << (bend - 8);
212 		k |= addr[1] << (bend - 16);
213 		k |= addr[2] << (bend - 24);
214 		k |= addr[3] >> (32 - bend);
215 	} else if (bend > 16) {
216 		k = (addr[0] & ((1 << (8 - boff)) - 1)) << (bend - 8);
217 		k |= addr[1] << (bend - 16);
218 		k |= addr[2] >> (24 - bend);
219 	} else if (bend > 8) {
220 		k = (addr[0] & ((1 << (8 - boff)) - 1)) << (bend - 8);
221 		k |= addr[1] >> (16 - bend);
222 	} else {
223 		k = (addr[0] >> (8 - bend)) & ((1 << at->at_bits) - 1);
224 	}
225 
226 	/*
227 	 * Single level base index formula:
228 	 */
229 	return ((k >> (at->at_bits - plen)) + (1 << plen));
230 }
231 
232 /*
233  * Single level lookup function.
234  *
235  * Return the fringe index of the part of ``addr''
236  * corresponding to the range covered by the table ``at''.
237  */
238 static inline int
239 art_findex(struct art_table *at, uint8_t *addr)
240 {
241 	return art_bindex(at, addr, (at->at_offset + at->at_bits));
242 }
243 
244 /*
245  * (Non-perfect) lookup API function.
246  *
247  * Return the best existing match for a destination.
248  */
249 struct art_node *
250 art_match(struct art_root *ar, void *addr, struct srp_ref *nsr)
251 {
252 	struct srp_ref		dsr, ndsr;
253 	void			*entry;
254 	struct art_table	*at;
255 	struct art_node		*dflt, *ndflt;
256 	int			j;
257 
258 	entry = srp_enter(nsr, &ar->ar_root);
259 	at = entry;
260 
261 	if (at == NULL)
262 		goto done;
263 
264 	/*
265 	 * Remember the default route of each table we visit in case
266 	 * we do not find a better matching route.
267 	 */
268 	dflt = srp_enter(&dsr, &at->at_default);
269 
270 	/*
271 	 * Iterate until we find a leaf.
272 	 */
273 	while (1) {
274 		/* Do a single level route lookup. */
275 		j = art_findex(at, addr);
276 		entry = srp_follow(nsr, &at->at_heap[j].node);
277 
278 		/* If this is a leaf (NULL is a leaf) we're done. */
279 		if (ISLEAF(entry))
280 			break;
281 
282 		at = SUBTABLE(entry);
283 
284 		ndflt = srp_enter(&ndsr, &at->at_default);
285 		if (ndflt != NULL) {
286 			srp_leave(&dsr);
287 			dsr = ndsr;
288 			dflt = ndflt;
289 		} else
290 			srp_leave(&ndsr);
291 	}
292 
293 	if (entry == NULL) {
294 		srp_leave(nsr);
295 		*nsr = dsr;
296 		KASSERT(ISLEAF(dflt));
297 		return (dflt);
298 	}
299 
300 	srp_leave(&dsr);
301 done:
302 	KASSERT(ISLEAF(entry));
303 	return (entry);
304 }
305 
306 /*
307  * Perfect lookup API function.
308  *
309  * Return a perfect match for a destination/prefix-length pair or NULL if
310  * it does not exist.
311  */
312 struct art_node *
313 art_lookup(struct art_root *ar, void *addr, int plen, struct srp_ref *nsr)
314 {
315 	void			*entry;
316 	struct art_table	*at;
317 	int			 i, j;
318 
319 	KASSERT(plen >= 0 && plen <= ar->ar_alen);
320 
321 	entry = srp_enter(nsr, &ar->ar_root);
322 	at = entry;
323 
324 	if (at == NULL)
325 		goto done;
326 
327 	/* Default route */
328 	if (plen == 0) {
329 		entry = srp_follow(nsr, &at->at_default);
330 		goto done;
331 	}
332 
333 	/*
334 	 * If the prefix length is smaller than the sum of
335 	 * the stride length at this level the entry must
336 	 * be in the current table.
337 	 */
338 	while (plen > (at->at_offset + at->at_bits)) {
339 		/* Do a single level route lookup. */
340 		j = art_findex(at, addr);
341 		entry = srp_follow(nsr, &at->at_heap[j].node);
342 
343 		/* A leaf is a match, but not a perfect one, or NULL */
344 		if (ISLEAF(entry))
345 			return (NULL);
346 
347 		at = SUBTABLE(entry);
348 	}
349 
350 	i = art_bindex(at, addr, plen);
351 	if (i == -1)
352 		return (NULL);
353 
354 	entry = srp_follow(nsr, &at->at_heap[i].node);
355 	if (!ISLEAF(entry))
356 		entry = srp_follow(nsr, &SUBTABLE(entry)->at_default);
357 
358 done:
359 	KASSERT(ISLEAF(entry));
360 	return (entry);
361 }
362 
363 
364 /*
365  * Insertion API function.
366  *
367  * Insert the given node or return an existing one if a node with the
368  * same destination/mask pair is already present.
369  */
370 struct art_node *
371 art_insert(struct art_root *ar, struct art_node *an, void *addr, int plen)
372 {
373 	struct art_table	*at, *child;
374 	struct art_node		*node;
375 	int			 i, j;
376 
377 	rw_assert_wrlock(&ar->ar_lock);
378 	KASSERT(plen >= 0 && plen <= ar->ar_alen);
379 
380 	at = srp_get_locked(&ar->ar_root);
381 	if (at == NULL) {
382 		at = art_table_get(ar, NULL, -1);
383 		if (at == NULL)
384 			return (NULL);
385 
386 		srp_swap_locked(&ar->ar_root, at);
387 	}
388 
389 	/* Default route */
390 	if (plen == 0) {
391 		node = srp_get_locked(&at->at_default);
392 		if (node != NULL)
393 			return (node);
394 
395 		art_table_ref(ar, at);
396 		srp_swap_locked(&at->at_default, an);
397 		return (an);
398 	}
399 
400 	/*
401 	 * If the prefix length is smaller than the sum of
402 	 * the stride length at this level the entry must
403 	 * be in the current table.
404 	 */
405 	while (plen > (at->at_offset + at->at_bits)) {
406 		/* Do a single level route lookup. */
407 		j = art_findex(at, addr);
408 		node = srp_get_locked(&at->at_heap[j].node);
409 
410 		/*
411 		 * If the node corresponding to the fringe index is
412 		 * a leaf we need to allocate a subtable.  The route
413 		 * entry of this node will then become the default
414 		 * route of the subtable.
415 		 */
416 		if (ISLEAF(node)) {
417 			child = art_table_get(ar, at, j);
418 			if (child == NULL)
419 				return (NULL);
420 
421 			art_table_ref(ar, at);
422 			srp_swap_locked(&at->at_heap[j].node, ASNODE(child));
423 			at = child;
424 		} else
425 			at = SUBTABLE(node);
426 	}
427 
428 	i = art_bindex(at, addr, plen);
429 	if (i == -1)
430 		return (NULL);
431 
432 	return (art_table_insert(ar, at, i, an));
433 }
434 
435 /*
436  * Single level insertion.
437  */
438 struct art_node *
439 art_table_insert(struct art_root *ar, struct art_table *at, int i,
440     struct art_node *an)
441 {
442 	struct art_node	*prev, *node;
443 
444 	node = srp_get_locked(&at->at_heap[i].node);
445 	if (!ISLEAF(node))
446 		prev = srp_get_locked(&SUBTABLE(node)->at_default);
447 	else
448 		prev = node;
449 
450 	if (art_check_duplicate(ar, prev, an))
451 		return (prev);
452 
453 	art_table_ref(ar, at);
454 
455 	/*
456 	 * If the index `i' of the route that we are inserting is not
457 	 * a fringe index, we need to allot this new route pointer to
458 	 * all the corresponding fringe indices.
459 	 */
460 	if (i < at->at_minfringe)
461 		art_allot(at, i, prev, an);
462 	else if (!ISLEAF(node))
463 		srp_swap_locked(&SUBTABLE(node)->at_default, an);
464 	else
465 		srp_swap_locked(&at->at_heap[i].node, an);
466 
467 	return (an);
468 }
469 
470 
471 /*
472  * Deletion API function.
473  */
474 struct art_node *
475 art_delete(struct art_root *ar, struct art_node *an, void *addr, int plen)
476 {
477 	struct art_table	*at;
478 	struct art_node		*node;
479 	int			 i, j;
480 
481 	rw_assert_wrlock(&ar->ar_lock);
482 	KASSERT(plen >= 0 && plen <= ar->ar_alen);
483 
484 	at = srp_get_locked(&ar->ar_root);
485 	if (at == NULL)
486 		return (NULL);
487 
488 	/* Default route */
489 	if (plen == 0) {
490 		node = srp_get_locked(&at->at_default);
491 		srp_swap_locked(&at->at_default, NULL);
492 		art_table_free(ar, at);
493 		return (node);
494 	}
495 
496 	/*
497 	 * If the prefix length is smaller than the sum of
498 	 * the stride length at this level the entry must
499 	 * be in the current table.
500 	 */
501 	while (plen > (at->at_offset + at->at_bits)) {
502 		/* Do a single level route lookup. */
503 		j = art_findex(at, addr);
504 		node = srp_get_locked(&at->at_heap[j].node);
505 
506 		/* If this is a leaf, there is no route to delete. */
507 		if (ISLEAF(node))
508 			return (NULL);
509 
510 		at = SUBTABLE(node);
511 	}
512 
513 	i = art_bindex(at, addr, plen);
514 	if (i == -1)
515 		return (NULL);
516 
517 	return (art_table_delete(ar, at, i, an));
518 }
519 
520 /*
521  * Single level deletion.
522  */
523 struct art_node *
524 art_table_delete(struct art_root *ar, struct art_table *at, int i,
525     struct art_node *an)
526 {
527 	struct art_node		*next, *node;
528 
529 #ifdef DIAGNOSTIC
530 	struct art_node		*prev;
531 #endif
532 
533 	node = srp_get_locked(&at->at_heap[i].node);
534 #ifdef DIAGNOSTIC
535 	if (!ISLEAF(node))
536 		prev = srp_get_locked(&SUBTABLE(node)->at_default);
537 	else
538 		prev = node;
539 
540 	KASSERT(prev == an);
541 #endif
542 
543 	/* Get the next most specific route for the index `i'. */
544 	if ((i >> 1) > 1)
545 		next = srp_get_locked(&at->at_heap[i >> 1].node);
546 	else
547 		next = NULL;
548 
549 	/*
550 	 * If the index `i' of the route that we are removing is not
551 	 * a fringe index, we need to allot the next most specific
552 	 * route pointer to all the corresponding fringe indices.
553 	 */
554 	if (i < at->at_minfringe)
555 		art_allot(at, i, an, next);
556 	else if (!ISLEAF(node))
557 		srp_swap_locked(&SUBTABLE(node)->at_default, next);
558 	else
559 		srp_swap_locked(&at->at_heap[i].node, next);
560 
561 	/* We have removed an entry from this table. */
562 	art_table_free(ar, at);
563 
564 	return (an);
565 }
566 
567 struct art_table *
568 art_table_ref(struct art_root *ar, struct art_table *at)
569 {
570 	at->at_refcnt++;
571 	return (at);
572 }
573 
574 static inline int
575 art_table_rele(struct art_table *at)
576 {
577 	if (at == NULL)
578 		return (0);
579 
580 	return (--at->at_refcnt == 0);
581 }
582 
583 int
584 art_table_free(struct art_root *ar, struct art_table *at)
585 {
586 	if (art_table_rele(at)) {
587 		/*
588 		 * Garbage collect this table and all its parents
589 		 * that are empty.
590 		 */
591 		do {
592 			at = art_table_put(ar, at);
593 		} while (art_table_rele(at));
594 
595 		return (1);
596 	}
597 
598 	return (0);
599 }
600 
601 /*
602  * Iteration API function.
603  */
604 int
605 art_walk(struct art_root *ar, int (*f)(struct art_node *, void *), void *arg)
606 {
607 	struct srp_ref		 sr;
608 	struct art_table	*at;
609 	struct art_node		*node;
610 	int			 error = 0;
611 
612 	rw_enter_write(&ar->ar_lock);
613 	at = srp_get_locked(&ar->ar_root);
614 	if (at != NULL) {
615 		art_table_ref(ar, at);
616 
617 		/*
618 		 * The default route should be processed here because the root
619 		 * table does not have a parent.
620 		 */
621 		node = srp_enter(&sr, &at->at_default);
622 		error = art_walk_apply(ar, node, NULL, f, arg);
623 		srp_leave(&sr);
624 
625 		if (error == 0)
626 			error = art_table_walk(ar, at, f, arg);
627 
628 		art_table_free(ar, at);
629 	}
630 	rw_exit_write(&ar->ar_lock);
631 
632 	return (error);
633 }
634 
635 int
636 art_table_walk(struct art_root *ar, struct art_table *at,
637     int (*f)(struct art_node *, void *), void *arg)
638 {
639 	struct srp_ref		 sr;
640 	struct art_node		*node, *next;
641 	struct art_table	*nat;
642 	int			 i, j, error = 0;
643 	uint32_t		 maxfringe = (at->at_minfringe << 1);
644 
645 	/*
646 	 * Iterate non-fringe nodes in ``natural'' order.
647 	 */
648 	for (j = 1; j < at->at_minfringe; j += 2) {
649 		/*
650 		 * The default route (index 1) is processed by the
651 		 * parent table (where it belongs) otherwise it could
652 		 * be processed more than once.
653 		 */
654 		for (i = max(j, 2); i < at->at_minfringe; i <<= 1) {
655 			next = srp_get_locked(&at->at_heap[i >> 1].node);
656 
657 			node = srp_enter(&sr, &at->at_heap[i].node);
658 			error = art_walk_apply(ar, node, next, f, arg);
659 			srp_leave(&sr);
660 
661 			if (error != 0)
662 				return (error);
663 		}
664 	}
665 
666 	/*
667 	 * Iterate fringe nodes.
668 	 */
669 	for (i = at->at_minfringe; i < maxfringe; i++) {
670 		next = srp_get_locked(&at->at_heap[i >> 1].node);
671 
672 		node = srp_enter(&sr, &at->at_heap[i].node);
673 		if (!ISLEAF(node)) {
674 			nat = art_table_ref(ar, SUBTABLE(node));
675 			node = srp_follow(&sr, &nat->at_default);
676 		} else
677 			nat = NULL;
678 
679 		error = art_walk_apply(ar, node, next, f, arg);
680 		srp_leave(&sr);
681 
682 		if (error != 0) {
683 			art_table_free(ar, nat);
684 			return (error);
685 		}
686 
687 		if (nat != NULL) {
688 			error = art_table_walk(ar, nat, f, arg);
689 			art_table_free(ar, nat);
690 			if (error != 0)
691 				return (error);
692 		}
693 	}
694 
695 	return (0);
696 }
697 
698 int
699 art_walk_apply(struct art_root *ar,
700     struct art_node *an, struct art_node *next,
701     int (*f)(struct art_node *, void *), void *arg)
702 {
703 	int error = 0;
704 
705 	if ((an != NULL) && (an != next)) {
706 		rw_exit_write(&ar->ar_lock);
707 		error = (*f)(an, arg);
708 		rw_enter_write(&ar->ar_lock);
709 	}
710 
711 	return (error);
712 }
713 
714 
715 /*
716  * Create a table and use the given index to set its default route.
717  *
718  * Note:  This function does not modify the root or the parent.
719  */
720 struct art_table *
721 art_table_get(struct art_root *ar, struct art_table *parent, int j)
722 {
723 	struct art_table	*at;
724 	struct art_node		*node;
725 	void			*at_heap;
726 	uint32_t		 lvl;
727 
728 	KASSERT(j != 0 && j != 1);
729 	KASSERT(parent != NULL || j == -1);
730 
731 	if (parent != NULL)
732 		lvl = parent->at_level + 1;
733 	else
734 		lvl = 0;
735 
736 	KASSERT(lvl < ar->ar_nlvl);
737 
738 	at = pool_get(&at_pool, PR_NOWAIT|PR_ZERO);
739 	if (at == NULL)
740 		return (NULL);
741 
742 	switch (AT_HEAPSIZE(ar->ar_bits[lvl])) {
743 	case AT_HEAPSIZE(4):
744 		at_heap = pool_get(&at_heap_4_pool, PR_NOWAIT|PR_ZERO);
745 		break;
746 	case AT_HEAPSIZE(8):
747 		at_heap = pool_get(&at_heap_8_pool, PR_NOWAIT|PR_ZERO);
748 		break;
749 	default:
750 		panic("incorrect stride length %u", ar->ar_bits[lvl]);
751 	}
752 
753 	if (at_heap == NULL) {
754 		pool_put(&at_pool, at);
755 		return (NULL);
756 	}
757 
758 	at->at_parent = parent;
759 	at->at_index = j;
760 	at->at_minfringe = (1 << ar->ar_bits[lvl]);
761 	at->at_level = lvl;
762 	at->at_bits = ar->ar_bits[lvl];
763 	at->at_heap = at_heap;
764 	at->at_refcnt = 0;
765 
766 	if (parent != NULL) {
767 		node = srp_get_locked(&parent->at_heap[j].node);
768 		/* node isn't being deleted, no srp_finalize needed */
769 		srp_swap_locked(&at->at_default, node);
770 		at->at_offset = (parent->at_offset + parent->at_bits);
771 	}
772 
773 	return (at);
774 }
775 
776 
777 /*
778  * Delete a table and use its index to restore its parent's default route.
779  *
780  * Note:  Modify its parent to unlink the table from it.
781  */
782 struct art_table *
783 art_table_put(struct art_root *ar, struct art_table *at)
784 {
785 	struct art_table	*parent = at->at_parent;
786 	struct art_node		*node;
787 	uint32_t		 j = at->at_index;
788 
789 	KASSERT(at->at_refcnt == 0);
790 	KASSERT(j != 0 && j != 1);
791 
792 	if (parent != NULL) {
793 		KASSERT(j != -1);
794 		KASSERT(at->at_level == parent->at_level + 1);
795 		KASSERT(parent->at_refcnt >= 1);
796 
797 		/* Give the route back to its parent. */
798 		node = srp_get_locked(&at->at_default);
799 		srp_swap_locked(&parent->at_heap[j].node, node);
800 	} else {
801 		KASSERT(j == -1);
802 		KASSERT(at->at_level == 0);
803 		srp_swap_locked(&ar->ar_root, NULL);
804 	}
805 
806 	mtx_enter(&art_table_gc_mtx);
807 	at->at_parent = art_table_gc_list;
808 	art_table_gc_list = at;
809 	mtx_leave(&art_table_gc_mtx);
810 
811 	task_add(systqmp, &art_table_gc_task);
812 
813 	return (parent);
814 }
815 
816 void
817 art_table_gc(void *null)
818 {
819 	struct art_table *at, *next;
820 
821 	mtx_enter(&art_table_gc_mtx);
822 	at = art_table_gc_list;
823 	art_table_gc_list = NULL;
824 	mtx_leave(&art_table_gc_mtx);
825 
826 	while (at != NULL) {
827 		next = at->at_parent;
828 
829 		if (at->at_level == 0)
830 			srp_finalize(at, "arttfini");
831 		else
832 			srp_finalize(ASNODE(at), "arttfini");
833 
834 		switch (AT_HEAPSIZE(at->at_bits)) {
835 		case AT_HEAPSIZE(4):
836 			pool_put(&at_heap_4_pool, at->at_heap);
837 			break;
838 		case AT_HEAPSIZE(8):
839 			pool_put(&at_heap_8_pool, at->at_heap);
840 			break;
841 		default:
842 			panic("incorrect stride length %u", at->at_bits);
843 		}
844 
845 		pool_put(&at_pool, at);
846 
847 		at = next;
848 	}
849 }
850 
851 /*
852  * Substitute a node by another in the subtree whose root index is given.
853  *
854  * This function iterates on the table ``at'' at index ``i'' until no
855  * more ``old'' node can be replaced by ``new''.
856  *
857  * This function was originally written by Don Knuth in CWEB. The
858  * complicated ``goto''s are the result of expansion of the two
859  * following recursions:
860  *
861  *	art_allot(at, i, old, new)
862  *	{
863  *		int k = i;
864  *		if (at->at_heap[k] == old)
865  *			at->at_heap[k] = new;
866  *		if (k >= at->at_minfringe)
867  *			 return;
868  *		k <<= 1;
869  *		art_allot(at, k, old, new);
870  *		k++;
871  *		art_allot(at, k, old, new);
872  *	}
873  */
874 void
875 art_allot(struct art_table *at, int i, struct art_node *old,
876     struct art_node *new)
877 {
878 	struct art_node		*node, *dflt;
879 	int			 k = i;
880 
881 	KASSERT(i < at->at_minfringe);
882 
883 again:
884 	k <<= 1;
885 	if (k < at->at_minfringe)
886 		goto nonfringe;
887 
888 	/* Change fringe nodes. */
889 	while (1) {
890 		node = srp_get_locked(&at->at_heap[k].node);
891 		if (!ISLEAF(node)) {
892 			dflt = srp_get_locked(&SUBTABLE(node)->at_default);
893 			if (dflt == old) {
894 				srp_swap_locked(&SUBTABLE(node)->at_default,
895 				    new);
896 			}
897 		} else if (node == old) {
898 			srp_swap_locked(&at->at_heap[k].node, new);
899 		}
900 		if (k % 2)
901 			goto moveup;
902 		k++;
903 	}
904 
905 nonfringe:
906 	node = srp_get_locked(&at->at_heap[k].node);
907 	if (node == old)
908 		goto again;
909 moveon:
910 	if (k % 2)
911 		goto moveup;
912 	k++;
913 	goto nonfringe;
914 moveup:
915 	k >>= 1;
916 	srp_swap_locked(&at->at_heap[k].node, new);
917 
918 	/* Change non-fringe node. */
919 	if (k != i)
920 		goto moveon;
921 }
922 
923 struct art_node *
924 art_get(void *dst, uint8_t plen)
925 {
926 	struct art_node		*an;
927 
928 	an = pool_get(&an_pool, PR_NOWAIT | PR_ZERO);
929 	if (an == NULL)
930 		return (NULL);
931 
932 	an->an_plen = plen;
933 	SRPL_INIT(&an->an_rtlist);
934 
935 	return (an);
936 }
937 
938 void
939 art_put(struct art_node *an)
940 {
941 	KASSERT(SRPL_EMPTY_LOCKED(&an->an_rtlist));
942 
943 	mtx_enter(&art_node_gc_mtx);
944 	an->an_gc = art_node_gc_list;
945 	art_node_gc_list = an;
946 	mtx_leave(&art_node_gc_mtx);
947 
948 	task_add(systqmp, &art_node_gc_task);
949 }
950 
951 void
952 art_gc(void *null)
953 {
954 	struct art_node		*an, *next;
955 
956 	mtx_enter(&art_node_gc_mtx);
957 	an = art_node_gc_list;
958 	art_node_gc_list = NULL;
959 	mtx_leave(&art_node_gc_mtx);
960 
961 	while (an != NULL) {
962 		next = an->an_gc;
963 
964 		srp_finalize(an, "artnfini");
965 
966 		pool_put(&an_pool, an);
967 
968 		an = next;
969 	}
970 }
971