xref: /openbsd-src/sys/net/art.c (revision 0b7734b3d77bb9b21afec6f4621cae6c805dbd45)
1 /*	$OpenBSD: art.c,v 1.22 2016/07/19 10:51:44 mpi Exp $ */
2 
3 /*
4  * Copyright (c) 2015 Martin Pieuchot
5  * Copyright (c) 2001 Yoichi Hariguchi
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  */
19 
20 /*
21  * Allotment Routing Table (ART).
22  *
23  * Yoichi Hariguchi paper can be found at:
24  *	http://www.hariguchi.org/art/art.pdf
25  */
26 
27 #ifndef _KERNEL
28 #include "kern_compat.h"
29 #else
30 #include <sys/param.h>
31 #include <sys/systm.h>
32 #include <sys/malloc.h>
33 #include <sys/pool.h>
34 #include <sys/task.h>
35 #include <sys/socket.h>
36 #endif
37 
38 #include <net/art.h>
39 
40 #define ISLEAF(e)	(((unsigned long)(e) & 1) == 0)
41 #define SUBTABLE(e)	((struct art_table *)((unsigned long)(e) & ~1))
42 #define ASNODE(t)	((struct art_node *)((unsigned long)(t) | 1))
43 
44 /*
45  * Allotment Table.
46  */
47 struct art_table {
48 	struct art_table	*at_parent;	/* Parent table */
49 	uint32_t		 at_index;	/* Index in the parent table */
50 	uint32_t		 at_minfringe;	/* Index that fringe begins */
51 	uint32_t		 at_level;	/* Level of the table */
52 	uint8_t			 at_bits;	/* Stride length of the table */
53 	uint8_t			 at_offset;	/* Sum of parents' stride len */
54 
55 	/*
56 	 * Items stored in the heap are pointers to nodes, in the leaf
57 	 * case, or tables otherwise.  One exception is index 0 which
58 	 * is a route counter.
59 	 */
60 	union {
61 		struct srp		 node;
62 		unsigned long		 count;
63 	} *at_heap;				/* Array of 2^(slen+1) items */
64 };
65 #define	at_refcnt	at_heap[0].count/* Refcounter (1 per different route) */
66 #define	at_default	at_heap[1].node	/* Default route (was in parent heap) */
67 
68 /* Heap size for an ART table of stride length ``slen''. */
69 #define AT_HEAPSIZE(slen)	((1 << ((slen) + 1)) * sizeof(void *))
70 
71 int			 art_bindex(struct art_table *, uint8_t *, int);
72 void			 art_allot(struct art_table *at, int, struct art_node *,
73 			     struct art_node *);
74 struct art_table	*art_table_get(struct art_root *, struct art_table *,
75 			     int);
76 struct art_table	*art_table_put(struct art_root *, struct art_table *);
77 struct art_node		*art_table_insert(struct art_root *, struct art_table *,
78 			     int, struct art_node *);
79 struct art_node		*art_table_delete(struct art_root *, struct art_table *,
80 			     int, struct art_node *);
81 struct art_table	*art_table_ref(struct art_root *, struct art_table *);
82 int			 art_table_free(struct art_root *, struct art_table *);
83 int			 art_table_walk(struct art_root *, struct art_table *,
84 			     int (*f)(struct art_node *, void *), void *);
85 int			 art_walk_apply(struct art_root *,
86 			     struct art_node *, struct art_node *,
87 			     int (*f)(struct art_node *, void *), void *);
88 void			 art_table_gc(void *);
89 void			 art_gc(void *);
90 
91 struct pool		an_pool, at_pool, at_heap_4_pool, at_heap_8_pool;
92 
93 struct art_table	*art_table_gc_list = NULL;
94 struct mutex		 art_table_gc_mtx = MUTEX_INITIALIZER(IPL_SOFTNET);
95 struct task		 art_table_gc_task =
96 			     TASK_INITIALIZER(art_table_gc, NULL);
97 
98 struct art_node		*art_node_gc_list = NULL;
99 struct mutex		 art_node_gc_mtx = MUTEX_INITIALIZER(IPL_SOFTNET);
100 struct task		 art_node_gc_task = TASK_INITIALIZER(art_gc, NULL);
101 
102 void
103 art_init(void)
104 {
105 	pool_init(&an_pool, sizeof(struct art_node), 0, 0, 0, "art_node", NULL);
106 	pool_setipl(&an_pool, IPL_SOFTNET);
107 
108 	pool_init(&at_pool, sizeof(struct art_table), 0, 0, 0, "art_table",
109 	    NULL);
110 	pool_setipl(&at_pool, IPL_SOFTNET);
111 
112 	pool_init(&at_heap_4_pool, AT_HEAPSIZE(4), 0, 0, 0, "art_heap4", NULL);
113 	pool_setipl(&at_heap_4_pool, IPL_SOFTNET);
114 
115 	pool_init(&at_heap_8_pool, AT_HEAPSIZE(8), 0, 0, 0, "art_heap8",
116 	    &pool_allocator_single);
117 	pool_setipl(&at_heap_8_pool, IPL_SOFTNET);
118 }
119 
120 /*
121  * Per routing table initialization API function.
122  */
123 struct art_root *
124 art_alloc(unsigned int rtableid, unsigned int alen, unsigned int off)
125 {
126 	struct art_root		*ar;
127 	int			 i;
128 
129 	ar = malloc(sizeof(*ar), M_RTABLE, M_NOWAIT|M_ZERO);
130 	if (ar == NULL)
131 		return (NULL);
132 
133 	switch (alen) {
134 	case 32:
135 		ar->ar_alen = 32;
136 		ar->ar_nlvl = 7;
137 		ar->ar_bits[0] = 8;
138 		for (i = 1; i < ar->ar_nlvl; i++)
139 			ar->ar_bits[i] = 4;
140 		break;
141 	case 128:
142 		ar->ar_alen = 128;
143 		ar->ar_nlvl = 32;
144 		for (i = 0; i < ar->ar_nlvl; i++)
145 			ar->ar_bits[i] = 4;
146 		break;
147 	default:
148 		printf("%s: incorrect address length %u\n", __func__, alen);
149 		free(ar, M_RTABLE, sizeof(*ar));
150 		return (NULL);
151 	}
152 
153 	ar->ar_off = off;
154 	ar->ar_rtableid = rtableid;
155 
156 	return (ar);
157 }
158 
159 /*
160  * Return 1 if ``old'' and ``new`` are identical, 0 otherwise.
161  */
162 static inline int
163 art_check_duplicate(struct art_root *ar, struct art_node *old,
164     struct art_node *new)
165 {
166 	if (old == NULL)
167 		return (0);
168 
169 	if (old->an_plen == new->an_plen)
170 		return (1);
171 
172 	return (0);
173 }
174 
175 /*
176  * Return the base index of the part of ``addr'' and ``plen''
177  * corresponding to the range covered by the table ``at''.
178  *
179  * In other words, this function take the multi-level (complete)
180  * address ``addr'' and prefix length ``plen'' and return the
181  * single level base index for the table ``at''.
182  *
183  * For example with an address size of 32bit divided into four
184  * 8bit-long tables, there's a maximum of 4 base indexes if the
185  * prefix length is > 24.
186  */
187 int
188 art_bindex(struct art_table *at, uint8_t *addr, int plen)
189 {
190 	uint8_t			boff, bend;
191 	uint32_t 		k;
192 
193 	if (plen < at->at_offset || plen > (at->at_offset + at->at_bits))
194 		return (-1);
195 
196 	/*
197 	 * We are only interested in the part of the prefix length
198 	 * corresponding to the range of this table.
199 	 */
200 	plen -= at->at_offset;
201 
202 	/*
203 	 * Jump to the first byte of the address containing bits
204 	 * covered by this table.
205 	 */
206 	addr += (at->at_offset / 8);
207 
208 	/* ``at'' covers the bit range between ``boff'' & ``bend''. */
209 	boff = (at->at_offset % 8);
210 	bend = (at->at_bits + boff);
211 
212 	KASSERT(bend <= 32);
213 
214 	if (bend > 24) {
215 		k = (addr[0] & ((1 << (8 - boff)) - 1)) << (bend - 8);
216 		k |= addr[1] << (bend - 16);
217 		k |= addr[2] << (bend - 24);
218 		k |= addr[3] >> (32 - bend);
219 	} else if (bend > 16) {
220 		k = (addr[0] & ((1 << (8 - boff)) - 1)) << (bend - 8);
221 		k |= addr[1] << (bend - 16);
222 		k |= addr[2] >> (24 - bend);
223 	} else if (bend > 8) {
224 		k = (addr[0] & ((1 << (8 - boff)) - 1)) << (bend - 8);
225 		k |= addr[1] >> (16 - bend);
226 	} else {
227 		k = (addr[0] >> (8 - bend)) & ((1 << at->at_bits) - 1);
228 	}
229 
230 	/*
231 	 * Single level base index formula:
232 	 */
233 	return ((k >> (at->at_bits - plen)) + (1 << plen));
234 }
235 
236 /*
237  * Single level lookup function.
238  *
239  * Return the fringe index of the part of ``addr''
240  * corresponding to the range covered by the table ``at''.
241  */
242 static inline int
243 art_findex(struct art_table *at, uint8_t *addr)
244 {
245 	return art_bindex(at, addr, (at->at_offset + at->at_bits));
246 }
247 
248 /*
249  * (Non-perfect) lookup API function.
250  *
251  * Return the best existing match for a destination.
252  */
253 struct art_node *
254 art_match(struct art_root *ar, uint8_t *addr, struct srp_ref *nsr)
255 {
256 	struct srp_ref		dsr, ndsr;
257 	void			*entry;
258 	struct art_table	*at;
259 	struct art_node		*dflt, *ndflt;
260 	int			j;
261 
262 	entry = srp_enter(nsr, &ar->ar_root);
263 	at = entry;
264 
265 	if (at == NULL)
266 		goto done;
267 
268 	/*
269 	 * Remember the default route of each table we visit in case
270 	 * we do not find a better matching route.
271 	 */
272 	dflt = srp_enter(&dsr, &at->at_default);
273 
274 	/*
275 	 * Iterate until we find a leaf.
276 	 */
277 	while (1) {
278 		/* Do a single level route lookup. */
279 		j = art_findex(at, addr);
280 		entry = srp_follow(nsr, &at->at_heap[j].node);
281 
282 		/* If this is a leaf (NULL is a leaf) we're done. */
283 		if (ISLEAF(entry))
284 			break;
285 
286 		at = SUBTABLE(entry);
287 
288 		ndflt = srp_enter(&ndsr, &at->at_default);
289 		if (ndflt != NULL) {
290 			srp_leave(&dsr);
291 			dsr = ndsr;
292 			dflt = ndflt;
293 		} else
294 			srp_leave(&ndsr);
295 	}
296 
297 	if (entry == NULL) {
298 		srp_leave(nsr);
299 		*nsr = dsr;
300 		KASSERT(ISLEAF(dflt));
301 		return (dflt);
302 	}
303 
304 	srp_leave(&dsr);
305 done:
306 	KASSERT(ISLEAF(entry));
307 	return (entry);
308 }
309 
310 /*
311  * Perfect lookup API function.
312  *
313  * Return a perfect match for a destination/prefix-length pair or NULL if
314  * it does not exist.
315  */
316 struct art_node *
317 art_lookup(struct art_root *ar, uint8_t *addr, int plen, struct srp_ref *nsr)
318 {
319 	void			*entry;
320 	struct art_table	*at;
321 	int			 i, j;
322 
323 	KASSERT(plen >= 0 && plen <= ar->ar_alen);
324 
325 	entry = srp_enter(nsr, &ar->ar_root);
326 	at = entry;
327 
328 	if (at == NULL)
329 		goto done;
330 
331 	/* Default route */
332 	if (plen == 0) {
333 		entry = srp_follow(nsr, &at->at_default);
334 		goto done;
335 	}
336 
337 	/*
338 	 * If the prefix length is smaller than the sum of
339 	 * the stride length at this level the entry must
340 	 * be in the current table.
341 	 */
342 	while (plen > (at->at_offset + at->at_bits)) {
343 		/* Do a single level route lookup. */
344 		j = art_findex(at, addr);
345 		entry = srp_follow(nsr, &at->at_heap[j].node);
346 
347 		/* A leaf is a match, but not a perfect one, or NULL */
348 		if (ISLEAF(entry))
349 			return (NULL);
350 
351 		at = SUBTABLE(entry);
352 	}
353 
354 	i = art_bindex(at, addr, plen);
355 	if (i == -1)
356 		return (NULL);
357 
358 	entry = srp_follow(nsr, &at->at_heap[i].node);
359 	if (!ISLEAF(entry))
360 		entry = srp_follow(nsr, &SUBTABLE(entry)->at_default);
361 
362 done:
363 	KASSERT(ISLEAF(entry));
364 	return (entry);
365 }
366 
367 
368 /*
369  * Insertion API function.
370  *
371  * Insert the given node or return an existing one if a node with the
372  * same destination/mask pair is already present.
373  */
374 struct art_node *
375 art_insert(struct art_root *ar, struct art_node *an, uint8_t *addr, int plen)
376 {
377 	struct art_table	*at, *child;
378 	struct art_node		*node;
379 	int			 i, j;
380 
381 	KERNEL_ASSERT_LOCKED();
382 	KASSERT(plen >= 0 && plen <= ar->ar_alen);
383 
384 	at = srp_get_locked(&ar->ar_root);
385 	if (at == NULL) {
386 		at = art_table_get(ar, NULL, -1);
387 		if (at == NULL)
388 			return (NULL);
389 
390 		srp_swap_locked(&ar->ar_root, at);
391 	}
392 
393 	/* Default route */
394 	if (plen == 0) {
395 		node = srp_get_locked(&at->at_default);
396 		if (node != NULL)
397 			return (node);
398 
399 		art_table_ref(ar, at);
400 		srp_swap_locked(&at->at_default, an);
401 		return (an);
402 	}
403 
404 	/*
405 	 * If the prefix length is smaller than the sum of
406 	 * the stride length at this level the entry must
407 	 * be in the current table.
408 	 */
409 	while (plen > (at->at_offset + at->at_bits)) {
410 		/* Do a single level route lookup. */
411 		j = art_findex(at, addr);
412 		node = srp_get_locked(&at->at_heap[j].node);
413 
414 		/*
415 		 * If the node corresponding to the fringe index is
416 		 * a leaf we need to allocate a subtable.  The route
417 		 * entry of this node will then become the default
418 		 * route of the subtable.
419 		 */
420 		if (ISLEAF(node)) {
421 			child = art_table_get(ar, at, j);
422 			if (child == NULL)
423 				return (NULL);
424 
425 			art_table_ref(ar, at);
426 			srp_swap_locked(&at->at_heap[j].node, ASNODE(child));
427 			at = child;
428 		} else
429 			at = SUBTABLE(node);
430 	}
431 
432 	i = art_bindex(at, addr, plen);
433 	if (i == -1)
434 		return (NULL);
435 
436 	return (art_table_insert(ar, at, i, an));
437 }
438 
439 /*
440  * Single level insertion.
441  */
442 struct art_node *
443 art_table_insert(struct art_root *ar, struct art_table *at, int i,
444     struct art_node *an)
445 {
446 	struct art_node	*prev, *node;
447 
448 	node = srp_get_locked(&at->at_heap[i].node);
449 	if (!ISLEAF(node))
450 		prev = srp_get_locked(&SUBTABLE(node)->at_default);
451 	else
452 		prev = node;
453 
454 	if (art_check_duplicate(ar, prev, an))
455 		return (prev);
456 
457 	art_table_ref(ar, at);
458 
459 	/*
460 	 * If the index `i' of the route that we are inserting is not
461 	 * a fringe index, we need to allot this new route pointer to
462 	 * all the corresponding fringe indices.
463 	 */
464 	if (i < at->at_minfringe)
465 		art_allot(at, i, prev, an);
466 	else if (!ISLEAF(node))
467 		srp_swap_locked(&SUBTABLE(node)->at_default, an);
468 	else
469 		srp_swap_locked(&at->at_heap[i].node, an);
470 
471 	return (an);
472 }
473 
474 
475 /*
476  * Deletion API function.
477  */
478 struct art_node *
479 art_delete(struct art_root *ar, struct art_node *an, uint8_t *addr, int plen)
480 {
481 	struct art_table	*at;
482 	struct art_node		*node;
483 	int			 i, j;
484 
485 	KERNEL_ASSERT_LOCKED();
486 	KASSERT(plen >= 0 && plen <= ar->ar_alen);
487 
488 	at = srp_get_locked(&ar->ar_root);
489 	if (at == NULL)
490 		return (NULL);
491 
492 	/* Default route */
493 	if (plen == 0) {
494 		node = srp_get_locked(&at->at_default);
495 		srp_swap_locked(&at->at_default, NULL);
496 		art_table_free(ar, at);
497 		return (node);
498 	}
499 
500 	/*
501 	 * If the prefix length is smaller than the sum of
502 	 * the stride length at this level the entry must
503 	 * be in the current table.
504 	 */
505 	while (plen > (at->at_offset + at->at_bits)) {
506 		/* Do a single level route lookup. */
507 		j = art_findex(at, addr);
508 		node = srp_get_locked(&at->at_heap[j].node);
509 
510 		/* If this is a leaf, there is no route to delete. */
511 		if (ISLEAF(node))
512 			return (NULL);
513 
514 		at = SUBTABLE(node);
515 	}
516 
517 	i = art_bindex(at, addr, plen);
518 	if (i == -1)
519 		return (NULL);
520 
521 	return (art_table_delete(ar, at, i, an));
522 }
523 
524 /*
525  * Single level deletion.
526  */
527 struct art_node *
528 art_table_delete(struct art_root *ar, struct art_table *at, int i,
529     struct art_node *an)
530 {
531 	struct art_node		*next, *node;
532 
533 #ifdef DIAGNOSTIC
534 	struct art_node		*prev;
535 #endif
536 
537 	node = srp_get_locked(&at->at_heap[i].node);
538 #ifdef DIAGNOSTIC
539 	if (!ISLEAF(node))
540 		prev = srp_get_locked(&SUBTABLE(node)->at_default);
541 	else
542 		prev = node;
543 
544 	KASSERT(prev == an);
545 #endif
546 
547 	/* Get the next most specific route for the index `i'. */
548 	if ((i >> 1) > 1)
549 		next = srp_get_locked(&at->at_heap[i >> 1].node);
550 	else
551 		next = NULL;
552 
553 	/*
554 	 * If the index `i' of the route that we are removing is not
555 	 * a fringe index, we need to allot the next most specific
556 	 * route pointer to all the corresponding fringe indices.
557 	 */
558 	if (i < at->at_minfringe)
559 		art_allot(at, i, an, next);
560 	else if (!ISLEAF(node))
561 		srp_swap_locked(&SUBTABLE(node)->at_default, next);
562 	else
563 		srp_swap_locked(&at->at_heap[i].node, next);
564 
565 	/* We have removed an entry from this table. */
566 	art_table_free(ar, at);
567 
568 	return (an);
569 }
570 
571 struct art_table *
572 art_table_ref(struct art_root *ar, struct art_table *at)
573 {
574 	at->at_refcnt++;
575 	return (at);
576 }
577 
578 static inline int
579 art_table_rele(struct art_table *at)
580 {
581 	if (at == NULL)
582 		return (0);
583 
584 	return (--at->at_refcnt == 0);
585 }
586 
587 int
588 art_table_free(struct art_root *ar, struct art_table *at)
589 {
590 	if (art_table_rele(at)) {
591 		/*
592 		 * Garbage collect this table and all its parents
593 		 * that are empty.
594 		 */
595 		do {
596 			at = art_table_put(ar, at);
597 		} while (art_table_rele(at));
598 
599 		return (1);
600 	}
601 
602 	return (0);
603 }
604 
605 /*
606  * Iteration API function.
607  */
608 int
609 art_walk(struct art_root *ar, int (*f)(struct art_node *, void *), void *arg)
610 {
611 	struct srp_ref		 sr;
612 	struct art_table	*at;
613 	struct art_node		*node;
614 	int			 error = 0;
615 
616 	KERNEL_LOCK();
617 	at = srp_get_locked(&ar->ar_root);
618 	if (at != NULL) {
619 		art_table_ref(ar, at);
620 
621 		/*
622 		 * The default route should be processed here because the root
623 		 * table does not have a parent.
624 		 */
625 		node = srp_enter(&sr, &at->at_default);
626 		error = art_walk_apply(ar, node, NULL, f, arg);
627 		srp_leave(&sr);
628 
629 		if (error == 0)
630 			error = art_table_walk(ar, at, f, arg);
631 
632 		art_table_free(ar, at);
633 	}
634 	KERNEL_UNLOCK();
635 
636 	return (error);
637 }
638 
639 int
640 art_table_walk(struct art_root *ar, struct art_table *at,
641     int (*f)(struct art_node *, void *), void *arg)
642 {
643 	struct srp_ref		 sr;
644 	struct art_node		*node, *next;
645 	struct art_table	*nat;
646 	int			 i, j, error = 0;
647 	uint32_t		 maxfringe = (at->at_minfringe << 1);
648 
649 	/*
650 	 * Iterate non-fringe nodes in ``natural'' order.
651 	 */
652 	for (j = 1; j < at->at_minfringe; j += 2) {
653 		/*
654 		 * The default route (index 1) is processed by the
655 		 * parent table (where it belongs) otherwise it could
656 		 * be processed more than once.
657 		 */
658 		for (i = max(j, 2); i < at->at_minfringe; i <<= 1) {
659 			next = srp_get_locked(&at->at_heap[i >> 1].node);
660 
661 			node = srp_enter(&sr, &at->at_heap[i].node);
662 			error = art_walk_apply(ar, node, next, f, arg);
663 			srp_leave(&sr);
664 
665 			if (error != 0)
666 				return (error);
667 		}
668 	}
669 
670 	/*
671 	 * Iterate fringe nodes.
672 	 */
673 	for (i = at->at_minfringe; i < maxfringe; i++) {
674 		next = srp_get_locked(&at->at_heap[i >> 1].node);
675 
676 		node = srp_enter(&sr, &at->at_heap[i].node);
677 		if (!ISLEAF(node)) {
678 			nat = art_table_ref(ar, SUBTABLE(node));
679 			node = srp_follow(&sr, &nat->at_default);
680 		} else
681 			nat = NULL;
682 
683 		error = art_walk_apply(ar, node, next, f, arg);
684 		srp_leave(&sr);
685 
686 		if (error != 0) {
687 			art_table_free(ar, nat);
688 			return (error);
689 		}
690 
691 		if (nat != NULL) {
692 			error = art_table_walk(ar, nat, f, arg);
693 			art_table_free(ar, nat);
694 			if (error != 0)
695 				return (error);
696 		}
697 	}
698 
699 	return (0);
700 }
701 
702 int
703 art_walk_apply(struct art_root *ar,
704     struct art_node *an, struct art_node *next,
705     int (*f)(struct art_node *, void *), void *arg)
706 {
707 	int error = 0;
708 
709 	if ((an != NULL) && (an != next)) {
710 		/* this assumes an->an_dst is not used by f */
711 		KERNEL_UNLOCK();
712 		error = (*f)(an, arg);
713 		KERNEL_LOCK();
714 	}
715 
716 	return (error);
717 }
718 
719 
720 /*
721  * Create a table and use the given index to set its default route.
722  *
723  * Note:  This function does not modify the root or the parent.
724  */
725 struct art_table *
726 art_table_get(struct art_root *ar, struct art_table *parent, int j)
727 {
728 	struct art_table	*at;
729 	struct art_node		*node;
730 	void			*at_heap;
731 	uint32_t		 lvl;
732 
733 	KASSERT(j != 0 && j != 1);
734 	KASSERT(parent != NULL || j == -1);
735 
736 	if (parent != NULL)
737 		lvl = parent->at_level + 1;
738 	else
739 		lvl = 0;
740 
741 	KASSERT(lvl < ar->ar_nlvl);
742 
743 	at = pool_get(&at_pool, PR_NOWAIT|PR_ZERO);
744 	if (at == NULL)
745 		return (NULL);
746 
747 	switch (AT_HEAPSIZE(ar->ar_bits[lvl])) {
748 	case AT_HEAPSIZE(4):
749 		at_heap = pool_get(&at_heap_4_pool, PR_NOWAIT|PR_ZERO);
750 		break;
751 	case AT_HEAPSIZE(8):
752 		at_heap = pool_get(&at_heap_8_pool, PR_NOWAIT|PR_ZERO);
753 		break;
754 	default:
755 		panic("incorrect stride length %u", ar->ar_bits[lvl]);
756 	}
757 
758 	if (at_heap == NULL) {
759 		pool_put(&at_pool, at);
760 		return (NULL);
761 	}
762 
763 	at->at_parent = parent;
764 	at->at_index = j;
765 	at->at_minfringe = (1 << ar->ar_bits[lvl]);
766 	at->at_level = lvl;
767 	at->at_bits = ar->ar_bits[lvl];
768 	at->at_heap = at_heap;
769 	at->at_refcnt = 0;
770 
771 	if (parent != NULL) {
772 		node = srp_get_locked(&parent->at_heap[j].node);
773 		/* node isn't being deleted, no srp_finalize needed */
774 		srp_swap_locked(&at->at_default, node);
775 		at->at_offset = (parent->at_offset + parent->at_bits);
776 	}
777 
778 	return (at);
779 }
780 
781 
782 /*
783  * Delete a table and use its index to restore its parent's default route.
784  *
785  * Note:  Modify its parent to unlink the table from it.
786  */
787 struct art_table *
788 art_table_put(struct art_root *ar, struct art_table *at)
789 {
790 	struct art_table	*parent = at->at_parent;
791 	struct art_node		*node;
792 	uint32_t		 j = at->at_index;
793 
794 	KASSERT(at->at_refcnt == 0);
795 	KASSERT(j != 0 && j != 1);
796 
797 	if (parent != NULL) {
798 		KASSERT(j != -1);
799 		KASSERT(at->at_level == parent->at_level + 1);
800 		KASSERT(parent->at_refcnt >= 1);
801 
802 		/* Give the route back to its parent. */
803 		node = srp_get_locked(&at->at_default);
804 		srp_swap_locked(&parent->at_heap[j].node, node);
805 	} else {
806 		KASSERT(j == -1);
807 		KASSERT(at->at_level == 0);
808 		srp_swap_locked(&ar->ar_root, NULL);
809 	}
810 
811 	mtx_enter(&art_table_gc_mtx);
812 	at->at_parent = art_table_gc_list;
813 	art_table_gc_list = at;
814 	mtx_leave(&art_table_gc_mtx);
815 
816 	task_add(systqmp, &art_table_gc_task);
817 
818 	return (parent);
819 }
820 
821 void
822 art_table_gc(void *null)
823 {
824 	struct art_table *at, *next;
825 
826 	mtx_enter(&art_table_gc_mtx);
827 	at = art_table_gc_list;
828 	art_table_gc_list = NULL;
829 	mtx_leave(&art_table_gc_mtx);
830 
831 	while (at != NULL) {
832 		next = at->at_parent;
833 
834 		if (at->at_level == 0)
835 			srp_finalize(at, "arttfini");
836 		else
837 			srp_finalize(ASNODE(at), "arttfini");
838 
839 		switch (AT_HEAPSIZE(at->at_bits)) {
840 		case AT_HEAPSIZE(4):
841 			pool_put(&at_heap_4_pool, at->at_heap);
842 			break;
843 		case AT_HEAPSIZE(8):
844 			pool_put(&at_heap_8_pool, at->at_heap);
845 			break;
846 		default:
847 			panic("incorrect stride length %u", at->at_bits);
848 		}
849 
850 		pool_put(&at_pool, at);
851 
852 		at = next;
853 	}
854 }
855 
856 /*
857  * Substitute a node by another in the subtree whose root index is given.
858  *
859  * This function iterates on the table ``at'' at index ``i'' until no
860  * more ``old'' node can be replaced by ``new''.
861  *
862  * This function was originally written by Don Knuth in CWEB. The
863  * complicated ``goto''s are the result of expansion of the two
864  * following recursions:
865  *
866  *	art_allot(at, i, old, new)
867  *	{
868  *		int k = i;
869  *		if (at->at_heap[k] == old)
870  *			at->at_heap[k] = new;
871  *		if (k >= at->at_minfringe)
872  *			 return;
873  *		k <<= 1;
874  *		art_allot(at, k, old, new);
875  *		k++;
876  *		art_allot(at, k, old, new);
877  *	}
878  */
879 void
880 art_allot(struct art_table *at, int i, struct art_node *old,
881     struct art_node *new)
882 {
883 	struct art_node		*node, *dflt;
884 	int			 k = i;
885 
886 	KASSERT(i < at->at_minfringe);
887 
888 again:
889 	k <<= 1;
890 	if (k < at->at_minfringe)
891 		goto nonfringe;
892 
893 	/* Change fringe nodes. */
894 	while (1) {
895 		node = srp_get_locked(&at->at_heap[k].node);
896 		if (!ISLEAF(node)) {
897 			dflt = srp_get_locked(&SUBTABLE(node)->at_default);
898 			if (dflt == old) {
899 				srp_swap_locked(&SUBTABLE(node)->at_default,
900 				    new);
901 			}
902 		} else if (node == old) {
903 			srp_swap_locked(&at->at_heap[k].node, new);
904 		}
905 		if (k % 2)
906 			goto moveup;
907 		k++;
908 	}
909 
910 nonfringe:
911 	node = srp_get_locked(&at->at_heap[k].node);
912 	if (node == old)
913 		goto again;
914 moveon:
915 	if (k % 2)
916 		goto moveup;
917 	k++;
918 	goto nonfringe;
919 moveup:
920 	k >>= 1;
921 	srp_swap_locked(&at->at_heap[k].node, new);
922 
923 	/* Change non-fringe node. */
924 	if (k != i)
925 		goto moveon;
926 }
927 
928 struct art_node *
929 art_get(struct sockaddr *dst, uint8_t plen)
930 {
931 	struct art_node		*an;
932 
933 	an = pool_get(&an_pool, PR_NOWAIT | PR_ZERO);
934 	if (an == NULL)
935 		return (NULL);
936 
937 	an->an_dst = dst;
938 	an->an_plen = plen;
939 	SRPL_INIT(&an->an_rtlist);
940 
941 	return (an);
942 }
943 
944 void
945 art_put(struct art_node *an)
946 {
947 	KASSERT(SRPL_EMPTY_LOCKED(&an->an_rtlist));
948 
949 	mtx_enter(&art_node_gc_mtx);
950 	an->an_gc = art_node_gc_list;
951 	art_node_gc_list = an;
952 	mtx_leave(&art_node_gc_mtx);
953 
954 	task_add(systqmp, &art_node_gc_task);
955 }
956 
957 void
958 art_gc(void *null)
959 {
960 	struct art_node		*an, *next;
961 
962 	mtx_enter(&art_node_gc_mtx);
963 	an = art_node_gc_list;
964 	art_node_gc_list = NULL;
965 	mtx_leave(&art_node_gc_mtx);
966 
967 	while (an != NULL) {
968 		next = an->an_gc;
969 
970 		srp_finalize(an, "artnfini");
971 
972 		pool_put(&an_pool, an);
973 
974 		an = next;
975 	}
976 }
977