xref: /openbsd-src/sys/net/art.c (revision f2da64fbbbf1b03f09f390ab01267c93dfd77c4c)
1 /*	$OpenBSD: art.c,v 1.24 2016/09/15 02:00:18 dlg Exp $ */
2 
3 /*
4  * Copyright (c) 2015 Martin Pieuchot
5  * Copyright (c) 2001 Yoichi Hariguchi
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  */
19 
20 /*
21  * Allotment Routing Table (ART).
22  *
23  * Yoichi Hariguchi paper can be found at:
24  *	http://www.hariguchi.org/art/art.pdf
25  */
26 
27 #ifndef _KERNEL
28 #include "kern_compat.h"
29 #else
30 #include <sys/param.h>
31 #include <sys/systm.h>
32 #include <sys/malloc.h>
33 #include <sys/pool.h>
34 #include <sys/task.h>
35 #include <sys/socket.h>
36 #endif
37 
38 #include <net/art.h>
39 
40 #define ISLEAF(e)	(((unsigned long)(e) & 1) == 0)
41 #define SUBTABLE(e)	((struct art_table *)((unsigned long)(e) & ~1))
42 #define ASNODE(t)	((struct art_node *)((unsigned long)(t) | 1))
43 
44 /*
45  * Allotment Table.
46  */
47 struct art_table {
48 	struct art_table	*at_parent;	/* Parent table */
49 	uint32_t		 at_index;	/* Index in the parent table */
50 	uint32_t		 at_minfringe;	/* Index that fringe begins */
51 	uint32_t		 at_level;	/* Level of the table */
52 	uint8_t			 at_bits;	/* Stride length of the table */
53 	uint8_t			 at_offset;	/* Sum of parents' stride len */
54 
55 	/*
56 	 * Items stored in the heap are pointers to nodes, in the leaf
57 	 * case, or tables otherwise.  One exception is index 0 which
58 	 * is a route counter.
59 	 */
60 	union {
61 		struct srp		 node;
62 		unsigned long		 count;
63 	} *at_heap;				/* Array of 2^(slen+1) items */
64 };
65 #define	at_refcnt	at_heap[0].count/* Refcounter (1 per different route) */
66 #define	at_default	at_heap[1].node	/* Default route (was in parent heap) */
67 
68 /* Heap size for an ART table of stride length ``slen''. */
69 #define AT_HEAPSIZE(slen)	((1 << ((slen) + 1)) * sizeof(void *))
70 
71 int			 art_bindex(struct art_table *, uint8_t *, int);
72 void			 art_allot(struct art_table *at, int, struct art_node *,
73 			     struct art_node *);
74 struct art_table	*art_table_get(struct art_root *, struct art_table *,
75 			     int);
76 struct art_table	*art_table_put(struct art_root *, struct art_table *);
77 struct art_node		*art_table_insert(struct art_root *, struct art_table *,
78 			     int, struct art_node *);
79 struct art_node		*art_table_delete(struct art_root *, struct art_table *,
80 			     int, struct art_node *);
81 struct art_table	*art_table_ref(struct art_root *, struct art_table *);
82 int			 art_table_free(struct art_root *, struct art_table *);
83 int			 art_table_walk(struct art_root *, struct art_table *,
84 			     int (*f)(struct art_node *, void *), void *);
85 int			 art_walk_apply(struct art_root *,
86 			     struct art_node *, struct art_node *,
87 			     int (*f)(struct art_node *, void *), void *);
88 void			 art_table_gc(void *);
89 void			 art_gc(void *);
90 
91 struct pool		an_pool, at_pool, at_heap_4_pool, at_heap_8_pool;
92 
93 struct art_table	*art_table_gc_list = NULL;
94 struct mutex		 art_table_gc_mtx = MUTEX_INITIALIZER(IPL_SOFTNET);
95 struct task		 art_table_gc_task =
96 			     TASK_INITIALIZER(art_table_gc, NULL);
97 
98 struct art_node		*art_node_gc_list = NULL;
99 struct mutex		 art_node_gc_mtx = MUTEX_INITIALIZER(IPL_SOFTNET);
100 struct task		 art_node_gc_task = TASK_INITIALIZER(art_gc, NULL);
101 
102 void
103 art_init(void)
104 {
105 	pool_init(&an_pool, sizeof(struct art_node), 0, IPL_SOFTNET, 0,
106 	    "art_node", NULL);
107 	pool_init(&at_pool, sizeof(struct art_table), 0, IPL_SOFTNET, 0,
108 	    "art_table", NULL);
109 	pool_init(&at_heap_4_pool, AT_HEAPSIZE(4), 0, IPL_SOFTNET, 0,
110 	    "art_heap4", NULL);
111 	pool_init(&at_heap_8_pool, AT_HEAPSIZE(8), 0, IPL_SOFTNET, 0,
112 	    "art_heap8", &pool_allocator_single);
113 }
114 
115 /*
116  * Per routing table initialization API function.
117  */
118 struct art_root *
119 art_alloc(unsigned int rtableid, unsigned int alen, unsigned int off)
120 {
121 	struct art_root		*ar;
122 	int			 i;
123 
124 	ar = malloc(sizeof(*ar), M_RTABLE, M_NOWAIT|M_ZERO);
125 	if (ar == NULL)
126 		return (NULL);
127 
128 	switch (alen) {
129 	case 32:
130 		ar->ar_alen = 32;
131 		ar->ar_nlvl = 7;
132 		ar->ar_bits[0] = 8;
133 		for (i = 1; i < ar->ar_nlvl; i++)
134 			ar->ar_bits[i] = 4;
135 		break;
136 	case 128:
137 		ar->ar_alen = 128;
138 		ar->ar_nlvl = 32;
139 		for (i = 0; i < ar->ar_nlvl; i++)
140 			ar->ar_bits[i] = 4;
141 		break;
142 	default:
143 		printf("%s: incorrect address length %u\n", __func__, alen);
144 		free(ar, M_RTABLE, sizeof(*ar));
145 		return (NULL);
146 	}
147 
148 	ar->ar_off = off;
149 	ar->ar_rtableid = rtableid;
150 	rw_init(&ar->ar_lock, "art");
151 
152 	return (ar);
153 }
154 
155 /*
156  * Return 1 if ``old'' and ``new`` are identical, 0 otherwise.
157  */
158 static inline int
159 art_check_duplicate(struct art_root *ar, struct art_node *old,
160     struct art_node *new)
161 {
162 	if (old == NULL)
163 		return (0);
164 
165 	if (old->an_plen == new->an_plen)
166 		return (1);
167 
168 	return (0);
169 }
170 
171 /*
172  * Return the base index of the part of ``addr'' and ``plen''
173  * corresponding to the range covered by the table ``at''.
174  *
175  * In other words, this function take the multi-level (complete)
176  * address ``addr'' and prefix length ``plen'' and return the
177  * single level base index for the table ``at''.
178  *
179  * For example with an address size of 32bit divided into four
180  * 8bit-long tables, there's a maximum of 4 base indexes if the
181  * prefix length is > 24.
182  */
183 int
184 art_bindex(struct art_table *at, uint8_t *addr, int plen)
185 {
186 	uint8_t			boff, bend;
187 	uint32_t 		k;
188 
189 	if (plen < at->at_offset || plen > (at->at_offset + at->at_bits))
190 		return (-1);
191 
192 	/*
193 	 * We are only interested in the part of the prefix length
194 	 * corresponding to the range of this table.
195 	 */
196 	plen -= at->at_offset;
197 
198 	/*
199 	 * Jump to the first byte of the address containing bits
200 	 * covered by this table.
201 	 */
202 	addr += (at->at_offset / 8);
203 
204 	/* ``at'' covers the bit range between ``boff'' & ``bend''. */
205 	boff = (at->at_offset % 8);
206 	bend = (at->at_bits + boff);
207 
208 	KASSERT(bend <= 32);
209 
210 	if (bend > 24) {
211 		k = (addr[0] & ((1 << (8 - boff)) - 1)) << (bend - 8);
212 		k |= addr[1] << (bend - 16);
213 		k |= addr[2] << (bend - 24);
214 		k |= addr[3] >> (32 - bend);
215 	} else if (bend > 16) {
216 		k = (addr[0] & ((1 << (8 - boff)) - 1)) << (bend - 8);
217 		k |= addr[1] << (bend - 16);
218 		k |= addr[2] >> (24 - bend);
219 	} else if (bend > 8) {
220 		k = (addr[0] & ((1 << (8 - boff)) - 1)) << (bend - 8);
221 		k |= addr[1] >> (16 - bend);
222 	} else {
223 		k = (addr[0] >> (8 - bend)) & ((1 << at->at_bits) - 1);
224 	}
225 
226 	/*
227 	 * Single level base index formula:
228 	 */
229 	return ((k >> (at->at_bits - plen)) + (1 << plen));
230 }
231 
232 /*
233  * Single level lookup function.
234  *
235  * Return the fringe index of the part of ``addr''
236  * corresponding to the range covered by the table ``at''.
237  */
238 static inline int
239 art_findex(struct art_table *at, uint8_t *addr)
240 {
241 	return art_bindex(at, addr, (at->at_offset + at->at_bits));
242 }
243 
244 /*
245  * (Non-perfect) lookup API function.
246  *
247  * Return the best existing match for a destination.
248  */
249 struct art_node *
250 art_match(struct art_root *ar, uint8_t *addr, struct srp_ref *nsr)
251 {
252 	struct srp_ref		dsr, ndsr;
253 	void			*entry;
254 	struct art_table	*at;
255 	struct art_node		*dflt, *ndflt;
256 	int			j;
257 
258 	entry = srp_enter(nsr, &ar->ar_root);
259 	at = entry;
260 
261 	if (at == NULL)
262 		goto done;
263 
264 	/*
265 	 * Remember the default route of each table we visit in case
266 	 * we do not find a better matching route.
267 	 */
268 	dflt = srp_enter(&dsr, &at->at_default);
269 
270 	/*
271 	 * Iterate until we find a leaf.
272 	 */
273 	while (1) {
274 		/* Do a single level route lookup. */
275 		j = art_findex(at, addr);
276 		entry = srp_follow(nsr, &at->at_heap[j].node);
277 
278 		/* If this is a leaf (NULL is a leaf) we're done. */
279 		if (ISLEAF(entry))
280 			break;
281 
282 		at = SUBTABLE(entry);
283 
284 		ndflt = srp_enter(&ndsr, &at->at_default);
285 		if (ndflt != NULL) {
286 			srp_leave(&dsr);
287 			dsr = ndsr;
288 			dflt = ndflt;
289 		} else
290 			srp_leave(&ndsr);
291 	}
292 
293 	if (entry == NULL) {
294 		srp_leave(nsr);
295 		*nsr = dsr;
296 		KASSERT(ISLEAF(dflt));
297 		return (dflt);
298 	}
299 
300 	srp_leave(&dsr);
301 done:
302 	KASSERT(ISLEAF(entry));
303 	return (entry);
304 }
305 
306 /*
307  * Perfect lookup API function.
308  *
309  * Return a perfect match for a destination/prefix-length pair or NULL if
310  * it does not exist.
311  */
312 struct art_node *
313 art_lookup(struct art_root *ar, uint8_t *addr, int plen, struct srp_ref *nsr)
314 {
315 	void			*entry;
316 	struct art_table	*at;
317 	int			 i, j;
318 
319 	KASSERT(plen >= 0 && plen <= ar->ar_alen);
320 
321 	entry = srp_enter(nsr, &ar->ar_root);
322 	at = entry;
323 
324 	if (at == NULL)
325 		goto done;
326 
327 	/* Default route */
328 	if (plen == 0) {
329 		entry = srp_follow(nsr, &at->at_default);
330 		goto done;
331 	}
332 
333 	/*
334 	 * If the prefix length is smaller than the sum of
335 	 * the stride length at this level the entry must
336 	 * be in the current table.
337 	 */
338 	while (plen > (at->at_offset + at->at_bits)) {
339 		/* Do a single level route lookup. */
340 		j = art_findex(at, addr);
341 		entry = srp_follow(nsr, &at->at_heap[j].node);
342 
343 		/* A leaf is a match, but not a perfect one, or NULL */
344 		if (ISLEAF(entry))
345 			return (NULL);
346 
347 		at = SUBTABLE(entry);
348 	}
349 
350 	i = art_bindex(at, addr, plen);
351 	if (i == -1)
352 		return (NULL);
353 
354 	entry = srp_follow(nsr, &at->at_heap[i].node);
355 	if (!ISLEAF(entry))
356 		entry = srp_follow(nsr, &SUBTABLE(entry)->at_default);
357 
358 done:
359 	KASSERT(ISLEAF(entry));
360 	return (entry);
361 }
362 
363 
364 /*
365  * Insertion API function.
366  *
367  * Insert the given node or return an existing one if a node with the
368  * same destination/mask pair is already present.
369  */
370 struct art_node *
371 art_insert(struct art_root *ar, struct art_node *an, uint8_t *addr, int plen)
372 {
373 	struct art_table	*at, *child;
374 	struct art_node		*node;
375 	int			 i, j;
376 
377 	rw_assert_wrlock(&ar->ar_lock);
378 	KASSERT(plen >= 0 && plen <= ar->ar_alen);
379 
380 	at = srp_get_locked(&ar->ar_root);
381 	if (at == NULL) {
382 		at = art_table_get(ar, NULL, -1);
383 		if (at == NULL)
384 			return (NULL);
385 
386 		srp_swap_locked(&ar->ar_root, at);
387 	}
388 
389 	/* Default route */
390 	if (plen == 0) {
391 		node = srp_get_locked(&at->at_default);
392 		if (node != NULL)
393 			return (node);
394 
395 		art_table_ref(ar, at);
396 		srp_swap_locked(&at->at_default, an);
397 		return (an);
398 	}
399 
400 	/*
401 	 * If the prefix length is smaller than the sum of
402 	 * the stride length at this level the entry must
403 	 * be in the current table.
404 	 */
405 	while (plen > (at->at_offset + at->at_bits)) {
406 		/* Do a single level route lookup. */
407 		j = art_findex(at, addr);
408 		node = srp_get_locked(&at->at_heap[j].node);
409 
410 		/*
411 		 * If the node corresponding to the fringe index is
412 		 * a leaf we need to allocate a subtable.  The route
413 		 * entry of this node will then become the default
414 		 * route of the subtable.
415 		 */
416 		if (ISLEAF(node)) {
417 			child = art_table_get(ar, at, j);
418 			if (child == NULL)
419 				return (NULL);
420 
421 			art_table_ref(ar, at);
422 			srp_swap_locked(&at->at_heap[j].node, ASNODE(child));
423 			at = child;
424 		} else
425 			at = SUBTABLE(node);
426 	}
427 
428 	i = art_bindex(at, addr, plen);
429 	if (i == -1)
430 		return (NULL);
431 
432 	return (art_table_insert(ar, at, i, an));
433 }
434 
435 /*
436  * Single level insertion.
437  */
438 struct art_node *
439 art_table_insert(struct art_root *ar, struct art_table *at, int i,
440     struct art_node *an)
441 {
442 	struct art_node	*prev, *node;
443 
444 	node = srp_get_locked(&at->at_heap[i].node);
445 	if (!ISLEAF(node))
446 		prev = srp_get_locked(&SUBTABLE(node)->at_default);
447 	else
448 		prev = node;
449 
450 	if (art_check_duplicate(ar, prev, an))
451 		return (prev);
452 
453 	art_table_ref(ar, at);
454 
455 	/*
456 	 * If the index `i' of the route that we are inserting is not
457 	 * a fringe index, we need to allot this new route pointer to
458 	 * all the corresponding fringe indices.
459 	 */
460 	if (i < at->at_minfringe)
461 		art_allot(at, i, prev, an);
462 	else if (!ISLEAF(node))
463 		srp_swap_locked(&SUBTABLE(node)->at_default, an);
464 	else
465 		srp_swap_locked(&at->at_heap[i].node, an);
466 
467 	return (an);
468 }
469 
470 
471 /*
472  * Deletion API function.
473  */
474 struct art_node *
475 art_delete(struct art_root *ar, struct art_node *an, uint8_t *addr, int plen)
476 {
477 	struct art_table	*at;
478 	struct art_node		*node;
479 	int			 i, j;
480 
481 	rw_assert_wrlock(&ar->ar_lock);
482 	KASSERT(plen >= 0 && plen <= ar->ar_alen);
483 
484 	at = srp_get_locked(&ar->ar_root);
485 	if (at == NULL)
486 		return (NULL);
487 
488 	/* Default route */
489 	if (plen == 0) {
490 		node = srp_get_locked(&at->at_default);
491 		srp_swap_locked(&at->at_default, NULL);
492 		art_table_free(ar, at);
493 		return (node);
494 	}
495 
496 	/*
497 	 * If the prefix length is smaller than the sum of
498 	 * the stride length at this level the entry must
499 	 * be in the current table.
500 	 */
501 	while (plen > (at->at_offset + at->at_bits)) {
502 		/* Do a single level route lookup. */
503 		j = art_findex(at, addr);
504 		node = srp_get_locked(&at->at_heap[j].node);
505 
506 		/* If this is a leaf, there is no route to delete. */
507 		if (ISLEAF(node))
508 			return (NULL);
509 
510 		at = SUBTABLE(node);
511 	}
512 
513 	i = art_bindex(at, addr, plen);
514 	if (i == -1)
515 		return (NULL);
516 
517 	return (art_table_delete(ar, at, i, an));
518 }
519 
520 /*
521  * Single level deletion.
522  */
523 struct art_node *
524 art_table_delete(struct art_root *ar, struct art_table *at, int i,
525     struct art_node *an)
526 {
527 	struct art_node		*next, *node;
528 
529 #ifdef DIAGNOSTIC
530 	struct art_node		*prev;
531 #endif
532 
533 	node = srp_get_locked(&at->at_heap[i].node);
534 #ifdef DIAGNOSTIC
535 	if (!ISLEAF(node))
536 		prev = srp_get_locked(&SUBTABLE(node)->at_default);
537 	else
538 		prev = node;
539 
540 	KASSERT(prev == an);
541 #endif
542 
543 	/* Get the next most specific route for the index `i'. */
544 	if ((i >> 1) > 1)
545 		next = srp_get_locked(&at->at_heap[i >> 1].node);
546 	else
547 		next = NULL;
548 
549 	/*
550 	 * If the index `i' of the route that we are removing is not
551 	 * a fringe index, we need to allot the next most specific
552 	 * route pointer to all the corresponding fringe indices.
553 	 */
554 	if (i < at->at_minfringe)
555 		art_allot(at, i, an, next);
556 	else if (!ISLEAF(node))
557 		srp_swap_locked(&SUBTABLE(node)->at_default, next);
558 	else
559 		srp_swap_locked(&at->at_heap[i].node, next);
560 
561 	/* We have removed an entry from this table. */
562 	art_table_free(ar, at);
563 
564 	return (an);
565 }
566 
567 struct art_table *
568 art_table_ref(struct art_root *ar, struct art_table *at)
569 {
570 	at->at_refcnt++;
571 	return (at);
572 }
573 
574 static inline int
575 art_table_rele(struct art_table *at)
576 {
577 	if (at == NULL)
578 		return (0);
579 
580 	return (--at->at_refcnt == 0);
581 }
582 
583 int
584 art_table_free(struct art_root *ar, struct art_table *at)
585 {
586 	if (art_table_rele(at)) {
587 		/*
588 		 * Garbage collect this table and all its parents
589 		 * that are empty.
590 		 */
591 		do {
592 			at = art_table_put(ar, at);
593 		} while (art_table_rele(at));
594 
595 		return (1);
596 	}
597 
598 	return (0);
599 }
600 
601 /*
602  * Iteration API function.
603  */
604 int
605 art_walk(struct art_root *ar, int (*f)(struct art_node *, void *), void *arg)
606 {
607 	struct srp_ref		 sr;
608 	struct art_table	*at;
609 	struct art_node		*node;
610 	int			 error = 0;
611 
612 	rw_enter_write(&ar->ar_lock);
613 	at = srp_get_locked(&ar->ar_root);
614 	if (at != NULL) {
615 		art_table_ref(ar, at);
616 
617 		/*
618 		 * The default route should be processed here because the root
619 		 * table does not have a parent.
620 		 */
621 		node = srp_enter(&sr, &at->at_default);
622 		error = art_walk_apply(ar, node, NULL, f, arg);
623 		srp_leave(&sr);
624 
625 		if (error == 0)
626 			error = art_table_walk(ar, at, f, arg);
627 
628 		art_table_free(ar, at);
629 	}
630 	rw_exit_write(&ar->ar_lock);
631 
632 	return (error);
633 }
634 
635 int
636 art_table_walk(struct art_root *ar, struct art_table *at,
637     int (*f)(struct art_node *, void *), void *arg)
638 {
639 	struct srp_ref		 sr;
640 	struct art_node		*node, *next;
641 	struct art_table	*nat;
642 	int			 i, j, error = 0;
643 	uint32_t		 maxfringe = (at->at_minfringe << 1);
644 
645 	/*
646 	 * Iterate non-fringe nodes in ``natural'' order.
647 	 */
648 	for (j = 1; j < at->at_minfringe; j += 2) {
649 		/*
650 		 * The default route (index 1) is processed by the
651 		 * parent table (where it belongs) otherwise it could
652 		 * be processed more than once.
653 		 */
654 		for (i = max(j, 2); i < at->at_minfringe; i <<= 1) {
655 			next = srp_get_locked(&at->at_heap[i >> 1].node);
656 
657 			node = srp_enter(&sr, &at->at_heap[i].node);
658 			error = art_walk_apply(ar, node, next, f, arg);
659 			srp_leave(&sr);
660 
661 			if (error != 0)
662 				return (error);
663 		}
664 	}
665 
666 	/*
667 	 * Iterate fringe nodes.
668 	 */
669 	for (i = at->at_minfringe; i < maxfringe; i++) {
670 		next = srp_get_locked(&at->at_heap[i >> 1].node);
671 
672 		node = srp_enter(&sr, &at->at_heap[i].node);
673 		if (!ISLEAF(node)) {
674 			nat = art_table_ref(ar, SUBTABLE(node));
675 			node = srp_follow(&sr, &nat->at_default);
676 		} else
677 			nat = NULL;
678 
679 		error = art_walk_apply(ar, node, next, f, arg);
680 		srp_leave(&sr);
681 
682 		if (error != 0) {
683 			art_table_free(ar, nat);
684 			return (error);
685 		}
686 
687 		if (nat != NULL) {
688 			error = art_table_walk(ar, nat, f, arg);
689 			art_table_free(ar, nat);
690 			if (error != 0)
691 				return (error);
692 		}
693 	}
694 
695 	return (0);
696 }
697 
698 int
699 art_walk_apply(struct art_root *ar,
700     struct art_node *an, struct art_node *next,
701     int (*f)(struct art_node *, void *), void *arg)
702 {
703 	int error = 0;
704 
705 	if ((an != NULL) && (an != next)) {
706 		/* this assumes an->an_dst is not used by f */
707 		rw_exit_write(&ar->ar_lock);
708 		error = (*f)(an, arg);
709 		rw_enter_write(&ar->ar_lock);
710 	}
711 
712 	return (error);
713 }
714 
715 
716 /*
717  * Create a table and use the given index to set its default route.
718  *
719  * Note:  This function does not modify the root or the parent.
720  */
721 struct art_table *
722 art_table_get(struct art_root *ar, struct art_table *parent, int j)
723 {
724 	struct art_table	*at;
725 	struct art_node		*node;
726 	void			*at_heap;
727 	uint32_t		 lvl;
728 
729 	KASSERT(j != 0 && j != 1);
730 	KASSERT(parent != NULL || j == -1);
731 
732 	if (parent != NULL)
733 		lvl = parent->at_level + 1;
734 	else
735 		lvl = 0;
736 
737 	KASSERT(lvl < ar->ar_nlvl);
738 
739 	at = pool_get(&at_pool, PR_NOWAIT|PR_ZERO);
740 	if (at == NULL)
741 		return (NULL);
742 
743 	switch (AT_HEAPSIZE(ar->ar_bits[lvl])) {
744 	case AT_HEAPSIZE(4):
745 		at_heap = pool_get(&at_heap_4_pool, PR_NOWAIT|PR_ZERO);
746 		break;
747 	case AT_HEAPSIZE(8):
748 		at_heap = pool_get(&at_heap_8_pool, PR_NOWAIT|PR_ZERO);
749 		break;
750 	default:
751 		panic("incorrect stride length %u", ar->ar_bits[lvl]);
752 	}
753 
754 	if (at_heap == NULL) {
755 		pool_put(&at_pool, at);
756 		return (NULL);
757 	}
758 
759 	at->at_parent = parent;
760 	at->at_index = j;
761 	at->at_minfringe = (1 << ar->ar_bits[lvl]);
762 	at->at_level = lvl;
763 	at->at_bits = ar->ar_bits[lvl];
764 	at->at_heap = at_heap;
765 	at->at_refcnt = 0;
766 
767 	if (parent != NULL) {
768 		node = srp_get_locked(&parent->at_heap[j].node);
769 		/* node isn't being deleted, no srp_finalize needed */
770 		srp_swap_locked(&at->at_default, node);
771 		at->at_offset = (parent->at_offset + parent->at_bits);
772 	}
773 
774 	return (at);
775 }
776 
777 
778 /*
779  * Delete a table and use its index to restore its parent's default route.
780  *
781  * Note:  Modify its parent to unlink the table from it.
782  */
783 struct art_table *
784 art_table_put(struct art_root *ar, struct art_table *at)
785 {
786 	struct art_table	*parent = at->at_parent;
787 	struct art_node		*node;
788 	uint32_t		 j = at->at_index;
789 
790 	KASSERT(at->at_refcnt == 0);
791 	KASSERT(j != 0 && j != 1);
792 
793 	if (parent != NULL) {
794 		KASSERT(j != -1);
795 		KASSERT(at->at_level == parent->at_level + 1);
796 		KASSERT(parent->at_refcnt >= 1);
797 
798 		/* Give the route back to its parent. */
799 		node = srp_get_locked(&at->at_default);
800 		srp_swap_locked(&parent->at_heap[j].node, node);
801 	} else {
802 		KASSERT(j == -1);
803 		KASSERT(at->at_level == 0);
804 		srp_swap_locked(&ar->ar_root, NULL);
805 	}
806 
807 	mtx_enter(&art_table_gc_mtx);
808 	at->at_parent = art_table_gc_list;
809 	art_table_gc_list = at;
810 	mtx_leave(&art_table_gc_mtx);
811 
812 	task_add(systqmp, &art_table_gc_task);
813 
814 	return (parent);
815 }
816 
817 void
818 art_table_gc(void *null)
819 {
820 	struct art_table *at, *next;
821 
822 	mtx_enter(&art_table_gc_mtx);
823 	at = art_table_gc_list;
824 	art_table_gc_list = NULL;
825 	mtx_leave(&art_table_gc_mtx);
826 
827 	while (at != NULL) {
828 		next = at->at_parent;
829 
830 		if (at->at_level == 0)
831 			srp_finalize(at, "arttfini");
832 		else
833 			srp_finalize(ASNODE(at), "arttfini");
834 
835 		switch (AT_HEAPSIZE(at->at_bits)) {
836 		case AT_HEAPSIZE(4):
837 			pool_put(&at_heap_4_pool, at->at_heap);
838 			break;
839 		case AT_HEAPSIZE(8):
840 			pool_put(&at_heap_8_pool, at->at_heap);
841 			break;
842 		default:
843 			panic("incorrect stride length %u", at->at_bits);
844 		}
845 
846 		pool_put(&at_pool, at);
847 
848 		at = next;
849 	}
850 }
851 
852 /*
853  * Substitute a node by another in the subtree whose root index is given.
854  *
855  * This function iterates on the table ``at'' at index ``i'' until no
856  * more ``old'' node can be replaced by ``new''.
857  *
858  * This function was originally written by Don Knuth in CWEB. The
859  * complicated ``goto''s are the result of expansion of the two
860  * following recursions:
861  *
862  *	art_allot(at, i, old, new)
863  *	{
864  *		int k = i;
865  *		if (at->at_heap[k] == old)
866  *			at->at_heap[k] = new;
867  *		if (k >= at->at_minfringe)
868  *			 return;
869  *		k <<= 1;
870  *		art_allot(at, k, old, new);
871  *		k++;
872  *		art_allot(at, k, old, new);
873  *	}
874  */
875 void
876 art_allot(struct art_table *at, int i, struct art_node *old,
877     struct art_node *new)
878 {
879 	struct art_node		*node, *dflt;
880 	int			 k = i;
881 
882 	KASSERT(i < at->at_minfringe);
883 
884 again:
885 	k <<= 1;
886 	if (k < at->at_minfringe)
887 		goto nonfringe;
888 
889 	/* Change fringe nodes. */
890 	while (1) {
891 		node = srp_get_locked(&at->at_heap[k].node);
892 		if (!ISLEAF(node)) {
893 			dflt = srp_get_locked(&SUBTABLE(node)->at_default);
894 			if (dflt == old) {
895 				srp_swap_locked(&SUBTABLE(node)->at_default,
896 				    new);
897 			}
898 		} else if (node == old) {
899 			srp_swap_locked(&at->at_heap[k].node, new);
900 		}
901 		if (k % 2)
902 			goto moveup;
903 		k++;
904 	}
905 
906 nonfringe:
907 	node = srp_get_locked(&at->at_heap[k].node);
908 	if (node == old)
909 		goto again;
910 moveon:
911 	if (k % 2)
912 		goto moveup;
913 	k++;
914 	goto nonfringe;
915 moveup:
916 	k >>= 1;
917 	srp_swap_locked(&at->at_heap[k].node, new);
918 
919 	/* Change non-fringe node. */
920 	if (k != i)
921 		goto moveon;
922 }
923 
924 struct art_node *
925 art_get(struct sockaddr *dst, uint8_t plen)
926 {
927 	struct art_node		*an;
928 
929 	an = pool_get(&an_pool, PR_NOWAIT | PR_ZERO);
930 	if (an == NULL)
931 		return (NULL);
932 
933 	an->an_dst = dst;
934 	an->an_plen = plen;
935 	SRPL_INIT(&an->an_rtlist);
936 
937 	return (an);
938 }
939 
940 void
941 art_put(struct art_node *an)
942 {
943 	KASSERT(SRPL_EMPTY_LOCKED(&an->an_rtlist));
944 
945 	mtx_enter(&art_node_gc_mtx);
946 	an->an_gc = art_node_gc_list;
947 	art_node_gc_list = an;
948 	mtx_leave(&art_node_gc_mtx);
949 
950 	task_add(systqmp, &art_node_gc_task);
951 }
952 
953 void
954 art_gc(void *null)
955 {
956 	struct art_node		*an, *next;
957 
958 	mtx_enter(&art_node_gc_mtx);
959 	an = art_node_gc_list;
960 	art_node_gc_list = NULL;
961 	mtx_leave(&art_node_gc_mtx);
962 
963 	while (an != NULL) {
964 		next = an->an_gc;
965 
966 		srp_finalize(an, "artnfini");
967 
968 		pool_put(&an_pool, an);
969 
970 		an = next;
971 	}
972 }
973