xref: /openbsd-src/sys/net/route.c (revision 850e275390052b330d93020bf619a739a3c277ac)
1 /*	$OpenBSD: route.c,v 1.97 2008/09/10 14:01:23 blambert Exp $	*/
2 /*	$NetBSD: route.c,v 1.14 1996/02/13 22:00:46 christos Exp $	*/
3 
4 /*
5  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the project nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 /*
34  * Copyright (c) 1980, 1986, 1991, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. Neither the name of the University nor the names of its contributors
46  *    may be used to endorse or promote products derived from this software
47  *    without specific prior written permission.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59  * SUCH DAMAGE.
60  *
61  *	@(#)route.c	8.2 (Berkeley) 11/15/93
62  */
63 
64 /*
65  *	@(#)COPYRIGHT	1.1 (NRL) 17 January 1995
66  *
67  * NRL grants permission for redistribution and use in source and binary
68  * forms, with or without modification, of the software and documentation
69  * created at NRL provided that the following conditions are met:
70  *
71  * 1. Redistributions of source code must retain the above copyright
72  *    notice, this list of conditions and the following disclaimer.
73  * 2. Redistributions in binary form must reproduce the above copyright
74  *    notice, this list of conditions and the following disclaimer in the
75  *    documentation and/or other materials provided with the distribution.
76  * 3. All advertising materials mentioning features or use of this software
77  *    must display the following acknowledgements:
78  * 	This product includes software developed by the University of
79  * 	California, Berkeley and its contributors.
80  * 	This product includes software developed at the Information
81  * 	Technology Division, US Naval Research Laboratory.
82  * 4. Neither the name of the NRL nor the names of its contributors
83  *    may be used to endorse or promote products derived from this software
84  *    without specific prior written permission.
85  *
86  * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS
87  * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
88  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
89  * PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL NRL OR
90  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
91  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
92  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
93  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
94  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
95  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
96  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
97  *
98  * The views and conclusions contained in the software and documentation
99  * are those of the authors and should not be interpreted as representing
100  * official policies, either expressed or implied, of the US Naval
101  * Research Laboratory (NRL).
102  */
103 
104 #include <sys/param.h>
105 #include <sys/systm.h>
106 #include <sys/proc.h>
107 #include <sys/mbuf.h>
108 #include <sys/socket.h>
109 #include <sys/socketvar.h>
110 #include <sys/domain.h>
111 #include <sys/protosw.h>
112 #include <sys/ioctl.h>
113 #include <sys/kernel.h>
114 #include <sys/queue.h>
115 #include <sys/pool.h>
116 
117 #include <net/if.h>
118 #include <net/route.h>
119 #include <net/raw_cb.h>
120 
121 #include <netinet/in.h>
122 #include <netinet/in_var.h>
123 
124 #ifdef IPSEC
125 #include <netinet/ip_ipsp.h>
126 #include <net/if_enc.h>
127 
128 struct ifaddr	*encap_findgwifa(struct sockaddr *);
129 #endif
130 
131 #define	SA(p) ((struct sockaddr *)(p))
132 
133 struct	route_cb	   route_cb;
134 struct	rtstat		   rtstat;
135 struct	radix_node_head	***rt_tables;
136 u_int8_t		   af2rtafidx[AF_MAX+1];
137 u_int8_t		   rtafidx_max;
138 u_int			   rtbl_id_max = 0;
139 
140 int			rttrash;	/* routes not in table but not freed */
141 
142 struct pool		rtentry_pool;	/* pool for rtentry structures */
143 struct pool		rttimer_pool;	/* pool for rttimer structures */
144 
145 int	rtable_init(struct radix_node_head ***);
146 int	okaytoclone(u_int, int);
147 int	rtdeletemsg(struct rtentry *, u_int);
148 int	rtflushclone1(struct radix_node *, void *);
149 void	rtflushclone(struct radix_node_head *, struct rtentry *);
150 int	rt_if_remove_rtdelete(struct radix_node *, void *);
151 
152 #define	LABELID_MAX	50000
153 
154 struct rt_label {
155 	TAILQ_ENTRY(rt_label)	rtl_entry;
156 	char			rtl_name[RTLABEL_LEN];
157 	u_int16_t		rtl_id;
158 	int			rtl_ref;
159 };
160 
161 TAILQ_HEAD(rt_labels, rt_label)	rt_labels = TAILQ_HEAD_INITIALIZER(rt_labels);
162 
163 #ifdef IPSEC
164 struct ifaddr *
165 encap_findgwifa(struct sockaddr *gw)
166 {
167 	return (TAILQ_FIRST(&encif[0].sc_if.if_addrlist));
168 }
169 #endif
170 
171 int
172 rtable_init(struct radix_node_head ***table)
173 {
174 	void		**p;
175 	struct domain	 *dom;
176 
177 	if ((p = malloc(sizeof(void *) * (rtafidx_max + 1), M_RTABLE,
178 	    M_NOWAIT|M_ZERO)) == NULL)
179 		return (-1);
180 
181 	/* 2nd pass: attach */
182 	for (dom = domains; dom != NULL; dom = dom->dom_next)
183 		if (dom->dom_rtattach)
184 			dom->dom_rtattach(&p[af2rtafidx[dom->dom_family]],
185 			    dom->dom_rtoffset);
186 
187 	*table = (struct radix_node_head **)p;
188 	return (0);
189 }
190 
191 void
192 route_init()
193 {
194 	struct domain	 *dom;
195 
196 	pool_init(&rtentry_pool, sizeof(struct rtentry), 0, 0, 0, "rtentpl",
197 	    NULL);
198 	rn_init();	/* initialize all zeroes, all ones, mask table */
199 
200 	bzero(af2rtafidx, sizeof(af2rtafidx));
201 	rtafidx_max = 1;	/* must have NULL at index 0, so start at 1 */
202 
203 	/* find out how many tables to allocate */
204 	for (dom = domains; dom != NULL; dom = dom->dom_next)
205 		if (dom->dom_rtattach)
206 			af2rtafidx[dom->dom_family] = rtafidx_max++;
207 
208 	if (rtable_add(0) == -1)
209 		panic("route_init rtable_add");
210 }
211 
212 int
213 rtable_add(u_int id)	/* must be called at splsoftnet */
214 {
215 	void	*p;
216 
217 	if (id > RT_TABLEID_MAX)
218 		return (-1);
219 
220 	if (id == 0 || id > rtbl_id_max) {
221 		size_t	newlen = sizeof(void *) * (id+1);
222 
223 		if ((p = malloc(newlen, M_RTABLE, M_NOWAIT|M_ZERO)) == NULL)
224 			return (-1);
225 		if (id > 0) {
226 			bcopy(rt_tables, p, sizeof(void *) * (rtbl_id_max+1));
227 			free(rt_tables, M_RTABLE);
228 		}
229 		rt_tables = p;
230 		rtbl_id_max = id;
231 	}
232 
233 	if (rt_tables[id] != NULL)	/* already exists */
234 		return (-1);
235 
236 	return (rtable_init(&rt_tables[id]));
237 }
238 
239 int
240 rtable_exists(u_int id)	/* verify table with that ID exists */
241 {
242 	if (id > RT_TABLEID_MAX)
243 		return (0);
244 
245 	if (id > rtbl_id_max)
246 		return (0);
247 
248 	if (rt_tables[id] == NULL)	/* should not happen */
249 		return (0);
250 
251 	return (1);
252 }
253 
254 #include "pf.h"
255 #if NPF > 0
256 void
257 rtalloc_noclone(struct route *ro, int howstrict)
258 {
259 	if (ro->ro_rt && ro->ro_rt->rt_ifp && (ro->ro_rt->rt_flags & RTF_UP))
260 		return;		/* XXX */
261 	ro->ro_rt = rtalloc2(&ro->ro_dst, 1, howstrict);
262 }
263 
264 int
265 okaytoclone(u_int flags, int howstrict)
266 {
267 	if (howstrict == ALL_CLONING)
268 		return (1);
269 	if (howstrict == ONNET_CLONING && !(flags & RTF_GATEWAY))
270 		return (1);
271 	return (0);
272 }
273 
274 struct rtentry *
275 rtalloc2(struct sockaddr *dst, int report, int howstrict)
276 {
277 	struct radix_node_head	*rnh;
278 	struct rtentry		*rt;
279 	struct radix_node	*rn;
280 	struct rtentry		*newrt = 0;
281 	struct rt_addrinfo	 info;
282 	int			 s = splnet(), err = 0, msgtype = RTM_MISS;
283 
284 	bzero(&info, sizeof(info));
285 	info.rti_info[RTAX_DST] = dst;
286 
287 	rnh = rt_gettable(dst->sa_family, 0);
288 	if (rnh && (rn = rnh->rnh_matchaddr((caddr_t)dst, rnh)) &&
289 	    ((rn->rn_flags & RNF_ROOT) == 0)) {
290 		newrt = rt = (struct rtentry *)rn;
291 		if (report && (rt->rt_flags & RTF_CLONING) &&
292 		    okaytoclone(rt->rt_flags, howstrict)) {
293 			err = rtrequest1(RTM_RESOLVE, &info, RTP_DEFAULT,
294 			    &newrt, 0);
295 			if (err) {
296 				newrt = rt;
297 				rt->rt_refcnt++;
298 				goto miss;
299 			}
300 			if ((rt = newrt) && (rt->rt_flags & RTF_XRESOLVE)) {
301 				msgtype = RTM_RESOLVE;
302 				goto miss;
303 			}
304 		} else
305 			rt->rt_refcnt++;
306 	} else {
307 		rtstat.rts_unreach++;
308 miss:
309 		if (report) {
310 			rt_missmsg(msgtype, &info, 0, NULL, err, 0);
311 		}
312 	}
313 	splx(s);
314 	return (newrt);
315 }
316 #endif /* NPF > 0 */
317 
318 /*
319  * Packet routing routines.
320  */
321 void
322 rtalloc(struct route *ro)
323 {
324 	if (ro->ro_rt && ro->ro_rt->rt_ifp && (ro->ro_rt->rt_flags & RTF_UP))
325 		return;				 /* XXX */
326 	ro->ro_rt = rtalloc1(&ro->ro_dst, 1, 0);
327 }
328 
329 struct rtentry *
330 rtalloc1(struct sockaddr *dst, int report, u_int tableid)
331 {
332 	struct radix_node_head	*rnh;
333 	struct rtentry		*rt;
334 	struct radix_node	*rn;
335 	struct rtentry		*newrt = 0;
336 	struct rt_addrinfo	 info;
337 	int			 s = splsoftnet(), err = 0, msgtype = RTM_MISS;
338 
339 	bzero(&info, sizeof(info));
340 	info.rti_info[RTAX_DST] = dst;
341 
342 	rnh = rt_gettable(dst->sa_family, tableid);
343 	if (rnh && (rn = rnh->rnh_matchaddr((caddr_t)dst, rnh)) &&
344 	    ((rn->rn_flags & RNF_ROOT) == 0)) {
345 		newrt = rt = (struct rtentry *)rn;
346 		if (report && (rt->rt_flags & RTF_CLONING)) {
347 			err = rtrequest1(RTM_RESOLVE, &info, RTP_DEFAULT,
348 			    &newrt, tableid);
349 			if (err) {
350 				newrt = rt;
351 				rt->rt_refcnt++;
352 				goto miss;
353 			}
354 			if ((rt = newrt) && (rt->rt_flags & RTF_XRESOLVE)) {
355 				msgtype = RTM_RESOLVE;
356 				goto miss;
357 			}
358 			/* Inform listeners of the new route */
359 			bzero(&info, sizeof(info));
360 			info.rti_info[RTAX_DST] = rt_key(rt);
361 			info.rti_info[RTAX_NETMASK] = rt_mask(rt);
362 			info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
363 			if (rt->rt_ifp != NULL) {
364 				info.rti_info[RTAX_IFP] =
365 				    TAILQ_FIRST(&rt->rt_ifp->if_addrlist)->ifa_addr;
366 				info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
367 			}
368 			rt_missmsg(RTM_ADD, &info, rt->rt_flags,
369 			    rt->rt_ifp, 0, tableid);
370 		} else
371 			rt->rt_refcnt++;
372 	} else {
373 		if (dst->sa_family != PF_KEY)
374 			rtstat.rts_unreach++;
375 	/*
376 	 * IP encapsulation does lots of lookups where we don't need nor want
377 	 * the RTM_MISSes that would be generated.  It causes RTM_MISS storms
378 	 * sent upward breaking user-level routing queries.
379 	 */
380 miss:
381 		if (report && dst->sa_family != PF_KEY) {
382 			bzero((caddr_t)&info, sizeof(info));
383 			info.rti_info[RTAX_DST] = dst;
384 			rt_missmsg(msgtype, &info, 0, NULL, err, tableid);
385 		}
386 	}
387 	splx(s);
388 	return (newrt);
389 }
390 
391 void
392 rtfree(struct rtentry *rt)
393 {
394 	struct ifaddr	*ifa;
395 
396 	if (rt == NULL)
397 		panic("rtfree");
398 
399 	rt->rt_refcnt--;
400 
401 	if (rt->rt_refcnt <= 0 && (rt->rt_flags & RTF_UP) == 0) {
402 		if (rt->rt_nodes->rn_flags & (RNF_ACTIVE | RNF_ROOT))
403 			panic("rtfree 2");
404 		rttrash--;
405 		if (rt->rt_refcnt < 0) {
406 			printf("rtfree: %p not freed (neg refs)\n", rt);
407 			return;
408 		}
409 		rt_timer_remove_all(rt);
410 		ifa = rt->rt_ifa;
411 		if (ifa)
412 			IFAFREE(ifa);
413 		rtlabel_unref(rt->rt_labelid);
414 		Free(rt_key(rt));
415 		pool_put(&rtentry_pool, rt);
416 	}
417 }
418 
419 void
420 ifafree(struct ifaddr *ifa)
421 {
422 	if (ifa == NULL)
423 		panic("ifafree");
424 	if (ifa->ifa_refcnt == 0)
425 		free(ifa, M_IFADDR);
426 	else
427 		ifa->ifa_refcnt--;
428 }
429 
430 /*
431  * Force a routing table entry to the specified
432  * destination to go through the given gateway.
433  * Normally called as a result of a routing redirect
434  * message from the network layer.
435  *
436  * N.B.: must be called at splsoftnet
437  */
438 void
439 rtredirect(struct sockaddr *dst, struct sockaddr *gateway,
440     struct sockaddr *netmask, int flags, struct sockaddr *src,
441     struct rtentry **rtp)
442 {
443 	struct rtentry		*rt;
444 	int			 error = 0;
445 	u_int32_t		*stat = NULL;
446 	struct rt_addrinfo	 info;
447 	struct ifaddr		*ifa;
448 	struct ifnet		*ifp = NULL;
449 
450 	splassert(IPL_SOFTNET);
451 
452 	/* verify the gateway is directly reachable */
453 	if ((ifa = ifa_ifwithnet(gateway)) == NULL) {
454 		error = ENETUNREACH;
455 		goto out;
456 	}
457 	ifp = ifa->ifa_ifp;
458 	rt = rtalloc1(dst, 0, 0);
459 	/*
460 	 * If the redirect isn't from our current router for this dst,
461 	 * it's either old or wrong.  If it redirects us to ourselves,
462 	 * we have a routing loop, perhaps as a result of an interface
463 	 * going down recently.
464 	 */
465 #define	equal(a1, a2) \
466 	((a1)->sa_len == (a2)->sa_len && \
467 	 bcmp((caddr_t)(a1), (caddr_t)(a2), (a1)->sa_len) == 0)
468 	if (!(flags & RTF_DONE) && rt &&
469 	     (!equal(src, rt->rt_gateway) || rt->rt_ifa != ifa))
470 		error = EINVAL;
471 	else if (ifa_ifwithaddr(gateway) != NULL)
472 		error = EHOSTUNREACH;
473 	if (error)
474 		goto done;
475 	/*
476 	 * Create a new entry if we just got back a wildcard entry
477 	 * or the lookup failed.  This is necessary for hosts
478 	 * which use routing redirects generated by smart gateways
479 	 * to dynamically build the routing tables.
480 	 */
481 	if ((rt == NULL) || (rt_mask(rt) && rt_mask(rt)->sa_len < 2))
482 		goto create;
483 	/*
484 	 * Don't listen to the redirect if it's
485 	 * for a route to an interface.
486 	 */
487 	if (rt->rt_flags & RTF_GATEWAY) {
488 		if (((rt->rt_flags & RTF_HOST) == 0) && (flags & RTF_HOST)) {
489 			/*
490 			 * Changing from route to net => route to host.
491 			 * Create new route, rather than smashing route to net.
492 			 */
493 create:
494 			if (rt)
495 				rtfree(rt);
496 			flags |= RTF_GATEWAY | RTF_DYNAMIC;
497 			bzero(&info, sizeof(info));
498 			info.rti_info[RTAX_DST] = dst;
499 			info.rti_info[RTAX_GATEWAY] = gateway;
500 			info.rti_info[RTAX_NETMASK] = netmask;
501 			info.rti_ifa = ifa;
502 			info.rti_flags = flags;
503 			rt = NULL;
504 			error = rtrequest1(RTM_ADD, &info, RTP_DEFAULT, &rt, 0);
505 			if (rt != NULL)
506 				flags = rt->rt_flags;
507 			stat = &rtstat.rts_dynamic;
508 		} else {
509 			/*
510 			 * Smash the current notion of the gateway to
511 			 * this destination.  Should check about netmask!!!
512 			 */
513 			rt->rt_flags |= RTF_MODIFIED;
514 			flags |= RTF_MODIFIED;
515 			stat = &rtstat.rts_newgateway;
516 			rt_setgate(rt, rt_key(rt), gateway, 0);
517 		}
518 	} else
519 		error = EHOSTUNREACH;
520 done:
521 	if (rt) {
522 		if (rtp && !error)
523 			*rtp = rt;
524 		else
525 			rtfree(rt);
526 	}
527 out:
528 	if (error)
529 		rtstat.rts_badredirect++;
530 	else if (stat != NULL)
531 		(*stat)++;
532 	bzero((caddr_t)&info, sizeof(info));
533 	info.rti_info[RTAX_DST] = dst;
534 	info.rti_info[RTAX_GATEWAY] = gateway;
535 	info.rti_info[RTAX_NETMASK] = netmask;
536 	info.rti_info[RTAX_AUTHOR] = src;
537 	rt_missmsg(RTM_REDIRECT, &info, flags, ifp, error, 0);
538 }
539 
540 /*
541  * Delete a route and generate a message
542  */
543 int
544 rtdeletemsg(struct rtentry *rt, u_int tableid)
545 {
546 	int			error;
547 	struct rt_addrinfo	info;
548 	struct ifnet		*ifp;
549 
550 	/*
551 	 * Request the new route so that the entry is not actually
552 	 * deleted.  That will allow the information being reported to
553 	 * be accurate (and consistent with route_output()).
554 	 */
555 	bzero((caddr_t)&info, sizeof(info));
556 	info.rti_info[RTAX_DST] = rt_key(rt);
557 	info.rti_info[RTAX_NETMASK] = rt_mask(rt);
558 	info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
559 	info.rti_flags = rt->rt_flags;
560 	ifp = rt->rt_ifp;
561 	error = rtrequest1(RTM_DELETE, &info, rt->rt_priority, &rt, tableid);
562 
563 	rt_missmsg(RTM_DELETE, &info, info.rti_flags, ifp, error, tableid);
564 
565 	/* Adjust the refcount */
566 	if (error == 0 && rt->rt_refcnt <= 0) {
567 		rt->rt_refcnt++;
568 		rtfree(rt);
569 	}
570 	return (error);
571 }
572 
573 int
574 rtflushclone1(struct radix_node *rn, void *arg)
575 {
576 	struct rtentry	*rt, *parent;
577 
578 	rt = (struct rtentry *)rn;
579 	parent = (struct rtentry *)arg;
580 	if ((rt->rt_flags & RTF_CLONED) != 0 && rt->rt_parent == parent)
581 		rtdeletemsg(rt, 0);
582 	return 0;
583 }
584 
585 void
586 rtflushclone(struct radix_node_head *rnh, struct rtentry *parent)
587 {
588 
589 #ifdef DIAGNOSTIC
590 	if (!parent || (parent->rt_flags & RTF_CLONING) == 0)
591 		panic("rtflushclone: called with a non-cloning route");
592 	if (!rnh->rnh_walktree)
593 		panic("rtflushclone: no rnh_walktree");
594 #endif
595 	rnh->rnh_walktree(rnh, rtflushclone1, (void *)parent);
596 }
597 
598 int
599 rtioctl(u_long req, caddr_t data, struct proc *p)
600 {
601 	return (EOPNOTSUPP);
602 }
603 
604 struct ifaddr *
605 ifa_ifwithroute(int flags, struct sockaddr *dst, struct sockaddr *gateway)
606 {
607 	struct ifaddr	*ifa;
608 
609 #ifdef IPSEC
610 	/*
611 	 * If the destination is a PF_KEY address, we'll look
612 	 * for the existence of a encap interface number or address
613 	 * in the options list of the gateway. By default, we'll return
614 	 * enc0.
615 	 */
616 	if (dst && (dst->sa_family == PF_KEY))
617 		return (encap_findgwifa(gateway));
618 #endif
619 
620 	if ((flags & RTF_GATEWAY) == 0) {
621 		/*
622 		 * If we are adding a route to an interface,
623 		 * and the interface is a pt to pt link
624 		 * we should search for the destination
625 		 * as our clue to the interface.  Otherwise
626 		 * we can use the local address.
627 		 */
628 		ifa = NULL;
629 		if (flags & RTF_HOST)
630 			ifa = ifa_ifwithdstaddr(dst);
631 		if (ifa == NULL)
632 			ifa = ifa_ifwithaddr(gateway);
633 	} else {
634 		/*
635 		 * If we are adding a route to a remote net
636 		 * or host, the gateway may still be on the
637 		 * other end of a pt to pt link.
638 		 */
639 		ifa = ifa_ifwithdstaddr(gateway);
640 	}
641 	if (ifa == NULL)
642 		ifa = ifa_ifwithnet(gateway);
643 	if (ifa == NULL) {
644 		struct rtentry	*rt = rtalloc1(gateway, 0, 0);
645 		if (rt == NULL)
646 			return (NULL);
647 		rt->rt_refcnt--;
648 		/* The gateway must be local if the same address family. */
649 		if ((rt->rt_flags & RTF_GATEWAY) &&
650 		    rt_key(rt)->sa_family == dst->sa_family)
651 			return (0);
652 		if ((ifa = rt->rt_ifa) == NULL)
653 			return (NULL);
654 	}
655 	if (ifa->ifa_addr->sa_family != dst->sa_family) {
656 		struct ifaddr	*oifa = ifa;
657 		ifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp);
658 		if (ifa == NULL)
659 			ifa = oifa;
660 	}
661 	return (ifa);
662 }
663 
664 #define ROUNDUP(a) (a>0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long))
665 
666 int
667 rt_getifa(struct rt_addrinfo *info)
668 {
669 	struct ifaddr	*ifa;
670 	int		 error = 0;
671 
672 	/*
673 	 * ifp may be specified by sockaddr_dl when protocol address
674 	 * is ambiguous
675 	 */
676 	if (info->rti_ifp == NULL && info->rti_info[RTAX_IFP] != NULL
677 	    && info->rti_info[RTAX_IFP]->sa_family == AF_LINK &&
678 	    (ifa = ifa_ifwithnet((struct sockaddr *)info->rti_info[RTAX_IFP]))
679 	    != NULL)
680 		info->rti_ifp = ifa->ifa_ifp;
681 
682 	if (info->rti_ifa == NULL && info->rti_info[RTAX_IFA] != NULL)
683 		info->rti_ifa = ifa_ifwithaddr(info->rti_info[RTAX_IFA]);
684 
685 	if (info->rti_ifa == NULL) {
686 		struct sockaddr	*sa;
687 
688 		if ((sa = info->rti_info[RTAX_IFA]) == NULL)
689 			if ((sa = info->rti_info[RTAX_GATEWAY]) == NULL)
690 				sa = info->rti_info[RTAX_DST];
691 
692 		if (sa != NULL && info->rti_ifp != NULL)
693 			info->rti_ifa = ifaof_ifpforaddr(sa, info->rti_ifp);
694 		else if (info->rti_info[RTAX_DST] != NULL &&
695 		    info->rti_info[RTAX_GATEWAY] != NULL)
696 			info->rti_ifa = ifa_ifwithroute(info->rti_flags,
697 			    info->rti_info[RTAX_DST],
698 			    info->rti_info[RTAX_GATEWAY]);
699 		else if (sa != NULL)
700 			info->rti_ifa = ifa_ifwithroute(info->rti_flags,
701 			    sa, sa);
702 	}
703 	if ((ifa = info->rti_ifa) != NULL) {
704 		if (info->rti_ifp == NULL)
705 			info->rti_ifp = ifa->ifa_ifp;
706 	} else
707 		error = ENETUNREACH;
708 	return (error);
709 }
710 
711 int
712 rtrequest1(int req, struct rt_addrinfo *info, u_int8_t prio,
713     struct rtentry **ret_nrt, u_int tableid)
714 {
715 	int			 s = splsoftnet(); int error = 0;
716 	struct rtentry		*rt, *crt;
717 	struct radix_node	*rn;
718 	struct radix_node_head	*rnh;
719 	struct ifaddr		*ifa;
720 	struct sockaddr		*ndst;
721 	struct sockaddr_rtlabel	*sa_rl;
722 #define senderr(x) { error = x ; goto bad; }
723 
724 	if ((rnh = rt_gettable(info->rti_info[RTAX_DST]->sa_family, tableid)) ==
725 	    NULL)
726 		senderr(EAFNOSUPPORT);
727 	if (info->rti_flags & RTF_HOST)
728 		info->rti_info[RTAX_NETMASK] = NULL;
729 	switch (req) {
730 	case RTM_DELETE:
731 		if ((rn = rnh->rnh_lookup(info->rti_info[RTAX_DST],
732 		    info->rti_info[RTAX_NETMASK], rnh)) == NULL)
733 			senderr(ESRCH);
734 		rt = (struct rtentry *)rn;
735 #ifndef SMALL_KERNEL
736 		/*
737 		 * if we got multipath routes, we require users to specify
738 		 * a matching RTAX_GATEWAY.
739 		 */
740 		if (rn_mpath_capable(rnh)) {
741 			rt = rt_mpath_matchgate(rt,
742 			    info->rti_info[RTAX_GATEWAY], prio);
743 			rn = (struct radix_node *)rt;
744 			if (!rt)
745 				senderr(ESRCH);
746 		}
747 #endif
748 		if ((rn = rnh->rnh_deladdr(info->rti_info[RTAX_DST],
749 		    info->rti_info[RTAX_NETMASK], rnh, rn)) == NULL)
750 			senderr(ESRCH);
751 		rt = (struct rtentry *)rn;
752 
753 		/* clean up any cloned children */
754 		if ((rt->rt_flags & RTF_CLONING) != 0)
755 			rtflushclone(rnh, rt);
756 
757 		if (rn->rn_flags & (RNF_ACTIVE | RNF_ROOT))
758 			panic ("rtrequest delete");
759 
760 		if (rt->rt_gwroute) {
761 			rt = rt->rt_gwroute; RTFREE(rt);
762 			(rt = (struct rtentry *)rn)->rt_gwroute = NULL;
763 		}
764 
765 		if (rt->rt_parent) {
766 			rt->rt_parent->rt_refcnt--;
767 			rt->rt_parent = NULL;
768 		}
769 
770 #ifndef SMALL_KERNEL
771 		if (rn_mpath_capable(rnh)) {
772 			if ((rn = rnh->rnh_lookup(info->rti_info[RTAX_DST],
773 			    info->rti_info[RTAX_NETMASK], rnh)) != NULL &&
774 			    rn_mpath_next(rn) == NULL)
775 				((struct rtentry *)rn)->rt_flags &= ~RTF_MPATH;
776 		}
777 #endif
778 
779 		rt->rt_flags &= ~RTF_UP;
780 		if ((ifa = rt->rt_ifa) && ifa->ifa_rtrequest)
781 			ifa->ifa_rtrequest(RTM_DELETE, rt, info);
782 		rttrash++;
783 
784 		if (ret_nrt)
785 			*ret_nrt = rt;
786 		else if (rt->rt_refcnt <= 0) {
787 			rt->rt_refcnt++;
788 			rtfree(rt);
789 		}
790 		break;
791 
792 	case RTM_RESOLVE:
793 		if (ret_nrt == NULL || (rt = *ret_nrt) == NULL)
794 			senderr(EINVAL);
795 		if ((rt->rt_flags & RTF_CLONING) == 0)
796 			senderr(EINVAL);
797 		ifa = rt->rt_ifa;
798 		info->rti_flags = rt->rt_flags & ~(RTF_CLONING | RTF_STATIC);
799 		info->rti_flags |= RTF_CLONED;
800 		info->rti_info[RTAX_GATEWAY] = rt->rt_gateway;
801 		if ((info->rti_info[RTAX_NETMASK] = rt->rt_genmask) == NULL)
802 			info->rti_flags |= RTF_HOST;
803 		goto makeroute;
804 
805 	case RTM_ADD:
806 		if (info->rti_ifa == 0 && (error = rt_getifa(info)))
807 			senderr(error);
808 		ifa = info->rti_ifa;
809 makeroute:
810 		rt = pool_get(&rtentry_pool, PR_NOWAIT);
811 		if (rt == NULL)
812 			senderr(ENOBUFS);
813 		Bzero(rt, sizeof(*rt));
814 		rt->rt_flags = RTF_UP | info->rti_flags;
815 		rt->rt_priority = prio;	/* init routing priority */
816 		LIST_INIT(&rt->rt_timer);
817 		if (rt_setgate(rt, info->rti_info[RTAX_DST],
818 		    info->rti_info[RTAX_GATEWAY], tableid)) {
819 			pool_put(&rtentry_pool, rt);
820 			senderr(ENOBUFS);
821 		}
822 		ndst = rt_key(rt);
823 		if (info->rti_info[RTAX_NETMASK] != NULL) {
824 			rt_maskedcopy(info->rti_info[RTAX_DST], ndst,
825 			    info->rti_info[RTAX_NETMASK]);
826 		} else
827 			Bcopy(info->rti_info[RTAX_DST], ndst,
828 			    info->rti_info[RTAX_DST]->sa_len);
829 #ifndef SMALL_KERNEL
830 		/* do not permit exactly the same dst/mask/gw pair */
831 		if (rn_mpath_capable(rnh) &&
832 		    rt_mpath_conflict(rnh, rt, info->rti_info[RTAX_NETMASK],
833 		    info->rti_flags & RTF_MPATH)) {
834 			if (rt->rt_gwroute)
835 				rtfree(rt->rt_gwroute);
836 			Free(rt_key(rt));
837 			pool_put(&rtentry_pool, rt);
838 			senderr(EEXIST);
839 		}
840 #endif
841 
842 		if (info->rti_info[RTAX_LABEL] != NULL) {
843 			sa_rl = (struct sockaddr_rtlabel *)
844 			    info->rti_info[RTAX_LABEL];
845 			rt->rt_labelid = rtlabel_name2id(sa_rl->sr_label);
846 		}
847 
848 		ifa->ifa_refcnt++;
849 		rt->rt_ifa = ifa;
850 		rt->rt_ifp = ifa->ifa_ifp;
851 		if (req == RTM_RESOLVE) {
852 			/*
853 			 * Copy both metrics and a back pointer to the cloned
854 			 * route's parent.
855 			 */
856 			rt->rt_rmx = (*ret_nrt)->rt_rmx; /* copy metrics */
857 			rt->rt_priority = (*ret_nrt)->rt_priority;
858 			rt->rt_parent = *ret_nrt;	 /* Back ptr. to parent. */
859 			rt->rt_parent->rt_refcnt++;
860 		}
861 		rn = rnh->rnh_addaddr((caddr_t)ndst,
862 		    (caddr_t)info->rti_info[RTAX_NETMASK], rnh, rt->rt_nodes,
863 		    rt->rt_priority);
864 		if (rn == NULL && (crt = rtalloc1(ndst, 0, tableid)) != NULL) {
865 			/* overwrite cloned route */
866 			if ((crt->rt_flags & RTF_CLONED) != 0) {
867 				rtdeletemsg(crt, tableid);
868 				rn = rnh->rnh_addaddr((caddr_t)ndst,
869 				    (caddr_t)info->rti_info[RTAX_NETMASK],
870 				    rnh, rt->rt_nodes, rt->rt_priority);
871 			}
872 			RTFREE(crt);
873 		}
874 		if (rn == 0) {
875 			IFAFREE(ifa);
876 			if ((rt->rt_flags & RTF_CLONED) != 0 && rt->rt_parent)
877 				rtfree(rt->rt_parent);
878 			if (rt->rt_gwroute)
879 				rtfree(rt->rt_gwroute);
880 			Free(rt_key(rt));
881 			pool_put(&rtentry_pool, rt);
882 			senderr(EEXIST);
883 		}
884 
885 #ifndef SMALL_KERNEL
886 		if (rn_mpath_capable(rnh) &&
887 		    (rn = rnh->rnh_lookup(info->rti_info[RTAX_DST],
888 		    info->rti_info[RTAX_NETMASK], rnh)) != NULL &&
889 		    (rn = rn_mpath_prio(rn, prio)) != NULL) {
890 			if (rn_mpath_next(rn) == NULL)
891 				((struct rtentry *)rn)->rt_flags &= ~RTF_MPATH;
892 			else
893 				((struct rtentry *)rn)->rt_flags |= RTF_MPATH;
894 		}
895 #endif
896 
897 		if (ifa->ifa_rtrequest)
898 			ifa->ifa_rtrequest(req, rt, info);
899 		if (ret_nrt) {
900 			*ret_nrt = rt;
901 			rt->rt_refcnt++;
902 		}
903 		if ((rt->rt_flags & RTF_CLONING) != 0) {
904 			/* clean up any cloned children */
905 			rtflushclone(rnh, rt);
906 		}
907 
908 		if_group_routechange(info->rti_info[RTAX_DST],
909 			info->rti_info[RTAX_NETMASK]);
910 		break;
911 	}
912 bad:
913 	splx(s);
914 	return (error);
915 }
916 
917 int
918 rt_setgate(struct rtentry *rt0, struct sockaddr *dst, struct sockaddr *gate,
919     u_int tableid)
920 {
921 	caddr_t	new, old;
922 	int	dlen = ROUNDUP(dst->sa_len), glen = ROUNDUP(gate->sa_len);
923 	struct rtentry	*rt = rt0;
924 
925 	if (rt->rt_gateway == NULL || glen > ROUNDUP(rt->rt_gateway->sa_len)) {
926 		old = (caddr_t)rt_key(rt);
927 		R_Malloc(new, caddr_t, dlen + glen);
928 		if (new == NULL)
929 			return 1;
930 		rt->rt_nodes->rn_key = new;
931 	} else {
932 		new = rt->rt_nodes->rn_key;
933 		old = NULL;
934 	}
935 	Bcopy(gate, (rt->rt_gateway = (struct sockaddr *)(new + dlen)), glen);
936 	if (old) {
937 		Bcopy(dst, new, dlen);
938 		Free(old);
939 	}
940 	if (rt->rt_gwroute != NULL) {
941 		rt = rt->rt_gwroute;
942 		RTFREE(rt);
943 		rt = rt0;
944 		rt->rt_gwroute = NULL;
945 	}
946 	if (rt->rt_flags & RTF_GATEWAY) {
947 		rt->rt_gwroute = rtalloc1(gate, 1, tableid);
948 		/*
949 		 * If we switched gateways, grab the MTU from the new
950 		 * gateway route if the current MTU is 0 or greater
951 		 * than the MTU of gateway.
952 		 * Note that, if the MTU of gateway is 0, we will reset the
953 		 * MTU of the route to run PMTUD again from scratch. XXX
954 		 */
955 		if (rt->rt_gwroute && !(rt->rt_rmx.rmx_locks & RTV_MTU) &&
956 		    rt->rt_rmx.rmx_mtu &&
957 		    rt->rt_rmx.rmx_mtu > rt->rt_gwroute->rt_rmx.rmx_mtu) {
958 			rt->rt_rmx.rmx_mtu = rt->rt_gwroute->rt_rmx.rmx_mtu;
959 		}
960 	}
961 	return (0);
962 }
963 
964 void
965 rt_maskedcopy(struct sockaddr *src, struct sockaddr *dst,
966     struct sockaddr *netmask)
967 {
968 	u_char	*cp1 = (u_char *)src;
969 	u_char	*cp2 = (u_char *)dst;
970 	u_char	*cp3 = (u_char *)netmask;
971 	u_char	*cplim = cp2 + *cp3;
972 	u_char	*cplim2 = cp2 + *cp1;
973 
974 	*cp2++ = *cp1++; *cp2++ = *cp1++; /* copies sa_len & sa_family */
975 	cp3 += 2;
976 	if (cplim > cplim2)
977 		cplim = cplim2;
978 	while (cp2 < cplim)
979 		*cp2++ = *cp1++ & *cp3++;
980 	if (cp2 < cplim2)
981 		bzero((caddr_t)cp2, (unsigned)(cplim2 - cp2));
982 }
983 
984 /*
985  * Set up a routing table entry, normally
986  * for an interface.
987  */
988 int
989 rtinit(struct ifaddr *ifa, int cmd, int flags)
990 {
991 	struct rtentry		*rt;
992 	struct sockaddr		*dst, *deldst;
993 	struct mbuf		*m = NULL;
994 	struct rtentry		*nrt = NULL;
995 	int			 error;
996 	struct rt_addrinfo	 info;
997 	struct sockaddr_rtlabel	 sa_rl;
998 	const char		*label;
999 
1000 	dst = flags & RTF_HOST ? ifa->ifa_dstaddr : ifa->ifa_addr;
1001 	if (cmd == RTM_DELETE) {
1002 		if ((flags & RTF_HOST) == 0 && ifa->ifa_netmask) {
1003 			m = m_get(M_DONTWAIT, MT_SONAME);
1004 			if (m == NULL)
1005 				return (ENOBUFS);
1006 			deldst = mtod(m, struct sockaddr *);
1007 			rt_maskedcopy(dst, deldst, ifa->ifa_netmask);
1008 			dst = deldst;
1009 		}
1010 		if ((rt = rtalloc1(dst, 0, 0)) != NULL) {
1011 			rt->rt_refcnt--;
1012 			if (rt->rt_ifa != ifa) {
1013 				if (m != NULL)
1014 					(void) m_free(m);
1015 				return (flags & RTF_HOST ? EHOSTUNREACH
1016 							: ENETUNREACH);
1017 			}
1018 		}
1019 	}
1020 	bzero(&info, sizeof(info));
1021 	info.rti_ifa = ifa;
1022 	info.rti_flags = flags | ifa->ifa_flags;
1023 	info.rti_info[RTAX_DST] = dst;
1024 	if (cmd == RTM_ADD)
1025 		info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr;
1026 	if (ifa->ifa_ifp->if_rtlabelid &&
1027 	    (label = rtlabel_id2name(ifa->ifa_ifp->if_rtlabelid)) != NULL) {
1028 		bzero(&sa_rl, sizeof(sa_rl));
1029 		sa_rl.sr_len = sizeof(sa_rl);
1030 		sa_rl.sr_family = AF_UNSPEC;
1031 		strlcpy(sa_rl.sr_label, label, sizeof(sa_rl.sr_label));
1032 		info.rti_info[RTAX_LABEL] = (struct sockaddr *)&sa_rl;
1033 	}
1034 
1035 	/*
1036 	 * XXX here, it seems that we are assuming that ifa_netmask is NULL
1037 	 * for RTF_HOST.  bsdi4 passes NULL explicitly (via intermediate
1038 	 * variable) when RTF_HOST is 1.  still not sure if i can safely
1039 	 * change it to meet bsdi4 behavior.
1040 	 */
1041 	info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
1042 	error = rtrequest1(cmd, &info, RTP_CONNECTED, &nrt, 0);
1043 	if (cmd == RTM_DELETE && error == 0 && (rt = nrt) != NULL) {
1044 		rt_newaddrmsg(cmd, ifa, error, nrt);
1045 		if (rt->rt_refcnt <= 0) {
1046 			rt->rt_refcnt++;
1047 			rtfree(rt);
1048 		}
1049 	}
1050 	if (cmd == RTM_ADD && error == 0 && (rt = nrt) != NULL) {
1051 		rt->rt_refcnt--;
1052 		if (rt->rt_ifa != ifa) {
1053 			printf("rtinit: wrong ifa (%p) was (%p)\n",
1054 			    ifa, rt->rt_ifa);
1055 			if (rt->rt_ifa->ifa_rtrequest)
1056 				rt->rt_ifa->ifa_rtrequest(RTM_DELETE, rt, NULL);
1057 			IFAFREE(rt->rt_ifa);
1058 			rt->rt_ifa = ifa;
1059 			rt->rt_ifp = ifa->ifa_ifp;
1060 			ifa->ifa_refcnt++;
1061 			if (ifa->ifa_rtrequest)
1062 				ifa->ifa_rtrequest(RTM_ADD, rt, NULL);
1063 		}
1064 		rt_newaddrmsg(cmd, ifa, error, nrt);
1065 	}
1066 	return (error);
1067 }
1068 
1069 /*
1070  * Route timer routines.  These routes allow functions to be called
1071  * for various routes at any time.  This is useful in supporting
1072  * path MTU discovery and redirect route deletion.
1073  *
1074  * This is similar to some BSDI internal functions, but it provides
1075  * for multiple queues for efficiency's sake...
1076  */
1077 
1078 LIST_HEAD(, rttimer_queue)	rttimer_queue_head;
1079 static int			rt_init_done = 0;
1080 
1081 #define RTTIMER_CALLOUT(r)	{				\
1082 	if (r->rtt_func != NULL) {				\
1083 		(*r->rtt_func)(r->rtt_rt, r);			\
1084 	} else {						\
1085 		struct rt_addrinfo info;			\
1086 		bzero(&info, sizeof(info));			\
1087 		info.rti_info[RTAX_DST] = rt_key(r->rtt_rt);	\
1088 		rtrequest1(RTM_DELETE, &info,			\
1089 		    r->rtt_rt->rt_priority, NULL, 0 /* XXX */);	\
1090 	}							\
1091 }
1092 
1093 /*
1094  * Some subtle order problems with domain initialization mean that
1095  * we cannot count on this being run from rt_init before various
1096  * protocol initializations are done.  Therefore, we make sure
1097  * that this is run when the first queue is added...
1098  */
1099 
1100 void
1101 rt_timer_init()
1102 {
1103 	static struct timeout	rt_timer_timeout;
1104 
1105 	if (rt_init_done)
1106 		panic("rt_timer_init: already initialized");
1107 
1108 	pool_init(&rttimer_pool, sizeof(struct rttimer), 0, 0, 0, "rttmrpl",
1109 	    NULL);
1110 
1111 	LIST_INIT(&rttimer_queue_head);
1112 	timeout_set(&rt_timer_timeout, rt_timer_timer, &rt_timer_timeout);
1113 	timeout_add_sec(&rt_timer_timeout, 1);
1114 	rt_init_done = 1;
1115 }
1116 
1117 struct rttimer_queue *
1118 rt_timer_queue_create(u_int timeout)
1119 {
1120 	struct rttimer_queue	*rtq;
1121 
1122 	if (rt_init_done == 0)
1123 		rt_timer_init();
1124 
1125 	R_Malloc(rtq, struct rttimer_queue *, sizeof *rtq);
1126 	if (rtq == NULL)
1127 		return (NULL);
1128 	Bzero(rtq, sizeof *rtq);
1129 
1130 	rtq->rtq_timeout = timeout;
1131 	rtq->rtq_count = 0;
1132 	TAILQ_INIT(&rtq->rtq_head);
1133 	LIST_INSERT_HEAD(&rttimer_queue_head, rtq, rtq_link);
1134 
1135 	return (rtq);
1136 }
1137 
1138 void
1139 rt_timer_queue_change(struct rttimer_queue *rtq, long timeout)
1140 {
1141 	rtq->rtq_timeout = timeout;
1142 }
1143 
1144 void
1145 rt_timer_queue_destroy(struct rttimer_queue *rtq, int destroy)
1146 {
1147 	struct rttimer	*r;
1148 
1149 	while ((r = TAILQ_FIRST(&rtq->rtq_head)) != NULL) {
1150 		LIST_REMOVE(r, rtt_link);
1151 		TAILQ_REMOVE(&rtq->rtq_head, r, rtt_next);
1152 		if (destroy)
1153 			RTTIMER_CALLOUT(r);
1154 		pool_put(&rttimer_pool, r);
1155 		if (rtq->rtq_count > 0)
1156 			rtq->rtq_count--;
1157 		else
1158 			printf("rt_timer_queue_destroy: rtq_count reached 0\n");
1159 	}
1160 
1161 	LIST_REMOVE(rtq, rtq_link);
1162 
1163 	/*
1164 	 * Caller is responsible for freeing the rttimer_queue structure.
1165 	 */
1166 }
1167 
1168 unsigned long
1169 rt_timer_count(struct rttimer_queue *rtq)
1170 {
1171 	return (rtq->rtq_count);
1172 }
1173 
1174 void
1175 rt_timer_remove_all(struct rtentry *rt)
1176 {
1177 	struct rttimer	*r;
1178 
1179 	while ((r = LIST_FIRST(&rt->rt_timer)) != NULL) {
1180 		LIST_REMOVE(r, rtt_link);
1181 		TAILQ_REMOVE(&r->rtt_queue->rtq_head, r, rtt_next);
1182 		if (r->rtt_queue->rtq_count > 0)
1183 			r->rtt_queue->rtq_count--;
1184 		else
1185 			printf("rt_timer_remove_all: rtq_count reached 0\n");
1186 		pool_put(&rttimer_pool, r);
1187 	}
1188 }
1189 
1190 int
1191 rt_timer_add(struct rtentry *rt, void (*func)(struct rtentry *,
1192     struct rttimer *), struct rttimer_queue *queue)
1193 {
1194 	struct rttimer	*r;
1195 	long		 current_time;
1196 
1197 	current_time = time_uptime;
1198 	rt->rt_rmx.rmx_expire = time_second + queue->rtq_timeout;
1199 
1200 	/*
1201 	 * If there's already a timer with this action, destroy it before
1202 	 * we add a new one.
1203 	 */
1204 	for (r = LIST_FIRST(&rt->rt_timer); r != NULL;
1205 	     r = LIST_NEXT(r, rtt_link)) {
1206 		if (r->rtt_func == func) {
1207 			LIST_REMOVE(r, rtt_link);
1208 			TAILQ_REMOVE(&r->rtt_queue->rtq_head, r, rtt_next);
1209 			if (r->rtt_queue->rtq_count > 0)
1210 				r->rtt_queue->rtq_count--;
1211 			else
1212 				printf("rt_timer_add: rtq_count reached 0\n");
1213 			pool_put(&rttimer_pool, r);
1214 			break;  /* only one per list, so we can quit... */
1215 		}
1216 	}
1217 
1218 	r = pool_get(&rttimer_pool, PR_NOWAIT);
1219 	if (r == NULL)
1220 		return (ENOBUFS);
1221 	Bzero(r, sizeof(*r));
1222 
1223 	r->rtt_rt = rt;
1224 	r->rtt_time = current_time;
1225 	r->rtt_func = func;
1226 	r->rtt_queue = queue;
1227 	LIST_INSERT_HEAD(&rt->rt_timer, r, rtt_link);
1228 	TAILQ_INSERT_TAIL(&queue->rtq_head, r, rtt_next);
1229 	r->rtt_queue->rtq_count++;
1230 
1231 	return (0);
1232 }
1233 
1234 struct radix_node_head *
1235 rt_gettable(sa_family_t af, u_int id)
1236 {
1237 	return (rt_tables[id] ? rt_tables[id][af2rtafidx[af]] : NULL);
1238 }
1239 
1240 struct radix_node *
1241 rt_lookup(struct sockaddr *dst, struct sockaddr *mask, int tableid)
1242 {
1243 	struct radix_node_head	*rnh;
1244 
1245 	if ((rnh = rt_gettable(dst->sa_family, tableid)) == NULL)
1246 		return (NULL);
1247 
1248 	return (rnh->rnh_lookup(dst, mask, rnh));
1249 }
1250 
1251 /* ARGSUSED */
1252 void
1253 rt_timer_timer(void *arg)
1254 {
1255 	struct timeout		*to = (struct timeout *)arg;
1256 	struct rttimer_queue	*rtq;
1257 	struct rttimer		*r;
1258 	long			 current_time;
1259 	int			 s;
1260 
1261 	current_time = time_uptime;
1262 
1263 	s = splsoftnet();
1264 	for (rtq = LIST_FIRST(&rttimer_queue_head); rtq != NULL;
1265 	     rtq = LIST_NEXT(rtq, rtq_link)) {
1266 		while ((r = TAILQ_FIRST(&rtq->rtq_head)) != NULL &&
1267 		    (r->rtt_time + rtq->rtq_timeout) < current_time) {
1268 			LIST_REMOVE(r, rtt_link);
1269 			TAILQ_REMOVE(&rtq->rtq_head, r, rtt_next);
1270 			RTTIMER_CALLOUT(r);
1271 			pool_put(&rttimer_pool, r);
1272 			if (rtq->rtq_count > 0)
1273 				rtq->rtq_count--;
1274 			else
1275 				printf("rt_timer_timer: rtq_count reached 0\n");
1276 		}
1277 	}
1278 	splx(s);
1279 
1280 	timeout_add_sec(to, 1);
1281 }
1282 
1283 u_int16_t
1284 rtlabel_name2id(char *name)
1285 {
1286 	struct rt_label		*label, *p = NULL;
1287 	u_int16_t		 new_id = 1;
1288 
1289 	if (!name[0])
1290 		return (0);
1291 
1292 	TAILQ_FOREACH(label, &rt_labels, rtl_entry)
1293 		if (strcmp(name, label->rtl_name) == 0) {
1294 			label->rtl_ref++;
1295 			return (label->rtl_id);
1296 		}
1297 
1298 	/*
1299 	 * to avoid fragmentation, we do a linear search from the beginning
1300 	 * and take the first free slot we find. if there is none or the list
1301 	 * is empty, append a new entry at the end.
1302 	 */
1303 
1304 	if (!TAILQ_EMPTY(&rt_labels))
1305 		for (p = TAILQ_FIRST(&rt_labels); p != NULL &&
1306 		    p->rtl_id == new_id; p = TAILQ_NEXT(p, rtl_entry))
1307 			new_id = p->rtl_id + 1;
1308 
1309 	if (new_id > LABELID_MAX)
1310 		return (0);
1311 
1312 	label = malloc(sizeof(*label), M_TEMP, M_NOWAIT|M_ZERO);
1313 	if (label == NULL)
1314 		return (0);
1315 	strlcpy(label->rtl_name, name, sizeof(label->rtl_name));
1316 	label->rtl_id = new_id;
1317 	label->rtl_ref++;
1318 
1319 	if (p != NULL)	/* insert new entry before p */
1320 		TAILQ_INSERT_BEFORE(p, label, rtl_entry);
1321 	else		/* either list empty or no free slot in between */
1322 		TAILQ_INSERT_TAIL(&rt_labels, label, rtl_entry);
1323 
1324 	return (label->rtl_id);
1325 }
1326 
1327 const char *
1328 rtlabel_id2name(u_int16_t id)
1329 {
1330 	struct rt_label	*label;
1331 
1332 	TAILQ_FOREACH(label, &rt_labels, rtl_entry)
1333 		if (label->rtl_id == id)
1334 			return (label->rtl_name);
1335 
1336 	return (NULL);
1337 }
1338 
1339 void
1340 rtlabel_unref(u_int16_t id)
1341 {
1342 	struct rt_label	*p, *next;
1343 
1344 	if (id == 0)
1345 		return;
1346 
1347 	for (p = TAILQ_FIRST(&rt_labels); p != NULL; p = next) {
1348 		next = TAILQ_NEXT(p, rtl_entry);
1349 		if (id == p->rtl_id) {
1350 			if (--p->rtl_ref == 0) {
1351 				TAILQ_REMOVE(&rt_labels, p, rtl_entry);
1352 				free(p, M_TEMP);
1353 			}
1354 			break;
1355 		}
1356 	}
1357 }
1358 
1359 void
1360 rt_if_remove(struct ifnet *ifp)
1361 {
1362 	int			 i;
1363 	struct radix_node_head	*rnh;
1364 
1365 	for (i = 1; i <= AF_MAX; i++)
1366 		if ((rnh = rt_gettable(i, 0)) != NULL)
1367 			while ((*rnh->rnh_walktree)(rnh,
1368 			    rt_if_remove_rtdelete, ifp) == EAGAIN)
1369 				;	/* nothing */
1370 }
1371 
1372 /*
1373  * Note that deleting a RTF_CLONING route can trigger the
1374  * deletion of more entries, so we need to cancel the walk
1375  * and return EAGAIN.  The caller should restart the walk
1376  * as long as EAGAIN is returned.
1377  */
1378 int
1379 rt_if_remove_rtdelete(struct radix_node *rn, void *vifp)
1380 {
1381 	struct ifnet	*ifp = vifp;
1382 	struct rtentry	*rt = (struct rtentry *)rn;
1383 
1384 	if (rt->rt_ifp == ifp) {
1385 		int	cloning = (rt->rt_flags & RTF_CLONING);
1386 
1387 		if (rtdeletemsg(rt, 0) == 0 && cloning)
1388 			return (EAGAIN);
1389 	}
1390 
1391 	/*
1392 	 * XXX There should be no need to check for rt_ifa belonging to this
1393 	 * interface, because then rt_ifp is set, right?
1394 	 */
1395 
1396 	return (0);
1397 }
1398