xref: /openbsd-src/sys/net/route.c (revision 43003dfe3ad45d1698bed8a37f2b0f5b14f20d4f)
1 /*	$OpenBSD: route.c,v 1.112 2009/10/10 22:08:26 dms Exp $	*/
2 /*	$NetBSD: route.c,v 1.14 1996/02/13 22:00:46 christos Exp $	*/
3 
4 /*
5  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the project nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 /*
34  * Copyright (c) 1980, 1986, 1991, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. Neither the name of the University nor the names of its contributors
46  *    may be used to endorse or promote products derived from this software
47  *    without specific prior written permission.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59  * SUCH DAMAGE.
60  *
61  *	@(#)route.c	8.2 (Berkeley) 11/15/93
62  */
63 
64 /*
65  *	@(#)COPYRIGHT	1.1 (NRL) 17 January 1995
66  *
67  * NRL grants permission for redistribution and use in source and binary
68  * forms, with or without modification, of the software and documentation
69  * created at NRL provided that the following conditions are met:
70  *
71  * 1. Redistributions of source code must retain the above copyright
72  *    notice, this list of conditions and the following disclaimer.
73  * 2. Redistributions in binary form must reproduce the above copyright
74  *    notice, this list of conditions and the following disclaimer in the
75  *    documentation and/or other materials provided with the distribution.
76  * 3. All advertising materials mentioning features or use of this software
77  *    must display the following acknowledgements:
78  * 	This product includes software developed by the University of
79  * 	California, Berkeley and its contributors.
80  * 	This product includes software developed at the Information
81  * 	Technology Division, US Naval Research Laboratory.
82  * 4. Neither the name of the NRL nor the names of its contributors
83  *    may be used to endorse or promote products derived from this software
84  *    without specific prior written permission.
85  *
86  * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS
87  * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
88  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
89  * PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL NRL OR
90  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
91  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
92  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
93  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
94  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
95  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
96  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
97  *
98  * The views and conclusions contained in the software and documentation
99  * are those of the authors and should not be interpreted as representing
100  * official policies, either expressed or implied, of the US Naval
101  * Research Laboratory (NRL).
102  */
103 
104 #include <sys/param.h>
105 #include <sys/systm.h>
106 #include <sys/proc.h>
107 #include <sys/mbuf.h>
108 #include <sys/socket.h>
109 #include <sys/socketvar.h>
110 #include <sys/domain.h>
111 #include <sys/protosw.h>
112 #include <sys/ioctl.h>
113 #include <sys/kernel.h>
114 #include <sys/queue.h>
115 #include <sys/pool.h>
116 
117 #include <net/if.h>
118 #include <net/route.h>
119 #include <net/raw_cb.h>
120 
121 #include <netinet/in.h>
122 #include <netinet/in_var.h>
123 
124 #ifdef MPLS
125 #include <netmpls/mpls.h>
126 #endif
127 
128 #ifdef IPSEC
129 #include <netinet/ip_ipsp.h>
130 #include <net/if_enc.h>
131 
132 struct ifaddr	*encap_findgwifa(struct sockaddr *);
133 #endif
134 
135 #define	SA(p) ((struct sockaddr *)(p))
136 
137 struct	route_cb	   route_cb;
138 struct	rtstat		   rtstat;
139 struct	radix_node_head	***rt_tables;
140 u_int8_t		   af2rtafidx[AF_MAX+1];
141 u_int8_t		   rtafidx_max;
142 u_int			   rtbl_id_max = 0;
143 
144 int			rttrash;	/* routes not in table but not freed */
145 
146 struct pool		rtentry_pool;	/* pool for rtentry structures */
147 struct pool		rttimer_pool;	/* pool for rttimer structures */
148 
149 int	rtable_init(struct radix_node_head ***);
150 int	okaytoclone(u_int, int);
151 int	rtflushclone1(struct radix_node *, void *);
152 void	rtflushclone(struct radix_node_head *, struct rtentry *);
153 int	rt_if_remove_rtdelete(struct radix_node *, void *);
154 #ifndef SMALL_KERNEL
155 int	rt_if_linkstate_change(struct radix_node *, void *);
156 #endif
157 
158 #define	LABELID_MAX	50000
159 
160 struct rt_label {
161 	TAILQ_ENTRY(rt_label)	rtl_entry;
162 	char			rtl_name[RTLABEL_LEN];
163 	u_int16_t		rtl_id;
164 	int			rtl_ref;
165 };
166 
167 TAILQ_HEAD(rt_labels, rt_label)	rt_labels = TAILQ_HEAD_INITIALIZER(rt_labels);
168 
169 #ifdef IPSEC
170 struct ifaddr *
171 encap_findgwifa(struct sockaddr *gw)
172 {
173 	return (TAILQ_FIRST(&encif[0].sc_if.if_addrlist));
174 }
175 #endif
176 
177 int
178 rtable_init(struct radix_node_head ***table)
179 {
180 	void		**p;
181 	struct domain	 *dom;
182 
183 	if ((p = malloc(sizeof(void *) * (rtafidx_max + 1), M_RTABLE,
184 	    M_NOWAIT|M_ZERO)) == NULL)
185 		return (-1);
186 
187 	/* 2nd pass: attach */
188 	for (dom = domains; dom != NULL; dom = dom->dom_next)
189 		if (dom->dom_rtattach)
190 			dom->dom_rtattach(&p[af2rtafidx[dom->dom_family]],
191 			    dom->dom_rtoffset);
192 
193 	*table = (struct radix_node_head **)p;
194 	return (0);
195 }
196 
197 void
198 route_init()
199 {
200 	struct domain	 *dom;
201 
202 	pool_init(&rtentry_pool, sizeof(struct rtentry), 0, 0, 0, "rtentpl",
203 	    NULL);
204 	rn_init();	/* initialize all zeroes, all ones, mask table */
205 
206 	bzero(af2rtafidx, sizeof(af2rtafidx));
207 	rtafidx_max = 1;	/* must have NULL at index 0, so start at 1 */
208 
209 	/* find out how many tables to allocate */
210 	for (dom = domains; dom != NULL; dom = dom->dom_next)
211 		if (dom->dom_rtattach)
212 			af2rtafidx[dom->dom_family] = rtafidx_max++;
213 
214 	if (rtable_add(0) == -1)
215 		panic("route_init rtable_add");
216 }
217 
218 int
219 rtable_add(u_int id)	/* must be called at splsoftnet */
220 {
221 	void	*p;
222 
223 	if (id > RT_TABLEID_MAX)
224 		return (-1);
225 
226 	if (id == 0 || id > rtbl_id_max) {
227 		size_t	newlen = sizeof(void *) * (id+1);
228 
229 		if ((p = malloc(newlen, M_RTABLE, M_NOWAIT|M_ZERO)) == NULL)
230 			return (-1);
231 		if (rt_tables) {
232 			bcopy(rt_tables, p, sizeof(void *) * (rtbl_id_max+1));
233 			free(rt_tables, M_RTABLE);
234 		}
235 		rt_tables = p;
236 		rtbl_id_max = id;
237 	}
238 
239 	if (rt_tables[id] != NULL)	/* already exists */
240 		return (-1);
241 
242 	return (rtable_init(&rt_tables[id]));
243 }
244 
245 int
246 rtable_exists(u_int id)	/* verify table with that ID exists */
247 {
248 	if (id > RT_TABLEID_MAX)
249 		return (0);
250 
251 	if (id > rtbl_id_max)
252 		return (0);
253 
254 	if (rt_tables[id] == NULL)
255 		return (0);
256 
257 	return (1);
258 }
259 
260 #include "pf.h"
261 #if NPF > 0
262 void
263 rtalloc_noclone(struct route *ro, int howstrict)
264 {
265 	if (ro->ro_rt && ro->ro_rt->rt_ifp && (ro->ro_rt->rt_flags & RTF_UP))
266 		return;		/* XXX */
267 	ro->ro_rt = rtalloc2(&ro->ro_dst, 1, howstrict);
268 }
269 
270 int
271 okaytoclone(u_int flags, int howstrict)
272 {
273 	if (howstrict == ALL_CLONING)
274 		return (1);
275 	if (howstrict == ONNET_CLONING && !(flags & RTF_GATEWAY))
276 		return (1);
277 	return (0);
278 }
279 
280 struct rtentry *
281 rtalloc2(struct sockaddr *dst, int report, int howstrict)
282 {
283 	struct radix_node_head	*rnh;
284 	struct rtentry		*rt;
285 	struct radix_node	*rn;
286 	struct rtentry		*newrt = 0;
287 	struct rt_addrinfo	 info;
288 	int			 s = splnet(), err = 0, msgtype = RTM_MISS;
289 
290 	bzero(&info, sizeof(info));
291 	info.rti_info[RTAX_DST] = dst;
292 
293 	rnh = rt_gettable(dst->sa_family, 0);
294 	if (rnh && (rn = rnh->rnh_matchaddr((caddr_t)dst, rnh)) &&
295 	    ((rn->rn_flags & RNF_ROOT) == 0)) {
296 		newrt = rt = (struct rtentry *)rn;
297 		if (report && (rt->rt_flags & RTF_CLONING) &&
298 		    okaytoclone(rt->rt_flags, howstrict)) {
299 			err = rtrequest1(RTM_RESOLVE, &info, RTP_DEFAULT,
300 			    &newrt, 0);
301 			if (err) {
302 				newrt = rt;
303 				rt->rt_refcnt++;
304 				goto miss;
305 			}
306 			if ((rt = newrt) && (rt->rt_flags & RTF_XRESOLVE)) {
307 				msgtype = RTM_RESOLVE;
308 				goto miss;
309 			}
310 		} else
311 			rt->rt_refcnt++;
312 	} else {
313 		rtstat.rts_unreach++;
314 miss:
315 		if (report) {
316 			rt_missmsg(msgtype, &info, 0, NULL, err, 0);
317 		}
318 	}
319 	splx(s);
320 	return (newrt);
321 }
322 #endif /* NPF > 0 */
323 
324 /*
325  * Packet routing routines.
326  */
327 void
328 rtalloc(struct route *ro)
329 {
330 	if (ro->ro_rt && ro->ro_rt->rt_ifp && (ro->ro_rt->rt_flags & RTF_UP))
331 		return;				 /* XXX */
332 	ro->ro_rt = rtalloc1(&ro->ro_dst, 1, 0);
333 }
334 
335 struct rtentry *
336 rtalloc1(struct sockaddr *dst, int report, u_int tableid)
337 {
338 	struct radix_node_head	*rnh;
339 	struct rtentry		*rt;
340 	struct radix_node	*rn;
341 	struct rtentry		*newrt = 0;
342 	struct rt_addrinfo	 info;
343 	int			 s = splsoftnet(), err = 0, msgtype = RTM_MISS;
344 
345 	bzero(&info, sizeof(info));
346 	info.rti_info[RTAX_DST] = dst;
347 
348 	rnh = rt_gettable(dst->sa_family, tableid);
349 	if (rnh && (rn = rnh->rnh_matchaddr((caddr_t)dst, rnh)) &&
350 	    ((rn->rn_flags & RNF_ROOT) == 0)) {
351 		newrt = rt = (struct rtentry *)rn;
352 		if (report && (rt->rt_flags & RTF_CLONING)) {
353 			err = rtrequest1(RTM_RESOLVE, &info, RTP_DEFAULT,
354 			    &newrt, tableid);
355 			if (err) {
356 				newrt = rt;
357 				rt->rt_refcnt++;
358 				goto miss;
359 			}
360 			if ((rt = newrt) && (rt->rt_flags & RTF_XRESOLVE)) {
361 				msgtype = RTM_RESOLVE;
362 				goto miss;
363 			}
364 			/* Inform listeners of the new route */
365 			bzero(&info, sizeof(info));
366 			info.rti_info[RTAX_DST] = rt_key(rt);
367 			info.rti_info[RTAX_NETMASK] = rt_mask(rt);
368 			info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
369 			if (rt->rt_ifp != NULL) {
370 				info.rti_info[RTAX_IFP] =
371 				    TAILQ_FIRST(&rt->rt_ifp->if_addrlist)->ifa_addr;
372 				info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
373 			}
374 			rt_missmsg(RTM_ADD, &info, rt->rt_flags,
375 			    rt->rt_ifp, 0, tableid);
376 		} else
377 			rt->rt_refcnt++;
378 	} else {
379 		if (dst->sa_family != PF_KEY)
380 			rtstat.rts_unreach++;
381 	/*
382 	 * IP encapsulation does lots of lookups where we don't need nor want
383 	 * the RTM_MISSes that would be generated.  It causes RTM_MISS storms
384 	 * sent upward breaking user-level routing queries.
385 	 */
386 miss:
387 		if (report && dst->sa_family != PF_KEY) {
388 			bzero((caddr_t)&info, sizeof(info));
389 			info.rti_info[RTAX_DST] = dst;
390 			rt_missmsg(msgtype, &info, 0, NULL, err, tableid);
391 		}
392 	}
393 	splx(s);
394 	return (newrt);
395 }
396 
397 void
398 rtfree(struct rtentry *rt)
399 {
400 	struct ifaddr	*ifa;
401 
402 	if (rt == NULL)
403 		panic("rtfree");
404 
405 	rt->rt_refcnt--;
406 
407 	if (rt->rt_refcnt <= 0 && (rt->rt_flags & RTF_UP) == 0) {
408 		if (rt->rt_refcnt == 0 && (rt->rt_nodes->rn_flags & RNF_ACTIVE))
409 			return; /* route still active but currently down */
410 		if (rt->rt_nodes->rn_flags & (RNF_ACTIVE | RNF_ROOT))
411 			panic("rtfree 2");
412 		rttrash--;
413 		if (rt->rt_refcnt < 0) {
414 			printf("rtfree: %p not freed (neg refs)\n", rt);
415 			return;
416 		}
417 		rt_timer_remove_all(rt);
418 		ifa = rt->rt_ifa;
419 		if (ifa)
420 			IFAFREE(ifa);
421 		rtlabel_unref(rt->rt_labelid);
422 #ifdef MPLS
423 		if (rt->rt_flags & RTF_MPLS)
424 			free(rt->rt_llinfo, M_TEMP);
425 #endif
426 		Free(rt_key(rt));
427 		pool_put(&rtentry_pool, rt);
428 	}
429 }
430 
431 void
432 ifafree(struct ifaddr *ifa)
433 {
434 	if (ifa == NULL)
435 		panic("ifafree");
436 	if (ifa->ifa_refcnt == 0)
437 		free(ifa, M_IFADDR);
438 	else
439 		ifa->ifa_refcnt--;
440 }
441 
442 /*
443  * Force a routing table entry to the specified
444  * destination to go through the given gateway.
445  * Normally called as a result of a routing redirect
446  * message from the network layer.
447  *
448  * N.B.: must be called at splsoftnet
449  */
450 void
451 rtredirect(struct sockaddr *dst, struct sockaddr *gateway,
452     struct sockaddr *netmask, int flags, struct sockaddr *src,
453     struct rtentry **rtp, u_int rdomain)
454 {
455 	struct rtentry		*rt;
456 	int			 error = 0;
457 	u_int32_t		*stat = NULL;
458 	struct rt_addrinfo	 info;
459 	struct ifaddr		*ifa;
460 	struct ifnet		*ifp = NULL;
461 
462 	splsoftassert(IPL_SOFTNET);
463 
464 	/* verify the gateway is directly reachable */
465 	if ((ifa = ifa_ifwithnet(gateway, rdomain)) == NULL) {
466 		error = ENETUNREACH;
467 		goto out;
468 	}
469 	ifp = ifa->ifa_ifp;
470 	rt = rtalloc1(dst, 0, rdomain);
471 	/*
472 	 * If the redirect isn't from our current router for this dst,
473 	 * it's either old or wrong.  If it redirects us to ourselves,
474 	 * we have a routing loop, perhaps as a result of an interface
475 	 * going down recently.
476 	 */
477 #define	equal(a1, a2) \
478 	((a1)->sa_len == (a2)->sa_len && \
479 	 bcmp((caddr_t)(a1), (caddr_t)(a2), (a1)->sa_len) == 0)
480 	if (!(flags & RTF_DONE) && rt &&
481 	     (!equal(src, rt->rt_gateway) || rt->rt_ifa != ifa))
482 		error = EINVAL;
483 	else if (ifa_ifwithaddr(gateway, rdomain) != NULL)
484 		error = EHOSTUNREACH;
485 	if (error)
486 		goto done;
487 	/*
488 	 * Create a new entry if we just got back a wildcard entry
489 	 * or the lookup failed.  This is necessary for hosts
490 	 * which use routing redirects generated by smart gateways
491 	 * to dynamically build the routing tables.
492 	 */
493 	if ((rt == NULL) || (rt_mask(rt) && rt_mask(rt)->sa_len < 2))
494 		goto create;
495 	/*
496 	 * Don't listen to the redirect if it's
497 	 * for a route to an interface.
498 	 */
499 	if (rt->rt_flags & RTF_GATEWAY) {
500 		if (((rt->rt_flags & RTF_HOST) == 0) && (flags & RTF_HOST)) {
501 			/*
502 			 * Changing from route to net => route to host.
503 			 * Create new route, rather than smashing route to net.
504 			 */
505 create:
506 			if (rt)
507 				rtfree(rt);
508 			flags |= RTF_GATEWAY | RTF_DYNAMIC;
509 			bzero(&info, sizeof(info));
510 			info.rti_info[RTAX_DST] = dst;
511 			info.rti_info[RTAX_GATEWAY] = gateway;
512 			info.rti_info[RTAX_NETMASK] = netmask;
513 			info.rti_ifa = ifa;
514 			info.rti_flags = flags;
515 			rt = NULL;
516 			error = rtrequest1(RTM_ADD, &info, RTP_DEFAULT, &rt,
517 			    rdomain);
518 			if (rt != NULL)
519 				flags = rt->rt_flags;
520 			stat = &rtstat.rts_dynamic;
521 		} else {
522 			/*
523 			 * Smash the current notion of the gateway to
524 			 * this destination.  Should check about netmask!!!
525 			 */
526 			rt->rt_flags |= RTF_MODIFIED;
527 			flags |= RTF_MODIFIED;
528 			stat = &rtstat.rts_newgateway;
529 			rt_setgate(rt, rt_key(rt), gateway, rdomain);
530 		}
531 	} else
532 		error = EHOSTUNREACH;
533 done:
534 	if (rt) {
535 		if (rtp && !error)
536 			*rtp = rt;
537 		else
538 			rtfree(rt);
539 	}
540 out:
541 	if (error)
542 		rtstat.rts_badredirect++;
543 	else if (stat != NULL)
544 		(*stat)++;
545 	bzero((caddr_t)&info, sizeof(info));
546 	info.rti_info[RTAX_DST] = dst;
547 	info.rti_info[RTAX_GATEWAY] = gateway;
548 	info.rti_info[RTAX_NETMASK] = netmask;
549 	info.rti_info[RTAX_AUTHOR] = src;
550 	rt_missmsg(RTM_REDIRECT, &info, flags, ifp, error, rdomain);
551 }
552 
553 /*
554  * Delete a route and generate a message
555  */
556 int
557 rtdeletemsg(struct rtentry *rt, u_int tableid)
558 {
559 	int			error;
560 	struct rt_addrinfo	info;
561 	struct ifnet		*ifp;
562 
563 	/*
564 	 * Request the new route so that the entry is not actually
565 	 * deleted.  That will allow the information being reported to
566 	 * be accurate (and consistent with route_output()).
567 	 */
568 	bzero((caddr_t)&info, sizeof(info));
569 	info.rti_info[RTAX_DST] = rt_key(rt);
570 	info.rti_info[RTAX_NETMASK] = rt_mask(rt);
571 	info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
572 	info.rti_flags = rt->rt_flags;
573 	ifp = rt->rt_ifp;
574 	error = rtrequest1(RTM_DELETE, &info, rt->rt_priority, &rt, tableid);
575 
576 	rt_missmsg(RTM_DELETE, &info, info.rti_flags, ifp, error, tableid);
577 
578 	/* Adjust the refcount */
579 	if (error == 0 && rt->rt_refcnt <= 0) {
580 		rt->rt_refcnt++;
581 		rtfree(rt);
582 	}
583 	return (error);
584 }
585 
586 int
587 rtflushclone1(struct radix_node *rn, void *arg)
588 {
589 	struct rtentry	*rt, *parent;
590 
591 	rt = (struct rtentry *)rn;
592 	parent = (struct rtentry *)arg;
593 	if ((rt->rt_flags & RTF_CLONED) != 0 && rt->rt_parent == parent)
594 		rtdeletemsg(rt, 0);
595 	return 0;
596 }
597 
598 void
599 rtflushclone(struct radix_node_head *rnh, struct rtentry *parent)
600 {
601 
602 #ifdef DIAGNOSTIC
603 	if (!parent || (parent->rt_flags & RTF_CLONING) == 0)
604 		panic("rtflushclone: called with a non-cloning route");
605 	if (!rnh->rnh_walktree)
606 		panic("rtflushclone: no rnh_walktree");
607 #endif
608 	rnh->rnh_walktree(rnh, rtflushclone1, (void *)parent);
609 }
610 
611 int
612 rtioctl(u_long req, caddr_t data, struct proc *p)
613 {
614 	return (EOPNOTSUPP);
615 }
616 
617 struct ifaddr *
618 ifa_ifwithroute(int flags, struct sockaddr *dst, struct sockaddr *gateway,
619     u_int rdomain)
620 {
621 	struct ifaddr	*ifa;
622 
623 #ifdef IPSEC
624 	/*
625 	 * If the destination is a PF_KEY address, we'll look
626 	 * for the existence of a encap interface number or address
627 	 * in the options list of the gateway. By default, we'll return
628 	 * enc0.
629 	 */
630 	if (dst && (dst->sa_family == PF_KEY))
631 		return (encap_findgwifa(gateway));
632 #endif
633 
634 	if ((flags & RTF_GATEWAY) == 0) {
635 		/*
636 		 * If we are adding a route to an interface,
637 		 * and the interface is a pt to pt link
638 		 * we should search for the destination
639 		 * as our clue to the interface.  Otherwise
640 		 * we can use the local address.
641 		 */
642 		ifa = NULL;
643 		if (flags & RTF_HOST)
644 			ifa = ifa_ifwithdstaddr(dst, rdomain);
645 		if (ifa == NULL)
646 			ifa = ifa_ifwithaddr(gateway, rdomain);
647 	} else {
648 		/*
649 		 * If we are adding a route to a remote net
650 		 * or host, the gateway may still be on the
651 		 * other end of a pt to pt link.
652 		 */
653 		ifa = ifa_ifwithdstaddr(gateway, rdomain);
654 	}
655 	if (ifa == NULL)
656 		ifa = ifa_ifwithnet(gateway, rdomain);
657 	if (ifa == NULL) {
658 		struct rtentry	*rt = rtalloc1(gateway, 0, rdomain);
659 		if (rt == NULL)
660 			return (NULL);
661 		rt->rt_refcnt--;
662 		/* The gateway must be local if the same address family. */
663 		if ((rt->rt_flags & RTF_GATEWAY) &&
664 		    rt_key(rt)->sa_family == dst->sa_family)
665 			return (0);
666 		if ((ifa = rt->rt_ifa) == NULL)
667 			return (NULL);
668 	}
669 	if (ifa->ifa_addr->sa_family != dst->sa_family) {
670 		struct ifaddr	*oifa = ifa;
671 		ifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp);
672 		if (ifa == NULL)
673 			ifa = oifa;
674 	}
675 	return (ifa);
676 }
677 
678 #define ROUNDUP(a) (a>0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long))
679 
680 int
681 rt_getifa(struct rt_addrinfo *info, u_int rdom)
682 {
683 	struct ifaddr	*ifa;
684 	int		 error = 0;
685 
686 	/*
687 	 * ifp may be specified by sockaddr_dl when protocol address
688 	 * is ambiguous
689 	 */
690 	if (info->rti_ifp == NULL && info->rti_info[RTAX_IFP] != NULL
691 	    && info->rti_info[RTAX_IFP]->sa_family == AF_LINK &&
692 	    (ifa = ifa_ifwithnet((struct sockaddr *)info->rti_info[RTAX_IFP],
693 	    rdom)) != NULL)
694 		info->rti_ifp = ifa->ifa_ifp;
695 
696 	if (info->rti_ifa == NULL && info->rti_info[RTAX_IFA] != NULL)
697 		info->rti_ifa = ifa_ifwithaddr(info->rti_info[RTAX_IFA], rdom);
698 
699 	if (info->rti_ifa == NULL) {
700 		struct sockaddr	*sa;
701 
702 		if ((sa = info->rti_info[RTAX_IFA]) == NULL)
703 			if ((sa = info->rti_info[RTAX_GATEWAY]) == NULL)
704 				sa = info->rti_info[RTAX_DST];
705 
706 		if (sa != NULL && info->rti_ifp != NULL)
707 			info->rti_ifa = ifaof_ifpforaddr(sa, info->rti_ifp);
708 		else if (info->rti_info[RTAX_DST] != NULL &&
709 		    info->rti_info[RTAX_GATEWAY] != NULL)
710 			info->rti_ifa = ifa_ifwithroute(info->rti_flags,
711 			    info->rti_info[RTAX_DST],
712 			    info->rti_info[RTAX_GATEWAY],
713 			    rdom);
714 		else if (sa != NULL)
715 			info->rti_ifa = ifa_ifwithroute(info->rti_flags,
716 			    sa, sa, rdom);
717 	}
718 	if ((ifa = info->rti_ifa) != NULL) {
719 		if (info->rti_ifp == NULL)
720 			info->rti_ifp = ifa->ifa_ifp;
721 	} else
722 		error = ENETUNREACH;
723 	return (error);
724 }
725 
726 int
727 rtrequest1(int req, struct rt_addrinfo *info, u_int8_t prio,
728     struct rtentry **ret_nrt, u_int tableid)
729 {
730 	int			 s = splsoftnet(); int error = 0;
731 	struct rtentry		*rt, *crt;
732 	struct radix_node	*rn;
733 	struct radix_node_head	*rnh;
734 	struct ifaddr		*ifa;
735 	struct sockaddr		*ndst;
736 	struct sockaddr_rtlabel	*sa_rl, sa_rl2;
737 #ifdef MPLS
738 	struct sockaddr_mpls	*sa_mpls;
739 #endif
740 #define senderr(x) { error = x ; goto bad; }
741 
742 	if ((rnh = rt_gettable(info->rti_info[RTAX_DST]->sa_family, tableid)) ==
743 	    NULL)
744 		senderr(EAFNOSUPPORT);
745 	if (info->rti_flags & RTF_HOST)
746 		info->rti_info[RTAX_NETMASK] = NULL;
747 	switch (req) {
748 	case RTM_DELETE:
749 		if ((rn = rnh->rnh_lookup(info->rti_info[RTAX_DST],
750 		    info->rti_info[RTAX_NETMASK], rnh)) == NULL)
751 			senderr(ESRCH);
752 		rt = (struct rtentry *)rn;
753 #ifndef SMALL_KERNEL
754 		/*
755 		 * if we got multipath routes, we require users to specify
756 		 * a matching RTAX_GATEWAY.
757 		 */
758 		if (rn_mpath_capable(rnh)) {
759 			rt = rt_mpath_matchgate(rt,
760 			    info->rti_info[RTAX_GATEWAY], prio);
761 			rn = (struct radix_node *)rt;
762 			if (!rt)
763 				senderr(ESRCH);
764 		}
765 #endif
766 		if ((rn = rnh->rnh_deladdr(info->rti_info[RTAX_DST],
767 		    info->rti_info[RTAX_NETMASK], rnh, rn)) == NULL)
768 			senderr(ESRCH);
769 		rt = (struct rtentry *)rn;
770 
771 		/* clean up any cloned children */
772 		if ((rt->rt_flags & RTF_CLONING) != 0)
773 			rtflushclone(rnh, rt);
774 
775 		if (rn->rn_flags & (RNF_ACTIVE | RNF_ROOT))
776 			panic ("rtrequest delete");
777 
778 		if (rt->rt_gwroute) {
779 			rt = rt->rt_gwroute; RTFREE(rt);
780 			(rt = (struct rtentry *)rn)->rt_gwroute = NULL;
781 		}
782 
783 		if (rt->rt_parent) {
784 			rt->rt_parent->rt_refcnt--;
785 			rt->rt_parent = NULL;
786 		}
787 
788 #ifndef SMALL_KERNEL
789 		if (rn_mpath_capable(rnh)) {
790 			if ((rn = rnh->rnh_lookup(info->rti_info[RTAX_DST],
791 			    info->rti_info[RTAX_NETMASK], rnh)) != NULL &&
792 			    rn_mpath_next(rn, 0) == NULL)
793 				((struct rtentry *)rn)->rt_flags &= ~RTF_MPATH;
794 		}
795 #endif
796 
797 		rt->rt_flags &= ~RTF_UP;
798 		if ((ifa = rt->rt_ifa) && ifa->ifa_rtrequest)
799 			ifa->ifa_rtrequest(RTM_DELETE, rt, info);
800 		rttrash++;
801 
802 		if (ret_nrt)
803 			*ret_nrt = rt;
804 		else if (rt->rt_refcnt <= 0) {
805 			rt->rt_refcnt++;
806 			rtfree(rt);
807 		}
808 		break;
809 
810 	case RTM_RESOLVE:
811 		if (ret_nrt == NULL || (rt = *ret_nrt) == NULL)
812 			senderr(EINVAL);
813 		if ((rt->rt_flags & RTF_CLONING) == 0)
814 			senderr(EINVAL);
815 		ifa = rt->rt_ifa;
816 		info->rti_flags = rt->rt_flags & ~(RTF_CLONING | RTF_STATIC);
817 		info->rti_flags |= RTF_CLONED;
818 		info->rti_info[RTAX_GATEWAY] = rt->rt_gateway;
819 		if ((info->rti_info[RTAX_NETMASK] = rt->rt_genmask) == NULL)
820 			info->rti_flags |= RTF_HOST;
821 		info->rti_info[RTAX_LABEL] =
822 		    rtlabel_id2sa(rt->rt_labelid, &sa_rl2);
823 		goto makeroute;
824 
825 	case RTM_ADD:
826 		if (info->rti_ifa == 0 && (error = rt_getifa(info,
827 		    /* XXX wrong because only rdomains allowed */ tableid)))
828 			senderr(error);
829 		ifa = info->rti_ifa;
830 makeroute:
831 		rt = pool_get(&rtentry_pool, PR_NOWAIT);
832 		if (rt == NULL)
833 			senderr(ENOBUFS);
834 		Bzero(rt, sizeof(*rt));
835 
836 		rt->rt_flags = info->rti_flags;
837 
838 		if (prio == 0)
839 			prio = ifa->ifa_ifp->if_priority + RTP_STATIC;
840 		rt->rt_priority = prio;	/* init routing priority */
841 		if ((LINK_STATE_IS_UP(ifa->ifa_ifp->if_link_state) ||
842 		    ifa->ifa_ifp->if_link_state == LINK_STATE_UNKNOWN) &&
843 		    ifa->ifa_ifp->if_flags & IFF_UP)
844 			rt->rt_flags |= RTF_UP;
845 		else {
846 			rt->rt_flags &= ~RTF_UP;
847 			rt->rt_priority |= RTP_DOWN;
848 		}
849 		LIST_INIT(&rt->rt_timer);
850 		if (rt_setgate(rt, info->rti_info[RTAX_DST],
851 		    info->rti_info[RTAX_GATEWAY], tableid)) {
852 			pool_put(&rtentry_pool, rt);
853 			senderr(ENOBUFS);
854 		}
855 		ndst = rt_key(rt);
856 		if (info->rti_info[RTAX_NETMASK] != NULL) {
857 			rt_maskedcopy(info->rti_info[RTAX_DST], ndst,
858 			    info->rti_info[RTAX_NETMASK]);
859 		} else
860 			Bcopy(info->rti_info[RTAX_DST], ndst,
861 			    info->rti_info[RTAX_DST]->sa_len);
862 #ifndef SMALL_KERNEL
863 		/* do not permit exactly the same dst/mask/gw pair */
864 		if (rn_mpath_capable(rnh) &&
865 		    rt_mpath_conflict(rnh, rt, info->rti_info[RTAX_NETMASK],
866 		    info->rti_flags & RTF_MPATH)) {
867 			if (rt->rt_gwroute)
868 				rtfree(rt->rt_gwroute);
869 			Free(rt_key(rt));
870 			pool_put(&rtentry_pool, rt);
871 			senderr(EEXIST);
872 		}
873 #endif
874 
875 		if (info->rti_info[RTAX_LABEL] != NULL) {
876 			sa_rl = (struct sockaddr_rtlabel *)
877 			    info->rti_info[RTAX_LABEL];
878 			rt->rt_labelid = rtlabel_name2id(sa_rl->sr_label);
879 		}
880 
881 #ifdef MPLS
882 		/* We have to allocate additional space for MPLS infos */
883 		if (info->rti_info[RTAX_SRC] != NULL ||
884 		    info->rti_info[RTAX_DST]->sa_family == AF_MPLS) {
885 			struct rt_mpls *rt_mpls;
886 
887 			sa_mpls = (struct sockaddr_mpls *)
888 			    info->rti_info[RTAX_SRC];
889 
890 			rt->rt_llinfo = (caddr_t)malloc(sizeof(struct rt_mpls),
891 			    M_TEMP, M_NOWAIT|M_ZERO);
892 
893 			if (rt->rt_llinfo == NULL) {
894 				if (rt->rt_gwroute)
895 					rtfree(rt->rt_gwroute);
896 				Free(rt_key(rt));
897 				pool_put(&rtentry_pool, rt);
898 				senderr(ENOMEM);
899 			}
900 
901 			rt_mpls = (struct rt_mpls *)rt->rt_llinfo;
902 
903 			if (sa_mpls != NULL)
904 				rt_mpls->mpls_label = sa_mpls->smpls_label;
905 
906 			rt_mpls->mpls_operation = info->rti_mpls;
907 
908 			/* XXX: set experimental bits */
909 
910 			rt->rt_flags |= RTF_MPLS;
911 		}
912 #endif
913 
914 		ifa->ifa_refcnt++;
915 		rt->rt_ifa = ifa;
916 		rt->rt_ifp = ifa->ifa_ifp;
917 		if (req == RTM_RESOLVE) {
918 			/*
919 			 * Copy both metrics and a back pointer to the cloned
920 			 * route's parent.
921 			 */
922 			rt->rt_rmx = (*ret_nrt)->rt_rmx; /* copy metrics */
923 			rt->rt_priority = (*ret_nrt)->rt_priority;
924 			rt->rt_parent = *ret_nrt;	 /* Back ptr. to parent. */
925 			rt->rt_parent->rt_refcnt++;
926 		}
927 		rn = rnh->rnh_addaddr((caddr_t)ndst,
928 		    (caddr_t)info->rti_info[RTAX_NETMASK], rnh, rt->rt_nodes,
929 		    rt->rt_priority);
930 		if (rn == NULL && (crt = rtalloc1(ndst, 0, tableid)) != NULL) {
931 			/* overwrite cloned route */
932 			if ((crt->rt_flags & RTF_CLONED) != 0) {
933 				rtdeletemsg(crt, tableid);
934 				rn = rnh->rnh_addaddr((caddr_t)ndst,
935 				    (caddr_t)info->rti_info[RTAX_NETMASK],
936 				    rnh, rt->rt_nodes, rt->rt_priority);
937 			}
938 			RTFREE(crt);
939 		}
940 		if (rn == 0) {
941 			IFAFREE(ifa);
942 			if ((rt->rt_flags & RTF_CLONED) != 0 && rt->rt_parent)
943 				rtfree(rt->rt_parent);
944 			if (rt->rt_gwroute)
945 				rtfree(rt->rt_gwroute);
946 			Free(rt_key(rt));
947 			pool_put(&rtentry_pool, rt);
948 			senderr(EEXIST);
949 		}
950 
951 #ifndef SMALL_KERNEL
952 		if (rn_mpath_capable(rnh) &&
953 		    (rn = rnh->rnh_lookup(info->rti_info[RTAX_DST],
954 		    info->rti_info[RTAX_NETMASK], rnh)) != NULL &&
955 		    (rn = rn_mpath_prio(rn, prio)) != NULL) {
956 			if (rn_mpath_next(rn, 0) == NULL)
957 				((struct rtentry *)rn)->rt_flags &= ~RTF_MPATH;
958 			else
959 				((struct rtentry *)rn)->rt_flags |= RTF_MPATH;
960 		}
961 #endif
962 
963 		if (ifa->ifa_rtrequest)
964 			ifa->ifa_rtrequest(req, rt, info);
965 		if (ret_nrt) {
966 			*ret_nrt = rt;
967 			rt->rt_refcnt++;
968 		}
969 		if ((rt->rt_flags & RTF_CLONING) != 0) {
970 			/* clean up any cloned children */
971 			rtflushclone(rnh, rt);
972 		}
973 
974 		if_group_routechange(info->rti_info[RTAX_DST],
975 			info->rti_info[RTAX_NETMASK]);
976 		break;
977 	}
978 bad:
979 	splx(s);
980 	return (error);
981 }
982 
983 int
984 rt_setgate(struct rtentry *rt0, struct sockaddr *dst, struct sockaddr *gate,
985     u_int tableid)
986 {
987 	caddr_t	new, old;
988 	int	dlen = ROUNDUP(dst->sa_len), glen = ROUNDUP(gate->sa_len);
989 	struct rtentry	*rt = rt0;
990 
991 	if (rt->rt_gateway == NULL || glen > ROUNDUP(rt->rt_gateway->sa_len)) {
992 		old = (caddr_t)rt_key(rt);
993 		R_Malloc(new, caddr_t, dlen + glen);
994 		if (new == NULL)
995 			return 1;
996 		rt->rt_nodes->rn_key = new;
997 	} else {
998 		new = rt->rt_nodes->rn_key;
999 		old = NULL;
1000 	}
1001 	Bcopy(gate, (rt->rt_gateway = (struct sockaddr *)(new + dlen)), glen);
1002 	if (old) {
1003 		Bcopy(dst, new, dlen);
1004 		Free(old);
1005 	}
1006 	if (rt->rt_gwroute != NULL) {
1007 		rt = rt->rt_gwroute;
1008 		RTFREE(rt);
1009 		rt = rt0;
1010 		rt->rt_gwroute = NULL;
1011 	}
1012 	if (rt->rt_flags & RTF_GATEWAY) {
1013 		rt->rt_gwroute = rtalloc1(gate, 1, tableid);
1014 		/*
1015 		 * If we switched gateways, grab the MTU from the new
1016 		 * gateway route if the current MTU is 0 or greater
1017 		 * than the MTU of gateway.
1018 		 * Note that, if the MTU of gateway is 0, we will reset the
1019 		 * MTU of the route to run PMTUD again from scratch. XXX
1020 		 */
1021 		if (rt->rt_gwroute && !(rt->rt_rmx.rmx_locks & RTV_MTU) &&
1022 		    rt->rt_rmx.rmx_mtu &&
1023 		    rt->rt_rmx.rmx_mtu > rt->rt_gwroute->rt_rmx.rmx_mtu) {
1024 			rt->rt_rmx.rmx_mtu = rt->rt_gwroute->rt_rmx.rmx_mtu;
1025 		}
1026 	}
1027 	return (0);
1028 }
1029 
1030 void
1031 rt_maskedcopy(struct sockaddr *src, struct sockaddr *dst,
1032     struct sockaddr *netmask)
1033 {
1034 	u_char	*cp1 = (u_char *)src;
1035 	u_char	*cp2 = (u_char *)dst;
1036 	u_char	*cp3 = (u_char *)netmask;
1037 	u_char	*cplim = cp2 + *cp3;
1038 	u_char	*cplim2 = cp2 + *cp1;
1039 
1040 	*cp2++ = *cp1++; *cp2++ = *cp1++; /* copies sa_len & sa_family */
1041 	cp3 += 2;
1042 	if (cplim > cplim2)
1043 		cplim = cplim2;
1044 	while (cp2 < cplim)
1045 		*cp2++ = *cp1++ & *cp3++;
1046 	if (cp2 < cplim2)
1047 		bzero((caddr_t)cp2, (unsigned)(cplim2 - cp2));
1048 }
1049 
1050 /*
1051  * Set up a routing table entry, normally
1052  * for an interface.
1053  */
1054 int
1055 rtinit(struct ifaddr *ifa, int cmd, int flags)
1056 {
1057 	struct rtentry		*rt;
1058 	struct sockaddr		*dst, *deldst;
1059 	struct mbuf		*m = NULL;
1060 	struct rtentry		*nrt = NULL;
1061 	int			 error;
1062 	struct rt_addrinfo	 info;
1063 	struct sockaddr_rtlabel	 sa_rl;
1064 	u_short			 rtableid = ifa->ifa_ifp->if_rdomain;
1065 
1066 	dst = flags & RTF_HOST ? ifa->ifa_dstaddr : ifa->ifa_addr;
1067 	if (cmd == RTM_DELETE) {
1068 		if ((flags & RTF_HOST) == 0 && ifa->ifa_netmask) {
1069 			m = m_get(M_DONTWAIT, MT_SONAME);
1070 			if (m == NULL)
1071 				return (ENOBUFS);
1072 			deldst = mtod(m, struct sockaddr *);
1073 			rt_maskedcopy(dst, deldst, ifa->ifa_netmask);
1074 			dst = deldst;
1075 		}
1076 		if ((rt = rtalloc1(dst, 0, rtableid)) != NULL) {
1077 			rt->rt_refcnt--;
1078 			/* try to find the right route */
1079 			while (rt && rt->rt_ifa != ifa)
1080 				rt = (struct rtentry *)
1081 				    ((struct radix_node *)rt)->rn_dupedkey;
1082 			if (!rt) {
1083 				if (m != NULL)
1084 					(void) m_free(m);
1085 				return (flags & RTF_HOST ? EHOSTUNREACH
1086 							: ENETUNREACH);
1087 			}
1088 		}
1089 	}
1090 	bzero(&info, sizeof(info));
1091 	info.rti_ifa = ifa;
1092 	info.rti_flags = flags | ifa->ifa_flags;
1093 	info.rti_info[RTAX_DST] = dst;
1094 	if (cmd == RTM_ADD)
1095 		info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr;
1096 	info.rti_info[RTAX_LABEL] =
1097 	    rtlabel_id2sa(ifa->ifa_ifp->if_rtlabelid, &sa_rl);
1098 
1099 	/*
1100 	 * XXX here, it seems that we are assuming that ifa_netmask is NULL
1101 	 * for RTF_HOST.  bsdi4 passes NULL explicitly (via intermediate
1102 	 * variable) when RTF_HOST is 1.  still not sure if i can safely
1103 	 * change it to meet bsdi4 behavior.
1104 	 */
1105 	info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
1106 	error = rtrequest1(cmd, &info, RTP_CONNECTED, &nrt, rtableid);
1107 	if (cmd == RTM_DELETE) {
1108 		if (error == 0 && (rt = nrt) != NULL) {
1109 			rt_newaddrmsg(cmd, ifa, error, nrt);
1110 			if (rt->rt_refcnt <= 0) {
1111 				rt->rt_refcnt++;
1112 				rtfree(rt);
1113 			}
1114 		}
1115 		if (m != NULL)
1116 			(void) m_free(m);
1117 	}
1118 	if (cmd == RTM_ADD && error == 0 && (rt = nrt) != NULL) {
1119 		rt->rt_refcnt--;
1120 		if (rt->rt_ifa != ifa) {
1121 			printf("rtinit: wrong ifa (%p) was (%p)\n",
1122 			    ifa, rt->rt_ifa);
1123 			if (rt->rt_ifa->ifa_rtrequest)
1124 				rt->rt_ifa->ifa_rtrequest(RTM_DELETE, rt, NULL);
1125 			IFAFREE(rt->rt_ifa);
1126 			rt->rt_ifa = ifa;
1127 			rt->rt_ifp = ifa->ifa_ifp;
1128 			ifa->ifa_refcnt++;
1129 			if (ifa->ifa_rtrequest)
1130 				ifa->ifa_rtrequest(RTM_ADD, rt, NULL);
1131 		}
1132 		rt_newaddrmsg(cmd, ifa, error, nrt);
1133 	}
1134 	return (error);
1135 }
1136 
1137 /*
1138  * Route timer routines.  These routes allow functions to be called
1139  * for various routes at any time.  This is useful in supporting
1140  * path MTU discovery and redirect route deletion.
1141  *
1142  * This is similar to some BSDI internal functions, but it provides
1143  * for multiple queues for efficiency's sake...
1144  */
1145 
1146 LIST_HEAD(, rttimer_queue)	rttimer_queue_head;
1147 static int			rt_init_done = 0;
1148 
1149 #define RTTIMER_CALLOUT(r)	{				\
1150 	if (r->rtt_func != NULL) {				\
1151 		(*r->rtt_func)(r->rtt_rt, r);			\
1152 	} else {						\
1153 		struct rt_addrinfo info;			\
1154 		bzero(&info, sizeof(info));			\
1155 		info.rti_info[RTAX_DST] = rt_key(r->rtt_rt);	\
1156 		rtrequest1(RTM_DELETE, &info,			\
1157 		    r->rtt_rt->rt_priority, NULL, 0 /* XXX */);	\
1158 	}							\
1159 }
1160 
1161 /*
1162  * Some subtle order problems with domain initialization mean that
1163  * we cannot count on this being run from rt_init before various
1164  * protocol initializations are done.  Therefore, we make sure
1165  * that this is run when the first queue is added...
1166  */
1167 
1168 void
1169 rt_timer_init()
1170 {
1171 	static struct timeout	rt_timer_timeout;
1172 
1173 	if (rt_init_done)
1174 		panic("rt_timer_init: already initialized");
1175 
1176 	pool_init(&rttimer_pool, sizeof(struct rttimer), 0, 0, 0, "rttmrpl",
1177 	    NULL);
1178 
1179 	LIST_INIT(&rttimer_queue_head);
1180 	timeout_set(&rt_timer_timeout, rt_timer_timer, &rt_timer_timeout);
1181 	timeout_add_sec(&rt_timer_timeout, 1);
1182 	rt_init_done = 1;
1183 }
1184 
1185 struct rttimer_queue *
1186 rt_timer_queue_create(u_int timeout)
1187 {
1188 	struct rttimer_queue	*rtq;
1189 
1190 	if (rt_init_done == 0)
1191 		rt_timer_init();
1192 
1193 	R_Malloc(rtq, struct rttimer_queue *, sizeof *rtq);
1194 	if (rtq == NULL)
1195 		return (NULL);
1196 	Bzero(rtq, sizeof *rtq);
1197 
1198 	rtq->rtq_timeout = timeout;
1199 	rtq->rtq_count = 0;
1200 	TAILQ_INIT(&rtq->rtq_head);
1201 	LIST_INSERT_HEAD(&rttimer_queue_head, rtq, rtq_link);
1202 
1203 	return (rtq);
1204 }
1205 
1206 void
1207 rt_timer_queue_change(struct rttimer_queue *rtq, long timeout)
1208 {
1209 	rtq->rtq_timeout = timeout;
1210 }
1211 
1212 void
1213 rt_timer_queue_destroy(struct rttimer_queue *rtq, int destroy)
1214 {
1215 	struct rttimer	*r;
1216 
1217 	while ((r = TAILQ_FIRST(&rtq->rtq_head)) != NULL) {
1218 		LIST_REMOVE(r, rtt_link);
1219 		TAILQ_REMOVE(&rtq->rtq_head, r, rtt_next);
1220 		if (destroy)
1221 			RTTIMER_CALLOUT(r);
1222 		pool_put(&rttimer_pool, r);
1223 		if (rtq->rtq_count > 0)
1224 			rtq->rtq_count--;
1225 		else
1226 			printf("rt_timer_queue_destroy: rtq_count reached 0\n");
1227 	}
1228 
1229 	LIST_REMOVE(rtq, rtq_link);
1230 
1231 	/*
1232 	 * Caller is responsible for freeing the rttimer_queue structure.
1233 	 */
1234 }
1235 
1236 unsigned long
1237 rt_timer_count(struct rttimer_queue *rtq)
1238 {
1239 	return (rtq->rtq_count);
1240 }
1241 
1242 void
1243 rt_timer_remove_all(struct rtentry *rt)
1244 {
1245 	struct rttimer	*r;
1246 
1247 	while ((r = LIST_FIRST(&rt->rt_timer)) != NULL) {
1248 		LIST_REMOVE(r, rtt_link);
1249 		TAILQ_REMOVE(&r->rtt_queue->rtq_head, r, rtt_next);
1250 		if (r->rtt_queue->rtq_count > 0)
1251 			r->rtt_queue->rtq_count--;
1252 		else
1253 			printf("rt_timer_remove_all: rtq_count reached 0\n");
1254 		pool_put(&rttimer_pool, r);
1255 	}
1256 }
1257 
1258 int
1259 rt_timer_add(struct rtentry *rt, void (*func)(struct rtentry *,
1260     struct rttimer *), struct rttimer_queue *queue)
1261 {
1262 	struct rttimer	*r;
1263 	long		 current_time;
1264 
1265 	current_time = time_uptime;
1266 	rt->rt_rmx.rmx_expire = time_second + queue->rtq_timeout;
1267 
1268 	/*
1269 	 * If there's already a timer with this action, destroy it before
1270 	 * we add a new one.
1271 	 */
1272 	for (r = LIST_FIRST(&rt->rt_timer); r != NULL;
1273 	     r = LIST_NEXT(r, rtt_link)) {
1274 		if (r->rtt_func == func) {
1275 			LIST_REMOVE(r, rtt_link);
1276 			TAILQ_REMOVE(&r->rtt_queue->rtq_head, r, rtt_next);
1277 			if (r->rtt_queue->rtq_count > 0)
1278 				r->rtt_queue->rtq_count--;
1279 			else
1280 				printf("rt_timer_add: rtq_count reached 0\n");
1281 			pool_put(&rttimer_pool, r);
1282 			break;  /* only one per list, so we can quit... */
1283 		}
1284 	}
1285 
1286 	r = pool_get(&rttimer_pool, PR_NOWAIT);
1287 	if (r == NULL)
1288 		return (ENOBUFS);
1289 	Bzero(r, sizeof(*r));
1290 
1291 	r->rtt_rt = rt;
1292 	r->rtt_time = current_time;
1293 	r->rtt_func = func;
1294 	r->rtt_queue = queue;
1295 	LIST_INSERT_HEAD(&rt->rt_timer, r, rtt_link);
1296 	TAILQ_INSERT_TAIL(&queue->rtq_head, r, rtt_next);
1297 	r->rtt_queue->rtq_count++;
1298 
1299 	return (0);
1300 }
1301 
1302 struct radix_node_head *
1303 rt_gettable(sa_family_t af, u_int id)
1304 {
1305 	if (id > rtbl_id_max)
1306 		return (NULL);
1307 	return (rt_tables[id] ? rt_tables[id][af2rtafidx[af]] : NULL);
1308 }
1309 
1310 struct radix_node *
1311 rt_lookup(struct sockaddr *dst, struct sockaddr *mask, u_int tableid)
1312 {
1313 	struct radix_node_head	*rnh;
1314 
1315 	if ((rnh = rt_gettable(dst->sa_family, tableid)) == NULL)
1316 		return (NULL);
1317 
1318 	return (rnh->rnh_lookup(dst, mask, rnh));
1319 }
1320 
1321 /* ARGSUSED */
1322 void
1323 rt_timer_timer(void *arg)
1324 {
1325 	struct timeout		*to = (struct timeout *)arg;
1326 	struct rttimer_queue	*rtq;
1327 	struct rttimer		*r;
1328 	long			 current_time;
1329 	int			 s;
1330 
1331 	current_time = time_uptime;
1332 
1333 	s = splsoftnet();
1334 	for (rtq = LIST_FIRST(&rttimer_queue_head); rtq != NULL;
1335 	     rtq = LIST_NEXT(rtq, rtq_link)) {
1336 		while ((r = TAILQ_FIRST(&rtq->rtq_head)) != NULL &&
1337 		    (r->rtt_time + rtq->rtq_timeout) < current_time) {
1338 			LIST_REMOVE(r, rtt_link);
1339 			TAILQ_REMOVE(&rtq->rtq_head, r, rtt_next);
1340 			RTTIMER_CALLOUT(r);
1341 			pool_put(&rttimer_pool, r);
1342 			if (rtq->rtq_count > 0)
1343 				rtq->rtq_count--;
1344 			else
1345 				printf("rt_timer_timer: rtq_count reached 0\n");
1346 		}
1347 	}
1348 	splx(s);
1349 
1350 	timeout_add_sec(to, 1);
1351 }
1352 
1353 u_int16_t
1354 rtlabel_name2id(char *name)
1355 {
1356 	struct rt_label		*label, *p = NULL;
1357 	u_int16_t		 new_id = 1;
1358 
1359 	if (!name[0])
1360 		return (0);
1361 
1362 	TAILQ_FOREACH(label, &rt_labels, rtl_entry)
1363 		if (strcmp(name, label->rtl_name) == 0) {
1364 			label->rtl_ref++;
1365 			return (label->rtl_id);
1366 		}
1367 
1368 	/*
1369 	 * to avoid fragmentation, we do a linear search from the beginning
1370 	 * and take the first free slot we find. if there is none or the list
1371 	 * is empty, append a new entry at the end.
1372 	 */
1373 
1374 	if (!TAILQ_EMPTY(&rt_labels))
1375 		for (p = TAILQ_FIRST(&rt_labels); p != NULL &&
1376 		    p->rtl_id == new_id; p = TAILQ_NEXT(p, rtl_entry))
1377 			new_id = p->rtl_id + 1;
1378 
1379 	if (new_id > LABELID_MAX)
1380 		return (0);
1381 
1382 	label = malloc(sizeof(*label), M_TEMP, M_NOWAIT|M_ZERO);
1383 	if (label == NULL)
1384 		return (0);
1385 	strlcpy(label->rtl_name, name, sizeof(label->rtl_name));
1386 	label->rtl_id = new_id;
1387 	label->rtl_ref++;
1388 
1389 	if (p != NULL)	/* insert new entry before p */
1390 		TAILQ_INSERT_BEFORE(p, label, rtl_entry);
1391 	else		/* either list empty or no free slot in between */
1392 		TAILQ_INSERT_TAIL(&rt_labels, label, rtl_entry);
1393 
1394 	return (label->rtl_id);
1395 }
1396 
1397 const char *
1398 rtlabel_id2name(u_int16_t id)
1399 {
1400 	struct rt_label	*label;
1401 
1402 	TAILQ_FOREACH(label, &rt_labels, rtl_entry)
1403 		if (label->rtl_id == id)
1404 			return (label->rtl_name);
1405 
1406 	return (NULL);
1407 }
1408 
1409 struct sockaddr *
1410 rtlabel_id2sa(u_int16_t labelid, struct sockaddr_rtlabel *sa_rl)
1411 {
1412 	const char	*label;
1413 
1414 	if (labelid == 0 || (label = rtlabel_id2name(labelid)) == NULL)
1415 		return (NULL);
1416 
1417 	bzero(sa_rl, sizeof(*sa_rl));
1418 	sa_rl->sr_len = sizeof(*sa_rl);
1419 	sa_rl->sr_family = AF_UNSPEC;
1420 	strlcpy(sa_rl->sr_label, label, sizeof(sa_rl->sr_label));
1421 
1422 	return ((struct sockaddr *)sa_rl);
1423 }
1424 
1425 void
1426 rtlabel_unref(u_int16_t id)
1427 {
1428 	struct rt_label	*p, *next;
1429 
1430 	if (id == 0)
1431 		return;
1432 
1433 	for (p = TAILQ_FIRST(&rt_labels); p != NULL; p = next) {
1434 		next = TAILQ_NEXT(p, rtl_entry);
1435 		if (id == p->rtl_id) {
1436 			if (--p->rtl_ref == 0) {
1437 				TAILQ_REMOVE(&rt_labels, p, rtl_entry);
1438 				free(p, M_TEMP);
1439 			}
1440 			break;
1441 		}
1442 	}
1443 }
1444 
1445 void
1446 rt_if_remove(struct ifnet *ifp)
1447 {
1448 	int			 i;
1449 	u_int			 tid;
1450 	struct radix_node_head	*rnh;
1451 
1452 	for (tid = 0; tid <= rtbl_id_max; tid++) {
1453 		for (i = 1; i <= AF_MAX; i++) {
1454 			if ((rnh = rt_gettable(i, tid)) != NULL)
1455 				while ((*rnh->rnh_walktree)(rnh,
1456 				    rt_if_remove_rtdelete, ifp) == EAGAIN)
1457 					;	/* nothing */
1458 		}
1459 	}
1460 }
1461 
1462 /*
1463  * Note that deleting a RTF_CLONING route can trigger the
1464  * deletion of more entries, so we need to cancel the walk
1465  * and return EAGAIN.  The caller should restart the walk
1466  * as long as EAGAIN is returned.
1467  */
1468 int
1469 rt_if_remove_rtdelete(struct radix_node *rn, void *vifp)
1470 {
1471 	struct ifnet	*ifp = vifp;
1472 	struct rtentry	*rt = (struct rtentry *)rn;
1473 
1474 	if (rt->rt_ifp == ifp) {
1475 		int	cloning = (rt->rt_flags & RTF_CLONING);
1476 
1477 		if (rtdeletemsg(rt, ifp->if_rdomain /* XXX wrong */) == 0 && cloning)
1478 			return (EAGAIN);
1479 	}
1480 
1481 	/*
1482 	 * XXX There should be no need to check for rt_ifa belonging to this
1483 	 * interface, because then rt_ifp is set, right?
1484 	 */
1485 
1486 	return (0);
1487 }
1488 
1489 #ifndef SMALL_KERNEL
1490 void
1491 rt_if_track(struct ifnet *ifp)
1492 {
1493 	struct radix_node_head *rnh;
1494 	int i;
1495 	u_int tid;
1496 
1497 	if (rt_tables == NULL)
1498 		return;
1499 
1500 	for (tid = 0; tid <= rtbl_id_max; tid++) {
1501 		for (i = 1; i <= AF_MAX; i++) {
1502 			if ((rnh = rt_gettable(i, tid)) != NULL) {
1503 				if (!rn_mpath_capable(rnh))
1504 					continue;
1505 				while ((*rnh->rnh_walktree)(rnh,
1506 				    rt_if_linkstate_change, ifp) == EAGAIN)
1507 					;	/* nothing */
1508 			}
1509 		}
1510 	}
1511 }
1512 
1513 int
1514 rt_if_linkstate_change(struct radix_node *rn, void *arg)
1515 {
1516 	struct ifnet *ifp = arg;
1517 	struct rtentry *rt = (struct rtentry *)rn;
1518 
1519 	if (rt->rt_ifp == ifp) {
1520 		if ((LINK_STATE_IS_UP(ifp->if_link_state) ||
1521 		    ifp->if_link_state == LINK_STATE_UNKNOWN) &&
1522 		    ifp->if_flags & IFF_UP) {
1523 			if (!(rt->rt_flags & RTF_UP)) {
1524 				/* bring route up */
1525 				rt->rt_flags |= RTF_UP;
1526 				rn_mpath_reprio(rn, rt->rt_priority & RTP_MASK);
1527 			}
1528 		} else {
1529 			if (rt->rt_flags & RTF_UP) {
1530 				/* take route done */
1531 				rt->rt_flags &= ~RTF_UP;
1532 				rn_mpath_reprio(rn, rt->rt_priority | RTP_DOWN);
1533 			}
1534 		}
1535 		if_group_routechange(rt_key(rt), rt_mask(rt));
1536 	}
1537 
1538 	return (0);
1539 }
1540 #endif
1541