xref: /openbsd-src/sys/net/route.c (revision 5054e3e78af0749a9bb00ba9a024b3ee2d90290f)
1 /*	$OpenBSD: route.c,v 1.114 2009/11/03 10:59:04 claudio Exp $	*/
2 /*	$NetBSD: route.c,v 1.14 1996/02/13 22:00:46 christos Exp $	*/
3 
4 /*
5  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the project nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 /*
34  * Copyright (c) 1980, 1986, 1991, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. Neither the name of the University nor the names of its contributors
46  *    may be used to endorse or promote products derived from this software
47  *    without specific prior written permission.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59  * SUCH DAMAGE.
60  *
61  *	@(#)route.c	8.2 (Berkeley) 11/15/93
62  */
63 
64 /*
65  *	@(#)COPYRIGHT	1.1 (NRL) 17 January 1995
66  *
67  * NRL grants permission for redistribution and use in source and binary
68  * forms, with or without modification, of the software and documentation
69  * created at NRL provided that the following conditions are met:
70  *
71  * 1. Redistributions of source code must retain the above copyright
72  *    notice, this list of conditions and the following disclaimer.
73  * 2. Redistributions in binary form must reproduce the above copyright
74  *    notice, this list of conditions and the following disclaimer in the
75  *    documentation and/or other materials provided with the distribution.
76  * 3. All advertising materials mentioning features or use of this software
77  *    must display the following acknowledgements:
78  * 	This product includes software developed by the University of
79  * 	California, Berkeley and its contributors.
80  * 	This product includes software developed at the Information
81  * 	Technology Division, US Naval Research Laboratory.
82  * 4. Neither the name of the NRL nor the names of its contributors
83  *    may be used to endorse or promote products derived from this software
84  *    without specific prior written permission.
85  *
86  * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS
87  * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
88  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
89  * PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL NRL OR
90  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
91  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
92  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
93  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
94  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
95  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
96  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
97  *
98  * The views and conclusions contained in the software and documentation
99  * are those of the authors and should not be interpreted as representing
100  * official policies, either expressed or implied, of the US Naval
101  * Research Laboratory (NRL).
102  */
103 
104 #include <sys/param.h>
105 #include <sys/systm.h>
106 #include <sys/proc.h>
107 #include <sys/mbuf.h>
108 #include <sys/socket.h>
109 #include <sys/socketvar.h>
110 #include <sys/domain.h>
111 #include <sys/protosw.h>
112 #include <sys/ioctl.h>
113 #include <sys/kernel.h>
114 #include <sys/queue.h>
115 #include <sys/pool.h>
116 
117 #include <net/if.h>
118 #include <net/route.h>
119 #include <net/raw_cb.h>
120 
121 #include <netinet/in.h>
122 #include <netinet/in_var.h>
123 
124 #ifdef MPLS
125 #include <netmpls/mpls.h>
126 #endif
127 
128 #ifdef IPSEC
129 #include <netinet/ip_ipsp.h>
130 #include <net/if_enc.h>
131 
132 struct ifaddr	*encap_findgwifa(struct sockaddr *);
133 #endif
134 
135 #define	SA(p) ((struct sockaddr *)(p))
136 
137 struct	route_cb	   route_cb;
138 struct	rtstat		   rtstat;
139 struct	radix_node_head	***rt_tables;
140 u_int8_t		   af2rtafidx[AF_MAX+1];
141 u_int8_t		   rtafidx_max;
142 u_int			   rtbl_id_max = 0;
143 u_int			  *rt_tab2dom;	/* rt table to domain lookup table */
144 
145 int			rttrash;	/* routes not in table but not freed */
146 
147 struct pool		rtentry_pool;	/* pool for rtentry structures */
148 struct pool		rttimer_pool;	/* pool for rttimer structures */
149 
150 int	rtable_init(struct radix_node_head ***);
151 int	okaytoclone(u_int, int);
152 int	rtflushclone1(struct radix_node *, void *);
153 void	rtflushclone(struct radix_node_head *, struct rtentry *);
154 int	rt_if_remove_rtdelete(struct radix_node *, void *);
155 #ifndef SMALL_KERNEL
156 int	rt_if_linkstate_change(struct radix_node *, void *);
157 #endif
158 
159 #define	LABELID_MAX	50000
160 
161 struct rt_label {
162 	TAILQ_ENTRY(rt_label)	rtl_entry;
163 	char			rtl_name[RTLABEL_LEN];
164 	u_int16_t		rtl_id;
165 	int			rtl_ref;
166 };
167 
168 TAILQ_HEAD(rt_labels, rt_label)	rt_labels = TAILQ_HEAD_INITIALIZER(rt_labels);
169 
170 #ifdef IPSEC
171 struct ifaddr *
172 encap_findgwifa(struct sockaddr *gw)
173 {
174 	return (TAILQ_FIRST(&encif[0].sc_if.if_addrlist));
175 }
176 #endif
177 
178 int
179 rtable_init(struct radix_node_head ***table)
180 {
181 	void		**p;
182 	struct domain	 *dom;
183 
184 	if ((p = malloc(sizeof(void *) * (rtafidx_max + 1), M_RTABLE,
185 	    M_NOWAIT|M_ZERO)) == NULL)
186 		return (-1);
187 
188 	/* 2nd pass: attach */
189 	for (dom = domains; dom != NULL; dom = dom->dom_next)
190 		if (dom->dom_rtattach)
191 			dom->dom_rtattach(&p[af2rtafidx[dom->dom_family]],
192 			    dom->dom_rtoffset);
193 
194 	*table = (struct radix_node_head **)p;
195 	return (0);
196 }
197 
198 void
199 route_init()
200 {
201 	struct domain	 *dom;
202 
203 	pool_init(&rtentry_pool, sizeof(struct rtentry), 0, 0, 0, "rtentpl",
204 	    NULL);
205 	rn_init();	/* initialize all zeroes, all ones, mask table */
206 
207 	bzero(af2rtafidx, sizeof(af2rtafidx));
208 	rtafidx_max = 1;	/* must have NULL at index 0, so start at 1 */
209 
210 	/* find out how many tables to allocate */
211 	for (dom = domains; dom != NULL; dom = dom->dom_next)
212 		if (dom->dom_rtattach)
213 			af2rtafidx[dom->dom_family] = rtafidx_max++;
214 
215 	if (rtable_add(0) == -1)
216 		panic("route_init rtable_add");
217 }
218 
219 int
220 rtable_add(u_int id)	/* must be called at splsoftnet */
221 {
222 	void	*p, *q;
223 
224 	if (id > RT_TABLEID_MAX)
225 		return (-1);
226 
227 	if (id == 0 || id > rtbl_id_max) {
228 		size_t	newlen = sizeof(void *) * (id+1);
229 		size_t	newlen2 = sizeof(u_int) * (id+1);
230 
231 		if ((p = malloc(newlen, M_RTABLE, M_NOWAIT|M_ZERO)) == NULL)
232 			return (-1);
233 		if ((q = malloc(newlen2, M_RTABLE, M_NOWAIT|M_ZERO)) == NULL) {
234 			free(p, M_RTABLE);
235 			return (-1);
236 		}
237 		if (rt_tables) {
238 			bcopy(rt_tables, p, sizeof(void *) * (rtbl_id_max+1));
239 			bcopy(rt_tab2dom, q, sizeof(u_int) * (rtbl_id_max+1));
240 			free(rt_tables, M_RTABLE);
241 		}
242 		rt_tables = p;
243 		rt_tab2dom = q;
244 		rtbl_id_max = id;
245 	}
246 
247 	if (rt_tables[id] != NULL)	/* already exists */
248 		return (-1);
249 
250 	rt_tab2dom[id] = 0;	/* use main table/domain by default */
251 	return (rtable_init(&rt_tables[id]));
252 }
253 
254 u_int
255 rtable_l2(u_int id)
256 {
257 	if (id > rtbl_id_max)
258 		return (0);
259 	return (rt_tab2dom[id]);
260 }
261 
262 void
263 rtable_l2set(u_int id, u_int parent)
264 {
265 	if (!rtable_exists(id) || !rtable_exists(parent))
266 		return;
267 	rt_tab2dom[id] = parent;
268 }
269 
270 int
271 rtable_exists(u_int id)	/* verify table with that ID exists */
272 {
273 	if (id > RT_TABLEID_MAX)
274 		return (0);
275 
276 	if (id > rtbl_id_max)
277 		return (0);
278 
279 	if (rt_tables[id] == NULL)
280 		return (0);
281 
282 	return (1);
283 }
284 
285 #include "pf.h"
286 #if NPF > 0
287 void
288 rtalloc_noclone(struct route *ro, int howstrict)
289 {
290 	if (ro->ro_rt && ro->ro_rt->rt_ifp && (ro->ro_rt->rt_flags & RTF_UP))
291 		return;		/* XXX */
292 	ro->ro_rt = rtalloc2(&ro->ro_dst, 1, howstrict);
293 }
294 
295 int
296 okaytoclone(u_int flags, int howstrict)
297 {
298 	if (howstrict == ALL_CLONING)
299 		return (1);
300 	if (howstrict == ONNET_CLONING && !(flags & RTF_GATEWAY))
301 		return (1);
302 	return (0);
303 }
304 
305 struct rtentry *
306 rtalloc2(struct sockaddr *dst, int report, int howstrict)
307 {
308 	struct radix_node_head	*rnh;
309 	struct rtentry		*rt;
310 	struct radix_node	*rn;
311 	struct rtentry		*newrt = 0;
312 	struct rt_addrinfo	 info;
313 	int			 s = splnet(), err = 0, msgtype = RTM_MISS;
314 
315 	bzero(&info, sizeof(info));
316 	info.rti_info[RTAX_DST] = dst;
317 
318 	rnh = rt_gettable(dst->sa_family, 0);
319 	if (rnh && (rn = rnh->rnh_matchaddr((caddr_t)dst, rnh)) &&
320 	    ((rn->rn_flags & RNF_ROOT) == 0)) {
321 		newrt = rt = (struct rtentry *)rn;
322 		if (report && (rt->rt_flags & RTF_CLONING) &&
323 		    okaytoclone(rt->rt_flags, howstrict)) {
324 			err = rtrequest1(RTM_RESOLVE, &info, RTP_DEFAULT,
325 			    &newrt, 0);
326 			if (err) {
327 				newrt = rt;
328 				rt->rt_refcnt++;
329 				goto miss;
330 			}
331 			if ((rt = newrt) && (rt->rt_flags & RTF_XRESOLVE)) {
332 				msgtype = RTM_RESOLVE;
333 				goto miss;
334 			}
335 		} else
336 			rt->rt_refcnt++;
337 	} else {
338 		rtstat.rts_unreach++;
339 miss:
340 		if (report) {
341 			rt_missmsg(msgtype, &info, 0, NULL, err, 0);
342 		}
343 	}
344 	splx(s);
345 	return (newrt);
346 }
347 #endif /* NPF > 0 */
348 
349 /*
350  * Packet routing routines.
351  */
352 void
353 rtalloc(struct route *ro)
354 {
355 	if (ro->ro_rt && ro->ro_rt->rt_ifp && (ro->ro_rt->rt_flags & RTF_UP))
356 		return;				 /* XXX */
357 	ro->ro_rt = rtalloc1(&ro->ro_dst, 1, 0);
358 }
359 
360 struct rtentry *
361 rtalloc1(struct sockaddr *dst, int report, u_int tableid)
362 {
363 	struct radix_node_head	*rnh;
364 	struct rtentry		*rt;
365 	struct radix_node	*rn;
366 	struct rtentry		*newrt = 0;
367 	struct rt_addrinfo	 info;
368 	int			 s = splsoftnet(), err = 0, msgtype = RTM_MISS;
369 
370 	bzero(&info, sizeof(info));
371 	info.rti_info[RTAX_DST] = dst;
372 
373 	rnh = rt_gettable(dst->sa_family, tableid);
374 	if (rnh && (rn = rnh->rnh_matchaddr((caddr_t)dst, rnh)) &&
375 	    ((rn->rn_flags & RNF_ROOT) == 0)) {
376 		newrt = rt = (struct rtentry *)rn;
377 		if (report && (rt->rt_flags & RTF_CLONING)) {
378 			err = rtrequest1(RTM_RESOLVE, &info, RTP_DEFAULT,
379 			    &newrt, tableid);
380 			if (err) {
381 				newrt = rt;
382 				rt->rt_refcnt++;
383 				goto miss;
384 			}
385 			if ((rt = newrt) && (rt->rt_flags & RTF_XRESOLVE)) {
386 				msgtype = RTM_RESOLVE;
387 				goto miss;
388 			}
389 			/* Inform listeners of the new route */
390 			bzero(&info, sizeof(info));
391 			info.rti_info[RTAX_DST] = rt_key(rt);
392 			info.rti_info[RTAX_NETMASK] = rt_mask(rt);
393 			info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
394 			if (rt->rt_ifp != NULL) {
395 				info.rti_info[RTAX_IFP] =
396 				    TAILQ_FIRST(&rt->rt_ifp->if_addrlist)->ifa_addr;
397 				info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
398 			}
399 			rt_missmsg(RTM_ADD, &info, rt->rt_flags,
400 			    rt->rt_ifp, 0, tableid);
401 		} else
402 			rt->rt_refcnt++;
403 	} else {
404 		if (dst->sa_family != PF_KEY)
405 			rtstat.rts_unreach++;
406 	/*
407 	 * IP encapsulation does lots of lookups where we don't need nor want
408 	 * the RTM_MISSes that would be generated.  It causes RTM_MISS storms
409 	 * sent upward breaking user-level routing queries.
410 	 */
411 miss:
412 		if (report && dst->sa_family != PF_KEY) {
413 			bzero((caddr_t)&info, sizeof(info));
414 			info.rti_info[RTAX_DST] = dst;
415 			rt_missmsg(msgtype, &info, 0, NULL, err, tableid);
416 		}
417 	}
418 	splx(s);
419 	return (newrt);
420 }
421 
422 void
423 rtfree(struct rtentry *rt)
424 {
425 	struct ifaddr	*ifa;
426 
427 	if (rt == NULL)
428 		panic("rtfree");
429 
430 	rt->rt_refcnt--;
431 
432 	if (rt->rt_refcnt <= 0 && (rt->rt_flags & RTF_UP) == 0) {
433 		if (rt->rt_refcnt == 0 && (rt->rt_nodes->rn_flags & RNF_ACTIVE))
434 			return; /* route still active but currently down */
435 		if (rt->rt_nodes->rn_flags & (RNF_ACTIVE | RNF_ROOT))
436 			panic("rtfree 2");
437 		rttrash--;
438 		if (rt->rt_refcnt < 0) {
439 			printf("rtfree: %p not freed (neg refs)\n", rt);
440 			return;
441 		}
442 		rt_timer_remove_all(rt);
443 		ifa = rt->rt_ifa;
444 		if (ifa)
445 			IFAFREE(ifa);
446 		rtlabel_unref(rt->rt_labelid);
447 #ifdef MPLS
448 		if (rt->rt_flags & RTF_MPLS)
449 			free(rt->rt_llinfo, M_TEMP);
450 #endif
451 		Free(rt_key(rt));
452 		pool_put(&rtentry_pool, rt);
453 	}
454 }
455 
456 void
457 ifafree(struct ifaddr *ifa)
458 {
459 	if (ifa == NULL)
460 		panic("ifafree");
461 	if (ifa->ifa_refcnt == 0)
462 		free(ifa, M_IFADDR);
463 	else
464 		ifa->ifa_refcnt--;
465 }
466 
467 /*
468  * Force a routing table entry to the specified
469  * destination to go through the given gateway.
470  * Normally called as a result of a routing redirect
471  * message from the network layer.
472  *
473  * N.B.: must be called at splsoftnet
474  */
475 void
476 rtredirect(struct sockaddr *dst, struct sockaddr *gateway,
477     struct sockaddr *netmask, int flags, struct sockaddr *src,
478     struct rtentry **rtp, u_int rdomain)
479 {
480 	struct rtentry		*rt;
481 	int			 error = 0;
482 	u_int32_t		*stat = NULL;
483 	struct rt_addrinfo	 info;
484 	struct ifaddr		*ifa;
485 	struct ifnet		*ifp = NULL;
486 
487 	splsoftassert(IPL_SOFTNET);
488 
489 	/* verify the gateway is directly reachable */
490 	if ((ifa = ifa_ifwithnet(gateway, rdomain)) == NULL) {
491 		error = ENETUNREACH;
492 		goto out;
493 	}
494 	ifp = ifa->ifa_ifp;
495 	rt = rtalloc1(dst, 0, rdomain);
496 	/*
497 	 * If the redirect isn't from our current router for this dst,
498 	 * it's either old or wrong.  If it redirects us to ourselves,
499 	 * we have a routing loop, perhaps as a result of an interface
500 	 * going down recently.
501 	 */
502 #define	equal(a1, a2) \
503 	((a1)->sa_len == (a2)->sa_len && \
504 	 bcmp((caddr_t)(a1), (caddr_t)(a2), (a1)->sa_len) == 0)
505 	if (!(flags & RTF_DONE) && rt &&
506 	     (!equal(src, rt->rt_gateway) || rt->rt_ifa != ifa))
507 		error = EINVAL;
508 	else if (ifa_ifwithaddr(gateway, rdomain) != NULL)
509 		error = EHOSTUNREACH;
510 	if (error)
511 		goto done;
512 	/*
513 	 * Create a new entry if we just got back a wildcard entry
514 	 * or the lookup failed.  This is necessary for hosts
515 	 * which use routing redirects generated by smart gateways
516 	 * to dynamically build the routing tables.
517 	 */
518 	if ((rt == NULL) || (rt_mask(rt) && rt_mask(rt)->sa_len < 2))
519 		goto create;
520 	/*
521 	 * Don't listen to the redirect if it's
522 	 * for a route to an interface.
523 	 */
524 	if (rt->rt_flags & RTF_GATEWAY) {
525 		if (((rt->rt_flags & RTF_HOST) == 0) && (flags & RTF_HOST)) {
526 			/*
527 			 * Changing from route to net => route to host.
528 			 * Create new route, rather than smashing route to net.
529 			 */
530 create:
531 			if (rt)
532 				rtfree(rt);
533 			flags |= RTF_GATEWAY | RTF_DYNAMIC;
534 			bzero(&info, sizeof(info));
535 			info.rti_info[RTAX_DST] = dst;
536 			info.rti_info[RTAX_GATEWAY] = gateway;
537 			info.rti_info[RTAX_NETMASK] = netmask;
538 			info.rti_ifa = ifa;
539 			info.rti_flags = flags;
540 			rt = NULL;
541 			error = rtrequest1(RTM_ADD, &info, RTP_DEFAULT, &rt,
542 			    rdomain);
543 			if (rt != NULL)
544 				flags = rt->rt_flags;
545 			stat = &rtstat.rts_dynamic;
546 		} else {
547 			/*
548 			 * Smash the current notion of the gateway to
549 			 * this destination.  Should check about netmask!!!
550 			 */
551 			rt->rt_flags |= RTF_MODIFIED;
552 			flags |= RTF_MODIFIED;
553 			stat = &rtstat.rts_newgateway;
554 			rt_setgate(rt, rt_key(rt), gateway, rdomain);
555 		}
556 	} else
557 		error = EHOSTUNREACH;
558 done:
559 	if (rt) {
560 		if (rtp && !error)
561 			*rtp = rt;
562 		else
563 			rtfree(rt);
564 	}
565 out:
566 	if (error)
567 		rtstat.rts_badredirect++;
568 	else if (stat != NULL)
569 		(*stat)++;
570 	bzero((caddr_t)&info, sizeof(info));
571 	info.rti_info[RTAX_DST] = dst;
572 	info.rti_info[RTAX_GATEWAY] = gateway;
573 	info.rti_info[RTAX_NETMASK] = netmask;
574 	info.rti_info[RTAX_AUTHOR] = src;
575 	rt_missmsg(RTM_REDIRECT, &info, flags, ifp, error, rdomain);
576 }
577 
578 /*
579  * Delete a route and generate a message
580  */
581 int
582 rtdeletemsg(struct rtentry *rt, u_int tableid)
583 {
584 	int			error;
585 	struct rt_addrinfo	info;
586 	struct ifnet		*ifp;
587 
588 	/*
589 	 * Request the new route so that the entry is not actually
590 	 * deleted.  That will allow the information being reported to
591 	 * be accurate (and consistent with route_output()).
592 	 */
593 	bzero((caddr_t)&info, sizeof(info));
594 	info.rti_info[RTAX_DST] = rt_key(rt);
595 	info.rti_info[RTAX_NETMASK] = rt_mask(rt);
596 	info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
597 	info.rti_flags = rt->rt_flags;
598 	ifp = rt->rt_ifp;
599 	error = rtrequest1(RTM_DELETE, &info, rt->rt_priority, &rt, tableid);
600 
601 	rt_missmsg(RTM_DELETE, &info, info.rti_flags, ifp, error, tableid);
602 
603 	/* Adjust the refcount */
604 	if (error == 0 && rt->rt_refcnt <= 0) {
605 		rt->rt_refcnt++;
606 		rtfree(rt);
607 	}
608 	return (error);
609 }
610 
611 int
612 rtflushclone1(struct radix_node *rn, void *arg)
613 {
614 	struct rtentry	*rt, *parent;
615 
616 	rt = (struct rtentry *)rn;
617 	parent = (struct rtentry *)arg;
618 	if ((rt->rt_flags & RTF_CLONED) != 0 && rt->rt_parent == parent)
619 		rtdeletemsg(rt, 0);
620 	return 0;
621 }
622 
623 void
624 rtflushclone(struct radix_node_head *rnh, struct rtentry *parent)
625 {
626 
627 #ifdef DIAGNOSTIC
628 	if (!parent || (parent->rt_flags & RTF_CLONING) == 0)
629 		panic("rtflushclone: called with a non-cloning route");
630 	if (!rnh->rnh_walktree)
631 		panic("rtflushclone: no rnh_walktree");
632 #endif
633 	rnh->rnh_walktree(rnh, rtflushclone1, (void *)parent);
634 }
635 
636 int
637 rtioctl(u_long req, caddr_t data, struct proc *p)
638 {
639 	return (EOPNOTSUPP);
640 }
641 
642 struct ifaddr *
643 ifa_ifwithroute(int flags, struct sockaddr *dst, struct sockaddr *gateway,
644     u_int rtableid)
645 {
646 	struct ifaddr	*ifa;
647 
648 #ifdef IPSEC
649 	/*
650 	 * If the destination is a PF_KEY address, we'll look
651 	 * for the existence of a encap interface number or address
652 	 * in the options list of the gateway. By default, we'll return
653 	 * enc0.
654 	 */
655 	if (dst && (dst->sa_family == PF_KEY))
656 		return (encap_findgwifa(gateway));
657 #endif
658 
659 	if ((flags & RTF_GATEWAY) == 0) {
660 		/*
661 		 * If we are adding a route to an interface,
662 		 * and the interface is a pt to pt link
663 		 * we should search for the destination
664 		 * as our clue to the interface.  Otherwise
665 		 * we can use the local address.
666 		 */
667 		ifa = NULL;
668 		if (flags & RTF_HOST)
669 			ifa = ifa_ifwithdstaddr(dst, rtableid);
670 		if (ifa == NULL)
671 			ifa = ifa_ifwithaddr(gateway, rtableid);
672 	} else {
673 		/*
674 		 * If we are adding a route to a remote net
675 		 * or host, the gateway may still be on the
676 		 * other end of a pt to pt link.
677 		 */
678 		ifa = ifa_ifwithdstaddr(gateway, rtableid);
679 	}
680 	if (ifa == NULL)
681 		ifa = ifa_ifwithnet(gateway, rtableid);
682 	if (ifa == NULL) {
683 		struct rtentry	*rt = rtalloc1(gateway, 0, rtable_l2(rtableid));
684 		if (rt == NULL)
685 			return (NULL);
686 		rt->rt_refcnt--;
687 		/* The gateway must be local if the same address family. */
688 		if ((rt->rt_flags & RTF_GATEWAY) &&
689 		    rt_key(rt)->sa_family == dst->sa_family)
690 			return (0);
691 		if ((ifa = rt->rt_ifa) == NULL)
692 			return (NULL);
693 	}
694 	if (ifa->ifa_addr->sa_family != dst->sa_family) {
695 		struct ifaddr	*oifa = ifa;
696 		ifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp);
697 		if (ifa == NULL)
698 			ifa = oifa;
699 	}
700 	return (ifa);
701 }
702 
703 #define ROUNDUP(a) (a>0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long))
704 
705 int
706 rt_getifa(struct rt_addrinfo *info, u_int rtid)
707 {
708 	struct ifaddr	*ifa;
709 	int		 error = 0;
710 
711 	/*
712 	 * ifp may be specified by sockaddr_dl when protocol address
713 	 * is ambiguous
714 	 */
715 	if (info->rti_ifp == NULL && info->rti_info[RTAX_IFP] != NULL
716 	    && info->rti_info[RTAX_IFP]->sa_family == AF_LINK &&
717 	    (ifa = ifa_ifwithnet((struct sockaddr *)info->rti_info[RTAX_IFP],
718 	    rtid)) != NULL)
719 		info->rti_ifp = ifa->ifa_ifp;
720 
721 	if (info->rti_ifa == NULL && info->rti_info[RTAX_IFA] != NULL)
722 		info->rti_ifa = ifa_ifwithaddr(info->rti_info[RTAX_IFA], rtid);
723 
724 	if (info->rti_ifa == NULL) {
725 		struct sockaddr	*sa;
726 
727 		if ((sa = info->rti_info[RTAX_IFA]) == NULL)
728 			if ((sa = info->rti_info[RTAX_GATEWAY]) == NULL)
729 				sa = info->rti_info[RTAX_DST];
730 
731 		if (sa != NULL && info->rti_ifp != NULL)
732 			info->rti_ifa = ifaof_ifpforaddr(sa, info->rti_ifp);
733 		else if (info->rti_info[RTAX_DST] != NULL &&
734 		    info->rti_info[RTAX_GATEWAY] != NULL)
735 			info->rti_ifa = ifa_ifwithroute(info->rti_flags,
736 			    info->rti_info[RTAX_DST],
737 			    info->rti_info[RTAX_GATEWAY],
738 			    rtid);
739 		else if (sa != NULL)
740 			info->rti_ifa = ifa_ifwithroute(info->rti_flags,
741 			    sa, sa, rtid);
742 	}
743 	if ((ifa = info->rti_ifa) != NULL) {
744 		if (info->rti_ifp == NULL)
745 			info->rti_ifp = ifa->ifa_ifp;
746 	} else
747 		error = ENETUNREACH;
748 	return (error);
749 }
750 
751 int
752 rtrequest1(int req, struct rt_addrinfo *info, u_int8_t prio,
753     struct rtentry **ret_nrt, u_int tableid)
754 {
755 	int			 s = splsoftnet(); int error = 0;
756 	struct rtentry		*rt, *crt;
757 	struct radix_node	*rn;
758 	struct radix_node_head	*rnh;
759 	struct ifaddr		*ifa;
760 	struct sockaddr		*ndst;
761 	struct sockaddr_rtlabel	*sa_rl, sa_rl2;
762 #ifdef MPLS
763 	struct sockaddr_mpls	*sa_mpls;
764 #endif
765 #define senderr(x) { error = x ; goto bad; }
766 
767 	if ((rnh = rt_gettable(info->rti_info[RTAX_DST]->sa_family, tableid)) ==
768 	    NULL)
769 		senderr(EAFNOSUPPORT);
770 	if (info->rti_flags & RTF_HOST)
771 		info->rti_info[RTAX_NETMASK] = NULL;
772 	switch (req) {
773 	case RTM_DELETE:
774 		if ((rn = rnh->rnh_lookup(info->rti_info[RTAX_DST],
775 		    info->rti_info[RTAX_NETMASK], rnh)) == NULL)
776 			senderr(ESRCH);
777 		rt = (struct rtentry *)rn;
778 #ifndef SMALL_KERNEL
779 		/*
780 		 * if we got multipath routes, we require users to specify
781 		 * a matching RTAX_GATEWAY.
782 		 */
783 		if (rn_mpath_capable(rnh)) {
784 			rt = rt_mpath_matchgate(rt,
785 			    info->rti_info[RTAX_GATEWAY], prio);
786 			rn = (struct radix_node *)rt;
787 			if (!rt)
788 				senderr(ESRCH);
789 		}
790 #endif
791 		if ((rn = rnh->rnh_deladdr(info->rti_info[RTAX_DST],
792 		    info->rti_info[RTAX_NETMASK], rnh, rn)) == NULL)
793 			senderr(ESRCH);
794 		rt = (struct rtentry *)rn;
795 
796 		/* clean up any cloned children */
797 		if ((rt->rt_flags & RTF_CLONING) != 0)
798 			rtflushclone(rnh, rt);
799 
800 		if (rn->rn_flags & (RNF_ACTIVE | RNF_ROOT))
801 			panic ("rtrequest delete");
802 
803 		if (rt->rt_gwroute) {
804 			rt = rt->rt_gwroute; RTFREE(rt);
805 			(rt = (struct rtentry *)rn)->rt_gwroute = NULL;
806 		}
807 
808 		if (rt->rt_parent) {
809 			rt->rt_parent->rt_refcnt--;
810 			rt->rt_parent = NULL;
811 		}
812 
813 #ifndef SMALL_KERNEL
814 		if (rn_mpath_capable(rnh)) {
815 			if ((rn = rnh->rnh_lookup(info->rti_info[RTAX_DST],
816 			    info->rti_info[RTAX_NETMASK], rnh)) != NULL &&
817 			    rn_mpath_next(rn, 0) == NULL)
818 				((struct rtentry *)rn)->rt_flags &= ~RTF_MPATH;
819 		}
820 #endif
821 
822 		rt->rt_flags &= ~RTF_UP;
823 		if ((ifa = rt->rt_ifa) && ifa->ifa_rtrequest)
824 			ifa->ifa_rtrequest(RTM_DELETE, rt, info);
825 		rttrash++;
826 
827 		if (ret_nrt)
828 			*ret_nrt = rt;
829 		else if (rt->rt_refcnt <= 0) {
830 			rt->rt_refcnt++;
831 			rtfree(rt);
832 		}
833 		break;
834 
835 	case RTM_RESOLVE:
836 		if (ret_nrt == NULL || (rt = *ret_nrt) == NULL)
837 			senderr(EINVAL);
838 		if ((rt->rt_flags & RTF_CLONING) == 0)
839 			senderr(EINVAL);
840 		ifa = rt->rt_ifa;
841 		info->rti_flags = rt->rt_flags & ~(RTF_CLONING | RTF_STATIC);
842 		info->rti_flags |= RTF_CLONED;
843 		info->rti_info[RTAX_GATEWAY] = rt->rt_gateway;
844 		if ((info->rti_info[RTAX_NETMASK] = rt->rt_genmask) == NULL)
845 			info->rti_flags |= RTF_HOST;
846 		info->rti_info[RTAX_LABEL] =
847 		    rtlabel_id2sa(rt->rt_labelid, &sa_rl2);
848 		goto makeroute;
849 
850 	case RTM_ADD:
851 		if (info->rti_ifa == 0 && (error = rt_getifa(info, tableid)))
852 			senderr(error);
853 		ifa = info->rti_ifa;
854 makeroute:
855 		rt = pool_get(&rtentry_pool, PR_NOWAIT | PR_ZERO);
856 		if (rt == NULL)
857 			senderr(ENOBUFS);
858 
859 		rt->rt_flags = info->rti_flags;
860 
861 		if (prio == 0)
862 			prio = ifa->ifa_ifp->if_priority + RTP_STATIC;
863 		rt->rt_priority = prio;	/* init routing priority */
864 		if ((LINK_STATE_IS_UP(ifa->ifa_ifp->if_link_state) ||
865 		    ifa->ifa_ifp->if_link_state == LINK_STATE_UNKNOWN) &&
866 		    ifa->ifa_ifp->if_flags & IFF_UP)
867 			rt->rt_flags |= RTF_UP;
868 		else {
869 			rt->rt_flags &= ~RTF_UP;
870 			rt->rt_priority |= RTP_DOWN;
871 		}
872 		LIST_INIT(&rt->rt_timer);
873 		if (rt_setgate(rt, info->rti_info[RTAX_DST],
874 		    info->rti_info[RTAX_GATEWAY], tableid)) {
875 			pool_put(&rtentry_pool, rt);
876 			senderr(ENOBUFS);
877 		}
878 		ndst = rt_key(rt);
879 		if (info->rti_info[RTAX_NETMASK] != NULL) {
880 			rt_maskedcopy(info->rti_info[RTAX_DST], ndst,
881 			    info->rti_info[RTAX_NETMASK]);
882 		} else
883 			Bcopy(info->rti_info[RTAX_DST], ndst,
884 			    info->rti_info[RTAX_DST]->sa_len);
885 #ifndef SMALL_KERNEL
886 		/* do not permit exactly the same dst/mask/gw pair */
887 		if (rn_mpath_capable(rnh) &&
888 		    rt_mpath_conflict(rnh, rt, info->rti_info[RTAX_NETMASK],
889 		    info->rti_flags & RTF_MPATH)) {
890 			if (rt->rt_gwroute)
891 				rtfree(rt->rt_gwroute);
892 			Free(rt_key(rt));
893 			pool_put(&rtentry_pool, rt);
894 			senderr(EEXIST);
895 		}
896 #endif
897 
898 		if (info->rti_info[RTAX_LABEL] != NULL) {
899 			sa_rl = (struct sockaddr_rtlabel *)
900 			    info->rti_info[RTAX_LABEL];
901 			rt->rt_labelid = rtlabel_name2id(sa_rl->sr_label);
902 		}
903 
904 #ifdef MPLS
905 		/* We have to allocate additional space for MPLS infos */
906 		if (info->rti_info[RTAX_SRC] != NULL ||
907 		    info->rti_info[RTAX_DST]->sa_family == AF_MPLS) {
908 			struct rt_mpls *rt_mpls;
909 
910 			sa_mpls = (struct sockaddr_mpls *)
911 			    info->rti_info[RTAX_SRC];
912 
913 			rt->rt_llinfo = (caddr_t)malloc(sizeof(struct rt_mpls),
914 			    M_TEMP, M_NOWAIT|M_ZERO);
915 
916 			if (rt->rt_llinfo == NULL) {
917 				if (rt->rt_gwroute)
918 					rtfree(rt->rt_gwroute);
919 				Free(rt_key(rt));
920 				pool_put(&rtentry_pool, rt);
921 				senderr(ENOMEM);
922 			}
923 
924 			rt_mpls = (struct rt_mpls *)rt->rt_llinfo;
925 
926 			if (sa_mpls != NULL)
927 				rt_mpls->mpls_label = sa_mpls->smpls_label;
928 
929 			rt_mpls->mpls_operation = info->rti_mpls;
930 
931 			/* XXX: set experimental bits */
932 
933 			rt->rt_flags |= RTF_MPLS;
934 		}
935 #endif
936 
937 		ifa->ifa_refcnt++;
938 		rt->rt_ifa = ifa;
939 		rt->rt_ifp = ifa->ifa_ifp;
940 		if (req == RTM_RESOLVE) {
941 			/*
942 			 * Copy both metrics and a back pointer to the cloned
943 			 * route's parent.
944 			 */
945 			rt->rt_rmx = (*ret_nrt)->rt_rmx; /* copy metrics */
946 			rt->rt_priority = (*ret_nrt)->rt_priority;
947 			rt->rt_parent = *ret_nrt;	 /* Back ptr. to parent. */
948 			rt->rt_parent->rt_refcnt++;
949 		}
950 		rn = rnh->rnh_addaddr((caddr_t)ndst,
951 		    (caddr_t)info->rti_info[RTAX_NETMASK], rnh, rt->rt_nodes,
952 		    rt->rt_priority);
953 		if (rn == NULL && (crt = rtalloc1(ndst, 0, tableid)) != NULL) {
954 			/* overwrite cloned route */
955 			if ((crt->rt_flags & RTF_CLONED) != 0) {
956 				rtdeletemsg(crt, tableid);
957 				rn = rnh->rnh_addaddr((caddr_t)ndst,
958 				    (caddr_t)info->rti_info[RTAX_NETMASK],
959 				    rnh, rt->rt_nodes, rt->rt_priority);
960 			}
961 			RTFREE(crt);
962 		}
963 		if (rn == 0) {
964 			IFAFREE(ifa);
965 			if ((rt->rt_flags & RTF_CLONED) != 0 && rt->rt_parent)
966 				rtfree(rt->rt_parent);
967 			if (rt->rt_gwroute)
968 				rtfree(rt->rt_gwroute);
969 			Free(rt_key(rt));
970 			pool_put(&rtentry_pool, rt);
971 			senderr(EEXIST);
972 		}
973 
974 #ifndef SMALL_KERNEL
975 		if (rn_mpath_capable(rnh) &&
976 		    (rn = rnh->rnh_lookup(info->rti_info[RTAX_DST],
977 		    info->rti_info[RTAX_NETMASK], rnh)) != NULL &&
978 		    (rn = rn_mpath_prio(rn, prio)) != NULL) {
979 			if (rn_mpath_next(rn, 0) == NULL)
980 				((struct rtentry *)rn)->rt_flags &= ~RTF_MPATH;
981 			else
982 				((struct rtentry *)rn)->rt_flags |= RTF_MPATH;
983 		}
984 #endif
985 
986 		if (ifa->ifa_rtrequest)
987 			ifa->ifa_rtrequest(req, rt, info);
988 		if (ret_nrt) {
989 			*ret_nrt = rt;
990 			rt->rt_refcnt++;
991 		}
992 		if ((rt->rt_flags & RTF_CLONING) != 0) {
993 			/* clean up any cloned children */
994 			rtflushclone(rnh, rt);
995 		}
996 
997 		if_group_routechange(info->rti_info[RTAX_DST],
998 			info->rti_info[RTAX_NETMASK]);
999 		break;
1000 	}
1001 bad:
1002 	splx(s);
1003 	return (error);
1004 }
1005 
1006 int
1007 rt_setgate(struct rtentry *rt0, struct sockaddr *dst, struct sockaddr *gate,
1008     u_int tableid)
1009 {
1010 	caddr_t	new, old;
1011 	int	dlen = ROUNDUP(dst->sa_len), glen = ROUNDUP(gate->sa_len);
1012 	struct rtentry	*rt = rt0;
1013 
1014 	if (rt->rt_gateway == NULL || glen > ROUNDUP(rt->rt_gateway->sa_len)) {
1015 		old = (caddr_t)rt_key(rt);
1016 		R_Malloc(new, caddr_t, dlen + glen);
1017 		if (new == NULL)
1018 			return 1;
1019 		rt->rt_nodes->rn_key = new;
1020 	} else {
1021 		new = rt->rt_nodes->rn_key;
1022 		old = NULL;
1023 	}
1024 	Bcopy(gate, (rt->rt_gateway = (struct sockaddr *)(new + dlen)), glen);
1025 	if (old) {
1026 		Bcopy(dst, new, dlen);
1027 		Free(old);
1028 	}
1029 	if (rt->rt_gwroute != NULL) {
1030 		rt = rt->rt_gwroute;
1031 		RTFREE(rt);
1032 		rt = rt0;
1033 		rt->rt_gwroute = NULL;
1034 	}
1035 	if (rt->rt_flags & RTF_GATEWAY) {
1036 		/* XXX is this actually valid to cross tables here? */
1037 		rt->rt_gwroute = rtalloc1(gate, 1, rtable_l2(tableid));
1038 		/*
1039 		 * If we switched gateways, grab the MTU from the new
1040 		 * gateway route if the current MTU is 0 or greater
1041 		 * than the MTU of gateway.
1042 		 * Note that, if the MTU of gateway is 0, we will reset the
1043 		 * MTU of the route to run PMTUD again from scratch. XXX
1044 		 */
1045 		if (rt->rt_gwroute && !(rt->rt_rmx.rmx_locks & RTV_MTU) &&
1046 		    rt->rt_rmx.rmx_mtu &&
1047 		    rt->rt_rmx.rmx_mtu > rt->rt_gwroute->rt_rmx.rmx_mtu) {
1048 			rt->rt_rmx.rmx_mtu = rt->rt_gwroute->rt_rmx.rmx_mtu;
1049 		}
1050 	}
1051 	return (0);
1052 }
1053 
1054 void
1055 rt_maskedcopy(struct sockaddr *src, struct sockaddr *dst,
1056     struct sockaddr *netmask)
1057 {
1058 	u_char	*cp1 = (u_char *)src;
1059 	u_char	*cp2 = (u_char *)dst;
1060 	u_char	*cp3 = (u_char *)netmask;
1061 	u_char	*cplim = cp2 + *cp3;
1062 	u_char	*cplim2 = cp2 + *cp1;
1063 
1064 	*cp2++ = *cp1++; *cp2++ = *cp1++; /* copies sa_len & sa_family */
1065 	cp3 += 2;
1066 	if (cplim > cplim2)
1067 		cplim = cplim2;
1068 	while (cp2 < cplim)
1069 		*cp2++ = *cp1++ & *cp3++;
1070 	if (cp2 < cplim2)
1071 		bzero((caddr_t)cp2, (unsigned)(cplim2 - cp2));
1072 }
1073 
1074 /*
1075  * Set up a routing table entry, normally
1076  * for an interface.
1077  */
1078 int
1079 rtinit(struct ifaddr *ifa, int cmd, int flags)
1080 {
1081 	struct rtentry		*rt;
1082 	struct sockaddr		*dst, *deldst;
1083 	struct mbuf		*m = NULL;
1084 	struct rtentry		*nrt = NULL;
1085 	int			 error;
1086 	struct rt_addrinfo	 info;
1087 	struct sockaddr_rtlabel	 sa_rl;
1088 	u_short			 rtableid = ifa->ifa_ifp->if_rdomain;
1089 
1090 	dst = flags & RTF_HOST ? ifa->ifa_dstaddr : ifa->ifa_addr;
1091 	if (cmd == RTM_DELETE) {
1092 		if ((flags & RTF_HOST) == 0 && ifa->ifa_netmask) {
1093 			m = m_get(M_DONTWAIT, MT_SONAME);
1094 			if (m == NULL)
1095 				return (ENOBUFS);
1096 			deldst = mtod(m, struct sockaddr *);
1097 			rt_maskedcopy(dst, deldst, ifa->ifa_netmask);
1098 			dst = deldst;
1099 		}
1100 		if ((rt = rtalloc1(dst, 0, rtableid)) != NULL) {
1101 			rt->rt_refcnt--;
1102 			/* try to find the right route */
1103 			while (rt && rt->rt_ifa != ifa)
1104 				rt = (struct rtentry *)
1105 				    ((struct radix_node *)rt)->rn_dupedkey;
1106 			if (!rt) {
1107 				if (m != NULL)
1108 					(void) m_free(m);
1109 				return (flags & RTF_HOST ? EHOSTUNREACH
1110 							: ENETUNREACH);
1111 			}
1112 		}
1113 	}
1114 	bzero(&info, sizeof(info));
1115 	info.rti_ifa = ifa;
1116 	info.rti_flags = flags | ifa->ifa_flags;
1117 	info.rti_info[RTAX_DST] = dst;
1118 	if (cmd == RTM_ADD)
1119 		info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr;
1120 	info.rti_info[RTAX_LABEL] =
1121 	    rtlabel_id2sa(ifa->ifa_ifp->if_rtlabelid, &sa_rl);
1122 
1123 	/*
1124 	 * XXX here, it seems that we are assuming that ifa_netmask is NULL
1125 	 * for RTF_HOST.  bsdi4 passes NULL explicitly (via intermediate
1126 	 * variable) when RTF_HOST is 1.  still not sure if i can safely
1127 	 * change it to meet bsdi4 behavior.
1128 	 */
1129 	info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
1130 	error = rtrequest1(cmd, &info, RTP_CONNECTED, &nrt, rtableid);
1131 	if (cmd == RTM_DELETE) {
1132 		if (error == 0 && (rt = nrt) != NULL) {
1133 			rt_newaddrmsg(cmd, ifa, error, nrt);
1134 			if (rt->rt_refcnt <= 0) {
1135 				rt->rt_refcnt++;
1136 				rtfree(rt);
1137 			}
1138 		}
1139 		if (m != NULL)
1140 			(void) m_free(m);
1141 	}
1142 	if (cmd == RTM_ADD && error == 0 && (rt = nrt) != NULL) {
1143 		rt->rt_refcnt--;
1144 		if (rt->rt_ifa != ifa) {
1145 			printf("rtinit: wrong ifa (%p) was (%p)\n",
1146 			    ifa, rt->rt_ifa);
1147 			if (rt->rt_ifa->ifa_rtrequest)
1148 				rt->rt_ifa->ifa_rtrequest(RTM_DELETE, rt, NULL);
1149 			IFAFREE(rt->rt_ifa);
1150 			rt->rt_ifa = ifa;
1151 			rt->rt_ifp = ifa->ifa_ifp;
1152 			ifa->ifa_refcnt++;
1153 			if (ifa->ifa_rtrequest)
1154 				ifa->ifa_rtrequest(RTM_ADD, rt, NULL);
1155 		}
1156 		rt_newaddrmsg(cmd, ifa, error, nrt);
1157 	}
1158 	return (error);
1159 }
1160 
1161 /*
1162  * Route timer routines.  These routes allow functions to be called
1163  * for various routes at any time.  This is useful in supporting
1164  * path MTU discovery and redirect route deletion.
1165  *
1166  * This is similar to some BSDI internal functions, but it provides
1167  * for multiple queues for efficiency's sake...
1168  */
1169 
1170 LIST_HEAD(, rttimer_queue)	rttimer_queue_head;
1171 static int			rt_init_done = 0;
1172 
1173 #define RTTIMER_CALLOUT(r)	{				\
1174 	if (r->rtt_func != NULL) {				\
1175 		(*r->rtt_func)(r->rtt_rt, r);			\
1176 	} else {						\
1177 		struct rt_addrinfo info;			\
1178 		bzero(&info, sizeof(info));			\
1179 		info.rti_info[RTAX_DST] = rt_key(r->rtt_rt);	\
1180 		rtrequest1(RTM_DELETE, &info,			\
1181 		    r->rtt_rt->rt_priority, NULL, 0 /* XXX */);	\
1182 	}							\
1183 }
1184 
1185 /*
1186  * Some subtle order problems with domain initialization mean that
1187  * we cannot count on this being run from rt_init before various
1188  * protocol initializations are done.  Therefore, we make sure
1189  * that this is run when the first queue is added...
1190  */
1191 
1192 void
1193 rt_timer_init()
1194 {
1195 	static struct timeout	rt_timer_timeout;
1196 
1197 	if (rt_init_done)
1198 		panic("rt_timer_init: already initialized");
1199 
1200 	pool_init(&rttimer_pool, sizeof(struct rttimer), 0, 0, 0, "rttmrpl",
1201 	    NULL);
1202 
1203 	LIST_INIT(&rttimer_queue_head);
1204 	timeout_set(&rt_timer_timeout, rt_timer_timer, &rt_timer_timeout);
1205 	timeout_add_sec(&rt_timer_timeout, 1);
1206 	rt_init_done = 1;
1207 }
1208 
1209 struct rttimer_queue *
1210 rt_timer_queue_create(u_int timeout)
1211 {
1212 	struct rttimer_queue	*rtq;
1213 
1214 	if (rt_init_done == 0)
1215 		rt_timer_init();
1216 
1217 	R_Malloc(rtq, struct rttimer_queue *, sizeof *rtq);
1218 	if (rtq == NULL)
1219 		return (NULL);
1220 	Bzero(rtq, sizeof *rtq);
1221 
1222 	rtq->rtq_timeout = timeout;
1223 	rtq->rtq_count = 0;
1224 	TAILQ_INIT(&rtq->rtq_head);
1225 	LIST_INSERT_HEAD(&rttimer_queue_head, rtq, rtq_link);
1226 
1227 	return (rtq);
1228 }
1229 
1230 void
1231 rt_timer_queue_change(struct rttimer_queue *rtq, long timeout)
1232 {
1233 	rtq->rtq_timeout = timeout;
1234 }
1235 
1236 void
1237 rt_timer_queue_destroy(struct rttimer_queue *rtq, int destroy)
1238 {
1239 	struct rttimer	*r;
1240 
1241 	while ((r = TAILQ_FIRST(&rtq->rtq_head)) != NULL) {
1242 		LIST_REMOVE(r, rtt_link);
1243 		TAILQ_REMOVE(&rtq->rtq_head, r, rtt_next);
1244 		if (destroy)
1245 			RTTIMER_CALLOUT(r);
1246 		pool_put(&rttimer_pool, r);
1247 		if (rtq->rtq_count > 0)
1248 			rtq->rtq_count--;
1249 		else
1250 			printf("rt_timer_queue_destroy: rtq_count reached 0\n");
1251 	}
1252 
1253 	LIST_REMOVE(rtq, rtq_link);
1254 
1255 	/*
1256 	 * Caller is responsible for freeing the rttimer_queue structure.
1257 	 */
1258 }
1259 
1260 unsigned long
1261 rt_timer_count(struct rttimer_queue *rtq)
1262 {
1263 	return (rtq->rtq_count);
1264 }
1265 
1266 void
1267 rt_timer_remove_all(struct rtentry *rt)
1268 {
1269 	struct rttimer	*r;
1270 
1271 	while ((r = LIST_FIRST(&rt->rt_timer)) != NULL) {
1272 		LIST_REMOVE(r, rtt_link);
1273 		TAILQ_REMOVE(&r->rtt_queue->rtq_head, r, rtt_next);
1274 		if (r->rtt_queue->rtq_count > 0)
1275 			r->rtt_queue->rtq_count--;
1276 		else
1277 			printf("rt_timer_remove_all: rtq_count reached 0\n");
1278 		pool_put(&rttimer_pool, r);
1279 	}
1280 }
1281 
1282 int
1283 rt_timer_add(struct rtentry *rt, void (*func)(struct rtentry *,
1284     struct rttimer *), struct rttimer_queue *queue)
1285 {
1286 	struct rttimer	*r;
1287 	long		 current_time;
1288 
1289 	current_time = time_uptime;
1290 	rt->rt_rmx.rmx_expire = time_second + queue->rtq_timeout;
1291 
1292 	/*
1293 	 * If there's already a timer with this action, destroy it before
1294 	 * we add a new one.
1295 	 */
1296 	for (r = LIST_FIRST(&rt->rt_timer); r != NULL;
1297 	     r = LIST_NEXT(r, rtt_link)) {
1298 		if (r->rtt_func == func) {
1299 			LIST_REMOVE(r, rtt_link);
1300 			TAILQ_REMOVE(&r->rtt_queue->rtq_head, r, rtt_next);
1301 			if (r->rtt_queue->rtq_count > 0)
1302 				r->rtt_queue->rtq_count--;
1303 			else
1304 				printf("rt_timer_add: rtq_count reached 0\n");
1305 			pool_put(&rttimer_pool, r);
1306 			break;  /* only one per list, so we can quit... */
1307 		}
1308 	}
1309 
1310 	r = pool_get(&rttimer_pool, PR_NOWAIT | PR_ZERO);
1311 	if (r == NULL)
1312 		return (ENOBUFS);
1313 
1314 	r->rtt_rt = rt;
1315 	r->rtt_time = current_time;
1316 	r->rtt_func = func;
1317 	r->rtt_queue = queue;
1318 	LIST_INSERT_HEAD(&rt->rt_timer, r, rtt_link);
1319 	TAILQ_INSERT_TAIL(&queue->rtq_head, r, rtt_next);
1320 	r->rtt_queue->rtq_count++;
1321 
1322 	return (0);
1323 }
1324 
1325 struct radix_node_head *
1326 rt_gettable(sa_family_t af, u_int id)
1327 {
1328 	if (id > rtbl_id_max)
1329 		return (NULL);
1330 	return (rt_tables[id] ? rt_tables[id][af2rtafidx[af]] : NULL);
1331 }
1332 
1333 struct radix_node *
1334 rt_lookup(struct sockaddr *dst, struct sockaddr *mask, u_int tableid)
1335 {
1336 	struct radix_node_head	*rnh;
1337 
1338 	if ((rnh = rt_gettable(dst->sa_family, tableid)) == NULL)
1339 		return (NULL);
1340 
1341 	return (rnh->rnh_lookup(dst, mask, rnh));
1342 }
1343 
1344 /* ARGSUSED */
1345 void
1346 rt_timer_timer(void *arg)
1347 {
1348 	struct timeout		*to = (struct timeout *)arg;
1349 	struct rttimer_queue	*rtq;
1350 	struct rttimer		*r;
1351 	long			 current_time;
1352 	int			 s;
1353 
1354 	current_time = time_uptime;
1355 
1356 	s = splsoftnet();
1357 	for (rtq = LIST_FIRST(&rttimer_queue_head); rtq != NULL;
1358 	     rtq = LIST_NEXT(rtq, rtq_link)) {
1359 		while ((r = TAILQ_FIRST(&rtq->rtq_head)) != NULL &&
1360 		    (r->rtt_time + rtq->rtq_timeout) < current_time) {
1361 			LIST_REMOVE(r, rtt_link);
1362 			TAILQ_REMOVE(&rtq->rtq_head, r, rtt_next);
1363 			RTTIMER_CALLOUT(r);
1364 			pool_put(&rttimer_pool, r);
1365 			if (rtq->rtq_count > 0)
1366 				rtq->rtq_count--;
1367 			else
1368 				printf("rt_timer_timer: rtq_count reached 0\n");
1369 		}
1370 	}
1371 	splx(s);
1372 
1373 	timeout_add_sec(to, 1);
1374 }
1375 
1376 u_int16_t
1377 rtlabel_name2id(char *name)
1378 {
1379 	struct rt_label		*label, *p = NULL;
1380 	u_int16_t		 new_id = 1;
1381 
1382 	if (!name[0])
1383 		return (0);
1384 
1385 	TAILQ_FOREACH(label, &rt_labels, rtl_entry)
1386 		if (strcmp(name, label->rtl_name) == 0) {
1387 			label->rtl_ref++;
1388 			return (label->rtl_id);
1389 		}
1390 
1391 	/*
1392 	 * to avoid fragmentation, we do a linear search from the beginning
1393 	 * and take the first free slot we find. if there is none or the list
1394 	 * is empty, append a new entry at the end.
1395 	 */
1396 
1397 	if (!TAILQ_EMPTY(&rt_labels))
1398 		for (p = TAILQ_FIRST(&rt_labels); p != NULL &&
1399 		    p->rtl_id == new_id; p = TAILQ_NEXT(p, rtl_entry))
1400 			new_id = p->rtl_id + 1;
1401 
1402 	if (new_id > LABELID_MAX)
1403 		return (0);
1404 
1405 	label = malloc(sizeof(*label), M_TEMP, M_NOWAIT|M_ZERO);
1406 	if (label == NULL)
1407 		return (0);
1408 	strlcpy(label->rtl_name, name, sizeof(label->rtl_name));
1409 	label->rtl_id = new_id;
1410 	label->rtl_ref++;
1411 
1412 	if (p != NULL)	/* insert new entry before p */
1413 		TAILQ_INSERT_BEFORE(p, label, rtl_entry);
1414 	else		/* either list empty or no free slot in between */
1415 		TAILQ_INSERT_TAIL(&rt_labels, label, rtl_entry);
1416 
1417 	return (label->rtl_id);
1418 }
1419 
1420 const char *
1421 rtlabel_id2name(u_int16_t id)
1422 {
1423 	struct rt_label	*label;
1424 
1425 	TAILQ_FOREACH(label, &rt_labels, rtl_entry)
1426 		if (label->rtl_id == id)
1427 			return (label->rtl_name);
1428 
1429 	return (NULL);
1430 }
1431 
1432 struct sockaddr *
1433 rtlabel_id2sa(u_int16_t labelid, struct sockaddr_rtlabel *sa_rl)
1434 {
1435 	const char	*label;
1436 
1437 	if (labelid == 0 || (label = rtlabel_id2name(labelid)) == NULL)
1438 		return (NULL);
1439 
1440 	bzero(sa_rl, sizeof(*sa_rl));
1441 	sa_rl->sr_len = sizeof(*sa_rl);
1442 	sa_rl->sr_family = AF_UNSPEC;
1443 	strlcpy(sa_rl->sr_label, label, sizeof(sa_rl->sr_label));
1444 
1445 	return ((struct sockaddr *)sa_rl);
1446 }
1447 
1448 void
1449 rtlabel_unref(u_int16_t id)
1450 {
1451 	struct rt_label	*p, *next;
1452 
1453 	if (id == 0)
1454 		return;
1455 
1456 	for (p = TAILQ_FIRST(&rt_labels); p != NULL; p = next) {
1457 		next = TAILQ_NEXT(p, rtl_entry);
1458 		if (id == p->rtl_id) {
1459 			if (--p->rtl_ref == 0) {
1460 				TAILQ_REMOVE(&rt_labels, p, rtl_entry);
1461 				free(p, M_TEMP);
1462 			}
1463 			break;
1464 		}
1465 	}
1466 }
1467 
1468 void
1469 rt_if_remove(struct ifnet *ifp)
1470 {
1471 	int			 i;
1472 	u_int			 tid;
1473 	struct radix_node_head	*rnh;
1474 
1475 	for (tid = 0; tid <= rtbl_id_max; tid++) {
1476 		for (i = 1; i <= AF_MAX; i++) {
1477 			if ((rnh = rt_gettable(i, tid)) != NULL)
1478 				while ((*rnh->rnh_walktree)(rnh,
1479 				    rt_if_remove_rtdelete, ifp) == EAGAIN)
1480 					;	/* nothing */
1481 		}
1482 	}
1483 }
1484 
1485 /*
1486  * Note that deleting a RTF_CLONING route can trigger the
1487  * deletion of more entries, so we need to cancel the walk
1488  * and return EAGAIN.  The caller should restart the walk
1489  * as long as EAGAIN is returned.
1490  */
1491 int
1492 rt_if_remove_rtdelete(struct radix_node *rn, void *vifp)
1493 {
1494 	struct ifnet	*ifp = vifp;
1495 	struct rtentry	*rt = (struct rtentry *)rn;
1496 
1497 	if (rt->rt_ifp == ifp) {
1498 		int	cloning = (rt->rt_flags & RTF_CLONING);
1499 
1500 		if (rtdeletemsg(rt, ifp->if_rdomain /* XXX wrong */) == 0 && cloning)
1501 			return (EAGAIN);
1502 	}
1503 
1504 	/*
1505 	 * XXX There should be no need to check for rt_ifa belonging to this
1506 	 * interface, because then rt_ifp is set, right?
1507 	 */
1508 
1509 	return (0);
1510 }
1511 
1512 #ifndef SMALL_KERNEL
1513 void
1514 rt_if_track(struct ifnet *ifp)
1515 {
1516 	struct radix_node_head *rnh;
1517 	int i;
1518 	u_int tid;
1519 
1520 	if (rt_tables == NULL)
1521 		return;
1522 
1523 	for (tid = 0; tid <= rtbl_id_max; tid++) {
1524 		for (i = 1; i <= AF_MAX; i++) {
1525 			if ((rnh = rt_gettable(i, tid)) != NULL) {
1526 				if (!rn_mpath_capable(rnh))
1527 					continue;
1528 				while ((*rnh->rnh_walktree)(rnh,
1529 				    rt_if_linkstate_change, ifp) == EAGAIN)
1530 					;	/* nothing */
1531 			}
1532 		}
1533 	}
1534 }
1535 
1536 int
1537 rt_if_linkstate_change(struct radix_node *rn, void *arg)
1538 {
1539 	struct ifnet *ifp = arg;
1540 	struct rtentry *rt = (struct rtentry *)rn;
1541 
1542 	if (rt->rt_ifp == ifp) {
1543 		if ((LINK_STATE_IS_UP(ifp->if_link_state) ||
1544 		    ifp->if_link_state == LINK_STATE_UNKNOWN) &&
1545 		    ifp->if_flags & IFF_UP) {
1546 			if (!(rt->rt_flags & RTF_UP)) {
1547 				/* bring route up */
1548 				rt->rt_flags |= RTF_UP;
1549 				rn_mpath_reprio(rn, rt->rt_priority & RTP_MASK);
1550 			}
1551 		} else {
1552 			if (rt->rt_flags & RTF_UP) {
1553 				/* take route done */
1554 				rt->rt_flags &= ~RTF_UP;
1555 				rn_mpath_reprio(rn, rt->rt_priority | RTP_DOWN);
1556 			}
1557 		}
1558 		if_group_routechange(rt_key(rt), rt_mask(rt));
1559 	}
1560 
1561 	return (0);
1562 }
1563 #endif
1564