xref: /openbsd-src/sys/net/route.c (revision d13be5d47e4149db2549a9828e244d59dbc43f15)
1 /*	$OpenBSD: route.c,v 1.132 2011/07/22 13:05:29 henning Exp $	*/
2 /*	$NetBSD: route.c,v 1.14 1996/02/13 22:00:46 christos Exp $	*/
3 
4 /*
5  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the project nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 /*
34  * Copyright (c) 1980, 1986, 1991, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. Neither the name of the University nor the names of its contributors
46  *    may be used to endorse or promote products derived from this software
47  *    without specific prior written permission.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59  * SUCH DAMAGE.
60  *
61  *	@(#)route.c	8.2 (Berkeley) 11/15/93
62  */
63 
64 /*
65  *	@(#)COPYRIGHT	1.1 (NRL) 17 January 1995
66  *
67  * NRL grants permission for redistribution and use in source and binary
68  * forms, with or without modification, of the software and documentation
69  * created at NRL provided that the following conditions are met:
70  *
71  * 1. Redistributions of source code must retain the above copyright
72  *    notice, this list of conditions and the following disclaimer.
73  * 2. Redistributions in binary form must reproduce the above copyright
74  *    notice, this list of conditions and the following disclaimer in the
75  *    documentation and/or other materials provided with the distribution.
76  * 3. All advertising materials mentioning features or use of this software
77  *    must display the following acknowledgements:
78  * 	This product includes software developed by the University of
79  * 	California, Berkeley and its contributors.
80  * 	This product includes software developed at the Information
81  * 	Technology Division, US Naval Research Laboratory.
82  * 4. Neither the name of the NRL nor the names of its contributors
83  *    may be used to endorse or promote products derived from this software
84  *    without specific prior written permission.
85  *
86  * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS
87  * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
88  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
89  * PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL NRL OR
90  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
91  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
92  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
93  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
94  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
95  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
96  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
97  *
98  * The views and conclusions contained in the software and documentation
99  * are those of the authors and should not be interpreted as representing
100  * official policies, either expressed or implied, of the US Naval
101  * Research Laboratory (NRL).
102  */
103 
104 #include <sys/param.h>
105 #include <sys/systm.h>
106 #include <sys/proc.h>
107 #include <sys/mbuf.h>
108 #include <sys/socket.h>
109 #include <sys/socketvar.h>
110 #include <sys/domain.h>
111 #include <sys/protosw.h>
112 #include <sys/ioctl.h>
113 #include <sys/kernel.h>
114 #include <sys/queue.h>
115 #include <sys/pool.h>
116 
117 #include <net/if.h>
118 #include <net/route.h>
119 #include <net/raw_cb.h>
120 
121 #include <netinet/in.h>
122 #include <netinet/in_var.h>
123 
124 #ifdef MPLS
125 #include <netmpls/mpls.h>
126 #endif
127 
128 #ifdef IPSEC
129 #include <netinet/ip_ipsp.h>
130 #include <net/if_enc.h>
131 
132 struct ifaddr	*encap_findgwifa(struct sockaddr *, u_int);
133 #endif
134 
135 #define	SA(p) ((struct sockaddr *)(p))
136 
137 struct	route_cb	   route_cb;
138 struct	rtstat		   rtstat;
139 struct	radix_node_head	***rt_tables;
140 u_int8_t		   af2rtafidx[AF_MAX+1];
141 u_int8_t		   rtafidx_max;
142 u_int			   rtbl_id_max = 0;
143 u_int			  *rt_tab2dom;	/* rt table to domain lookup table */
144 
145 int			rttrash;	/* routes not in table but not freed */
146 
147 struct pool		rtentry_pool;	/* pool for rtentry structures */
148 struct pool		rttimer_pool;	/* pool for rttimer structures */
149 
150 int	rtable_init(struct radix_node_head ***, u_int);
151 int	rtflushclone1(struct radix_node *, void *, u_int);
152 void	rtflushclone(struct radix_node_head *, struct rtentry *);
153 int	rt_if_remove_rtdelete(struct radix_node *, void *, u_int);
154 
155 #define	LABELID_MAX	50000
156 
157 struct rt_label {
158 	TAILQ_ENTRY(rt_label)	rtl_entry;
159 	char			rtl_name[RTLABEL_LEN];
160 	u_int16_t		rtl_id;
161 	int			rtl_ref;
162 };
163 
164 TAILQ_HEAD(rt_labels, rt_label)	rt_labels = TAILQ_HEAD_INITIALIZER(rt_labels);
165 
166 #ifdef IPSEC
167 struct ifaddr *
168 encap_findgwifa(struct sockaddr *gw, u_int rdomain)
169 {
170 	struct ifnet	*encif;
171 
172 	if ((encif = enc_getif(rdomain, 0)) == NULL)
173 		return (NULL);
174 
175 	return (TAILQ_FIRST(&encif->if_addrlist));
176 }
177 #endif
178 
179 int
180 rtable_init(struct radix_node_head ***table, u_int id)
181 {
182 	void		**p;
183 	struct domain	 *dom;
184 	u_int8_t	  i;
185 
186 	if ((p = malloc(sizeof(void *) * (rtafidx_max + 1), M_RTABLE,
187 	    M_NOWAIT|M_ZERO)) == NULL)
188 		return (ENOMEM);
189 
190 	/* 2nd pass: attach */
191 	for (dom = domains; dom != NULL; dom = dom->dom_next)
192 		if (dom->dom_rtattach)
193 			dom->dom_rtattach(&p[af2rtafidx[dom->dom_family]],
194 			    dom->dom_rtoffset);
195 
196 	*table = (struct radix_node_head **)p;
197 
198 	for (i = 0; i < rtafidx_max; i++) {
199 		if ((*table)[i] != NULL)
200 			(*table)[i]->rnh_rtableid = id;
201 	}
202 
203 	return (0);
204 }
205 
206 void
207 route_init(void)
208 {
209 	struct domain	 *dom;
210 
211 	pool_init(&rtentry_pool, sizeof(struct rtentry), 0, 0, 0, "rtentpl",
212 	    NULL);
213 	rn_init();	/* initialize all zeroes, all ones, mask table */
214 
215 	bzero(af2rtafidx, sizeof(af2rtafidx));
216 	rtafidx_max = 1;	/* must have NULL at index 0, so start at 1 */
217 
218 	/* find out how many tables to allocate */
219 	for (dom = domains; dom != NULL; dom = dom->dom_next)
220 		if (dom->dom_rtattach)
221 			af2rtafidx[dom->dom_family] = rtafidx_max++;
222 
223 	if (rtable_add(0) != 0)
224 		panic("route_init rtable_add");
225 }
226 
227 int
228 rtable_add(u_int id)	/* must be called at splsoftnet */
229 {
230 	void	*p, *q;
231 
232 	if (id > RT_TABLEID_MAX)
233 		return (EINVAL);
234 
235 	if (id == 0 || id > rtbl_id_max) {
236 		size_t	newlen = sizeof(void *) * (id+1);
237 		size_t	newlen2 = sizeof(u_int) * (id+1);
238 
239 		if ((p = malloc(newlen, M_RTABLE, M_NOWAIT|M_ZERO)) == NULL)
240 			return (ENOMEM);
241 		if ((q = malloc(newlen2, M_RTABLE, M_NOWAIT|M_ZERO)) == NULL) {
242 			free(p, M_RTABLE);
243 			return (ENOMEM);
244 		}
245 		if (rt_tables) {
246 			bcopy(rt_tables, p, sizeof(void *) * (rtbl_id_max+1));
247 			bcopy(rt_tab2dom, q, sizeof(u_int) * (rtbl_id_max+1));
248 			free(rt_tables, M_RTABLE);
249 			free(rt_tab2dom, M_RTABLE);
250 		}
251 		rt_tables = p;
252 		rt_tab2dom = q;
253 		rtbl_id_max = id;
254 	}
255 
256 	if (rt_tables[id] != NULL)	/* already exists */
257 		return (EEXIST);
258 
259 	rt_tab2dom[id] = 0;	/* use main table/domain by default */
260 	return (rtable_init(&rt_tables[id], id));
261 }
262 
263 u_int
264 rtable_l2(u_int id)
265 {
266 	if (id > rtbl_id_max)
267 		return (0);
268 	return (rt_tab2dom[id]);
269 }
270 
271 void
272 rtable_l2set(u_int id, u_int parent)
273 {
274 	if (!rtable_exists(id) || !rtable_exists(parent))
275 		return;
276 	rt_tab2dom[id] = parent;
277 }
278 
279 int
280 rtable_exists(u_int id)	/* verify table with that ID exists */
281 {
282 	if (id > RT_TABLEID_MAX)
283 		return (0);
284 
285 	if (id > rtbl_id_max)
286 		return (0);
287 
288 	if (rt_tables[id] == NULL)
289 		return (0);
290 
291 	return (1);
292 }
293 
294 /*
295  * Packet routing routines.
296  */
297 void
298 rtalloc_noclone(struct route *ro)
299 {
300 	if (ro->ro_rt && ro->ro_rt->rt_ifp && (ro->ro_rt->rt_flags & RTF_UP))
301 		return;				 /* XXX */
302 	ro->ro_rt = rtalloc1(&ro->ro_dst, RT_REPORT | RT_NOCLONING,
303 	    ro->ro_tableid);
304 }
305 
306 void
307 rtalloc(struct route *ro)
308 {
309 	if (ro->ro_rt && ro->ro_rt->rt_ifp && (ro->ro_rt->rt_flags & RTF_UP))
310 		return;				 /* XXX */
311 	ro->ro_rt = rtalloc1(&ro->ro_dst, RT_REPORT, ro->ro_tableid);
312 }
313 
314 struct rtentry *
315 rtalloc1(struct sockaddr *dst, int flags, u_int tableid)
316 {
317 	struct radix_node_head	*rnh;
318 	struct rtentry		*rt;
319 	struct radix_node	*rn;
320 	struct rtentry		*newrt = 0;
321 	struct rt_addrinfo	 info;
322 	int			 s = splsoftnet(), err = 0, msgtype = RTM_MISS;
323 
324 	bzero(&info, sizeof(info));
325 	info.rti_info[RTAX_DST] = dst;
326 
327 	rnh = rt_gettable(dst->sa_family, tableid);
328 	if (rnh && (rn = rnh->rnh_matchaddr((caddr_t)dst, rnh)) &&
329 	    ((rn->rn_flags & RNF_ROOT) == 0)) {
330 		newrt = rt = (struct rtentry *)rn;
331 		if ((rt->rt_flags & RTF_CLONING) &&
332 		    ISSET(flags,  RT_REPORT | RT_NOCLONING) == RT_REPORT) {
333 			err = rtrequest1(RTM_RESOLVE, &info, RTP_DEFAULT,
334 			    &newrt, tableid);
335 			if (err) {
336 				newrt = rt;
337 				rt->rt_refcnt++;
338 				goto miss;
339 			}
340 			if ((rt = newrt) && (rt->rt_flags & RTF_XRESOLVE)) {
341 				msgtype = RTM_RESOLVE;
342 				goto miss;
343 			}
344 			/* Inform listeners of the new route */
345 			bzero(&info, sizeof(info));
346 			info.rti_info[RTAX_DST] = rt_key(rt);
347 			info.rti_info[RTAX_NETMASK] = rt_mask(rt);
348 			info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
349 			if (rt->rt_ifp != NULL) {
350 				info.rti_info[RTAX_IFP] =
351 				    TAILQ_FIRST(&rt->rt_ifp->if_addrlist)->ifa_addr;
352 				info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
353 			}
354 			rt_missmsg(RTM_ADD, &info, rt->rt_flags,
355 			    rt->rt_ifp, 0, tableid);
356 		} else
357 			rt->rt_refcnt++;
358 	} else {
359 		if (dst->sa_family != PF_KEY)
360 			rtstat.rts_unreach++;
361 	/*
362 	 * IP encapsulation does lots of lookups where we don't need nor want
363 	 * the RTM_MISSes that would be generated.  It causes RTM_MISS storms
364 	 * sent upward breaking user-level routing queries.
365 	 */
366 miss:
367 		if (ISSET(flags, RT_REPORT) && dst->sa_family != PF_KEY) {
368 			bzero((caddr_t)&info, sizeof(info));
369 			info.rti_info[RTAX_DST] = dst;
370 			rt_missmsg(msgtype, &info, 0, NULL, err, tableid);
371 		}
372 	}
373 	splx(s);
374 	return (newrt);
375 }
376 
377 void
378 rtfree(struct rtentry *rt)
379 {
380 	struct ifaddr	*ifa;
381 
382 	if (rt == NULL)
383 		panic("rtfree");
384 
385 	rt->rt_refcnt--;
386 
387 	if (rt->rt_refcnt <= 0 && (rt->rt_flags & RTF_UP) == 0) {
388 		if (rt->rt_refcnt == 0 && (rt->rt_nodes->rn_flags & RNF_ACTIVE))
389 			return; /* route still active but currently down */
390 		if (rt->rt_nodes->rn_flags & (RNF_ACTIVE | RNF_ROOT))
391 			panic("rtfree 2");
392 		rttrash--;
393 		if (rt->rt_refcnt < 0) {
394 			printf("rtfree: %p not freed (neg refs)\n", rt);
395 			return;
396 		}
397 		rt_timer_remove_all(rt);
398 		ifa = rt->rt_ifa;
399 		if (ifa)
400 			IFAFREE(ifa);
401 		rtlabel_unref(rt->rt_labelid);
402 #ifdef MPLS
403 		if (rt->rt_flags & RTF_MPLS)
404 			free(rt->rt_llinfo, M_TEMP);
405 #endif
406 		Free(rt_key(rt));
407 		pool_put(&rtentry_pool, rt);
408 	}
409 }
410 
411 void
412 ifafree(struct ifaddr *ifa)
413 {
414 	if (ifa == NULL)
415 		panic("ifafree");
416 	if (ifa->ifa_refcnt == 0)
417 		free(ifa, M_IFADDR);
418 	else
419 		ifa->ifa_refcnt--;
420 }
421 
422 /*
423  * Force a routing table entry to the specified
424  * destination to go through the given gateway.
425  * Normally called as a result of a routing redirect
426  * message from the network layer.
427  *
428  * N.B.: must be called at splsoftnet
429  */
430 void
431 rtredirect(struct sockaddr *dst, struct sockaddr *gateway,
432     struct sockaddr *netmask, int flags, struct sockaddr *src,
433     struct rtentry **rtp, u_int rdomain)
434 {
435 	struct rtentry		*rt;
436 	int			 error = 0;
437 	u_int32_t		*stat = NULL;
438 	struct rt_addrinfo	 info;
439 	struct ifaddr		*ifa;
440 	struct ifnet		*ifp = NULL;
441 
442 	splsoftassert(IPL_SOFTNET);
443 
444 	/* verify the gateway is directly reachable */
445 	if ((ifa = ifa_ifwithnet(gateway, rdomain)) == NULL) {
446 		error = ENETUNREACH;
447 		goto out;
448 	}
449 	ifp = ifa->ifa_ifp;
450 	rt = rtalloc1(dst, 0, rdomain);
451 	/*
452 	 * If the redirect isn't from our current router for this dst,
453 	 * it's either old or wrong.  If it redirects us to ourselves,
454 	 * we have a routing loop, perhaps as a result of an interface
455 	 * going down recently.
456 	 */
457 #define	equal(a1, a2) \
458 	((a1)->sa_len == (a2)->sa_len && \
459 	 bcmp((caddr_t)(a1), (caddr_t)(a2), (a1)->sa_len) == 0)
460 	if (!(flags & RTF_DONE) && rt &&
461 	     (!equal(src, rt->rt_gateway) || rt->rt_ifa != ifa))
462 		error = EINVAL;
463 	else if (ifa_ifwithaddr(gateway, rdomain) != NULL)
464 		error = EHOSTUNREACH;
465 	if (error)
466 		goto done;
467 	/*
468 	 * Create a new entry if we just got back a wildcard entry
469 	 * or the lookup failed.  This is necessary for hosts
470 	 * which use routing redirects generated by smart gateways
471 	 * to dynamically build the routing tables.
472 	 */
473 	if ((rt == NULL) || (rt_mask(rt) && rt_mask(rt)->sa_len < 2))
474 		goto create;
475 	/*
476 	 * Don't listen to the redirect if it's
477 	 * for a route to an interface.
478 	 */
479 	if (rt->rt_flags & RTF_GATEWAY) {
480 		if (((rt->rt_flags & RTF_HOST) == 0) && (flags & RTF_HOST)) {
481 			/*
482 			 * Changing from route to net => route to host.
483 			 * Create new route, rather than smashing route to net.
484 			 */
485 create:
486 			if (rt)
487 				rtfree(rt);
488 			flags |= RTF_GATEWAY | RTF_DYNAMIC;
489 			bzero(&info, sizeof(info));
490 			info.rti_info[RTAX_DST] = dst;
491 			info.rti_info[RTAX_GATEWAY] = gateway;
492 			info.rti_info[RTAX_NETMASK] = netmask;
493 			info.rti_ifa = ifa;
494 			info.rti_flags = flags;
495 			rt = NULL;
496 			error = rtrequest1(RTM_ADD, &info, RTP_DEFAULT, &rt,
497 			    rdomain);
498 			if (rt != NULL)
499 				flags = rt->rt_flags;
500 			stat = &rtstat.rts_dynamic;
501 		} else {
502 			/*
503 			 * Smash the current notion of the gateway to
504 			 * this destination.  Should check about netmask!!!
505 			 */
506 			rt->rt_flags |= RTF_MODIFIED;
507 			flags |= RTF_MODIFIED;
508 			stat = &rtstat.rts_newgateway;
509 			rt_setgate(rt, rt_key(rt), gateway, rdomain);
510 		}
511 	} else
512 		error = EHOSTUNREACH;
513 done:
514 	if (rt) {
515 		if (rtp && !error)
516 			*rtp = rt;
517 		else
518 			rtfree(rt);
519 	}
520 out:
521 	if (error)
522 		rtstat.rts_badredirect++;
523 	else if (stat != NULL)
524 		(*stat)++;
525 	bzero((caddr_t)&info, sizeof(info));
526 	info.rti_info[RTAX_DST] = dst;
527 	info.rti_info[RTAX_GATEWAY] = gateway;
528 	info.rti_info[RTAX_NETMASK] = netmask;
529 	info.rti_info[RTAX_AUTHOR] = src;
530 	rt_missmsg(RTM_REDIRECT, &info, flags, ifp, error, rdomain);
531 }
532 
533 /*
534  * Delete a route and generate a message
535  */
536 int
537 rtdeletemsg(struct rtentry *rt, u_int tableid)
538 {
539 	int			error;
540 	struct rt_addrinfo	info;
541 	struct ifnet		*ifp;
542 
543 	/*
544 	 * Request the new route so that the entry is not actually
545 	 * deleted.  That will allow the information being reported to
546 	 * be accurate (and consistent with route_output()).
547 	 */
548 	bzero((caddr_t)&info, sizeof(info));
549 	info.rti_info[RTAX_DST] = rt_key(rt);
550 	info.rti_info[RTAX_NETMASK] = rt_mask(rt);
551 	info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
552 	info.rti_flags = rt->rt_flags;
553 	ifp = rt->rt_ifp;
554 	error = rtrequest1(RTM_DELETE, &info, rt->rt_priority, &rt, tableid);
555 
556 	rt_missmsg(RTM_DELETE, &info, info.rti_flags, ifp, error, tableid);
557 
558 	/* Adjust the refcount */
559 	if (error == 0 && rt->rt_refcnt <= 0) {
560 		rt->rt_refcnt++;
561 		rtfree(rt);
562 	}
563 	return (error);
564 }
565 
566 int
567 rtflushclone1(struct radix_node *rn, void *arg, u_int id)
568 {
569 	struct rtentry	*rt, *parent;
570 
571 	rt = (struct rtentry *)rn;
572 	parent = (struct rtentry *)arg;
573 	if ((rt->rt_flags & RTF_CLONED) != 0 && rt->rt_parent == parent)
574 		rtdeletemsg(rt, id);
575 	return 0;
576 }
577 
578 void
579 rtflushclone(struct radix_node_head *rnh, struct rtentry *parent)
580 {
581 
582 #ifdef DIAGNOSTIC
583 	if (!parent || (parent->rt_flags & RTF_CLONING) == 0)
584 		panic("rtflushclone: called with a non-cloning route");
585 	if (!rnh->rnh_walktree)
586 		panic("rtflushclone: no rnh_walktree");
587 #endif
588 	rnh->rnh_walktree(rnh, rtflushclone1, (void *)parent);
589 }
590 
591 int
592 rtioctl(u_long req, caddr_t data, struct proc *p)
593 {
594 	return (EOPNOTSUPP);
595 }
596 
597 struct ifaddr *
598 ifa_ifwithroute(int flags, struct sockaddr *dst, struct sockaddr *gateway,
599     u_int rtableid)
600 {
601 	struct ifaddr	*ifa;
602 
603 #ifdef IPSEC
604 	/*
605 	 * If the destination is a PF_KEY address, we'll look
606 	 * for the existence of a encap interface number or address
607 	 * in the options list of the gateway. By default, we'll return
608 	 * enc0.
609 	 */
610 	if (dst && (dst->sa_family == PF_KEY))
611 		return (encap_findgwifa(gateway, rtableid));
612 #endif
613 
614 	if ((flags & RTF_GATEWAY) == 0) {
615 		/*
616 		 * If we are adding a route to an interface,
617 		 * and the interface is a pt to pt link
618 		 * we should search for the destination
619 		 * as our clue to the interface.  Otherwise
620 		 * we can use the local address.
621 		 */
622 		ifa = NULL;
623 		if (flags & RTF_HOST)
624 			ifa = ifa_ifwithdstaddr(dst, rtableid);
625 		if (ifa == NULL)
626 			ifa = ifa_ifwithaddr(gateway, rtableid);
627 	} else {
628 		/*
629 		 * If we are adding a route to a remote net
630 		 * or host, the gateway may still be on the
631 		 * other end of a pt to pt link.
632 		 */
633 		ifa = ifa_ifwithdstaddr(gateway, rtableid);
634 	}
635 	if (ifa == NULL)
636 		ifa = ifa_ifwithnet(gateway, rtableid);
637 	if (ifa == NULL) {
638 		struct rtentry	*rt = rtalloc1(gateway, 0, rtable_l2(rtableid));
639 		if (rt == NULL)
640 			return (NULL);
641 		rt->rt_refcnt--;
642 		/* The gateway must be local if the same address family. */
643 		if ((rt->rt_flags & RTF_GATEWAY) &&
644 		    rt_key(rt)->sa_family == dst->sa_family)
645 			return (0);
646 		if ((ifa = rt->rt_ifa) == NULL)
647 			return (NULL);
648 	}
649 	if (ifa->ifa_addr->sa_family != dst->sa_family) {
650 		struct ifaddr	*oifa = ifa;
651 		ifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp);
652 		if (ifa == NULL)
653 			ifa = oifa;
654 	}
655 	return (ifa);
656 }
657 
658 #define ROUNDUP(a) (a>0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long))
659 
660 int
661 rt_getifa(struct rt_addrinfo *info, u_int rtid)
662 {
663 	struct ifaddr	*ifa;
664 	int		 error = 0;
665 
666 	/*
667 	 * ifp may be specified by sockaddr_dl when protocol address
668 	 * is ambiguous
669 	 */
670 	if (info->rti_ifp == NULL && info->rti_info[RTAX_IFP] != NULL
671 	    && info->rti_info[RTAX_IFP]->sa_family == AF_LINK &&
672 	    (ifa = ifa_ifwithnet((struct sockaddr *)info->rti_info[RTAX_IFP],
673 	    rtid)) != NULL)
674 		info->rti_ifp = ifa->ifa_ifp;
675 
676 	if (info->rti_ifa == NULL && info->rti_info[RTAX_IFA] != NULL)
677 		info->rti_ifa = ifa_ifwithaddr(info->rti_info[RTAX_IFA], rtid);
678 
679 	if (info->rti_ifa == NULL) {
680 		struct sockaddr	*sa;
681 
682 		if ((sa = info->rti_info[RTAX_IFA]) == NULL)
683 			if ((sa = info->rti_info[RTAX_GATEWAY]) == NULL)
684 				sa = info->rti_info[RTAX_DST];
685 
686 		if (sa != NULL && info->rti_ifp != NULL)
687 			info->rti_ifa = ifaof_ifpforaddr(sa, info->rti_ifp);
688 		else if (info->rti_info[RTAX_DST] != NULL &&
689 		    info->rti_info[RTAX_GATEWAY] != NULL)
690 			info->rti_ifa = ifa_ifwithroute(info->rti_flags,
691 			    info->rti_info[RTAX_DST],
692 			    info->rti_info[RTAX_GATEWAY],
693 			    rtid);
694 		else if (sa != NULL)
695 			info->rti_ifa = ifa_ifwithroute(info->rti_flags,
696 			    sa, sa, rtid);
697 	}
698 	if ((ifa = info->rti_ifa) != NULL) {
699 		if (info->rti_ifp == NULL)
700 			info->rti_ifp = ifa->ifa_ifp;
701 	} else
702 		error = ENETUNREACH;
703 	return (error);
704 }
705 
706 int
707 rtrequest1(int req, struct rt_addrinfo *info, u_int8_t prio,
708     struct rtentry **ret_nrt, u_int tableid)
709 {
710 	int			 s = splsoftnet(); int error = 0;
711 	struct rtentry		*rt, *crt;
712 	struct radix_node	*rn;
713 	struct radix_node_head	*rnh;
714 	struct ifaddr		*ifa;
715 	struct sockaddr		*ndst;
716 	struct sockaddr_rtlabel	*sa_rl, sa_rl2;
717 #ifdef MPLS
718 	struct sockaddr_mpls	*sa_mpls;
719 #endif
720 #define senderr(x) { error = x ; goto bad; }
721 
722 	if ((rnh = rt_gettable(info->rti_info[RTAX_DST]->sa_family, tableid)) ==
723 	    NULL)
724 		senderr(EAFNOSUPPORT);
725 	if (info->rti_flags & RTF_HOST)
726 		info->rti_info[RTAX_NETMASK] = NULL;
727 	switch (req) {
728 	case RTM_DELETE:
729 		if ((rn = rnh->rnh_lookup(info->rti_info[RTAX_DST],
730 		    info->rti_info[RTAX_NETMASK], rnh)) == NULL)
731 			senderr(ESRCH);
732 		rt = (struct rtentry *)rn;
733 #ifndef SMALL_KERNEL
734 		/*
735 		 * if we got multipath routes, we require users to specify
736 		 * a matching RTAX_GATEWAY.
737 		 */
738 		if (rn_mpath_capable(rnh)) {
739 			rt = rt_mpath_matchgate(rt,
740 			    info->rti_info[RTAX_GATEWAY], prio);
741 			rn = (struct radix_node *)rt;
742 			if (!rt)
743 				senderr(ESRCH);
744 		}
745 #endif
746 		if ((rn = rnh->rnh_deladdr(info->rti_info[RTAX_DST],
747 		    info->rti_info[RTAX_NETMASK], rnh, rn)) == NULL)
748 			senderr(ESRCH);
749 		rt = (struct rtentry *)rn;
750 
751 		/* clean up any cloned children */
752 		if ((rt->rt_flags & RTF_CLONING) != 0)
753 			rtflushclone(rnh, rt);
754 
755 		if (rn->rn_flags & (RNF_ACTIVE | RNF_ROOT))
756 			panic ("rtrequest delete");
757 
758 		if (rt->rt_gwroute) {
759 			rt = rt->rt_gwroute; RTFREE(rt);
760 			(rt = (struct rtentry *)rn)->rt_gwroute = NULL;
761 		}
762 
763 		if (rt->rt_parent) {
764 			rt->rt_parent->rt_refcnt--;
765 			rt->rt_parent = NULL;
766 		}
767 
768 #ifndef SMALL_KERNEL
769 		if (rn_mpath_capable(rnh)) {
770 			if ((rn = rnh->rnh_lookup(info->rti_info[RTAX_DST],
771 			    info->rti_info[RTAX_NETMASK], rnh)) != NULL &&
772 			    rn_mpath_next(rn, 0) == NULL)
773 				((struct rtentry *)rn)->rt_flags &= ~RTF_MPATH;
774 		}
775 #endif
776 
777 		rt->rt_flags &= ~RTF_UP;
778 		if ((ifa = rt->rt_ifa) && ifa->ifa_rtrequest)
779 			ifa->ifa_rtrequest(RTM_DELETE, rt, info);
780 		rttrash++;
781 
782 		if (ret_nrt)
783 			*ret_nrt = rt;
784 		else if (rt->rt_refcnt <= 0) {
785 			rt->rt_refcnt++;
786 			rtfree(rt);
787 		}
788 		break;
789 
790 	case RTM_RESOLVE:
791 		if (ret_nrt == NULL || (rt = *ret_nrt) == NULL)
792 			senderr(EINVAL);
793 		if ((rt->rt_flags & RTF_CLONING) == 0)
794 			senderr(EINVAL);
795 		if (rt->rt_ifa->ifa_ifp) {
796 			info->rti_ifa = rt->rt_ifa;
797 		} else {
798 			/*
799 			 * The interface address at the cloning route
800 			 * is not longer referenced by an interface.
801 			 * Try to find a similar active address and use
802 			 * it for the cloned route.  The cloning route
803 			 * will get the new address and interface later.
804 			 */
805 			info->rti_ifa = NULL;
806 			info->rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
807 		}
808 		info->rti_ifp = rt->rt_ifp;
809 		info->rti_flags = rt->rt_flags & ~(RTF_CLONING | RTF_STATIC);
810 		info->rti_flags |= RTF_CLONED;
811 		info->rti_info[RTAX_GATEWAY] = rt->rt_gateway;
812 		if ((info->rti_info[RTAX_NETMASK] = rt->rt_genmask) == NULL)
813 			info->rti_flags |= RTF_HOST;
814 		info->rti_info[RTAX_LABEL] =
815 		    rtlabel_id2sa(rt->rt_labelid, &sa_rl2);
816 		/* FALLTHROUGH */
817 
818 	case RTM_ADD:
819 		if (info->rti_ifa == NULL && (error = rt_getifa(info, tableid)))
820 			senderr(error);
821 		ifa = info->rti_ifa;
822 		rt = pool_get(&rtentry_pool, PR_NOWAIT | PR_ZERO);
823 		if (rt == NULL)
824 			senderr(ENOBUFS);
825 
826 		rt->rt_flags = info->rti_flags;
827 
828 		if (prio == 0)
829 			prio = ifa->ifa_ifp->if_priority + RTP_STATIC;
830 		rt->rt_priority = prio;	/* init routing priority */
831 		LIST_INIT(&rt->rt_timer);
832 		if (rt_setgate(rt, info->rti_info[RTAX_DST],
833 		    info->rti_info[RTAX_GATEWAY], tableid)) {
834 			pool_put(&rtentry_pool, rt);
835 			senderr(ENOBUFS);
836 		}
837 		ndst = rt_key(rt);
838 		if (info->rti_info[RTAX_NETMASK] != NULL) {
839 			rt_maskedcopy(info->rti_info[RTAX_DST], ndst,
840 			    info->rti_info[RTAX_NETMASK]);
841 		} else
842 			Bcopy(info->rti_info[RTAX_DST], ndst,
843 			    info->rti_info[RTAX_DST]->sa_len);
844 #ifndef SMALL_KERNEL
845 		if (rn_mpath_capable(rnh)) {
846 			/* do not permit exactly the same dst/mask/gw pair */
847 			if (rt_mpath_conflict(rnh, rt,
848 			    info->rti_info[RTAX_NETMASK],
849 			    info->rti_flags & RTF_MPATH)) {
850 				if (rt->rt_gwroute)
851 					rtfree(rt->rt_gwroute);
852 				Free(rt_key(rt));
853 				pool_put(&rtentry_pool, rt);
854 				senderr(EEXIST);
855 			}
856 			/* check the link state since the table supports it */
857 			if (LINK_STATE_IS_UP(ifa->ifa_ifp->if_link_state) &&
858 			    ifa->ifa_ifp->if_flags & IFF_UP)
859 				rt->rt_flags |= RTF_UP;
860 			else {
861 				rt->rt_flags &= ~RTF_UP;
862 				rt->rt_priority |= RTP_DOWN;
863 			}
864 		}
865 #endif
866 
867 		if (info->rti_info[RTAX_LABEL] != NULL) {
868 			sa_rl = (struct sockaddr_rtlabel *)
869 			    info->rti_info[RTAX_LABEL];
870 			rt->rt_labelid = rtlabel_name2id(sa_rl->sr_label);
871 		}
872 
873 #ifdef MPLS
874 		/* We have to allocate additional space for MPLS infos */
875 		if (info->rti_flags & RTF_MPLS &&
876 		    (info->rti_info[RTAX_SRC] != NULL ||
877 		    info->rti_info[RTAX_DST]->sa_family == AF_MPLS)) {
878 			struct rt_mpls *rt_mpls;
879 
880 			sa_mpls = (struct sockaddr_mpls *)
881 			    info->rti_info[RTAX_SRC];
882 
883 			rt->rt_llinfo = (caddr_t)malloc(sizeof(struct rt_mpls),
884 			    M_TEMP, M_NOWAIT|M_ZERO);
885 
886 			if (rt->rt_llinfo == NULL) {
887 				if (rt->rt_gwroute)
888 					rtfree(rt->rt_gwroute);
889 				Free(rt_key(rt));
890 				pool_put(&rtentry_pool, rt);
891 				senderr(ENOMEM);
892 			}
893 
894 			rt_mpls = (struct rt_mpls *)rt->rt_llinfo;
895 
896 			if (sa_mpls != NULL)
897 				rt_mpls->mpls_label = sa_mpls->smpls_label;
898 
899 			rt_mpls->mpls_operation = info->rti_mpls;
900 
901 			/* XXX: set experimental bits */
902 
903 			rt->rt_flags |= RTF_MPLS;
904 		} else
905 			rt->rt_flags &= ~RTF_MPLS;
906 #endif
907 
908 		ifa->ifa_refcnt++;
909 		rt->rt_ifa = ifa;
910 		rt->rt_ifp = ifa->ifa_ifp;
911 		if (req == RTM_RESOLVE) {
912 			/*
913 			 * If the ifa of the cloning route was stale, a
914 			 * successful lookup for an ifa with the same address
915 			 * has been made.  Use this ifa also for the cloning
916 			 * route.
917 			 */
918 			if ((*ret_nrt)->rt_ifa->ifa_ifp == NULL) {
919 				printf("rtrequest1 RTM_RESOLVE: wrong ifa (%p) "
920 				    "was (%p)\n", ifa, (*ret_nrt)->rt_ifa);
921 				if ((*ret_nrt)->rt_ifa->ifa_rtrequest)
922 					(*ret_nrt)->rt_ifa->ifa_rtrequest(
923 					    RTM_DELETE, *ret_nrt, NULL);
924 				IFAFREE((*ret_nrt)->rt_ifa);
925 				(*ret_nrt)->rt_ifa = ifa;
926 				(*ret_nrt)->rt_ifp = ifa->ifa_ifp;
927 				ifa->ifa_refcnt++;
928 				if (ifa->ifa_rtrequest)
929 					ifa->ifa_rtrequest(RTM_ADD, *ret_nrt,
930 					    NULL);
931 			}
932 			/*
933 			 * Copy both metrics and a back pointer to the cloned
934 			 * route's parent.
935 			 */
936 			rt->rt_rmx = (*ret_nrt)->rt_rmx; /* copy metrics */
937 			rt->rt_priority = (*ret_nrt)->rt_priority;
938 			rt->rt_parent = *ret_nrt;	 /* Back ptr. to parent. */
939 			rt->rt_parent->rt_refcnt++;
940 		}
941 		rn = rnh->rnh_addaddr((caddr_t)ndst,
942 		    (caddr_t)info->rti_info[RTAX_NETMASK], rnh, rt->rt_nodes,
943 		    rt->rt_priority);
944 		if (rn == NULL && (crt = rtalloc1(ndst, 0, tableid)) != NULL) {
945 			/* overwrite cloned route */
946 			if ((crt->rt_flags & RTF_CLONED) != 0) {
947 				rtdeletemsg(crt, tableid);
948 				rn = rnh->rnh_addaddr((caddr_t)ndst,
949 				    (caddr_t)info->rti_info[RTAX_NETMASK],
950 				    rnh, rt->rt_nodes, rt->rt_priority);
951 			}
952 			RTFREE(crt);
953 		}
954 		if (rn == 0) {
955 			IFAFREE(ifa);
956 			if ((rt->rt_flags & RTF_CLONED) != 0 && rt->rt_parent)
957 				rtfree(rt->rt_parent);
958 			if (rt->rt_gwroute)
959 				rtfree(rt->rt_gwroute);
960 			Free(rt_key(rt));
961 			pool_put(&rtentry_pool, rt);
962 			senderr(EEXIST);
963 		}
964 
965 #ifndef SMALL_KERNEL
966 		if (rn_mpath_capable(rnh) &&
967 		    (rn = rnh->rnh_lookup(info->rti_info[RTAX_DST],
968 		    info->rti_info[RTAX_NETMASK], rnh)) != NULL &&
969 		    (rn = rn_mpath_prio(rn, prio)) != NULL) {
970 			if (rn_mpath_next(rn, 0) == NULL)
971 				((struct rtentry *)rn)->rt_flags &= ~RTF_MPATH;
972 			else
973 				((struct rtentry *)rn)->rt_flags |= RTF_MPATH;
974 		}
975 #endif
976 
977 		if (ifa->ifa_rtrequest)
978 			ifa->ifa_rtrequest(req, rt, info);
979 		if (ret_nrt) {
980 			*ret_nrt = rt;
981 			rt->rt_refcnt++;
982 		}
983 		if ((rt->rt_flags & RTF_CLONING) != 0) {
984 			/* clean up any cloned children */
985 			rtflushclone(rnh, rt);
986 		}
987 
988 		if_group_routechange(info->rti_info[RTAX_DST],
989 			info->rti_info[RTAX_NETMASK]);
990 		break;
991 	}
992 bad:
993 	splx(s);
994 	return (error);
995 }
996 
997 int
998 rt_setgate(struct rtentry *rt0, struct sockaddr *dst, struct sockaddr *gate,
999     u_int tableid)
1000 {
1001 	caddr_t	new, old;
1002 	int	dlen = ROUNDUP(dst->sa_len), glen = ROUNDUP(gate->sa_len);
1003 	struct rtentry	*rt = rt0;
1004 
1005 	if (rt->rt_gateway == NULL || glen > ROUNDUP(rt->rt_gateway->sa_len)) {
1006 		old = (caddr_t)rt_key(rt);
1007 		R_Malloc(new, caddr_t, dlen + glen);
1008 		if (new == NULL)
1009 			return 1;
1010 		rt->rt_nodes->rn_key = new;
1011 	} else {
1012 		new = rt->rt_nodes->rn_key;
1013 		old = NULL;
1014 	}
1015 	Bcopy(gate, (rt->rt_gateway = (struct sockaddr *)(new + dlen)), glen);
1016 	if (old) {
1017 		Bcopy(dst, new, dlen);
1018 		Free(old);
1019 	}
1020 	if (rt->rt_gwroute != NULL) {
1021 		rt = rt->rt_gwroute;
1022 		RTFREE(rt);
1023 		rt = rt0;
1024 		rt->rt_gwroute = NULL;
1025 	}
1026 	if (rt->rt_flags & RTF_GATEWAY) {
1027 		/* XXX is this actually valid to cross tables here? */
1028 		rt->rt_gwroute = rtalloc1(gate, RT_REPORT, rtable_l2(tableid));
1029 		/*
1030 		 * If we switched gateways, grab the MTU from the new
1031 		 * gateway route if the current MTU is 0 or greater
1032 		 * than the MTU of gateway.
1033 		 * Note that, if the MTU of gateway is 0, we will reset the
1034 		 * MTU of the route to run PMTUD again from scratch. XXX
1035 		 */
1036 		if (rt->rt_gwroute && !(rt->rt_rmx.rmx_locks & RTV_MTU) &&
1037 		    rt->rt_rmx.rmx_mtu &&
1038 		    rt->rt_rmx.rmx_mtu > rt->rt_gwroute->rt_rmx.rmx_mtu) {
1039 			rt->rt_rmx.rmx_mtu = rt->rt_gwroute->rt_rmx.rmx_mtu;
1040 		}
1041 	}
1042 	return (0);
1043 }
1044 
1045 void
1046 rt_maskedcopy(struct sockaddr *src, struct sockaddr *dst,
1047     struct sockaddr *netmask)
1048 {
1049 	u_char	*cp1 = (u_char *)src;
1050 	u_char	*cp2 = (u_char *)dst;
1051 	u_char	*cp3 = (u_char *)netmask;
1052 	u_char	*cplim = cp2 + *cp3;
1053 	u_char	*cplim2 = cp2 + *cp1;
1054 
1055 	*cp2++ = *cp1++; *cp2++ = *cp1++; /* copies sa_len & sa_family */
1056 	cp3 += 2;
1057 	if (cplim > cplim2)
1058 		cplim = cplim2;
1059 	while (cp2 < cplim)
1060 		*cp2++ = *cp1++ & *cp3++;
1061 	if (cp2 < cplim2)
1062 		bzero((caddr_t)cp2, (unsigned)(cplim2 - cp2));
1063 }
1064 
1065 /*
1066  * Set up a routing table entry, normally
1067  * for an interface.
1068  */
1069 int
1070 rtinit(struct ifaddr *ifa, int cmd, int flags)
1071 {
1072 	struct rtentry		*rt;
1073 	struct sockaddr		*dst, *deldst;
1074 	struct mbuf		*m = NULL;
1075 	struct rtentry		*nrt = NULL;
1076 	int			 error;
1077 	struct rt_addrinfo	 info;
1078 	struct sockaddr_rtlabel	 sa_rl;
1079 	u_short			 rtableid = ifa->ifa_ifp->if_rdomain;
1080 
1081 	dst = flags & RTF_HOST ? ifa->ifa_dstaddr : ifa->ifa_addr;
1082 	if (cmd == RTM_DELETE) {
1083 		if ((flags & RTF_HOST) == 0 && ifa->ifa_netmask) {
1084 			m = m_get(M_DONTWAIT, MT_SONAME);
1085 			if (m == NULL)
1086 				return (ENOBUFS);
1087 			deldst = mtod(m, struct sockaddr *);
1088 			rt_maskedcopy(dst, deldst, ifa->ifa_netmask);
1089 			dst = deldst;
1090 		}
1091 		if ((rt = rtalloc1(dst, 0, rtableid)) != NULL) {
1092 			rt->rt_refcnt--;
1093 			/* try to find the right route */
1094 			while (rt && rt->rt_ifa != ifa)
1095 				rt = (struct rtentry *)
1096 				    ((struct radix_node *)rt)->rn_dupedkey;
1097 			if (!rt) {
1098 				if (m != NULL)
1099 					(void) m_free(m);
1100 				return (flags & RTF_HOST ? EHOSTUNREACH
1101 							: ENETUNREACH);
1102 			}
1103 		}
1104 	}
1105 	bzero(&info, sizeof(info));
1106 	info.rti_ifa = ifa;
1107 	info.rti_flags = flags | ifa->ifa_flags;
1108 	info.rti_info[RTAX_DST] = dst;
1109 	if (cmd == RTM_ADD)
1110 		info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr;
1111 	info.rti_info[RTAX_LABEL] =
1112 	    rtlabel_id2sa(ifa->ifa_ifp->if_rtlabelid, &sa_rl);
1113 
1114 	/*
1115 	 * XXX here, it seems that we are assuming that ifa_netmask is NULL
1116 	 * for RTF_HOST.  bsdi4 passes NULL explicitly (via intermediate
1117 	 * variable) when RTF_HOST is 1.  still not sure if i can safely
1118 	 * change it to meet bsdi4 behavior.
1119 	 */
1120 	info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
1121 	error = rtrequest1(cmd, &info, RTP_CONNECTED, &nrt, rtableid);
1122 	if (cmd == RTM_DELETE) {
1123 		if (error == 0 && (rt = nrt) != NULL) {
1124 			rt_newaddrmsg(cmd, ifa, error, nrt);
1125 			if (rt->rt_refcnt <= 0) {
1126 				rt->rt_refcnt++;
1127 				rtfree(rt);
1128 			}
1129 		}
1130 		if (m != NULL)
1131 			(void) m_free(m);
1132 	}
1133 	if (cmd == RTM_ADD && error == 0 && (rt = nrt) != NULL) {
1134 		rt->rt_refcnt--;
1135 		if (rt->rt_ifa != ifa) {
1136 			printf("rtinit: wrong ifa (%p) was (%p)\n",
1137 			    ifa, rt->rt_ifa);
1138 			if (rt->rt_ifa->ifa_rtrequest)
1139 				rt->rt_ifa->ifa_rtrequest(RTM_DELETE, rt, NULL);
1140 			IFAFREE(rt->rt_ifa);
1141 			rt->rt_ifa = ifa;
1142 			rt->rt_ifp = ifa->ifa_ifp;
1143 			ifa->ifa_refcnt++;
1144 			if (ifa->ifa_rtrequest)
1145 				ifa->ifa_rtrequest(RTM_ADD, rt, NULL);
1146 		}
1147 		rt_newaddrmsg(cmd, ifa, error, nrt);
1148 	}
1149 	return (error);
1150 }
1151 
1152 /*
1153  * Route timer routines.  These routes allow functions to be called
1154  * for various routes at any time.  This is useful in supporting
1155  * path MTU discovery and redirect route deletion.
1156  *
1157  * This is similar to some BSDI internal functions, but it provides
1158  * for multiple queues for efficiency's sake...
1159  */
1160 
1161 LIST_HEAD(, rttimer_queue)	rttimer_queue_head;
1162 static int			rt_init_done = 0;
1163 
1164 #define RTTIMER_CALLOUT(r)	{				\
1165 	if (r->rtt_func != NULL) {				\
1166 		(*r->rtt_func)(r->rtt_rt, r);			\
1167 	} else {						\
1168 		struct rt_addrinfo info;			\
1169 		bzero(&info, sizeof(info));			\
1170 		info.rti_info[RTAX_DST] = rt_key(r->rtt_rt);	\
1171 		rtrequest1(RTM_DELETE, &info,			\
1172 		    r->rtt_rt->rt_priority, NULL, r->rtt_tableid);	\
1173 	}							\
1174 }
1175 
1176 /*
1177  * Some subtle order problems with domain initialization mean that
1178  * we cannot count on this being run from rt_init before various
1179  * protocol initializations are done.  Therefore, we make sure
1180  * that this is run when the first queue is added...
1181  */
1182 
1183 void
1184 rt_timer_init()
1185 {
1186 	static struct timeout	rt_timer_timeout;
1187 
1188 	if (rt_init_done)
1189 		panic("rt_timer_init: already initialized");
1190 
1191 	pool_init(&rttimer_pool, sizeof(struct rttimer), 0, 0, 0, "rttmrpl",
1192 	    NULL);
1193 
1194 	LIST_INIT(&rttimer_queue_head);
1195 	timeout_set(&rt_timer_timeout, rt_timer_timer, &rt_timer_timeout);
1196 	timeout_add_sec(&rt_timer_timeout, 1);
1197 	rt_init_done = 1;
1198 }
1199 
1200 struct rttimer_queue *
1201 rt_timer_queue_create(u_int timeout)
1202 {
1203 	struct rttimer_queue	*rtq;
1204 
1205 	if (rt_init_done == 0)
1206 		rt_timer_init();
1207 
1208 	if ((rtq = malloc(sizeof(*rtq), M_RTABLE, M_NOWAIT|M_ZERO)) == NULL)
1209 		return (NULL);
1210 
1211 	rtq->rtq_timeout = timeout;
1212 	rtq->rtq_count = 0;
1213 	TAILQ_INIT(&rtq->rtq_head);
1214 	LIST_INSERT_HEAD(&rttimer_queue_head, rtq, rtq_link);
1215 
1216 	return (rtq);
1217 }
1218 
1219 void
1220 rt_timer_queue_change(struct rttimer_queue *rtq, long timeout)
1221 {
1222 	rtq->rtq_timeout = timeout;
1223 }
1224 
1225 void
1226 rt_timer_queue_destroy(struct rttimer_queue *rtq, int destroy)
1227 {
1228 	struct rttimer	*r;
1229 
1230 	while ((r = TAILQ_FIRST(&rtq->rtq_head)) != NULL) {
1231 		LIST_REMOVE(r, rtt_link);
1232 		TAILQ_REMOVE(&rtq->rtq_head, r, rtt_next);
1233 		if (destroy)
1234 			RTTIMER_CALLOUT(r);
1235 		pool_put(&rttimer_pool, r);
1236 		if (rtq->rtq_count > 0)
1237 			rtq->rtq_count--;
1238 		else
1239 			printf("rt_timer_queue_destroy: rtq_count reached 0\n");
1240 	}
1241 
1242 	LIST_REMOVE(rtq, rtq_link);
1243 	free(rtq, M_RTABLE);
1244 }
1245 
1246 unsigned long
1247 rt_timer_count(struct rttimer_queue *rtq)
1248 {
1249 	return (rtq->rtq_count);
1250 }
1251 
1252 void
1253 rt_timer_remove_all(struct rtentry *rt)
1254 {
1255 	struct rttimer	*r;
1256 
1257 	while ((r = LIST_FIRST(&rt->rt_timer)) != NULL) {
1258 		LIST_REMOVE(r, rtt_link);
1259 		TAILQ_REMOVE(&r->rtt_queue->rtq_head, r, rtt_next);
1260 		if (r->rtt_queue->rtq_count > 0)
1261 			r->rtt_queue->rtq_count--;
1262 		else
1263 			printf("rt_timer_remove_all: rtq_count reached 0\n");
1264 		pool_put(&rttimer_pool, r);
1265 	}
1266 }
1267 
1268 int
1269 rt_timer_add(struct rtentry *rt, void (*func)(struct rtentry *,
1270     struct rttimer *), struct rttimer_queue *queue, u_int rtableid)
1271 {
1272 	struct rttimer	*r;
1273 	long		 current_time;
1274 
1275 	current_time = time_uptime;
1276 	rt->rt_rmx.rmx_expire = time_second + queue->rtq_timeout;
1277 
1278 	/*
1279 	 * If there's already a timer with this action, destroy it before
1280 	 * we add a new one.
1281 	 */
1282 	for (r = LIST_FIRST(&rt->rt_timer); r != NULL;
1283 	     r = LIST_NEXT(r, rtt_link)) {
1284 		if (r->rtt_func == func) {
1285 			LIST_REMOVE(r, rtt_link);
1286 			TAILQ_REMOVE(&r->rtt_queue->rtq_head, r, rtt_next);
1287 			if (r->rtt_queue->rtq_count > 0)
1288 				r->rtt_queue->rtq_count--;
1289 			else
1290 				printf("rt_timer_add: rtq_count reached 0\n");
1291 			pool_put(&rttimer_pool, r);
1292 			break;  /* only one per list, so we can quit... */
1293 		}
1294 	}
1295 
1296 	r = pool_get(&rttimer_pool, PR_NOWAIT | PR_ZERO);
1297 	if (r == NULL)
1298 		return (ENOBUFS);
1299 
1300 	r->rtt_rt = rt;
1301 	r->rtt_time = current_time;
1302 	r->rtt_func = func;
1303 	r->rtt_queue = queue;
1304 	r->rtt_tableid = rtableid;
1305 	LIST_INSERT_HEAD(&rt->rt_timer, r, rtt_link);
1306 	TAILQ_INSERT_TAIL(&queue->rtq_head, r, rtt_next);
1307 	r->rtt_queue->rtq_count++;
1308 
1309 	return (0);
1310 }
1311 
1312 struct radix_node_head *
1313 rt_gettable(sa_family_t af, u_int id)
1314 {
1315 	if (id > rtbl_id_max)
1316 		return (NULL);
1317 	return (rt_tables[id] ? rt_tables[id][af2rtafidx[af]] : NULL);
1318 }
1319 
1320 struct radix_node *
1321 rt_lookup(struct sockaddr *dst, struct sockaddr *mask, u_int tableid)
1322 {
1323 	struct radix_node_head	*rnh;
1324 
1325 	if ((rnh = rt_gettable(dst->sa_family, tableid)) == NULL)
1326 		return (NULL);
1327 
1328 	return (rnh->rnh_lookup(dst, mask, rnh));
1329 }
1330 
1331 /* ARGSUSED */
1332 void
1333 rt_timer_timer(void *arg)
1334 {
1335 	struct timeout		*to = (struct timeout *)arg;
1336 	struct rttimer_queue	*rtq;
1337 	struct rttimer		*r;
1338 	long			 current_time;
1339 	int			 s;
1340 
1341 	current_time = time_uptime;
1342 
1343 	s = splsoftnet();
1344 	for (rtq = LIST_FIRST(&rttimer_queue_head); rtq != NULL;
1345 	     rtq = LIST_NEXT(rtq, rtq_link)) {
1346 		while ((r = TAILQ_FIRST(&rtq->rtq_head)) != NULL &&
1347 		    (r->rtt_time + rtq->rtq_timeout) < current_time) {
1348 			LIST_REMOVE(r, rtt_link);
1349 			TAILQ_REMOVE(&rtq->rtq_head, r, rtt_next);
1350 			RTTIMER_CALLOUT(r);
1351 			pool_put(&rttimer_pool, r);
1352 			if (rtq->rtq_count > 0)
1353 				rtq->rtq_count--;
1354 			else
1355 				printf("rt_timer_timer: rtq_count reached 0\n");
1356 		}
1357 	}
1358 	splx(s);
1359 
1360 	timeout_add_sec(to, 1);
1361 }
1362 
1363 u_int16_t
1364 rtlabel_name2id(char *name)
1365 {
1366 	struct rt_label		*label, *p = NULL;
1367 	u_int16_t		 new_id = 1;
1368 
1369 	if (!name[0])
1370 		return (0);
1371 
1372 	TAILQ_FOREACH(label, &rt_labels, rtl_entry)
1373 		if (strcmp(name, label->rtl_name) == 0) {
1374 			label->rtl_ref++;
1375 			return (label->rtl_id);
1376 		}
1377 
1378 	/*
1379 	 * to avoid fragmentation, we do a linear search from the beginning
1380 	 * and take the first free slot we find. if there is none or the list
1381 	 * is empty, append a new entry at the end.
1382 	 */
1383 
1384 	if (!TAILQ_EMPTY(&rt_labels))
1385 		for (p = TAILQ_FIRST(&rt_labels); p != NULL &&
1386 		    p->rtl_id == new_id; p = TAILQ_NEXT(p, rtl_entry))
1387 			new_id = p->rtl_id + 1;
1388 
1389 	if (new_id > LABELID_MAX)
1390 		return (0);
1391 
1392 	label = malloc(sizeof(*label), M_TEMP, M_NOWAIT|M_ZERO);
1393 	if (label == NULL)
1394 		return (0);
1395 	strlcpy(label->rtl_name, name, sizeof(label->rtl_name));
1396 	label->rtl_id = new_id;
1397 	label->rtl_ref++;
1398 
1399 	if (p != NULL)	/* insert new entry before p */
1400 		TAILQ_INSERT_BEFORE(p, label, rtl_entry);
1401 	else		/* either list empty or no free slot in between */
1402 		TAILQ_INSERT_TAIL(&rt_labels, label, rtl_entry);
1403 
1404 	return (label->rtl_id);
1405 }
1406 
1407 const char *
1408 rtlabel_id2name(u_int16_t id)
1409 {
1410 	struct rt_label	*label;
1411 
1412 	TAILQ_FOREACH(label, &rt_labels, rtl_entry)
1413 		if (label->rtl_id == id)
1414 			return (label->rtl_name);
1415 
1416 	return (NULL);
1417 }
1418 
1419 struct sockaddr *
1420 rtlabel_id2sa(u_int16_t labelid, struct sockaddr_rtlabel *sa_rl)
1421 {
1422 	const char	*label;
1423 
1424 	if (labelid == 0 || (label = rtlabel_id2name(labelid)) == NULL)
1425 		return (NULL);
1426 
1427 	bzero(sa_rl, sizeof(*sa_rl));
1428 	sa_rl->sr_len = sizeof(*sa_rl);
1429 	sa_rl->sr_family = AF_UNSPEC;
1430 	strlcpy(sa_rl->sr_label, label, sizeof(sa_rl->sr_label));
1431 
1432 	return ((struct sockaddr *)sa_rl);
1433 }
1434 
1435 void
1436 rtlabel_unref(u_int16_t id)
1437 {
1438 	struct rt_label	*p, *next;
1439 
1440 	if (id == 0)
1441 		return;
1442 
1443 	for (p = TAILQ_FIRST(&rt_labels); p != NULL; p = next) {
1444 		next = TAILQ_NEXT(p, rtl_entry);
1445 		if (id == p->rtl_id) {
1446 			if (--p->rtl_ref == 0) {
1447 				TAILQ_REMOVE(&rt_labels, p, rtl_entry);
1448 				free(p, M_TEMP);
1449 			}
1450 			break;
1451 		}
1452 	}
1453 }
1454 
1455 void
1456 rt_if_remove(struct ifnet *ifp)
1457 {
1458 	int			 i;
1459 	u_int			 tid;
1460 	struct radix_node_head	*rnh;
1461 
1462 	for (tid = 0; tid <= rtbl_id_max; tid++) {
1463 		for (i = 1; i <= AF_MAX; i++) {
1464 			if ((rnh = rt_gettable(i, tid)) != NULL)
1465 				while ((*rnh->rnh_walktree)(rnh,
1466 				    rt_if_remove_rtdelete, ifp) == EAGAIN)
1467 					;	/* nothing */
1468 		}
1469 	}
1470 }
1471 
1472 /*
1473  * Note that deleting a RTF_CLONING route can trigger the
1474  * deletion of more entries, so we need to cancel the walk
1475  * and return EAGAIN.  The caller should restart the walk
1476  * as long as EAGAIN is returned.
1477  */
1478 int
1479 rt_if_remove_rtdelete(struct radix_node *rn, void *vifp, u_int id)
1480 {
1481 	struct ifnet	*ifp = vifp;
1482 	struct rtentry	*rt = (struct rtentry *)rn;
1483 
1484 	if (rt->rt_ifp == ifp) {
1485 		int	cloning = (rt->rt_flags & RTF_CLONING);
1486 
1487 		if (rtdeletemsg(rt, id) == 0 && cloning)
1488 			return (EAGAIN);
1489 	}
1490 
1491 	/*
1492 	 * XXX There should be no need to check for rt_ifa belonging to this
1493 	 * interface, because then rt_ifp is set, right?
1494 	 */
1495 
1496 	return (0);
1497 }
1498 
1499 #ifndef SMALL_KERNEL
1500 void
1501 rt_if_track(struct ifnet *ifp)
1502 {
1503 	struct radix_node_head *rnh;
1504 	int i;
1505 	u_int tid;
1506 
1507 	if (rt_tables == NULL)
1508 		return;
1509 
1510 	for (tid = 0; tid <= rtbl_id_max; tid++) {
1511 		for (i = 1; i <= AF_MAX; i++) {
1512 			if ((rnh = rt_gettable(i, tid)) != NULL) {
1513 				if (!rn_mpath_capable(rnh))
1514 					continue;
1515 				while ((*rnh->rnh_walktree)(rnh,
1516 				    rt_if_linkstate_change, ifp) == EAGAIN)
1517 					;	/* nothing */
1518 			}
1519 		}
1520 	}
1521 }
1522 
1523 int
1524 rt_if_linkstate_change(struct radix_node *rn, void *arg, u_int id)
1525 {
1526 	struct ifnet *ifp = arg;
1527 	struct rtentry *rt = (struct rtentry *)rn;
1528 
1529 	if (rt->rt_ifp == ifp) {
1530 		if (LINK_STATE_IS_UP(ifp->if_link_state) &&
1531 		    ifp->if_flags & IFF_UP) {
1532 			if (!(rt->rt_flags & RTF_UP)) {
1533 				/* bring route up */
1534 				rt->rt_flags |= RTF_UP;
1535 				rn_mpath_reprio(rn, rt->rt_priority & RTP_MASK);
1536 			}
1537 		} else {
1538 			if (rt->rt_flags & RTF_UP) {
1539 				/* take route done */
1540 				rt->rt_flags &= ~RTF_UP;
1541 				rn_mpath_reprio(rn, rt->rt_priority | RTP_DOWN);
1542 			}
1543 		}
1544 		if_group_routechange(rt_key(rt), rt_mask(rt));
1545 	}
1546 
1547 	return (0);
1548 }
1549 #endif
1550