xref: /openbsd-src/sys/net/route.c (revision 8500990981f885cbe5e6a4958549cacc238b5ae6)
1 /*	$OpenBSD: route.c,v 1.38 2003/08/27 00:01:38 itojun Exp $	*/
2 /*	$NetBSD: route.c,v 1.14 1996/02/13 22:00:46 christos Exp $	*/
3 
4 /*
5  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the project nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 /*
34  * Copyright (c) 1980, 1986, 1991, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. Neither the name of the University nor the names of its contributors
46  *    may be used to endorse or promote products derived from this software
47  *    without specific prior written permission.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59  * SUCH DAMAGE.
60  *
61  *	@(#)route.c	8.2 (Berkeley) 11/15/93
62  */
63 
64 /*
65  *	@(#)COPYRIGHT	1.1 (NRL) 17 January 1995
66  *
67  * NRL grants permission for redistribution and use in source and binary
68  * forms, with or without modification, of the software and documentation
69  * created at NRL provided that the following conditions are met:
70  *
71  * 1. Redistributions of source code must retain the above copyright
72  *    notice, this list of conditions and the following disclaimer.
73  * 2. Redistributions in binary form must reproduce the above copyright
74  *    notice, this list of conditions and the following disclaimer in the
75  *    documentation and/or other materials provided with the distribution.
76  * 3. All advertising materials mentioning features or use of this software
77  *    must display the following acknowledgements:
78  * 	This product includes software developed by the University of
79  * 	California, Berkeley and its contributors.
80  * 	This product includes software developed at the Information
81  * 	Technology Division, US Naval Research Laboratory.
82  * 4. Neither the name of the NRL nor the names of its contributors
83  *    may be used to endorse or promote products derived from this software
84  *    without specific prior written permission.
85  *
86  * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS
87  * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
88  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
89  * PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL NRL OR
90  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
91  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
92  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
93  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
94  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
95  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
96  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
97  *
98  * The views and conclusions contained in the software and documentation
99  * are those of the authors and should not be interpreted as representing
100  * official policies, either expressed or implied, of the US Naval
101  * Research Laboratory (NRL).
102  */
103 
104 #include <sys/param.h>
105 #include <sys/systm.h>
106 #include <sys/proc.h>
107 #include <sys/mbuf.h>
108 #include <sys/socket.h>
109 #include <sys/socketvar.h>
110 #include <sys/domain.h>
111 #include <sys/protosw.h>
112 #include <sys/ioctl.h>
113 #include <sys/kernel.h>
114 
115 #include <net/if.h>
116 #include <net/route.h>
117 #include <net/raw_cb.h>
118 
119 #include <netinet/in.h>
120 #include <netinet/in_var.h>
121 
122 #ifdef NS
123 #include <netns/ns.h>
124 #endif
125 
126 #ifdef IPSEC
127 #include <netinet/ip_ipsp.h>
128 
129 extern struct ifnet encif;
130 #endif
131 
132 #define	SA(p) ((struct sockaddr *)(p))
133 
134 struct	route_cb route_cb;
135 struct	rtstat  rtstat;
136 struct	radix_node_head *rt_tables[AF_MAX+1];
137 
138 int	rttrash;		/* routes not in table but not freed */
139 struct	sockaddr wildcard;	/* zero valued cookie for wildcard searches */
140 
141 static int okaytoclone(u_int, int);
142 static int rtdeletemsg(struct rtentry *);
143 static int rtflushclone1(struct radix_node *, void *);
144 static void rtflushclone(struct radix_node_head *, struct rtentry *);
145 
146 #ifdef IPSEC
147 
148 static struct ifaddr *
149 encap_findgwifa(struct sockaddr *gw)
150 {
151 	return (TAILQ_FIRST(&encif.if_addrlist));
152 }
153 
154 #endif
155 
156 void
157 rtable_init(table)
158 	void **table;
159 {
160 	struct domain *dom;
161 	for (dom = domains; dom != NULL; dom = dom->dom_next)
162 		if (dom->dom_rtattach)
163 			dom->dom_rtattach(&table[dom->dom_family],
164 			    dom->dom_rtoffset);
165 }
166 
167 void
168 route_init()
169 {
170 	rn_init();	/* initialize all zeroes, all ones, mask table */
171 	rtable_init((void **)rt_tables);
172 }
173 
174 void
175 rtalloc_noclone(ro, howstrict)
176 	register struct route *ro;
177 	int howstrict;
178 {
179 	if (ro->ro_rt && ro->ro_rt->rt_ifp && (ro->ro_rt->rt_flags & RTF_UP))
180 		return;		/* XXX */
181 	ro->ro_rt = rtalloc2(&ro->ro_dst, 1, howstrict);
182 }
183 
184 static int
185 okaytoclone(flags, howstrict)
186 	u_int flags;
187 	int howstrict;
188 {
189 	if (howstrict == ALL_CLONING)
190 		return (1);
191 	if (howstrict == ONNET_CLONING && !(flags & RTF_GATEWAY))
192 		return (1);
193 	return (0);
194 }
195 
196 struct rtentry *
197 rtalloc2(dst, report,howstrict)
198 	register struct sockaddr *dst;
199 	int report,howstrict;
200 {
201 	register struct radix_node_head *rnh = rt_tables[dst->sa_family];
202 	register struct rtentry *rt;
203 	register struct radix_node *rn;
204 	struct rtentry *newrt = 0;
205 	struct rt_addrinfo info;
206 	int  s = splnet(), err = 0, msgtype = RTM_MISS;
207 
208 	if (rnh && (rn = rnh->rnh_matchaddr((caddr_t)dst, rnh)) &&
209 	    ((rn->rn_flags & RNF_ROOT) == 0)) {
210 		newrt = rt = (struct rtentry *)rn;
211 		if (report && (rt->rt_flags & RTF_CLONING) &&
212 		    okaytoclone(rt->rt_flags, howstrict)) {
213 			err = rtrequest(RTM_RESOLVE, dst, SA(0), SA(0), 0,
214 			    &newrt);
215 			if (err) {
216 				newrt = rt;
217 				rt->rt_refcnt++;
218 				goto miss;
219 			}
220 			if ((rt = newrt) && (rt->rt_flags & RTF_XRESOLVE)) {
221 				msgtype = RTM_RESOLVE;
222 				goto miss;
223 			}
224 		} else
225 			rt->rt_refcnt++;
226 	} else {
227 		rtstat.rts_unreach++;
228 miss:		if (report) {
229 			bzero((caddr_t)&info, sizeof(info));
230 			info.rti_info[RTAX_DST] = dst;
231 			rt_missmsg(msgtype, &info, 0, err);
232 		}
233 	}
234 	splx(s);
235 	return (newrt);
236 }
237 
238 /*
239  * Packet routing routines.
240  */
241 void
242 rtalloc(ro)
243 	register struct route *ro;
244 {
245 	if (ro->ro_rt && ro->ro_rt->rt_ifp && (ro->ro_rt->rt_flags & RTF_UP))
246 		return;				 /* XXX */
247 	ro->ro_rt = rtalloc1(&ro->ro_dst, 1);
248 }
249 
250 struct rtentry *
251 rtalloc1(dst, report)
252 	register struct sockaddr *dst;
253 	int report;
254 {
255 	register struct radix_node_head *rnh = rt_tables[dst->sa_family];
256 	register struct rtentry *rt;
257 	register struct radix_node *rn;
258 	struct rtentry *newrt = 0;
259 	struct rt_addrinfo info;
260 	int  s = splsoftnet(), err = 0, msgtype = RTM_MISS;
261 
262 	if (rnh && (rn = rnh->rnh_matchaddr((caddr_t)dst, rnh)) &&
263 	    ((rn->rn_flags & RNF_ROOT) == 0)) {
264 		newrt = rt = (struct rtentry *)rn;
265 		if (report && (rt->rt_flags & RTF_CLONING)) {
266 			err = rtrequest(RTM_RESOLVE, dst, SA(NULL),
267 			    SA(NULL), 0, &newrt);
268 			if (err) {
269 				newrt = rt;
270 				rt->rt_refcnt++;
271 				goto miss;
272 			}
273 			if ((rt = newrt) && (rt->rt_flags & RTF_XRESOLVE)) {
274 				msgtype = RTM_RESOLVE;
275 				goto miss;
276 			}
277 			/* Inform listeners of the new route */
278 			bzero(&info, sizeof(info));
279 			info.rti_info[RTAX_DST] = rt_key(rt);
280 			info.rti_info[RTAX_NETMASK] = rt_mask(rt);
281 			info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
282 			if (rt->rt_ifp != NULL) {
283 				info.rti_info[RTAX_IFP] =
284 				    TAILQ_FIRST(&rt->rt_ifp->if_addrlist)->ifa_addr;
285 				info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
286 			}
287 			rt_missmsg(RTM_ADD, &info, rt->rt_flags, 0);
288 		} else
289 			rt->rt_refcnt++;
290 	} else {
291 		if (dst->sa_family != PF_KEY)
292 		        rtstat.rts_unreach++;
293 	/*
294 	 * IP encapsulation does lots of lookups where we don't need nor want
295 	 * the RTM_MISSes that would be generated.  It causes RTM_MISS storms
296 	 * sent upward breaking user-level routing queries.
297 	 */
298 	miss:	if (report && dst->sa_family != PF_KEY) {
299 			bzero((caddr_t)&info, sizeof(info));
300 			info.rti_info[RTAX_DST] = dst;
301 			rt_missmsg(msgtype, &info, 0, err);
302 		}
303 	}
304 	splx(s);
305 	return (newrt);
306 }
307 
308 void
309 rtfree(rt)
310 	register struct rtentry *rt;
311 {
312 	register struct ifaddr *ifa;
313 
314 	if (rt == NULL)
315 		panic("rtfree");
316 	rt->rt_refcnt--;
317 	if (rt->rt_refcnt <= 0 && (rt->rt_flags & RTF_UP) == 0) {
318 		if (rt->rt_nodes->rn_flags & (RNF_ACTIVE | RNF_ROOT))
319 			panic ("rtfree 2");
320 		rttrash--;
321 		if (rt->rt_refcnt < 0) {
322 			printf("rtfree: %p not freed (neg refs)\n", rt);
323 			return;
324 		}
325 		rt_timer_remove_all(rt);
326 		ifa = rt->rt_ifa;
327 		if (ifa)
328 			IFAFREE(ifa);
329 		Free(rt_key(rt));
330 		Free(rt);
331 	}
332 }
333 
334 void
335 ifafree(ifa)
336 	register struct ifaddr *ifa;
337 {
338 	if (ifa == NULL)
339 		panic("ifafree");
340 	if (ifa->ifa_refcnt == 0)
341 		free(ifa, M_IFADDR);
342 	else
343 		ifa->ifa_refcnt--;
344 }
345 
346 /*
347  * Force a routing table entry to the specified
348  * destination to go through the given gateway.
349  * Normally called as a result of a routing redirect
350  * message from the network layer.
351  *
352  * N.B.: must be called at splsoftnet
353  */
354 void
355 rtredirect(dst, gateway, netmask, flags, src, rtp)
356 	struct sockaddr *dst, *gateway, *netmask, *src;
357 	int flags;
358 	struct rtentry **rtp;
359 {
360 	struct rtentry *rt;
361 	int error = 0;
362 	u_int32_t *stat = NULL;
363 	struct rt_addrinfo info;
364 	struct ifaddr *ifa;
365 
366 	splassert(IPL_SOFTNET);
367 
368 	/* verify the gateway is directly reachable */
369 	if ((ifa = ifa_ifwithnet(gateway)) == NULL) {
370 		error = ENETUNREACH;
371 		goto out;
372 	}
373 	rt = rtalloc1(dst, 0);
374 	/*
375 	 * If the redirect isn't from our current router for this dst,
376 	 * it's either old or wrong.  If it redirects us to ourselves,
377 	 * we have a routing loop, perhaps as a result of an interface
378 	 * going down recently.
379 	 */
380 #define	equal(a1, a2) \
381 	((a1)->sa_len == (a2)->sa_len && \
382 	 bcmp((caddr_t)(a1), (caddr_t)(a2), (a1)->sa_len) == 0)
383 	if (!(flags & RTF_DONE) && rt &&
384 	     (!equal(src, rt->rt_gateway) || rt->rt_ifa != ifa))
385 		error = EINVAL;
386 	else if (ifa_ifwithaddr(gateway) != NULL)
387 		error = EHOSTUNREACH;
388 	if (error)
389 		goto done;
390 	/*
391 	 * Create a new entry if we just got back a wildcard entry
392 	 * or the lookup failed.  This is necessary for hosts
393 	 * which use routing redirects generated by smart gateways
394 	 * to dynamically build the routing tables.
395 	 */
396 	if ((rt == NULL) || (rt_mask(rt) && rt_mask(rt)->sa_len < 2))
397 		goto create;
398 	/*
399 	 * Don't listen to the redirect if it's
400 	 * for a route to an interface.
401 	 */
402 	if (rt->rt_flags & RTF_GATEWAY) {
403 		if (((rt->rt_flags & RTF_HOST) == 0) && (flags & RTF_HOST)) {
404 			/*
405 			 * Changing from route to net => route to host.
406 			 * Create new route, rather than smashing route to net.
407 			 */
408 		create:
409 			if (rt)
410 				rtfree(rt);
411 			flags |=  RTF_GATEWAY | RTF_DYNAMIC;
412 			info.rti_info[RTAX_DST] = dst;
413 			info.rti_info[RTAX_GATEWAY] = gateway;
414 			info.rti_info[RTAX_NETMASK] = netmask;
415 			info.rti_ifa = ifa;
416 			info.rti_flags = flags;
417 			rt = NULL;
418 			error = rtrequest1(RTM_ADD, &info, &rt);
419 			if (rt != NULL)
420 				flags = rt->rt_flags;
421 			stat = &rtstat.rts_dynamic;
422 		} else {
423 			/*
424 			 * Smash the current notion of the gateway to
425 			 * this destination.  Should check about netmask!!!
426 			 */
427 			rt->rt_flags |= RTF_MODIFIED;
428 			flags |= RTF_MODIFIED;
429 			stat = &rtstat.rts_newgateway;
430 			rt_setgate(rt, rt_key(rt), gateway);
431 		}
432 	} else
433 		error = EHOSTUNREACH;
434 done:
435 	if (rt) {
436 		if (rtp && !error)
437 			*rtp = rt;
438 		else
439 			rtfree(rt);
440 	}
441 out:
442 	if (error)
443 		rtstat.rts_badredirect++;
444 	else if (stat != NULL)
445 		(*stat)++;
446 	bzero((caddr_t)&info, sizeof(info));
447 	info.rti_info[RTAX_DST] = dst;
448 	info.rti_info[RTAX_GATEWAY] = gateway;
449 	info.rti_info[RTAX_NETMASK] = netmask;
450 	info.rti_info[RTAX_AUTHOR] = src;
451 	rt_missmsg(RTM_REDIRECT, &info, flags, error);
452 }
453 
454 /*
455  * Delete a route and generate a message
456  */
457 static int
458 rtdeletemsg(rt)
459 	struct rtentry *rt;
460 {
461 	int error;
462 	struct rt_addrinfo info;
463 
464 	/*
465 	 * Request the new route so that the entry is not actually
466 	 * deleted.  That will allow the information being reported to
467 	 * be accurate (and consistent with route_output()).
468 	 */
469 	bzero((caddr_t)&info, sizeof(info));
470 	info.rti_info[RTAX_DST] = rt_key(rt);
471 	info.rti_info[RTAX_NETMASK] = rt_mask(rt);
472 	info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
473 	info.rti_flags = rt->rt_flags;
474 	error = rtrequest1(RTM_DELETE, &info, &rt);
475 
476 	rt_missmsg(RTM_DELETE, &info, info.rti_flags, error);
477 
478 	/* Adjust the refcount */
479 	if (error == 0 && rt->rt_refcnt <= 0) {
480 		rt->rt_refcnt++;
481 		rtfree(rt);
482 	}
483 	return (error);
484 }
485 
486 static int
487 rtflushclone1(rn, arg)
488 	struct radix_node *rn;
489 	void *arg;
490 {
491 	struct rtentry *rt, *parent;
492 
493 	rt = (struct rtentry *)rn;
494 	parent = (struct rtentry *)arg;
495 	if ((rt->rt_flags & RTF_CLONED) != 0 && rt->rt_parent == parent)
496 		rtdeletemsg(rt);
497 	return 0;
498 }
499 
500 static void
501 rtflushclone(rnh, parent)
502 	struct radix_node_head *rnh;
503 	struct rtentry *parent;
504 {
505 
506 #ifdef DIAGNOSTIC
507 	if (!parent || (parent->rt_flags & RTF_CLONING) == 0)
508 		panic("rtflushclone: called with a non-cloning route");
509 	if (!rnh->rnh_walktree)
510 		panic("rtflushclone: no rnh_walktree");
511 #endif
512 	rnh->rnh_walktree(rnh, rtflushclone1, (void *)parent);
513 }
514 
515 /*
516 * Routing table ioctl interface.
517 */
518 int
519 rtioctl(req, data, p)
520 	u_long req;
521 	caddr_t data;
522 	struct proc *p;
523 {
524 	return (EOPNOTSUPP);
525 }
526 
527 struct ifaddr *
528 ifa_ifwithroute(flags, dst, gateway)
529 	int flags;
530 	struct sockaddr	*dst, *gateway;
531 {
532 	register struct ifaddr *ifa;
533 
534 #ifdef IPSEC
535 	/*
536 	 * If the destination is a PF_KEY address, we'll look
537 	 * for the existence of a encap interface number or address
538 	 * in the options list of the gateway. By default, we'll return
539 	 * enc0.
540 	 */
541 	if (dst && (dst->sa_family == PF_KEY))
542 		return encap_findgwifa(gateway);
543 #endif
544 
545 	if ((flags & RTF_GATEWAY) == 0) {
546 		/*
547 		 * If we are adding a route to an interface,
548 		 * and the interface is a pt to pt link
549 		 * we should search for the destination
550 		 * as our clue to the interface.  Otherwise
551 		 * we can use the local address.
552 		 */
553 		ifa = NULL;
554 		if (flags & RTF_HOST)
555 			ifa = ifa_ifwithdstaddr(dst);
556 		if (ifa == NULL)
557 			ifa = ifa_ifwithaddr(gateway);
558 	} else {
559 		/*
560 		 * If we are adding a route to a remote net
561 		 * or host, the gateway may still be on the
562 		 * other end of a pt to pt link.
563 		 */
564 		ifa = ifa_ifwithdstaddr(gateway);
565 	}
566 	if (ifa == NULL)
567 		ifa = ifa_ifwithnet(gateway);
568 	if (ifa == NULL) {
569 		struct rtentry *rt = rtalloc1(gateway, 0);
570 		if (rt == NULL)
571 			return (NULL);
572 		rt->rt_refcnt--;
573 		/* The gateway must be local if the same address family. */
574 		if ((rt->rt_flags & RTF_GATEWAY) &&
575 		    rt_key(rt)->sa_family == dst->sa_family)
576 			return (0);
577 		if ((ifa = rt->rt_ifa) == NULL)
578 			return (NULL);
579 	}
580 	if (ifa->ifa_addr->sa_family != dst->sa_family) {
581 		struct ifaddr *oifa = ifa;
582 		ifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp);
583 		if (ifa == NULL)
584 			ifa = oifa;
585 	}
586 	return (ifa);
587 }
588 
589 #define ROUNDUP(a) (a>0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long))
590 
591 int
592 rtrequest(req, dst, gateway, netmask, flags, ret_nrt)
593 	int req, flags;
594 	struct sockaddr *dst, *gateway, *netmask;
595 	struct rtentry **ret_nrt;
596 {
597 	struct rt_addrinfo info;
598 
599 	bzero(&info, sizeof(info));
600 	info.rti_flags = flags;
601 	info.rti_info[RTAX_DST] = dst;
602 	info.rti_info[RTAX_GATEWAY] = gateway;
603 	info.rti_info[RTAX_NETMASK] = netmask;
604 	return (rtrequest1(req, &info, ret_nrt));
605 }
606 
607 /*
608  * These (questionable) definitions of apparent local variables apply
609  * to the next function.  XXXXXX!!!
610  */
611 #define dst	info->rti_info[RTAX_DST]
612 #define gateway	info->rti_info[RTAX_GATEWAY]
613 #define netmask	info->rti_info[RTAX_NETMASK]
614 #define ifaaddr	info->rti_info[RTAX_IFA]
615 #define ifpaddr	info->rti_info[RTAX_IFP]
616 #define flags	info->rti_flags
617 
618 int
619 rt_getifa(info)
620 	struct rt_addrinfo *info;
621 {
622 	struct ifaddr *ifa;
623 	int error = 0;
624 
625 	/*
626 	 * ifp may be specified by sockaddr_dl when protocol address
627 	 * is ambiguous
628 	 */
629 	if (info->rti_ifp == NULL && ifpaddr != NULL
630 	    && ifpaddr->sa_family == AF_LINK &&
631 	    (ifa = ifa_ifwithnet((struct sockaddr *)ifpaddr)) != NULL)
632 		info->rti_ifp = ifa->ifa_ifp;
633 	if (info->rti_ifa == NULL && ifaaddr != NULL)
634 		info->rti_ifa = ifa_ifwithaddr(ifaaddr);
635 	if (info->rti_ifa == NULL) {
636 		struct sockaddr *sa;
637 
638 		sa = ifaaddr != NULL ? ifaaddr :
639 		    (gateway != NULL ? gateway : dst);
640 		if (sa != NULL && info->rti_ifp != NULL)
641 			info->rti_ifa = ifaof_ifpforaddr(sa, info->rti_ifp);
642 		else if (dst != NULL && gateway != NULL)
643 			info->rti_ifa = ifa_ifwithroute(flags, dst, gateway);
644 		else if (sa != NULL)
645 			info->rti_ifa = ifa_ifwithroute(flags, sa, sa);
646 	}
647 	if ((ifa = info->rti_ifa) != NULL) {
648 		if (info->rti_ifp == NULL)
649 			info->rti_ifp = ifa->ifa_ifp;
650 	} else
651 		error = ENETUNREACH;
652 	return (error);
653 }
654 
655 int
656 rtrequest1(req, info, ret_nrt)
657 	int req;
658 	struct rt_addrinfo *info;
659 	struct rtentry **ret_nrt;
660 {
661 	int s = splsoftnet(); int error = 0;
662 	register struct rtentry *rt, *crt;
663 	register struct radix_node *rn;
664 	register struct radix_node_head *rnh;
665 	struct ifaddr *ifa;
666 	struct sockaddr *ndst;
667 #define senderr(x) { error = x ; goto bad; }
668 
669 	if ((rnh = rt_tables[dst->sa_family]) == 0)
670 		senderr(EAFNOSUPPORT);
671 	if (flags & RTF_HOST)
672 		netmask = 0;
673 	switch (req) {
674 	case RTM_DELETE:
675 		if ((rn = rnh->rnh_deladdr(dst, netmask, rnh)) == NULL)
676 			senderr(ESRCH);
677 		rt = (struct rtentry *)rn;
678 		if ((rt->rt_flags & RTF_CLONING) != 0) {
679 			/* clean up any cloned children */
680 			rtflushclone(rnh, rt);
681 		}
682 		if (rn->rn_flags & (RNF_ACTIVE | RNF_ROOT))
683 			panic ("rtrequest delete");
684 		rt = (struct rtentry *)rn;
685 		if (rt->rt_gwroute) {
686 			rt = rt->rt_gwroute; RTFREE(rt);
687 			(rt = (struct rtentry *)rn)->rt_gwroute = NULL;
688 		}
689 		if (rt->rt_parent) {
690 			rt->rt_parent->rt_refcnt--;
691 			rt->rt_parent = NULL;
692 		}
693 		rt->rt_flags &= ~RTF_UP;
694 		if ((ifa = rt->rt_ifa) && ifa->ifa_rtrequest)
695 			ifa->ifa_rtrequest(RTM_DELETE, rt, info);
696 		rttrash++;
697 		if (ret_nrt)
698 			*ret_nrt = rt;
699 		else if (rt->rt_refcnt <= 0) {
700 			rt->rt_refcnt++;
701 			rtfree(rt);
702 		}
703 		break;
704 
705 	case RTM_RESOLVE:
706 		if (ret_nrt == NULL || (rt = *ret_nrt) == NULL)
707 			senderr(EINVAL);
708 		if ((rt->rt_flags & RTF_CLONING) == 0)
709 			senderr(EINVAL);
710 		ifa = rt->rt_ifa;
711 		flags = rt->rt_flags & ~(RTF_CLONING | RTF_STATIC);
712 		flags |= RTF_CLONED;
713 		gateway = rt->rt_gateway;
714 		if ((netmask = rt->rt_genmask) == NULL)
715 			flags |= RTF_HOST;
716 		goto makeroute;
717 
718 	case RTM_ADD:
719 		if (info->rti_ifa == 0 && (error = rt_getifa(info)))
720 			senderr(error);
721 		ifa = info->rti_ifa;
722 	makeroute:
723 		R_Malloc(rt, struct rtentry *, sizeof(*rt));
724 		if (rt == NULL)
725 			senderr(ENOBUFS);
726 		Bzero(rt, sizeof(*rt));
727 		rt->rt_flags = RTF_UP | flags;
728 		LIST_INIT(&rt->rt_timer);
729 		if (rt_setgate(rt, dst, gateway)) {
730 			Free(rt);
731 			senderr(ENOBUFS);
732 		}
733 		ndst = rt_key(rt);
734 		if (netmask) {
735 			rt_maskedcopy(dst, ndst, netmask);
736 		} else
737 			Bcopy(dst, ndst, dst->sa_len);
738 		ifa->ifa_refcnt++;
739 		rt->rt_ifa = ifa;
740 		rt->rt_ifp = ifa->ifa_ifp;
741 		if (req == RTM_RESOLVE) {
742 			/*
743 			 * Copy both metrics and a back pointer to the cloned
744 			 * route's parent.
745 			 */
746 			rt->rt_rmx = (*ret_nrt)->rt_rmx; /* copy metrics */
747 			rt->rt_parent = *ret_nrt;	 /* Back ptr. to parent. */
748 			rt->rt_parent->rt_refcnt++;
749 		}
750 		rn = rnh->rnh_addaddr((caddr_t)ndst, (caddr_t)netmask,
751 		    rnh, rt->rt_nodes);
752 		if (rn == NULL && (crt = rtalloc1(ndst, 0)) != NULL) {
753 			/* overwrite cloned route */
754 			if ((crt->rt_flags & RTF_CLONED) != 0) {
755 				rtdeletemsg(crt);
756 				rn = rnh->rnh_addaddr((caddr_t)ndst,
757 				    (caddr_t)netmask, rnh, rt->rt_nodes);
758 			}
759 			RTFREE(crt);
760 		}
761 		if (rn == 0) {
762 			IFAFREE(ifa);
763 			if ((rt->rt_flags & RTF_CLONED) != 0 && rt->rt_parent)
764 				rtfree(rt->rt_parent);
765 			if (rt->rt_gwroute)
766 				rtfree(rt->rt_gwroute);
767 			Free(rt_key(rt));
768 			Free(rt);
769 			senderr(EEXIST);
770 		}
771 		if (ifa->ifa_rtrequest)
772 			ifa->ifa_rtrequest(req, rt, info);
773 		if (ret_nrt) {
774 			*ret_nrt = rt;
775 			rt->rt_refcnt++;
776 		}
777 		if ((rt->rt_flags & RTF_CLONING) != 0) {
778 			/* clean up any cloned children */
779 			rtflushclone(rnh, rt);
780 		}
781 		break;
782 	}
783 bad:
784 	splx(s);
785 	return (error);
786 }
787 
788 #undef dst
789 #undef gateway
790 #undef netmask
791 #undef ifaaddr
792 #undef ifpaddr
793 #undef flags
794 
795 int
796 rt_setgate(rt0, dst, gate)
797 	struct rtentry *rt0;
798 	struct sockaddr *dst, *gate;
799 {
800 	caddr_t new, old;
801 	int dlen = ROUNDUP(dst->sa_len), glen = ROUNDUP(gate->sa_len);
802 	register struct rtentry *rt = rt0;
803 
804 	if (rt->rt_gateway == NULL || glen > ROUNDUP(rt->rt_gateway->sa_len)) {
805 		old = (caddr_t)rt_key(rt);
806 		R_Malloc(new, caddr_t, dlen + glen);
807 		if (new == NULL)
808 			return 1;
809 		rt->rt_nodes->rn_key = new;
810 	} else {
811 		new = rt->rt_nodes->rn_key;
812 		old = NULL;
813 	}
814 	Bcopy(gate, (rt->rt_gateway = (struct sockaddr *)(new + dlen)), glen);
815 	if (old) {
816 		Bcopy(dst, new, dlen);
817 		Free(old);
818 	}
819 	if (rt->rt_gwroute != NULL) {
820 		rt = rt->rt_gwroute;
821 		RTFREE(rt);
822 		rt = rt0;
823 		rt->rt_gwroute = NULL;
824 	}
825 	if (rt->rt_flags & RTF_GATEWAY) {
826 		rt->rt_gwroute = rtalloc1(gate, 1);
827 		/*
828 		 * If we switched gateways, grab the MTU from the new
829 		 * gateway route if the current MTU is 0 or greater
830 		 * than the MTU of gateway.
831 		 * Note that, if the MTU of gateway is 0, we will reset the
832 		 * MTU of the route to run PMTUD again from scratch. XXX
833 		 */
834 		if (rt->rt_gwroute && !(rt->rt_rmx.rmx_locks & RTV_MTU) &&
835 		    rt->rt_rmx.rmx_mtu &&
836 		    rt->rt_rmx.rmx_mtu > rt->rt_gwroute->rt_rmx.rmx_mtu) {
837 			rt->rt_rmx.rmx_mtu = rt->rt_gwroute->rt_rmx.rmx_mtu;
838 		}
839 	}
840 	return (0);
841 }
842 
843 void
844 rt_maskedcopy(src, dst, netmask)
845 	struct sockaddr *src, *dst, *netmask;
846 {
847 	register u_char *cp1 = (u_char *)src;
848 	register u_char *cp2 = (u_char *)dst;
849 	register u_char *cp3 = (u_char *)netmask;
850 	u_char *cplim = cp2 + *cp3;
851 	u_char *cplim2 = cp2 + *cp1;
852 
853 	*cp2++ = *cp1++; *cp2++ = *cp1++; /* copies sa_len & sa_family */
854 	cp3 += 2;
855 	if (cplim > cplim2)
856 		cplim = cplim2;
857 	while (cp2 < cplim)
858 		*cp2++ = *cp1++ & *cp3++;
859 	if (cp2 < cplim2)
860 		bzero((caddr_t)cp2, (unsigned)(cplim2 - cp2));
861 }
862 
863 /*
864  * Set up a routing table entry, normally
865  * for an interface.
866  */
867 int
868 rtinit(ifa, cmd, flags)
869 	register struct ifaddr *ifa;
870 	int cmd, flags;
871 {
872 	register struct rtentry *rt;
873 	register struct sockaddr *dst;
874 	register struct sockaddr *deldst;
875 	struct mbuf *m = NULL;
876 	struct rtentry *nrt = NULL;
877 	int error;
878 	struct rt_addrinfo info;
879 
880 	dst = flags & RTF_HOST ? ifa->ifa_dstaddr : ifa->ifa_addr;
881 	if (cmd == RTM_DELETE) {
882 		if ((flags & RTF_HOST) == 0 && ifa->ifa_netmask) {
883 			m = m_get(M_DONTWAIT, MT_SONAME);
884 			if (m == NULL)
885 				return (ENOBUFS);
886 			deldst = mtod(m, struct sockaddr *);
887 			rt_maskedcopy(dst, deldst, ifa->ifa_netmask);
888 			dst = deldst;
889 		}
890 		if ((rt = rtalloc1(dst, 0)) != NULL) {
891 			rt->rt_refcnt--;
892 			if (rt->rt_ifa != ifa) {
893 				if (m != NULL)
894 					(void) m_free(m);
895 				return (flags & RTF_HOST ? EHOSTUNREACH
896 							: ENETUNREACH);
897 			}
898 		}
899 	}
900 	bzero(&info, sizeof(info));
901 	info.rti_ifa = ifa;
902 	info.rti_flags = flags | ifa->ifa_flags;
903 	info.rti_info[RTAX_DST] = dst;
904 	info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr;
905 	/*
906 	 * XXX here, it seems that we are assuming that ifa_netmask is NULL
907 	 * for RTF_HOST.  bsdi4 passes NULL explicitly (via intermediate
908 	 * variable) when RTF_HOST is 1.  still not sure if i can safely
909 	 * change it to meet bsdi4 behavior.
910 	 */
911 	info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
912 	error = rtrequest1(cmd, &info, &nrt);
913 	if (cmd == RTM_DELETE && error == 0 && (rt = nrt) != NULL) {
914 		rt_newaddrmsg(cmd, ifa, error, nrt);
915 		if (rt->rt_refcnt <= 0) {
916 			rt->rt_refcnt++;
917 			rtfree(rt);
918 		}
919 	}
920 	if (cmd == RTM_ADD && error == 0 && (rt = nrt) != NULL) {
921 		rt->rt_refcnt--;
922 		if (rt->rt_ifa != ifa) {
923 			printf("rtinit: wrong ifa (%p) was (%p)\n",
924 			       ifa, rt->rt_ifa);
925 			if (rt->rt_ifa->ifa_rtrequest)
926 				rt->rt_ifa->ifa_rtrequest(RTM_DELETE, rt, NULL);
927 			IFAFREE(rt->rt_ifa);
928 			rt->rt_ifa = ifa;
929 			rt->rt_ifp = ifa->ifa_ifp;
930 			ifa->ifa_refcnt++;
931 			if (ifa->ifa_rtrequest)
932 				ifa->ifa_rtrequest(RTM_ADD, rt, NULL);
933 		}
934 		rt_newaddrmsg(cmd, ifa, error, nrt);
935 	}
936 	return (error);
937 }
938 
939 /*
940  * Route timer routines.  These routes allow functions to be called
941  * for various routes at any time.  This is useful in supporting
942  * path MTU discovery and redirect route deletion.
943  *
944  * This is similar to some BSDI internal functions, but it provides
945  * for multiple queues for efficiency's sake...
946  */
947 
948 LIST_HEAD(, rttimer_queue) rttimer_queue_head;
949 static int rt_init_done = 0;
950 
951 #define RTTIMER_CALLOUT(r)	{				\
952 	if (r->rtt_func != NULL) {				\
953 		(*r->rtt_func)(r->rtt_rt, r);			\
954 	} else {						\
955 		rtrequest((int) RTM_DELETE,			\
956 			  (struct sockaddr *)rt_key(r->rtt_rt),	\
957 			  0, 0, 0, 0);				\
958 	}							\
959 }
960 
961 /*
962  * Some subtle order problems with domain initialization mean that
963  * we cannot count on this being run from rt_init before various
964  * protocol initializations are done.  Therefore, we make sure
965  * that this is run when the first queue is added...
966  */
967 
968 void
969 rt_timer_init()
970 {
971 	static struct timeout rt_timer_timeout;
972 
973 	assert(rt_init_done == 0);
974 
975 #if 0
976 	pool_init(&rttimer_pool, sizeof(struct rttimer), 0, 0, 0, "rttmrpl",
977 	    NULL);
978 #endif
979 
980 	LIST_INIT(&rttimer_queue_head);
981 	timeout_set(&rt_timer_timeout, rt_timer_timer, &rt_timer_timeout);
982 	timeout_add(&rt_timer_timeout, hz);	/* every second */
983 	rt_init_done = 1;
984 }
985 
986 struct rttimer_queue *
987 rt_timer_queue_create(timeout)
988 	u_int	timeout;
989 {
990 	struct rttimer_queue *rtq;
991 
992 	if (rt_init_done == 0)
993 		rt_timer_init();
994 
995 	R_Malloc(rtq, struct rttimer_queue *, sizeof *rtq);
996 	if (rtq == NULL)
997 		return (NULL);
998 	Bzero(rtq, sizeof *rtq);
999 
1000 	rtq->rtq_timeout = timeout;
1001 	rtq->rtq_count = 0;
1002 	TAILQ_INIT(&rtq->rtq_head);
1003 	LIST_INSERT_HEAD(&rttimer_queue_head, rtq, rtq_link);
1004 
1005 	return (rtq);
1006 }
1007 
1008 void
1009 rt_timer_queue_change(rtq, timeout)
1010 	struct rttimer_queue *rtq;
1011 	long timeout;
1012 {
1013 
1014 	rtq->rtq_timeout = timeout;
1015 }
1016 
1017 void
1018 rt_timer_queue_destroy(rtq, destroy)
1019 	struct rttimer_queue *rtq;
1020 	int destroy;
1021 {
1022 	struct rttimer *r;
1023 
1024 	while ((r = TAILQ_FIRST(&rtq->rtq_head)) != NULL) {
1025 		LIST_REMOVE(r, rtt_link);
1026 		TAILQ_REMOVE(&rtq->rtq_head, r, rtt_next);
1027 		if (destroy)
1028 			RTTIMER_CALLOUT(r);
1029 #if 0
1030 		pool_put(&rttimer_pool, r);
1031 #else
1032 		free(r, M_RTABLE);
1033 #endif
1034 		if (rtq->rtq_count > 0)
1035 			rtq->rtq_count--;
1036 		else
1037 			printf("rt_timer_queue_destroy: rtq_count reached 0\n");
1038 	}
1039 
1040 	LIST_REMOVE(rtq, rtq_link);
1041 
1042 	/*
1043 	 * Caller is responsible for freeing the rttimer_queue structure.
1044 	 */
1045 }
1046 
1047 unsigned long
1048 rt_timer_count(rtq)
1049 	struct rttimer_queue *rtq;
1050 {
1051 
1052 	return (rtq->rtq_count);
1053 }
1054 
1055 void
1056 rt_timer_remove_all(rt)
1057 	struct rtentry *rt;
1058 {
1059 	struct rttimer *r;
1060 
1061 	while ((r = LIST_FIRST(&rt->rt_timer)) != NULL) {
1062 		LIST_REMOVE(r, rtt_link);
1063 		TAILQ_REMOVE(&r->rtt_queue->rtq_head, r, rtt_next);
1064 		if (r->rtt_queue->rtq_count > 0)
1065 			r->rtt_queue->rtq_count--;
1066 		else
1067 			printf("rt_timer_remove_all: rtq_count reached 0\n");
1068 #if 0
1069 		pool_put(&rttimer_pool, r);
1070 #else
1071 		free(r, M_RTABLE);
1072 #endif
1073 	}
1074 }
1075 
1076 int
1077 rt_timer_add(rt, func, queue)
1078 	struct rtentry *rt;
1079 	void(*func)(struct rtentry *, struct rttimer *);
1080 	struct rttimer_queue *queue;
1081 {
1082 	struct rttimer *r;
1083 	long current_time;
1084 
1085 	current_time = mono_time.tv_sec;
1086 
1087 	/*
1088 	 * If there's already a timer with this action, destroy it before
1089 	 * we add a new one.
1090 	 */
1091 	for (r = LIST_FIRST(&rt->rt_timer); r != NULL;
1092 	     r = LIST_NEXT(r, rtt_link)) {
1093 		if (r->rtt_func == func) {
1094 			LIST_REMOVE(r, rtt_link);
1095 			TAILQ_REMOVE(&r->rtt_queue->rtq_head, r, rtt_next);
1096 			if (r->rtt_queue->rtq_count > 0)
1097 				r->rtt_queue->rtq_count--;
1098 			else
1099 				printf("rt_timer_add: rtq_count reached 0\n");
1100 #if 0
1101 			pool_put(&rttimer_pool, r);
1102 #else
1103 			free(r, M_RTABLE);
1104 #endif
1105 			break;  /* only one per list, so we can quit... */
1106 		}
1107 	}
1108 
1109 #if 0
1110 	r = pool_get(&rttimer_pool, PR_NOWAIT);
1111 #else
1112 	r = (struct rttimer *)malloc(sizeof(*r), M_RTABLE, M_NOWAIT);
1113 #endif
1114 	if (r == NULL)
1115 		return (ENOBUFS);
1116 	Bzero(r, sizeof(*r));
1117 
1118 	r->rtt_rt = rt;
1119 	r->rtt_time = current_time;
1120 	r->rtt_func = func;
1121 	r->rtt_queue = queue;
1122 	LIST_INSERT_HEAD(&rt->rt_timer, r, rtt_link);
1123 	TAILQ_INSERT_TAIL(&queue->rtq_head, r, rtt_next);
1124 	r->rtt_queue->rtq_count++;
1125 
1126 	return (0);
1127 }
1128 
1129 /* ARGSUSED */
1130 void
1131 rt_timer_timer(arg)
1132 	void *arg;
1133 {
1134 	struct timeout *to = (struct timeout *)arg;
1135 	struct rttimer_queue *rtq;
1136 	struct rttimer *r;
1137 	long current_time;
1138 	int s;
1139 
1140 	current_time = mono_time.tv_sec;
1141 
1142 	s = splsoftnet();
1143 	for (rtq = LIST_FIRST(&rttimer_queue_head); rtq != NULL;
1144 	     rtq = LIST_NEXT(rtq, rtq_link)) {
1145 		while ((r = TAILQ_FIRST(&rtq->rtq_head)) != NULL &&
1146 		    (r->rtt_time + rtq->rtq_timeout) < current_time) {
1147 			LIST_REMOVE(r, rtt_link);
1148 			TAILQ_REMOVE(&rtq->rtq_head, r, rtt_next);
1149 			RTTIMER_CALLOUT(r);
1150 #if 0
1151 			pool_put(&rttimer_pool, r);
1152 #else
1153 			free(r, M_RTABLE);
1154 #endif
1155 			if (rtq->rtq_count > 0)
1156 				rtq->rtq_count--;
1157 			else
1158 				printf("rt_timer_timer: rtq_count reached 0\n");
1159 		}
1160 	}
1161 	splx(s);
1162 
1163 	timeout_add(to, hz);		/* every second */
1164 }
1165