xref: /openbsd-src/sys/net/route.c (revision b2ea75c1b17e1a9a339660e7ed45cd24946b230e)
1 /*	$OpenBSD: route.c,v 1.25 2001/07/20 18:46:50 itojun Exp $	*/
2 /*	$NetBSD: route.c,v 1.14 1996/02/13 22:00:46 christos Exp $	*/
3 
4 /*
5  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the project nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 /*
34  * Copyright (c) 1980, 1986, 1991, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. All advertising materials mentioning features or use of this software
46  *    must display the following acknowledgement:
47  *	This product includes software developed by the University of
48  *	California, Berkeley and its contributors.
49  * 4. Neither the name of the University nor the names of its contributors
50  *    may be used to endorse or promote products derived from this software
51  *    without specific prior written permission.
52  *
53  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63  * SUCH DAMAGE.
64  *
65  *	@(#)route.c	8.2 (Berkeley) 11/15/93
66  */
67 
68 /*
69 %%% portions-copyright-nrl-95
70 Portions of this software are Copyright 1995-1998 by Randall Atkinson,
71 Ronald Lee, Daniel McDonald, Bao Phan, and Chris Winters. All Rights
72 Reserved. All rights under this copyright have been assigned to the US
73 Naval Research Laboratory (NRL). The NRL Copyright Notice and License
74 Agreement Version 1.1 (January 17, 1995) applies to these portions of the
75 software.
76 You should have received a copy of the license with this software. If you
77 didn't get a copy, you may request one from <license@ipv6.nrl.navy.mil>.
78 */
79 
80 #include <sys/param.h>
81 #include <sys/systm.h>
82 #include <sys/proc.h>
83 #include <sys/mbuf.h>
84 #include <sys/socket.h>
85 #include <sys/socketvar.h>
86 #include <sys/domain.h>
87 #include <sys/protosw.h>
88 #include <sys/ioctl.h>
89 #include <sys/kernel.h>
90 
91 #include <net/if.h>
92 #include <net/route.h>
93 #include <net/raw_cb.h>
94 
95 #include <netinet/in.h>
96 #include <netinet/in_var.h>
97 
98 #ifdef NS
99 #include <netns/ns.h>
100 #endif
101 
102 #ifdef IPSEC
103 #include <netinet/ip_ipsp.h>
104 
105 extern struct ifnet encif;
106 #endif
107 
108 #define	SA(p) ((struct sockaddr *)(p))
109 
110 int	rttrash;		/* routes not in table but not freed */
111 struct	sockaddr wildcard;	/* zero valued cookie for wildcard searches */
112 
113 static int okaytoclone __P((u_int, int));
114 
115 #ifdef IPSEC
116 
117 static struct ifaddr *
118 encap_findgwifa(struct sockaddr *gw)
119 {
120 	return encif.if_addrlist.tqh_first;
121 }
122 
123 #endif
124 
125 void
126 rtable_init(table)
127 	void **table;
128 {
129 	struct domain *dom;
130 	for (dom = domains; dom; dom = dom->dom_next)
131 		if (dom->dom_rtattach)
132 			dom->dom_rtattach(&table[dom->dom_family],
133 			    dom->dom_rtoffset);
134 }
135 
136 void
137 route_init()
138 {
139 	rn_init();	/* initialize all zeroes, all ones, mask table */
140 	rtable_init((void **)rt_tables);
141 }
142 
143 void
144 rtalloc_noclone(ro, howstrict)
145 	register struct route *ro;
146 	int howstrict;
147 {
148 	if (ro->ro_rt && ro->ro_rt->rt_ifp && (ro->ro_rt->rt_flags & RTF_UP))
149 		return;		/* XXX */
150 	ro->ro_rt = rtalloc2(&ro->ro_dst, 1, howstrict);
151 }
152 
153 static int
154 okaytoclone(flags, howstrict)
155 	u_int flags;
156 	int howstrict;
157 {
158 	if (howstrict == ALL_CLONING)
159 		return 1;
160 	if (howstrict == ONNET_CLONING && !(flags & RTF_GATEWAY))
161 		return 1;
162 	return 0;
163 }
164 
165 struct rtentry *
166 rtalloc2(dst, report,howstrict)
167 	register struct sockaddr *dst;
168 	int report,howstrict;
169 {
170 	register struct radix_node_head *rnh = rt_tables[dst->sa_family];
171 	register struct rtentry *rt;
172 	register struct radix_node *rn;
173 	struct rtentry *newrt = 0;
174 	struct rt_addrinfo info;
175 	int  s = splnet(), err = 0, msgtype = RTM_MISS;
176 
177 	if (rnh && (rn = rnh->rnh_matchaddr((caddr_t)dst, rnh)) &&
178 	    ((rn->rn_flags & RNF_ROOT) == 0)) {
179 		newrt = rt = (struct rtentry *)rn;
180 		if (report && (rt->rt_flags & RTF_CLONING) &&
181 		    okaytoclone(rt->rt_flags, howstrict)) {
182 			err = rtrequest(RTM_RESOLVE, dst, SA(0), SA(0), 0,
183 			    &newrt);
184 			if (err) {
185 				newrt = rt;
186 				rt->rt_refcnt++;
187 				goto miss;
188 			}
189 			if ((rt = newrt) && (rt->rt_flags & RTF_XRESOLVE)) {
190 				msgtype = RTM_RESOLVE;
191 				goto miss;
192 			}
193 		} else
194 			rt->rt_refcnt++;
195 	} else {
196 		rtstat.rts_unreach++;
197 miss:		if (report) {
198 			bzero((caddr_t)&info, sizeof(info));
199 			info.rti_info[RTAX_DST] = dst;
200 			rt_missmsg(msgtype, &info, 0, err);
201 		}
202 	}
203 	splx(s);
204 	return (newrt);
205 }
206 
207 /*
208  * Packet routing routines.
209  */
210 void
211 rtalloc(ro)
212 	register struct route *ro;
213 {
214 	if (ro->ro_rt && ro->ro_rt->rt_ifp && (ro->ro_rt->rt_flags & RTF_UP))
215 		return;				 /* XXX */
216 	ro->ro_rt = rtalloc1(&ro->ro_dst, 1);
217 }
218 
219 struct rtentry *
220 rtalloc1(dst, report)
221 	register struct sockaddr *dst;
222 	int report;
223 {
224 	register struct radix_node_head *rnh = rt_tables[dst->sa_family];
225 	register struct rtentry *rt;
226 	register struct radix_node *rn;
227 	struct rtentry *newrt = 0;
228 	struct rt_addrinfo info;
229 	int  s = splsoftnet(), err = 0, msgtype = RTM_MISS;
230 
231 	if (rnh && (rn = rnh->rnh_matchaddr((caddr_t)dst, rnh)) &&
232 	    ((rn->rn_flags & RNF_ROOT) == 0)) {
233 		newrt = rt = (struct rtentry *)rn;
234 		if (report && (rt->rt_flags & RTF_CLONING)) {
235 			err = rtrequest(RTM_RESOLVE, dst, SA(NULL),
236 			    SA(NULL), 0, &newrt);
237 			if (err) {
238 				newrt = rt;
239 				rt->rt_refcnt++;
240 				goto miss;
241 			}
242 			if ((rt = newrt) && (rt->rt_flags & RTF_XRESOLVE)) {
243 				msgtype = RTM_RESOLVE;
244 				goto miss;
245 			}
246 			/* Inform listeners of the new route */
247 			bzero(&info, sizeof(info));
248 			info.rti_info[RTAX_DST] = rt_key(rt);
249 			info.rti_info[RTAX_NETMASK] = rt_mask(rt);
250 			info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
251 			if (rt->rt_ifp != NULL) {
252 				info.rti_info[RTAX_IFP] =
253 				    rt->rt_ifp->if_addrlist.tqh_first->ifa_addr;
254 				info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
255 			}
256 			rt_missmsg(RTM_ADD, &info, rt->rt_flags, 0);
257 		} else
258 			rt->rt_refcnt++;
259 	} else {
260 		if (dst->sa_family != PF_KEY)
261 		        rtstat.rts_unreach++;
262 	/*
263 	 * IP encapsulation does lots of lookups where we don't need nor want
264 	 * the RTM_MISSes that would be generated.  It causes RTM_MISS storms
265 	 * sent upward breaking user-level routing queries.
266 	 */
267 	miss:	if (report && dst->sa_family != PF_KEY) {
268 			bzero((caddr_t)&info, sizeof(info));
269 			info.rti_info[RTAX_DST] = dst;
270 			rt_missmsg(msgtype, &info, 0, err);
271 		}
272 	}
273 	splx(s);
274 	return (newrt);
275 }
276 
277 void
278 rtfree(rt)
279 	register struct rtentry *rt;
280 {
281 	register struct ifaddr *ifa;
282 
283 	if (rt == NULL)
284 		panic("rtfree");
285 	rt->rt_refcnt--;
286 	if (rt->rt_refcnt <= 0 && (rt->rt_flags & RTF_UP) == 0) {
287 		if (rt->rt_nodes->rn_flags & (RNF_ACTIVE | RNF_ROOT))
288 			panic ("rtfree 2");
289 		rttrash--;
290 		if (rt->rt_refcnt < 0) {
291 			printf("rtfree: %p not freed (neg refs)\n", rt);
292 			return;
293 		}
294 		rt_timer_remove_all(rt);
295 		ifa = rt->rt_ifa;
296 		if (ifa)
297 			IFAFREE(ifa);
298 		Free(rt_key(rt));
299 		Free(rt);
300 	}
301 }
302 
303 void
304 ifafree(ifa)
305 	register struct ifaddr *ifa;
306 {
307 	if (ifa == NULL)
308 		panic("ifafree");
309 	if (ifa->ifa_refcnt == 0)
310 		free(ifa, M_IFADDR);
311 	else
312 		ifa->ifa_refcnt--;
313 }
314 
315 /*
316  * Force a routing table entry to the specified
317  * destination to go through the given gateway.
318  * Normally called as a result of a routing redirect
319  * message from the network layer.
320  *
321  * N.B.: must be called at splsoftnet
322  */
323 void
324 rtredirect(dst, gateway, netmask, flags, src, rtp)
325 	struct sockaddr *dst, *gateway, *netmask, *src;
326 	int flags;
327 	struct rtentry **rtp;
328 {
329 	struct rtentry *rt;
330 	int error = 0;
331 	u_int32_t *stat = NULL;
332 	struct rt_addrinfo info;
333 	struct ifaddr *ifa;
334 
335 	/* verify the gateway is directly reachable */
336 	if ((ifa = ifa_ifwithnet(gateway)) == NULL) {
337 		error = ENETUNREACH;
338 		goto out;
339 	}
340 	rt = rtalloc1(dst, 0);
341 	/*
342 	 * If the redirect isn't from our current router for this dst,
343 	 * it's either old or wrong.  If it redirects us to ourselves,
344 	 * we have a routing loop, perhaps as a result of an interface
345 	 * going down recently.
346 	 */
347 #define	equal(a1, a2) \
348 	((a1)->sa_len == (a2)->sa_len && \
349 	 bcmp((caddr_t)(a1), (caddr_t)(a2), (a1)->sa_len) == 0)
350 	if (!(flags & RTF_DONE) && rt &&
351 	     (!equal(src, rt->rt_gateway) || rt->rt_ifa != ifa))
352 		error = EINVAL;
353 	else if (ifa_ifwithaddr(gateway) != NULL)
354 		error = EHOSTUNREACH;
355 	if (error)
356 		goto done;
357 	/*
358 	 * Create a new entry if we just got back a wildcard entry
359 	 * or the lookup failed.  This is necessary for hosts
360 	 * which use routing redirects generated by smart gateways
361 	 * to dynamically build the routing tables.
362 	 */
363 	if ((rt == NULL) || (rt_mask(rt) && rt_mask(rt)->sa_len < 2))
364 		goto create;
365 	/*
366 	 * Don't listen to the redirect if it's
367 	 * for a route to an interface.
368 	 */
369 	if (rt->rt_flags & RTF_GATEWAY) {
370 		if (((rt->rt_flags & RTF_HOST) == 0) && (flags & RTF_HOST)) {
371 			/*
372 			 * Changing from route to net => route to host.
373 			 * Create new route, rather than smashing route to net.
374 			 */
375 		create:
376 			if (rt)
377 				rtfree(rt);
378 			flags |=  RTF_GATEWAY | RTF_DYNAMIC;
379 			info.rti_info[RTAX_DST] = dst;
380 			info.rti_info[RTAX_GATEWAY] = gateway;
381 			info.rti_info[RTAX_NETMASK] = netmask;
382 			info.rti_ifa = ifa;
383 			info.rti_flags = flags;
384 			rt = NULL;
385 			error = rtrequest1(RTM_ADD, &info, &rt);
386 			if (rt != NULL)
387 				flags = rt->rt_flags;
388 			stat = &rtstat.rts_dynamic;
389 		} else {
390 			/*
391 			 * Smash the current notion of the gateway to
392 			 * this destination.  Should check about netmask!!!
393 			 */
394 			rt->rt_flags |= RTF_MODIFIED;
395 			flags |= RTF_MODIFIED;
396 			stat = &rtstat.rts_newgateway;
397 			rt_setgate(rt, rt_key(rt), gateway);
398 		}
399 	} else
400 		error = EHOSTUNREACH;
401 done:
402 	if (rt) {
403 		if (rtp && !error)
404 			*rtp = rt;
405 		else
406 			rtfree(rt);
407 	}
408 out:
409 	if (error)
410 		rtstat.rts_badredirect++;
411 	else if (stat != NULL)
412 		(*stat)++;
413 	bzero((caddr_t)&info, sizeof(info));
414 	info.rti_info[RTAX_DST] = dst;
415 	info.rti_info[RTAX_GATEWAY] = gateway;
416 	info.rti_info[RTAX_NETMASK] = netmask;
417 	info.rti_info[RTAX_AUTHOR] = src;
418 	rt_missmsg(RTM_REDIRECT, &info, flags, error);
419 }
420 
421 /*
422 * Routing table ioctl interface.
423 */
424 int
425 rtioctl(req, data, p)
426 	u_long req;
427 	caddr_t data;
428 	struct proc *p;
429 {
430 	return (EOPNOTSUPP);
431 }
432 
433 struct ifaddr *
434 ifa_ifwithroute(flags, dst, gateway)
435 	int flags;
436 	struct sockaddr	*dst, *gateway;
437 {
438 	register struct ifaddr *ifa;
439 
440 #ifdef IPSEC
441 	/*
442 	 * If the destination is a PF_KEY address, we'll look
443 	 * for the existence of a encap interface number or address
444 	 * in the options list of the gateway. By default, we'll return
445 	 * enc0.
446 	 */
447 	if (dst && (dst->sa_family == PF_KEY))
448 		return encap_findgwifa(gateway);
449 #endif
450 
451 	if ((flags & RTF_GATEWAY) == 0) {
452 		/*
453 		 * If we are adding a route to an interface,
454 		 * and the interface is a pt to pt link
455 		 * we should search for the destination
456 		 * as our clue to the interface.  Otherwise
457 		 * we can use the local address.
458 		 */
459 		ifa = NULL;
460 		if (flags & RTF_HOST)
461 			ifa = ifa_ifwithdstaddr(dst);
462 		if (ifa == NULL)
463 			ifa = ifa_ifwithaddr(gateway);
464 	} else {
465 		/*
466 		 * If we are adding a route to a remote net
467 		 * or host, the gateway may still be on the
468 		 * other end of a pt to pt link.
469 		 */
470 		ifa = ifa_ifwithdstaddr(gateway);
471 	}
472 	if (ifa == NULL)
473 		ifa = ifa_ifwithnet(gateway);
474 	if (ifa == NULL) {
475 		struct rtentry *rt = rtalloc1(gateway, 0);
476 		if (rt == NULL)
477 			return (NULL);
478 		rt->rt_refcnt--;
479 		/* The gateway must be local if the same address family. */
480 		if ((rt->rt_flags & RTF_GATEWAY) &&
481 		    rt_key(rt)->sa_family == dst->sa_family)
482 			return (0);
483 		if ((ifa = rt->rt_ifa) == NULL)
484 			return (NULL);
485 	}
486 	if (ifa->ifa_addr->sa_family != dst->sa_family) {
487 		struct ifaddr *oifa = ifa;
488 		ifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp);
489 		if (ifa == NULL)
490 			ifa = oifa;
491 	}
492 	return (ifa);
493 }
494 
495 #define ROUNDUP(a) (a>0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long))
496 
497 int
498 rtrequest(req, dst, gateway, netmask, flags, ret_nrt)
499 	int req, flags;
500 	struct sockaddr *dst, *gateway, *netmask;
501 	struct rtentry **ret_nrt;
502 {
503 	struct rt_addrinfo info;
504 
505 	bzero(&info, sizeof(info));
506 	info.rti_flags = flags;
507 	info.rti_info[RTAX_DST] = dst;
508 	info.rti_info[RTAX_GATEWAY] = gateway;
509 	info.rti_info[RTAX_NETMASK] = netmask;
510 	return rtrequest1(req, &info, ret_nrt);
511 }
512 
513 /*
514  * These (questionable) definitions of apparent local variables apply
515  * to the next function.  XXXXXX!!!
516  */
517 #define dst	info->rti_info[RTAX_DST]
518 #define gateway	info->rti_info[RTAX_GATEWAY]
519 #define netmask	info->rti_info[RTAX_NETMASK]
520 #define ifaaddr	info->rti_info[RTAX_IFA]
521 #define ifpaddr	info->rti_info[RTAX_IFP]
522 #define flags	info->rti_flags
523 
524 int
525 rt_getifa(info)
526 	struct rt_addrinfo *info;
527 {
528 	struct ifaddr *ifa;
529 	int error = 0;
530 
531 	/*
532 	 * ifp may be specified by sockaddr_dl when protocol address
533 	 * is ambiguous
534 	 */
535 	if (info->rti_ifp == NULL && ifpaddr != NULL
536 	    && ifpaddr->sa_family == AF_LINK &&
537 	    (ifa = ifa_ifwithnet((struct sockaddr *)ifpaddr)) != NULL)
538 		info->rti_ifp = ifa->ifa_ifp;
539 	if (info->rti_ifa == NULL && ifaaddr != NULL)
540 		info->rti_ifa = ifa_ifwithaddr(ifaaddr);
541 	if (info->rti_ifa == NULL) {
542 		struct sockaddr *sa;
543 
544 		sa = ifaaddr != NULL ? ifaaddr :
545 		    (gateway != NULL ? gateway : dst);
546 		if (sa != NULL && info->rti_ifp != NULL)
547 			info->rti_ifa = ifaof_ifpforaddr(sa, info->rti_ifp);
548 		else if (dst != NULL && gateway != NULL)
549 			info->rti_ifa = ifa_ifwithroute(flags, dst, gateway);
550 		else if (sa != NULL)
551 			info->rti_ifa = ifa_ifwithroute(flags, sa, sa);
552 	}
553 	if ((ifa = info->rti_ifa) != NULL) {
554 		if (info->rti_ifp == NULL)
555 			info->rti_ifp = ifa->ifa_ifp;
556 	} else
557 		error = ENETUNREACH;
558 	return (error);
559 }
560 
561 int
562 rtrequest1(req, info, ret_nrt)
563 	int req;
564 	struct rt_addrinfo *info;
565 	struct rtentry **ret_nrt;
566 {
567 	int s = splsoftnet(); int error = 0;
568 	register struct rtentry *rt;
569 	register struct radix_node *rn;
570 	register struct radix_node_head *rnh;
571 	struct ifaddr *ifa;
572 	struct sockaddr *ndst;
573 #define senderr(x) { error = x ; goto bad; }
574 
575 	if ((rnh = rt_tables[dst->sa_family]) == 0)
576 		senderr(EAFNOSUPPORT);
577 	if (flags & RTF_HOST)
578 		netmask = 0;
579 	switch (req) {
580 	case RTM_DELETE:
581 		if ((rn = rnh->rnh_deladdr(dst, netmask, rnh)) == NULL)
582 			senderr(ESRCH);
583 		if (rn->rn_flags & (RNF_ACTIVE | RNF_ROOT))
584 			panic ("rtrequest delete");
585 		rt = (struct rtentry *)rn;
586 		if (rt->rt_gwroute) {
587 			rt = rt->rt_gwroute; RTFREE(rt);
588 			(rt = (struct rtentry *)rn)->rt_gwroute = NULL;
589 		}
590 		rt->rt_flags &= ~RTF_UP;
591 		if ((ifa = rt->rt_ifa) && ifa->ifa_rtrequest)
592 			ifa->ifa_rtrequest(RTM_DELETE, rt, info);
593 		rttrash++;
594 		if (ret_nrt)
595 			*ret_nrt = rt;
596 		else if (rt->rt_refcnt <= 0) {
597 			rt->rt_refcnt++;
598 			rtfree(rt);
599 		}
600 		break;
601 
602 	case RTM_RESOLVE:
603 		if (ret_nrt == NULL || (rt = *ret_nrt) == NULL)
604 			senderr(EINVAL);
605 		ifa = rt->rt_ifa;
606 		flags = rt->rt_flags & ~RTF_CLONING;
607 		gateway = rt->rt_gateway;
608 		if ((netmask = rt->rt_genmask) == NULL)
609 			flags |= RTF_HOST;
610 		goto makeroute;
611 
612 	case RTM_ADD:
613 		if (info->rti_ifa == 0 && (error = rt_getifa(info)))
614 			senderr(error);
615 		ifa = info->rti_ifa;
616 	makeroute:
617 		R_Malloc(rt, struct rtentry *, sizeof(*rt));
618 		if (rt == NULL)
619 			senderr(ENOBUFS);
620 		Bzero(rt, sizeof(*rt));
621 		rt->rt_flags = RTF_UP | flags;
622 		LIST_INIT(&rt->rt_timer);
623 		if (rt_setgate(rt, dst, gateway)) {
624 			Free(rt);
625 			senderr(ENOBUFS);
626 		}
627 		ndst = rt_key(rt);
628 		if (netmask) {
629 			rt_maskedcopy(dst, ndst, netmask);
630 		} else
631 			Bcopy(dst, ndst, dst->sa_len);
632 		rn = rnh->rnh_addaddr((caddr_t)ndst, (caddr_t)netmask,
633 					rnh, rt->rt_nodes);
634 		if (rn == NULL) {
635 			if (rt->rt_gwroute)
636 				rtfree(rt->rt_gwroute);
637 			Free(rt_key(rt));
638 			Free(rt);
639 			senderr(EEXIST);
640 		}
641 		ifa->ifa_refcnt++;
642 		rt->rt_ifa = ifa;
643 		rt->rt_ifp = ifa->ifa_ifp;
644 		if (req == RTM_RESOLVE) {
645 			/*
646 			 * Copy both metrics and a back pointer to the cloned
647 			 * route's parent.
648 			 */
649 			rt->rt_rmx = (*ret_nrt)->rt_rmx; /* copy metrics */
650 			rt->rt_parent = *ret_nrt;	 /* Back ptr. to parent. */
651 		} else if (!rt->rt_rmx.rmx_mtu &&
652 		    !(rt->rt_rmx.rmx_locks & RTV_MTU)) { /* XXX */
653 			if (rt->rt_gwroute) {
654 				rt->rt_rmx.rmx_mtu =
655 				    rt->rt_gwroute->rt_rmx.rmx_mtu;
656 			} else {
657 				rt->rt_rmx.rmx_mtu = ifa->ifa_ifp->if_mtu;
658 			}
659 		}
660 		if (ifa->ifa_rtrequest)
661 			ifa->ifa_rtrequest(req, rt, info);
662 		if (ret_nrt) {
663 			*ret_nrt = rt;
664 			rt->rt_refcnt++;
665 		}
666 		break;
667 	}
668 bad:
669 	splx(s);
670 	return (error);
671 }
672 
673 #undef dst
674 #undef gateway
675 #undef netmask
676 #undef ifaaddr
677 #undef ifpaddr
678 #undef flags
679 
680 int
681 rt_setgate(rt0, dst, gate)
682 	struct rtentry *rt0;
683 	struct sockaddr *dst, *gate;
684 {
685 	caddr_t new, old;
686 	int dlen = ROUNDUP(dst->sa_len), glen = ROUNDUP(gate->sa_len);
687 	register struct rtentry *rt = rt0;
688 
689 	if (rt->rt_gateway == NULL || glen > ROUNDUP(rt->rt_gateway->sa_len)) {
690 		old = (caddr_t)rt_key(rt);
691 		R_Malloc(new, caddr_t, dlen + glen);
692 		if (new == NULL)
693 			return 1;
694 		rt->rt_nodes->rn_key = new;
695 	} else {
696 		new = rt->rt_nodes->rn_key;
697 		old = NULL;
698 	}
699 	Bcopy(gate, (rt->rt_gateway = (struct sockaddr *)(new + dlen)), glen);
700 	if (old) {
701 		Bcopy(dst, new, dlen);
702 		Free(old);
703 	}
704 	if (rt->rt_gwroute != NULL) {
705 		rt = rt->rt_gwroute;
706 		RTFREE(rt);
707 		rt = rt0;
708 		rt->rt_gwroute = NULL;
709 	}
710 	if (rt->rt_flags & RTF_GATEWAY) {
711 		rt->rt_gwroute = rtalloc1(gate, 1);
712 		/*
713 		 * If we switched gateways, grab the MTU from the new
714 		 * gateway route if the current MTU is 0 or greater
715 		 * than the MTU of gateway.
716 		 */
717 		if (rt->rt_gwroute && !(rt->rt_rmx.rmx_locks & RTV_MTU) &&
718 		    (rt->rt_rmx.rmx_mtu == 0 ||
719 		     rt->rt_rmx.rmx_mtu > rt->rt_gwroute->rt_rmx.rmx_mtu)) {
720 			rt->rt_rmx.rmx_mtu = rt->rt_gwroute->rt_rmx.rmx_mtu;
721 		}
722 	}
723 	return 0;
724 }
725 
726 void
727 rt_maskedcopy(src, dst, netmask)
728 	struct sockaddr *src, *dst, *netmask;
729 {
730 	register u_char *cp1 = (u_char *)src;
731 	register u_char *cp2 = (u_char *)dst;
732 	register u_char *cp3 = (u_char *)netmask;
733 	u_char *cplim = cp2 + *cp3;
734 	u_char *cplim2 = cp2 + *cp1;
735 
736 	*cp2++ = *cp1++; *cp2++ = *cp1++; /* copies sa_len & sa_family */
737 	cp3 += 2;
738 	if (cplim > cplim2)
739 		cplim = cplim2;
740 	while (cp2 < cplim)
741 		*cp2++ = *cp1++ & *cp3++;
742 	if (cp2 < cplim2)
743 		bzero((caddr_t)cp2, (unsigned)(cplim2 - cp2));
744 }
745 
746 /*
747  * Set up a routing table entry, normally
748  * for an interface.
749  */
750 int
751 rtinit(ifa, cmd, flags)
752 	register struct ifaddr *ifa;
753 	int cmd, flags;
754 {
755 	register struct rtentry *rt;
756 	register struct sockaddr *dst;
757 	register struct sockaddr *deldst;
758 	struct mbuf *m = NULL;
759 	struct rtentry *nrt = NULL;
760 	int error;
761 	struct rt_addrinfo info;
762 
763 	dst = flags & RTF_HOST ? ifa->ifa_dstaddr : ifa->ifa_addr;
764 	if (cmd == RTM_DELETE) {
765 		if ((flags & RTF_HOST) == 0 && ifa->ifa_netmask) {
766 			m = m_get(M_DONTWAIT, MT_SONAME);
767 			if (m == NULL)
768 				return(ENOBUFS);
769 			deldst = mtod(m, struct sockaddr *);
770 			rt_maskedcopy(dst, deldst, ifa->ifa_netmask);
771 			dst = deldst;
772 		}
773 		if ((rt = rtalloc1(dst, 0)) != NULL) {
774 			rt->rt_refcnt--;
775 			if (rt->rt_ifa != ifa) {
776 				if (m != NULL)
777 					(void) m_free(m);
778 				return (flags & RTF_HOST ? EHOSTUNREACH
779 							: ENETUNREACH);
780 			}
781 		}
782 	}
783 	bzero(&info, sizeof(info));
784 	info.rti_ifa = ifa;
785 	info.rti_flags = flags | ifa->ifa_flags;
786 	info.rti_info[RTAX_DST] = dst;
787 	info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr;
788 	/*
789 	 * XXX here, it seems that we are assuming that ifa_netmask is NULL
790 	 * for RTF_HOST.  bsdi4 passes NULL explicitly (via intermediate
791 	 * variable) when RTF_HOST is 1.  still not sure if i can safely
792 	 * change it to meet bsdi4 behavior.
793 	 */
794 	info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
795 	error = rtrequest1(cmd, &info, &nrt);
796 	if (cmd == RTM_DELETE && error == 0 && (rt = nrt) != NULL) {
797 		rt_newaddrmsg(cmd, ifa, error, nrt);
798 		if (rt->rt_refcnt <= 0) {
799 			rt->rt_refcnt++;
800 			rtfree(rt);
801 		}
802 	}
803 	if (cmd == RTM_ADD && error == 0 && (rt = nrt) != NULL) {
804 		rt->rt_refcnt--;
805 		if (rt->rt_ifa != ifa) {
806 			printf("rtinit: wrong ifa (%p) was (%p)\n",
807 			       ifa, rt->rt_ifa);
808 			if (rt->rt_ifa->ifa_rtrequest)
809 				rt->rt_ifa->ifa_rtrequest(RTM_DELETE, rt, NULL);
810 			IFAFREE(rt->rt_ifa);
811 			rt->rt_ifa = ifa;
812 			rt->rt_ifp = ifa->ifa_ifp;
813 			rt->rt_rmx.rmx_mtu = ifa->ifa_ifp->if_mtu;	/*XXX*/
814 			ifa->ifa_refcnt++;
815 			if (ifa->ifa_rtrequest)
816 				ifa->ifa_rtrequest(RTM_ADD, rt, NULL);
817 		}
818 		rt_newaddrmsg(cmd, ifa, error, nrt);
819 	}
820 	return (error);
821 }
822 
823 /*
824  * Route timer routines.  These routes allow functions to be called
825  * for various routes at any time.  This is useful in supporting
826  * path MTU discovery and redirect route deletion.
827  *
828  * This is similar to some BSDI internal functions, but it provides
829  * for multiple queues for efficiency's sake...
830  */
831 
832 LIST_HEAD(, rttimer_queue) rttimer_queue_head;
833 static int rt_init_done = 0;
834 
835 #define RTTIMER_CALLOUT(r)	{				\
836 	if (r->rtt_func != NULL) {				\
837 		(*r->rtt_func)(r->rtt_rt, r);			\
838 	} else {						\
839 		rtrequest((int) RTM_DELETE,			\
840 			  (struct sockaddr *)rt_key(r->rtt_rt),	\
841 			  0, 0, 0, 0);				\
842 	}							\
843 }
844 
845 /*
846  * Some subtle order problems with domain initialization mean that
847  * we cannot count on this being run from rt_init before various
848  * protocol initializations are done.  Therefore, we make sure
849  * that this is run when the first queue is added...
850  */
851 
852 void
853 rt_timer_init()
854 {
855 	static struct timeout rt_timer_timeout;
856 
857 	assert(rt_init_done == 0);
858 
859 #if 0
860 	pool_init(&rttimer_pool, sizeof(struct rttimer), 0, 0, 0, "rttmrpl",
861 	    0, NULL, NULL, M_RTABLE);
862 #endif
863 
864 	LIST_INIT(&rttimer_queue_head);
865 	timeout_set(&rt_timer_timeout, rt_timer_timer, &rt_timer_timeout);
866 	timeout_add(&rt_timer_timeout, hz);	/* every second */
867 	rt_init_done = 1;
868 }
869 
870 struct rttimer_queue *
871 rt_timer_queue_create(timeout)
872 	u_int	timeout;
873 {
874 	struct rttimer_queue *rtq;
875 
876 	if (rt_init_done == 0)
877 		rt_timer_init();
878 
879 	R_Malloc(rtq, struct rttimer_queue *, sizeof *rtq);
880 	if (rtq == NULL)
881 		return (NULL);
882 	Bzero(rtq, sizeof *rtq);
883 
884 	rtq->rtq_timeout = timeout;
885 	rtq->rtq_count = 0;
886 	TAILQ_INIT(&rtq->rtq_head);
887 	LIST_INSERT_HEAD(&rttimer_queue_head, rtq, rtq_link);
888 
889 	return (rtq);
890 }
891 
892 void
893 rt_timer_queue_change(rtq, timeout)
894 	struct rttimer_queue *rtq;
895 	long timeout;
896 {
897 
898 	rtq->rtq_timeout = timeout;
899 }
900 
901 void
902 rt_timer_queue_destroy(rtq, destroy)
903 	struct rttimer_queue *rtq;
904 	int destroy;
905 {
906 	struct rttimer *r;
907 
908 	while ((r = TAILQ_FIRST(&rtq->rtq_head)) != NULL) {
909 		LIST_REMOVE(r, rtt_link);
910 		TAILQ_REMOVE(&rtq->rtq_head, r, rtt_next);
911 		if (destroy)
912 			RTTIMER_CALLOUT(r);
913 #if 0
914 		pool_put(&rttimer_pool, r);
915 #else
916 		free(r, M_RTABLE);
917 #endif
918 		if (rtq->rtq_count > 0)
919 			rtq->rtq_count--;
920 		else
921 			printf("rt_timer_queue_destroy: rtq_count reached 0\n");
922 	}
923 
924 	LIST_REMOVE(rtq, rtq_link);
925 
926 	/*
927 	 * Caller is responsible for freeing the rttimer_queue structure.
928 	 */
929 }
930 
931 unsigned long
932 rt_timer_count(rtq)
933 	struct rttimer_queue *rtq;
934 {
935 
936 	return rtq->rtq_count;
937 }
938 
939 void
940 rt_timer_remove_all(rt)
941 	struct rtentry *rt;
942 {
943 	struct rttimer *r;
944 
945 	while ((r = LIST_FIRST(&rt->rt_timer)) != NULL) {
946 		LIST_REMOVE(r, rtt_link);
947 		TAILQ_REMOVE(&r->rtt_queue->rtq_head, r, rtt_next);
948 		if (r->rtt_queue->rtq_count > 0)
949 			r->rtt_queue->rtq_count--;
950 		else
951 			printf("rt_timer_remove_all: rtq_count reached 0\n");
952 #if 0
953 		pool_put(&rttimer_pool, r);
954 #else
955 		free(r, M_RTABLE);
956 #endif
957 	}
958 }
959 
960 int
961 rt_timer_add(rt, func, queue)
962 	struct rtentry *rt;
963 	void(*func) __P((struct rtentry *, struct rttimer *));
964 	struct rttimer_queue *queue;
965 {
966 	struct rttimer *r;
967 	long current_time;
968 	int s;
969 
970 	s = splclock();
971 	current_time = mono_time.tv_sec;
972 	splx(s);
973 
974 	/*
975 	 * If there's already a timer with this action, destroy it before
976 	 * we add a new one.
977 	 */
978 	for (r = LIST_FIRST(&rt->rt_timer); r != NULL;
979 	     r = LIST_NEXT(r, rtt_link)) {
980 		if (r->rtt_func == func) {
981 			LIST_REMOVE(r, rtt_link);
982 			TAILQ_REMOVE(&r->rtt_queue->rtq_head, r, rtt_next);
983 			if (r->rtt_queue->rtq_count > 0)
984 				r->rtt_queue->rtq_count--;
985 			else
986 				printf("rt_timer_add: rtq_count reached 0\n");
987 #if 0
988 			pool_put(&rttimer_pool, r);
989 #else
990 			free(r, M_RTABLE);
991 #endif
992 			break;  /* only one per list, so we can quit... */
993 		}
994 	}
995 
996 #if 0
997 	r = pool_get(&rttimer_pool, PR_NOWAIT);
998 #else
999 	r = (struct rttimer *)malloc(sizeof(*r), M_RTABLE, M_NOWAIT);
1000 #endif
1001 	if (r == NULL)
1002 		return (ENOBUFS);
1003 	Bzero(r, sizeof(*r));
1004 
1005 	r->rtt_rt = rt;
1006 	r->rtt_time = current_time;
1007 	r->rtt_func = func;
1008 	r->rtt_queue = queue;
1009 	LIST_INSERT_HEAD(&rt->rt_timer, r, rtt_link);
1010 	TAILQ_INSERT_TAIL(&queue->rtq_head, r, rtt_next);
1011 	r->rtt_queue->rtq_count++;
1012 
1013 	return (0);
1014 }
1015 
1016 /* ARGSUSED */
1017 void
1018 rt_timer_timer(arg)
1019 	void *arg;
1020 {
1021 	struct timeout *to = (struct timeout *)arg;
1022 	struct rttimer_queue *rtq;
1023 	struct rttimer *r;
1024 	long current_time;
1025 	int s;
1026 
1027 	s = splclock();
1028 	current_time = mono_time.tv_sec;
1029 	splx(s);
1030 
1031 	s = splsoftnet();
1032 	for (rtq = LIST_FIRST(&rttimer_queue_head); rtq != NULL;
1033 	     rtq = LIST_NEXT(rtq, rtq_link)) {
1034 		while ((r = TAILQ_FIRST(&rtq->rtq_head)) != NULL &&
1035 		    (r->rtt_time + rtq->rtq_timeout) < current_time) {
1036 			LIST_REMOVE(r, rtt_link);
1037 			TAILQ_REMOVE(&rtq->rtq_head, r, rtt_next);
1038 			RTTIMER_CALLOUT(r);
1039 #if 0
1040 			pool_put(&rttimer_pool, r);
1041 #else
1042 			free(r, M_RTABLE);
1043 #endif
1044 			if (rtq->rtq_count > 0)
1045 				rtq->rtq_count--;
1046 			else
1047 				printf("rt_timer_timer: rtq_count reached 0\n");
1048 		}
1049 	}
1050 	splx(s);
1051 
1052 	timeout_add(to, hz);		/* every second */
1053 }
1054