xref: /dflybsd-src/sys/net/route.c (revision f7e25d559127833cbbb89b74fcf0cc036406459b)
1 /*
2  * Copyright (c) 1980, 1986, 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by the University of
16  *	California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  *	@(#)route.c	8.3 (Berkeley) 1/9/95
34  * $FreeBSD: src/sys/net/route.c,v 1.59.2.10 2003/01/17 08:04:00 ru Exp $
35  * $DragonFly: src/sys/net/route.c,v 1.10 2004/12/21 02:54:14 hsu Exp $
36  */
37 
38 #include "opt_inet.h"
39 
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/malloc.h>
43 #include <sys/mbuf.h>
44 #include <sys/socket.h>
45 #include <sys/domain.h>
46 #include <sys/kernel.h>
47 
48 #include <net/if.h>
49 #include <net/route.h>
50 
51 #include <netinet/in.h>
52 #include <net/ip_mroute/ip_mroute.h>
53 
54 static struct rtstat rtstat;
55 struct radix_node_head *rt_tables[AF_MAX+1];
56 
57 static void	rt_maskedcopy (struct sockaddr *, struct sockaddr *,
58 			       struct sockaddr *);
59 static void	rtable_init (void **);
60 
61 static void
62 rtable_init(void **table)
63 {
64 	struct domain *dom;
65 
66 	for (dom = domains; dom; dom = dom->dom_next)
67 		if (dom->dom_rtattach)
68 			dom->dom_rtattach(&table[dom->dom_family],
69 			    dom->dom_rtoffset);
70 }
71 
72 void
73 route_init()
74 {
75 	rn_init();	/* initialize all zeroes, all ones, mask table */
76 	rtable_init((void **)rt_tables);
77 }
78 
79 /*
80  * Packet routing routines.
81  */
82 
83 /*
84  * Lookup and fill in the ro_rt rtentry field in a route structure given
85  * an address in the ro_dst field.
86  */
87 void
88 rtalloc(struct route *ro)
89 {
90 	rtalloc_ign(ro, 0UL);
91 }
92 
93 void
94 rtalloc_ign(struct route *ro, u_long ignore)
95 {
96 	int s;
97 
98 	if (ro->ro_rt != NULL) {
99 		if (ro->ro_rt->rt_ifp != NULL && ro->ro_rt->rt_flags & RTF_UP)
100 			return;
101 		s = splnet(); /* XXX probably always at splnet here already */
102 		rtfree(ro->ro_rt);
103 		ro->ro_rt = NULL;
104 		splx(s);
105 	}
106 	ro->ro_rt = rtlookup(&ro->ro_dst, 1, ignore);
107 }
108 
109 /*
110  * Look up the route that matches the given 'dst' address.
111  *
112  * Create a cloned route if the route is a cloning route
113  * and RTF_CLONING or RTF_PRCLONING are not being ignored.
114  *
115  * In either case, the returned route has its refcnt incremented.
116  */
117 struct rtentry *
118 rtlookup(struct sockaddr *dst, int report, u_long ignflags)
119 {
120 	struct radix_node_head *rnh = rt_tables[dst->sa_family];
121 	struct rtentry *rt;
122 	struct radix_node *rn;
123 	struct rt_addrinfo info;
124 	u_long nflags;
125 	int s, err, msgtype;
126 
127 	s = splnet();
128 	if (rnh != NULL && (rn = rnh->rnh_matchaddr((char *)dst, rnh))) {
129 		rt = (struct rtentry *)rn;
130 		nflags = rt->rt_flags & ~ignflags;
131 		if (report && (nflags & (RTF_CLONING | RTF_PRCLONING))) {
132 			struct rtentry *clonedroute;
133 
134 			clonedroute = rt;  /* value used in rtrequest()! */
135 			err = rtrequest(RTM_RESOLVE, dst, NULL, NULL, 0,
136 					&clonedroute);
137 			if (err != 0) {
138 				/* use original route on clone failure */
139 				rt->rt_refcnt++;
140 				goto reportmiss;
141 			} else {
142 				rt = clonedroute;	/* use cloned route */
143 			}
144 			if (clonedroute->rt_flags & RTF_XRESOLVE) {
145 				/*
146 				 * The new cloned route needs external
147 				 * resolution.
148 				 */
149 				msgtype = RTM_RESOLVE;
150 				goto reportmsg;
151 			}
152 			/* Inform listeners of the new route. */
153 			bzero(&info, sizeof(info));
154 			info.rti_info[RTAX_DST] = rt_key(clonedroute);
155 			info.rti_info[RTAX_NETMASK] = rt_mask(clonedroute);
156 			info.rti_info[RTAX_GATEWAY] = clonedroute->rt_gateway;
157 			if (clonedroute->rt_ifp != NULL) {
158 				info.rti_info[RTAX_IFP] =
159 				    TAILQ_FIRST(&clonedroute->rt_ifp
160 						->if_addrhead)->ifa_addr;
161 				info.rti_info[RTAX_IFA] =
162 				    clonedroute->rt_ifa->ifa_addr;
163 			}
164 			rt_missmsg(RTM_ADD, &info, clonedroute->rt_flags, 0);
165 		} else
166 			rt->rt_refcnt++;	/* most common case */
167 	} else {
168 		rt = NULL;
169 		rtstat.rts_unreach++;
170 		if (report) {
171 			err = 0;
172 reportmiss:
173 			msgtype = RTM_MISS;
174 reportmsg:
175 			bzero(&info, sizeof(info));
176 			info.rti_info[RTAX_DST] = dst;
177 			rt_missmsg(msgtype, &info, 0, err);
178 		}
179 	}
180 	splx(s);
181 	return (rt);
182 }
183 
184 void
185 rtfree(struct rtentry *rt)
186 {
187 	struct radix_node_head *rnh = rt_tables[rt_key(rt)->sa_family];
188 
189 	--rt->rt_refcnt;
190 	if (rnh->rnh_close && rt->rt_refcnt == 0)
191 		rnh->rnh_close((struct radix_node *)rt, rnh);
192 	if (rt->rt_refcnt <= 0 && !(rt->rt_flags & RTF_UP)) {
193 		KASSERT(!(rt->rt_nodes->rn_flags & (RNF_ACTIVE | RNF_ROOT)),
194 			("rtfree: rn_flags 0x%x ", rt->rt_nodes->rn_flags));
195 		KASSERT(rt->rt_refcnt == 0,
196 			("rtfree: rt_refcnt %ld", rt->rt_refcnt));
197 		if (rt->rt_ifa != NULL)
198 			IFAFREE(rt->rt_ifa);
199 		if (rt->rt_parent != NULL)
200 			RTFREE(rt->rt_parent);
201 		Free(rt_key(rt));  /* Also frees gateway.  See rt_setgate(). */
202 		Free(rt);
203 	}
204 }
205 
206 #define	sa_equal(a1, a2) (bcmp((a1), (a2), (a1)->sa_len) == 0)
207 
208 /*
209  * Force a routing table entry to the specified
210  * destination to go through the given gateway.
211  * Normally called as a result of a routing redirect
212  * message from the network layer.
213  *
214  * N.B.: must be called at splnet
215  *
216  */
217 void
218 rtredirect(
219 	struct sockaddr *dst,
220 	struct sockaddr *gateway,
221 	struct sockaddr *netmask,
222 	int flags,
223 	struct sockaddr *src,
224 	struct rtentry **rtp)
225 {
226 	struct rtentry *rt;
227 	struct rt_addrinfo info;
228 	struct ifaddr *ifa;
229 	short *stat = NULL;
230 	int error;
231 
232 	/* verify the gateway is directly reachable */
233 	if ((ifa = ifa_ifwithnet(gateway)) == NULL) {
234 		error = ENETUNREACH;
235 		goto out;
236 	}
237 
238 	/*
239 	 * If the redirect isn't from our current router for this dst,
240 	 * it's either old or wrong.  If it redirects us to ourselves,
241 	 * we have a routing loop, perhaps as a result of an interface
242 	 * going down recently.
243 	 */
244 	if (!(flags & RTF_DONE) &&
245 	    (rt = rtlookup(dst, 0, 0UL)) != NULL &&
246 	    (!sa_equal(src, rt->rt_gateway) || rt->rt_ifa != ifa)) {
247 		error = EINVAL;
248 		goto done;
249 	} else if (ifa_ifwithaddr(gateway)) {
250 		error = EHOSTUNREACH;
251 		goto done;
252 	}
253 
254 	/*
255 	 * Create a new entry if we just got back a wildcard entry
256 	 * or the the lookup failed.  This is necessary for hosts
257 	 * which use routing redirects generated by smart gateways
258 	 * to dynamically build the routing tables.
259 	 */
260 	if (rt == NULL || (rt_mask(rt) != NULL && rt_mask(rt)->sa_len < 2))
261 		goto create;
262 
263 	/*
264 	 * Don't listen to the redirect if it's for a route to an interface.
265 	 */
266 	if (rt->rt_flags & RTF_GATEWAY) {
267 		if ((!(rt->rt_flags & RTF_HOST)) && (flags & RTF_HOST)) {
268 			/*
269 			 * Changing from route to net => route to host.
270 			 * Create new route, rather than smashing route to net.
271 			 */
272 create:
273 			if (rt != NULL)
274 				rtfree(rt);
275 			flags |=  RTF_GATEWAY | RTF_DYNAMIC;
276 			bzero(&info, sizeof(info));
277 			info.rti_info[RTAX_DST] = dst;
278 			info.rti_info[RTAX_GATEWAY] = gateway;
279 			info.rti_info[RTAX_NETMASK] = netmask;
280 			info.rti_ifa = ifa;
281 			info.rti_flags = flags;
282 			rt = NULL;
283 			error = rtrequest1(RTM_ADD, &info, &rt);
284 			if (rt != NULL)
285 				flags = rt->rt_flags;
286 			stat = &rtstat.rts_dynamic;
287 		} else {
288 			/*
289 			 * Smash the current notion of the gateway to
290 			 * this destination.  Should check about netmask!!!
291 			 */
292 			rt->rt_flags |= RTF_MODIFIED;
293 			flags |= RTF_MODIFIED;
294 			stat = &rtstat.rts_newgateway;
295 			/* Add the key and gateway (in one malloc'ed chunk). */
296 			rt_setgate(rt, rt_key(rt), gateway);
297 			error = 0;
298 		}
299 	} else {
300 		error = EHOSTUNREACH;
301 	}
302 
303 done:
304 	if (rt != NULL) {
305 		if (rtp != NULL && error == 0)
306 			*rtp = rt;
307 		else
308 			rtfree(rt);
309 	}
310 
311 out:
312 	if (error != 0)
313 		rtstat.rts_badredirect++;
314 	else if (stat != NULL)
315 		(*stat)++;
316 
317 	bzero(&info, sizeof(info));
318 	info.rti_info[RTAX_DST] = dst;
319 	info.rti_info[RTAX_GATEWAY] = gateway;
320 	info.rti_info[RTAX_NETMASK] = netmask;
321 	info.rti_info[RTAX_AUTHOR] = src;
322 	rt_missmsg(RTM_REDIRECT, &info, flags, error);
323 }
324 
325 /*
326 * Routing table ioctl interface.
327 */
328 int
329 rtioctl(u_long req, caddr_t data, struct thread *td)
330 {
331 #ifdef INET
332 	/* Multicast goop, grrr... */
333 	return mrt_ioctl ? mrt_ioctl(req, data) : EOPNOTSUPP;
334 #else
335 	return ENXIO;
336 #endif
337 }
338 
339 struct ifaddr *
340 ifa_ifwithroute(int flags, struct sockaddr *dst, struct sockaddr *gateway)
341 {
342 	struct ifaddr *ifa;
343 
344 	if (!(flags & RTF_GATEWAY)) {
345 		/*
346 		 * If we are adding a route to an interface,
347 		 * and the interface is a point-to-point link,
348 		 * we should search for the destination
349 		 * as our clue to the interface.  Otherwise
350 		 * we can use the local address.
351 		 */
352 		ifa = NULL;
353 		if (flags & RTF_HOST) {
354 			ifa = ifa_ifwithdstaddr(dst);
355 		}
356 		if (ifa == NULL)
357 			ifa = ifa_ifwithaddr(gateway);
358 	} else {
359 		/*
360 		 * If we are adding a route to a remote net
361 		 * or host, the gateway may still be on the
362 		 * other end of a pt to pt link.
363 		 */
364 		ifa = ifa_ifwithdstaddr(gateway);
365 	}
366 	if (ifa == NULL)
367 		ifa = ifa_ifwithnet(gateway);
368 	if (ifa == NULL) {
369 		struct rtentry *rt = rtlookup(gateway, 0, 0UL);
370 
371 		if (rt == NULL)
372 			return (NULL);
373 		rt->rt_refcnt--;
374 		if ((ifa = rt->rt_ifa) == NULL)
375 			return (NULL);
376 	}
377 	if (ifa->ifa_addr->sa_family != dst->sa_family) {
378 		struct ifaddr *oifa = ifa;
379 
380 		ifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp);
381 		if (ifa == NULL)
382 			ifa = oifa;
383 	}
384 	return (ifa);
385 }
386 
387 static int rt_fixdelete (struct radix_node *, void *);
388 static int rt_fixchange (struct radix_node *, void *);
389 
390 struct rtfc_arg {
391 	struct rtentry *rt0;
392 	struct radix_node_head *rnh;
393 };
394 
395 int
396 rt_getifa(struct rt_addrinfo *info)
397 {
398 	struct sockaddr *gateway = info->rti_info[RTAX_GATEWAY];
399 	struct sockaddr *dst = info->rti_info[RTAX_DST];
400 	struct sockaddr *ifaaddr = info->rti_info[RTAX_IFA];
401 	struct sockaddr *ifpaddr = info->rti_info[RTAX_IFP];
402 	int flags = info->rti_flags;
403 	struct ifaddr *ifa;
404 	int error = 0;
405 
406 	/*
407 	 * ifp may be specified by sockaddr_dl
408 	 * when protocol address is ambiguous.
409 	 */
410 	if (info->rti_ifp == NULL && ifpaddr != NULL &&
411 	    ifpaddr->sa_family == AF_LINK &&
412 	    (ifa = ifa_ifwithnet(ifpaddr)) != NULL)
413 		info->rti_ifp = ifa->ifa_ifp;
414 	if (info->rti_ifa == NULL && ifaaddr != NULL)
415 		info->rti_ifa = ifa_ifwithaddr(ifaaddr);
416 	if (info->rti_ifa == NULL) {
417 		struct sockaddr *sa;
418 
419 		sa = ifaaddr != NULL ? ifaaddr :
420 		    (gateway != NULL ? gateway : dst);
421 		if (sa != NULL && info->rti_ifp != NULL)
422 			info->rti_ifa = ifaof_ifpforaddr(sa, info->rti_ifp);
423 		else if (dst != NULL && gateway != NULL)
424 			info->rti_ifa = ifa_ifwithroute(flags, dst, gateway);
425 		else if (sa != NULL)
426 			info->rti_ifa = ifa_ifwithroute(flags, sa, sa);
427 	}
428 	if ((ifa = info->rti_ifa) != NULL) {
429 		if (info->rti_ifp == NULL)
430 			info->rti_ifp = ifa->ifa_ifp;
431 	} else
432 		error = ENETUNREACH;
433 	return (error);
434 }
435 
436 /*
437  * Do appropriate manipulations of a routing tree given
438  * all the bits of info needed
439  */
440 int
441 rtrequest(
442 	int req,
443 	struct sockaddr *dst,
444 	struct sockaddr *gateway,
445 	struct sockaddr *netmask,
446 	int flags,
447 	struct rtentry **ret_nrt)
448 {
449 	struct rt_addrinfo info;
450 
451 	bzero(&info, sizeof info);
452 	info.rti_flags = flags;
453 	info.rti_info[RTAX_DST] = dst;
454 	info.rti_info[RTAX_GATEWAY] = gateway;
455 	info.rti_info[RTAX_NETMASK] = netmask;
456 	return rtrequest1(req, &info, ret_nrt);
457 }
458 
459 int
460 rtrequest1(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt)
461 {
462 	struct sockaddr *dst = info->rti_info[RTAX_DST];
463 	struct rtentry *rt;
464 	struct radix_node *rn;
465 	struct radix_node_head *rnh;
466 	struct ifaddr *ifa;
467 	struct sockaddr *ndst;
468 	int error = 0;
469 	int s;
470 
471 #define gotoerr(x) { error = x ; goto bad; }
472 
473 	s = splnet();
474 	/*
475 	 * Find the correct routing tree to use for this Address Family
476 	 */
477 	if ((rnh = rt_tables[dst->sa_family]) == NULL)
478 		gotoerr(EAFNOSUPPORT);
479 
480 	/*
481 	 * If we are adding a host route then we don't want to put
482 	 * a netmask in the tree, nor do we want to clone it.
483 	 */
484 	if (info->rti_flags & RTF_HOST) {
485 		info->rti_info[RTAX_NETMASK] = NULL;
486 		info->rti_flags &= ~(RTF_CLONING | RTF_PRCLONING);
487 	}
488 
489 	switch (req) {
490 	case RTM_DELETE:
491 		/* Remove the item from the tree. */
492 		rn = rnh->rnh_deladdr((char *)info->rti_info[RTAX_DST],
493 				      (char *)info->rti_info[RTAX_NETMASK],
494 				      rnh);
495 		if (rn == NULL)
496 			gotoerr(ESRCH);
497 		KASSERT(!(rn->rn_flags & (RNF_ACTIVE | RNF_ROOT)),
498 			("rnh_deladdr returned flags 0x%x", rn->rn_flags));
499 		rt = (struct rtentry *)rn;
500 
501 		/* Free any routes cloned from this one. */
502 		if ((rt->rt_flags & (RTF_CLONING | RTF_PRCLONING)) &&
503 		    rt_mask(rt) != NULL) {
504 			rnh->rnh_walktree_from(rnh, (char *)rt_key(rt),
505 					       (char *)rt_mask(rt),
506 					       rt_fixdelete, rt);
507 		}
508 
509 		if (rt->rt_gwroute != NULL) {
510 			RTFREE(rt->rt_gwroute);
511 			rt->rt_gwroute = NULL;
512 		}
513 
514 		/*
515 		 * NB: RTF_UP must be set during the search above,
516 		 * because we might delete the last ref, causing
517 		 * rt to get freed prematurely.
518 		 */
519 		rt->rt_flags &= ~RTF_UP;
520 
521 		/* Give the protocol a chance to keep things in sync. */
522 		if ((ifa = rt->rt_ifa) && ifa->ifa_rtrequest)
523 			ifa->ifa_rtrequest(RTM_DELETE, rt, info);
524 
525 		/*
526 		 * If the caller wants it, then it can have it,
527 		 * but it's up to it to free the rtentry as we won't be
528 		 * doing it.
529 		 */
530 		if (ret_nrt != NULL) {
531 			*ret_nrt = rt;
532 		} else if (rt->rt_refcnt <= 0) {
533 			rt->rt_refcnt++;  /* refcnt > 0 required for rtfree() */
534 			rtfree(rt);
535 		}
536 		break;
537 
538 	case RTM_RESOLVE:
539 		if (ret_nrt == NULL || (rt = *ret_nrt) == NULL)
540 			gotoerr(EINVAL);
541 		ifa = rt->rt_ifa;
542 		info->rti_flags = rt->rt_flags &
543 		    ~(RTF_CLONING | RTF_PRCLONING | RTF_STATIC);
544 		info->rti_flags |= RTF_WASCLONED;
545 		info->rti_info[RTAX_GATEWAY] = rt->rt_gateway;
546 		if ((info->rti_info[RTAX_NETMASK] = rt->rt_genmask) == NULL)
547 			info->rti_flags |= RTF_HOST;
548 		goto makeroute;
549 
550 	case RTM_ADD:
551 		KASSERT(!(info->rti_flags & RTF_GATEWAY) ||
552 			info->rti_info[RTAX_GATEWAY] != NULL,
553 		    ("rtrequest: GATEWAY but no gateway"));
554 
555 		if (info->rti_ifa == NULL && (error = rt_getifa(info)))
556 			gotoerr(error);
557 		ifa = info->rti_ifa;
558 makeroute:
559 		R_Malloc(rt, struct rtentry *, sizeof *rt);
560 		if (rt == NULL)
561 			gotoerr(ENOBUFS);
562 		bzero(rt, sizeof *rt);
563 		rt->rt_flags = RTF_UP | info->rti_flags;
564 		error = rt_setgate(rt, dst, info->rti_info[RTAX_GATEWAY]);
565 		if (error != 0) {
566 			Free(rt);
567 			gotoerr(error);
568 		}
569 
570 		ndst = rt_key(rt);
571 		if (info->rti_info[RTAX_NETMASK] != NULL)
572 			rt_maskedcopy(dst, ndst, info->rti_info[RTAX_NETMASK]);
573 		else
574 			bcopy(dst, ndst, dst->sa_len);
575 
576 		/*
577 		 * Note that we now have a reference to the ifa.
578 		 * This moved from below so that rnh->rnh_addaddr() can
579 		 * examine the ifa and  ifa->ifa_ifp if it so desires.
580 		 */
581 		IFAREF(ifa);
582 		rt->rt_ifa = ifa;
583 		rt->rt_ifp = ifa->ifa_ifp;
584 		/* XXX mtu manipulation will be done in rnh_addaddr -- itojun */
585 
586 		rn = rnh->rnh_addaddr((char *)ndst,
587 				      (char *)info->rti_info[RTAX_NETMASK],
588 				      rnh, rt->rt_nodes);
589 		if (rn == NULL) {
590 			struct rtentry *oldrt;
591 
592 			/*
593 			 * We already have one of these in the tree.
594 			 * We do a special hack: if the old route was
595 			 * cloned, then we blow it away and try
596 			 * re-inserting the new one.
597 			 */
598 			oldrt = rtlookup(ndst, 0, RTF_CLONING | RTF_PRCLONING);
599 			if (oldrt != NULL) {
600 				--oldrt->rt_refcnt;
601 				if (oldrt->rt_flags & RTF_WASCLONED) {
602 					rtrequest(RTM_DELETE, rt_key(oldrt),
603 						  oldrt->rt_gateway,
604 						  rt_mask(oldrt),
605 						  oldrt->rt_flags, NULL);
606 					rn = rnh->rnh_addaddr((char *)ndst,
607 						  (char *)
608 						  info->rti_info[RTAX_NETMASK],
609 						  rnh, rt->rt_nodes);
610 				}
611 			}
612 		}
613 
614 		/*
615 		 * If it still failed to go into the tree,
616 		 * then un-make it (this should be a function).
617 		 */
618 		if (rn == NULL) {
619 			if (rt->rt_gwroute != NULL)
620 				rtfree(rt->rt_gwroute);
621 			IFAFREE(ifa);
622 			Free(rt_key(rt));
623 			Free(rt);
624 			gotoerr(EEXIST);
625 		}
626 
627 		/*
628 		 * If we got here from RESOLVE, then we are cloning
629 		 * so clone the rest, and note that we
630 		 * are a clone (and increment the parent's references)
631 		 */
632 		if (req == RTM_RESOLVE) {
633 			rt->rt_rmx = (*ret_nrt)->rt_rmx;    /* copy metrics */
634 			rt->rt_rmx.rmx_pksent = 0;  /* reset packet counter */
635 			if ((*ret_nrt)->rt_flags &
636 				       (RTF_CLONING | RTF_PRCLONING)) {
637 				rt->rt_parent = *ret_nrt;
638 				(*ret_nrt)->rt_refcnt++;
639 			}
640 		}
641 
642 		/*
643 		 * if this protocol has something to add to this then
644 		 * allow it to do that as well.
645 		 */
646 		if (ifa->ifa_rtrequest != NULL)
647 			ifa->ifa_rtrequest(req, rt, info);
648 
649 		/*
650 		 * We repeat the same procedure from rt_setgate() here because
651 		 * it doesn't fire when we call it there because the node
652 		 * hasn't been added to the tree yet.
653 		 */
654 		if (req == RTM_ADD && !(rt->rt_flags & RTF_HOST) &&
655 		    rt_mask(rt) != NULL) {
656 			struct rtfc_arg arg = { rt, rnh };
657 
658 			rnh->rnh_walktree_from(rnh, (char *)rt_key(rt),
659 					       (char *)rt_mask(rt),
660 					       rt_fixchange, &arg);
661 		}
662 
663 		/*
664 		 * Return the resulting rtentry,
665 		 * increasing the number of references by one.
666 		 */
667 		if (ret_nrt != NULL) {
668 			rt->rt_refcnt++;
669 			*ret_nrt = rt;
670 		}
671 		break;
672 	default:
673 		error = EOPNOTSUPP;
674 	}
675 bad:
676 	splx(s);
677 	return (error);
678 }
679 
680 /*
681  * Called from rtrequest(RTM_DELETE, ...) to fix up the route's ``family''
682  * (i.e., the routes related to it by the operation of cloning).  This
683  * routine is iterated over all potential former-child-routes by way of
684  * rnh->rnh_walktree_from() above, and those that actually are children of
685  * the late parent (passed in as VP here) are themselves deleted.
686  */
687 static int
688 rt_fixdelete(struct radix_node *rn, void *vp)
689 {
690 	struct rtentry *rt = (struct rtentry *)rn;
691 	struct rtentry *rt0 = vp;
692 
693 	if (rt->rt_parent == rt0 &&
694 	    !(rt->rt_flags & (RTF_PINNED | RTF_CLONING | RTF_PRCLONING))) {
695 		return rtrequest(RTM_DELETE, rt_key(rt), NULL, rt_mask(rt),
696 				 rt->rt_flags, NULL);
697 	}
698 	return 0;
699 }
700 
701 /*
702  * This routine is called from rt_setgate() to do the analogous thing for
703  * adds and changes.  There is the added complication in this case of a
704  * middle insert; i.e., insertion of a new network route between an older
705  * network route and (cloned) host routes.  For this reason, a simple check
706  * of rt->rt_parent is insufficient; each candidate route must be tested
707  * against the (mask, value) of the new route (passed as before in vp)
708  * to see if the new route matches it.
709  *
710  * XXX - it may be possible to do fixdelete() for changes and reserve this
711  * routine just for adds.  I'm not sure why I thought it was necessary to do
712  * changes this way.
713  */
714 #ifdef DEBUG
715 static int rtfcdebug = 0;
716 #endif
717 
718 static int
719 rt_fixchange(struct radix_node *rn, void *vp)
720 {
721 	struct rtentry *rt = (struct rtentry *)rn;
722 	struct rtfc_arg *ap = vp;
723 	struct rtentry *rt0 = ap->rt0;
724 	struct radix_node_head *rnh = ap->rnh;
725 	u_char *xk1, *xm1, *xk2, *xmp;
726 	int i, len, mlen;
727 
728 #ifdef DEBUG
729 	if (rtfcdebug)
730 		printf("rt_fixchange: rt %p, rt0 %p\n", rt, rt0);
731 #endif
732 
733 	if (rt->rt_parent == NULL ||
734 	    (rt->rt_flags & (RTF_PINNED | RTF_CLONING | RTF_PRCLONING))) {
735 #ifdef DEBUG
736 		if (rtfcdebug) printf("no parent, pinned or cloning\n");
737 #endif
738 		return 0;
739 	}
740 
741 	if (rt->rt_parent == rt0) {
742 #ifdef DEBUG
743 		if (rtfcdebug) printf("parent match\n");
744 #endif
745 		return rtrequest(RTM_DELETE, rt_key(rt), NULL, rt_mask(rt),
746 				 rt->rt_flags, NULL);
747 	}
748 
749 	/*
750 	 * There probably is a function somewhere which does this...
751 	 * if not, there should be.
752 	 */
753 	len = imin(rt_key(rt0)->sa_len, rt_key(rt)->sa_len);
754 
755 	xk1 = (u_char *)rt_key(rt0);
756 	xm1 = (u_char *)rt_mask(rt0);
757 	xk2 = (u_char *)rt_key(rt);
758 
759 	/* avoid applying a less specific route */
760 	xmp = (u_char *)rt_mask(rt->rt_parent);
761 	mlen = rt_key(rt->rt_parent)->sa_len;
762 	if (mlen > rt_key(rt0)->sa_len) {
763 #ifdef DEBUG
764 		if (rtfcdebug)
765 			printf("rt_fixchange: inserting a less "
766 			       "specific route\n");
767 #endif
768 		return 0;
769 	}
770 	for (i = rnh->rnh_treetop->rn_offset; i < mlen; i++) {
771 		if ((xmp[i] & ~(xmp[i] ^ xm1[i])) != xmp[i]) {
772 #ifdef DEBUG
773 			if (rtfcdebug)
774 				printf("rt_fixchange: inserting a less "
775 				       "specific route\n");
776 #endif
777 			return 0;
778 		}
779 	}
780 
781 	for (i = rnh->rnh_treetop->rn_offset; i < len; i++) {
782 		if ((xk2[i] & xm1[i]) != xk1[i]) {
783 #ifdef DEBUG
784 			if (rtfcdebug) printf("no match\n");
785 #endif
786 			return 0;
787 		}
788 	}
789 
790 	/*
791 	 * OK, this node is a clone, and matches the node currently being
792 	 * changed/added under the node's mask.  So, get rid of it.
793 	 */
794 #ifdef DEBUG
795 	if (rtfcdebug) printf("deleting\n");
796 #endif
797 	return rtrequest(RTM_DELETE, rt_key(rt), NULL, rt_mask(rt),
798 			 rt->rt_flags, NULL);
799 }
800 
801 #define ROUNDUP(a) (a>0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long))
802 
803 int
804 rt_setgate(struct rtentry *rt0, struct sockaddr *dst, struct sockaddr *gate)
805 {
806 	char *space, *oldspace;
807 	int dlen = ROUNDUP(dst->sa_len), glen = ROUNDUP(gate->sa_len);
808 	struct rtentry *rt = rt0;
809 	struct radix_node_head *rnh = rt_tables[dst->sa_family];
810 
811 	/*
812 	 * A host route with the destination equal to the gateway
813 	 * will interfere with keeping LLINFO in the routing
814 	 * table, so disallow it.
815 	 */
816 	if (((rt0->rt_flags & (RTF_HOST | RTF_GATEWAY | RTF_LLINFO)) ==
817 			      (RTF_HOST | RTF_GATEWAY)) &&
818 	    dst->sa_len == gate->sa_len &&
819 	    sa_equal(dst, gate)) {
820 		/*
821 		 * The route might already exist if this is an RTM_CHANGE
822 		 * or a routing redirect, so try to delete it.
823 		 */
824 		if (rt_key(rt0) != NULL)
825 			rtrequest(RTM_DELETE, rt_key(rt0), rt0->rt_gateway,
826 				  rt_mask(rt0), rt0->rt_flags, NULL);
827 		return EADDRNOTAVAIL;
828 	}
829 
830 	/*
831 	 * Both dst and gateway are stored in the same malloc'ed chunk
832 	 * (If I ever get my hands on....)
833 	 * if we need to malloc a new chunk, then keep the old one around
834 	 * till we don't need it any more.
835 	 */
836 	if (rt->rt_gateway == NULL || glen > ROUNDUP(rt->rt_gateway->sa_len)) {
837 		oldspace = (char *)rt_key(rt);
838 		R_Malloc(space, char *, dlen + glen);
839 		if (space == NULL)
840 			return ENOBUFS;
841 		rt->rt_nodes->rn_key = space;
842 	} else {
843 		space = (char *)rt_key(rt);	/* Just use the old space. */
844 		oldspace = NULL;
845 	}
846 
847 	/* Set the gateway value. */
848 	rt->rt_gateway = (struct sockaddr *)(space + dlen);
849 	bcopy(gate, rt->rt_gateway, glen);
850 
851 	if (oldspace != NULL) {
852 		/*
853 		 * If we allocated a new chunk, preserve the original dst.
854 		 * This way, rt_setgate() really just sets the gate
855 		 * and leaves the dst field alone.
856 		 */
857 		bcopy(dst, space, dlen);
858 		Free(oldspace);
859 	}
860 
861 	/*
862 	 * If there is already a gwroute, it's now almost definitely wrong
863 	 * so drop it.
864 	 */
865 	if (rt->rt_gwroute != NULL) {
866 		RTFREE(rt->rt_gwroute);
867 		rt->rt_gwroute = NULL;
868 	}
869 	if (rt->rt_flags & RTF_GATEWAY) {
870 		/*
871 		 * Cloning loop avoidance: In the presence of
872 		 * protocol-cloning and bad configuration, it is
873 		 * possible to get stuck in bottomless mutual recursion
874 		 * (rtrequest rt_setgate rtlookup).  We avoid this
875 		 * by not allowing protocol-cloning to operate for
876 		 * gateways (which is probably the correct choice
877 		 * anyway), and avoid the resulting reference loops
878 		 * by disallowing any route to run through itself as
879 		 * a gateway.  This is obviously mandatory when we
880 		 * get rt->rt_output().
881 		 *
882 		 * This breaks TTCP!  XXX JH
883 		 */
884 		rt->rt_gwroute = rtlookup(gate, 1, RTF_PRCLONING);
885 		if (rt->rt_gwroute == rt) {
886 			rt->rt_gwroute = NULL;
887 			--rt->rt_refcnt;
888 			return EDQUOT; /* failure */
889 		}
890 	}
891 
892 	/*
893 	 * This isn't going to do anything useful for host routes, so
894 	 * don't bother.  Also make sure we have a reasonable mask
895 	 * (we don't yet have one during adds).
896 	 */
897 	if (!(rt->rt_flags & RTF_HOST) && rt_mask(rt) != NULL) {
898 		struct rtfc_arg arg = { rt, rnh };
899 
900 		rnh->rnh_walktree_from(rnh, (char *)rt_key(rt),
901 				       (char *)rt_mask(rt),
902 				       rt_fixchange, &arg);
903 	}
904 
905 	return 0;
906 }
907 
908 static void
909 rt_maskedcopy(
910 	struct sockaddr *src,
911 	struct sockaddr *dst,
912 	struct sockaddr *netmask)
913 {
914 	u_char *cp1 = (u_char *)src;
915 	u_char *cp2 = (u_char *)dst;
916 	u_char *cp3 = (u_char *)netmask;
917 	u_char *cplim = cp2 + *cp3;
918 	u_char *cplim2 = cp2 + *cp1;
919 
920 	*cp2++ = *cp1++; *cp2++ = *cp1++; /* copies sa_len & sa_family */
921 	cp3 += 2;
922 	if (cplim > cplim2)
923 		cplim = cplim2;
924 	while (cp2 < cplim)
925 		*cp2++ = *cp1++ & *cp3++;
926 	if (cp2 < cplim2)
927 		bzero(cp2, cplim2 - cp2);
928 }
929 
930 int
931 rt_llroute(struct sockaddr *dst, struct rtentry *rt0, struct rtentry **drt)
932 {
933 	struct rtentry *up_rt, *rt;
934 
935 	if (!(rt0->rt_flags & RTF_UP)) {
936 		up_rt = rtlookup(dst, 1, 0UL);
937 		if (up_rt == NULL)
938 			return (EHOSTUNREACH);
939 		up_rt->rt_refcnt--;
940 	} else
941 		up_rt = rt0;
942 	if (up_rt->rt_flags & RTF_GATEWAY) {
943 		if (up_rt->rt_gwroute == NULL) {
944 			up_rt->rt_gwroute = rtlookup(up_rt->rt_gateway, 1, 0UL);
945 			if (up_rt->rt_gwroute == NULL)
946 				return (EHOSTUNREACH);
947 		} else if (!(up_rt->rt_gwroute->rt_flags & RTF_UP)) {
948 			rtfree(up_rt->rt_gwroute);
949 			up_rt->rt_gwroute = rtlookup(up_rt->rt_gateway, 1, 0UL);
950 			if (up_rt->rt_gwroute == NULL)
951 				return (EHOSTUNREACH);
952 		}
953 		rt = up_rt->rt_gwroute;
954 	} else
955 		rt = up_rt;
956 	if (rt->rt_flags & RTF_REJECT &&
957 	    (rt->rt_rmx.rmx_expire == 0 ||		/* rt doesn't expire */
958 	     time_second < rt->rt_rmx.rmx_expire))	/* rt not expired */
959 		return (rt->rt_flags & RTF_HOST ?  EHOSTDOWN : EHOSTUNREACH);
960 	*drt = rt;
961 	return 0;
962 }
963 
964 /*
965  * Set up a routing table entry, normally
966  * for an interface.
967  */
968 int
969 rtinit(struct ifaddr *ifa, int cmd, int flags)
970 {
971 	struct sockaddr *dst, *deldst, *netmask;
972 	struct rtentry *rt;
973 	struct rtentry *nrt = NULL;
974 	struct mbuf *m = NULL;
975 	struct radix_node_head *rnh;
976 	struct radix_node *rn;
977 	struct rt_addrinfo info;
978 	int error;
979 
980 	if (flags & RTF_HOST) {
981 		dst = ifa->ifa_dstaddr;
982 		netmask = NULL;
983 	} else {
984 		dst = ifa->ifa_addr;
985 		netmask = ifa->ifa_netmask;
986 	}
987 	/*
988 	 * If it's a delete, check that if it exists, it's on the correct
989 	 * interface or we might scrub a route to another ifa which would
990 	 * be confusing at best and possibly worse.
991 	 */
992 	if (cmd == RTM_DELETE) {
993 		/*
994 		 * It's a delete, so it should already exist..
995 		 * If it's a net, mask off the host bits
996 		 * (Assuming we have a mask)
997 		 */
998 		if (netmask != NULL) {
999 			m = m_get(MB_DONTWAIT, MT_SONAME);
1000 			if (m == NULL)
1001 				return (ENOBUFS);
1002 			deldst = mtod(m, struct sockaddr *);
1003 			rt_maskedcopy(dst, deldst, netmask);
1004 			dst = deldst;
1005 		}
1006 		/*
1007 		 * Look up an rtentry that is in the routing tree and
1008 		 * contains the correct info.
1009 		 */
1010 		if ((rnh = rt_tables[dst->sa_family]) == NULL ||
1011 		    (rn = rnh->rnh_lookup((char *)dst,
1012 					  (char *)netmask, rnh)) == NULL ||
1013 		    ((struct rtentry *)rn)->rt_ifa != ifa ||
1014 		    !sa_equal((struct sockaddr *)rn->rn_key, dst)) {
1015 			if (m != NULL)
1016 				m_free(m);
1017 			return (flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH);
1018 		}
1019 		/* XXX */
1020 #if 0
1021 		else {
1022 			/*
1023 			 * One would think that as we are deleting, and we know
1024 			 * it doesn't exist, we could just return at this point
1025 			 * with an "ELSE" clause, but apparently not..
1026 			 */
1027 			return (flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH);
1028 		}
1029 #endif
1030 	}
1031 	/*
1032 	 * Do the actual request
1033 	 */
1034 	bzero(&info, sizeof(info));
1035 	info.rti_ifa = ifa;
1036 	info.rti_flags = flags | ifa->ifa_flags;
1037 	info.rti_info[RTAX_DST] = dst;
1038 	info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr;
1039 	info.rti_info[RTAX_NETMASK] = netmask;
1040 	error = rtrequest1(cmd, &info, &nrt);
1041 	if (error == 0 && (rt = nrt) != NULL) {
1042 		/*
1043 		 * notify any listening routing agents of the change
1044 		 */
1045 		rt_newaddrmsg(cmd, ifa, error, rt);
1046 		if (cmd == RTM_DELETE) {
1047 			/*
1048 			 * If we are deleting, and we found an entry, then
1049 			 * it's been removed from the tree.. now throw it away.
1050 			 */
1051 			if (rt->rt_refcnt <= 0) {
1052 				rt->rt_refcnt++; /* make a 1->0 transition */
1053 				rtfree(rt);
1054 			}
1055 		} else if (cmd == RTM_ADD) {
1056 			/*
1057 			 * We just wanted to add it.. we don't actually
1058 			 * need a reference.
1059 			 */
1060 			rt->rt_refcnt--;
1061 		}
1062 	}
1063 	if (m != NULL)
1064 		m_free(m);
1065 	return (error);
1066 }
1067 
1068 /* This must be before ip6_init2(), which is now SI_ORDER_MIDDLE */
1069 SYSINIT(route, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, route_init, 0);
1070