xref: /netbsd-src/sys/net/route.c (revision 8450a7c42673d65e3b1f6560d3b6ecd317a6cbe8)
1 /*	$NetBSD: route.c,v 1.181 2016/10/25 02:45:09 ozaki-r Exp $	*/
2 
3 /*-
4  * Copyright (c) 1998, 2008 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Kevin M. Lahey of the Numerical Aerospace Simulation Facility,
9  * NASA Ames Research Center.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 /*
34  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
35  * All rights reserved.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. Neither the name of the project nor the names of its contributors
46  *    may be used to endorse or promote products derived from this software
47  *    without specific prior written permission.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59  * SUCH DAMAGE.
60  */
61 
62 /*
63  * Copyright (c) 1980, 1986, 1991, 1993
64  *	The Regents of the University of California.  All rights reserved.
65  *
66  * Redistribution and use in source and binary forms, with or without
67  * modification, are permitted provided that the following conditions
68  * are met:
69  * 1. Redistributions of source code must retain the above copyright
70  *    notice, this list of conditions and the following disclaimer.
71  * 2. Redistributions in binary form must reproduce the above copyright
72  *    notice, this list of conditions and the following disclaimer in the
73  *    documentation and/or other materials provided with the distribution.
74  * 3. Neither the name of the University nor the names of its contributors
75  *    may be used to endorse or promote products derived from this software
76  *    without specific prior written permission.
77  *
78  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
79  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
80  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
81  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
82  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
83  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
84  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
85  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
86  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
87  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
88  * SUCH DAMAGE.
89  *
90  *	@(#)route.c	8.3 (Berkeley) 1/9/95
91  */
92 
93 #ifdef _KERNEL_OPT
94 #include "opt_inet.h"
95 #include "opt_route.h"
96 #include "opt_net_mpsafe.h"
97 #endif
98 
99 #include <sys/cdefs.h>
100 __KERNEL_RCSID(0, "$NetBSD: route.c,v 1.181 2016/10/25 02:45:09 ozaki-r Exp $");
101 
102 #include <sys/param.h>
103 #ifdef RTFLUSH_DEBUG
104 #include <sys/sysctl.h>
105 #endif
106 #include <sys/systm.h>
107 #include <sys/callout.h>
108 #include <sys/proc.h>
109 #include <sys/mbuf.h>
110 #include <sys/socket.h>
111 #include <sys/socketvar.h>
112 #include <sys/domain.h>
113 #include <sys/protosw.h>
114 #include <sys/kernel.h>
115 #include <sys/ioctl.h>
116 #include <sys/pool.h>
117 #include <sys/kauth.h>
118 #include <sys/workqueue.h>
119 
120 #include <net/if.h>
121 #include <net/if_dl.h>
122 #include <net/route.h>
123 
124 #include <netinet/in.h>
125 #include <netinet/in_var.h>
126 
127 #ifdef RTFLUSH_DEBUG
128 #define	rtcache_debug() __predict_false(_rtcache_debug)
129 #else /* RTFLUSH_DEBUG */
130 #define	rtcache_debug() 0
131 #endif /* RTFLUSH_DEBUG */
132 
133 struct rtstat		rtstat;
134 
135 static int		rttrash;	/* routes not in table but not freed */
136 
137 static struct pool	rtentry_pool;
138 static struct pool	rttimer_pool;
139 
140 static struct callout	rt_timer_ch; /* callout for rt_timer_timer() */
141 static struct workqueue	*rt_timer_wq;
142 static struct work	rt_timer_wk;
143 
144 static void	rt_timer_init(void);
145 static void	rt_timer_queue_remove_all(struct rttimer_queue *);
146 static void	rt_timer_remove_all(struct rtentry *);
147 static void	rt_timer_timer(void *);
148 
149 #ifdef RTFLUSH_DEBUG
150 static int _rtcache_debug = 0;
151 #endif /* RTFLUSH_DEBUG */
152 
153 static kauth_listener_t route_listener;
154 
155 static int rtdeletemsg(struct rtentry *);
156 static void rtflushall(int);
157 
158 static void rt_maskedcopy(const struct sockaddr *,
159     struct sockaddr *, const struct sockaddr *);
160 
161 static void rtcache_clear(struct route *);
162 static void rtcache_clear_rtentry(int, struct rtentry *);
163 static void rtcache_invalidate(struct dom_rtlist *);
164 
165 #ifdef DDB
166 static void db_print_sa(const struct sockaddr *);
167 static void db_print_ifa(struct ifaddr *);
168 static int db_show_rtentry(struct rtentry *, void *);
169 #endif
170 
171 #ifdef RTFLUSH_DEBUG
172 static void sysctl_net_rtcache_setup(struct sysctllog **);
173 static void
174 sysctl_net_rtcache_setup(struct sysctllog **clog)
175 {
176 	const struct sysctlnode *rnode;
177 
178 	if (sysctl_createv(clog, 0, NULL, &rnode, CTLFLAG_PERMANENT,
179 	    CTLTYPE_NODE,
180 	    "rtcache", SYSCTL_DESCR("Route cache related settings"),
181 	    NULL, 0, NULL, 0, CTL_NET, CTL_CREATE, CTL_EOL) != 0)
182 		return;
183 	if (sysctl_createv(clog, 0, &rnode, &rnode,
184 	    CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT,
185 	    "debug", SYSCTL_DESCR("Debug route caches"),
186 	    NULL, 0, &_rtcache_debug, 0, CTL_CREATE, CTL_EOL) != 0)
187 		return;
188 }
189 #endif /* RTFLUSH_DEBUG */
190 
191 static inline void
192 rt_destroy(struct rtentry *rt)
193 {
194 	if (rt->_rt_key != NULL)
195 		sockaddr_free(rt->_rt_key);
196 	if (rt->rt_gateway != NULL)
197 		sockaddr_free(rt->rt_gateway);
198 	if (rt_gettag(rt) != NULL)
199 		sockaddr_free(rt_gettag(rt));
200 	rt->_rt_key = rt->rt_gateway = rt->rt_tag = NULL;
201 }
202 
203 static inline const struct sockaddr *
204 rt_setkey(struct rtentry *rt, const struct sockaddr *key, int flags)
205 {
206 	if (rt->_rt_key == key)
207 		goto out;
208 
209 	if (rt->_rt_key != NULL)
210 		sockaddr_free(rt->_rt_key);
211 	rt->_rt_key = sockaddr_dup(key, flags);
212 out:
213 	rt->rt_nodes->rn_key = (const char *)rt->_rt_key;
214 	return rt->_rt_key;
215 }
216 
217 struct ifaddr *
218 rt_get_ifa(struct rtentry *rt)
219 {
220 	struct ifaddr *ifa;
221 
222 	if ((ifa = rt->rt_ifa) == NULL)
223 		return ifa;
224 	else if (ifa->ifa_getifa == NULL)
225 		return ifa;
226 #if 0
227 	else if (ifa->ifa_seqno != NULL && *ifa->ifa_seqno == rt->rt_ifa_seqno)
228 		return ifa;
229 #endif
230 	else {
231 		ifa = (*ifa->ifa_getifa)(ifa, rt_getkey(rt));
232 		if (ifa == NULL)
233 			return NULL;
234 		rt_replace_ifa(rt, ifa);
235 		return ifa;
236 	}
237 }
238 
239 static void
240 rt_set_ifa1(struct rtentry *rt, struct ifaddr *ifa)
241 {
242 	rt->rt_ifa = ifa;
243 	if (ifa->ifa_seqno != NULL)
244 		rt->rt_ifa_seqno = *ifa->ifa_seqno;
245 }
246 
247 /*
248  * Is this route the connected route for the ifa?
249  */
250 static int
251 rt_ifa_connected(const struct rtentry *rt, const struct ifaddr *ifa)
252 {
253 	const struct sockaddr *key, *dst, *odst;
254 	struct sockaddr_storage maskeddst;
255 
256 	key = rt_getkey(rt);
257 	dst = rt->rt_flags & RTF_HOST ? ifa->ifa_dstaddr : ifa->ifa_addr;
258 	if (dst == NULL ||
259 	    dst->sa_family != key->sa_family ||
260 	    dst->sa_len != key->sa_len)
261 		return 0;
262 	if ((rt->rt_flags & RTF_HOST) == 0 && ifa->ifa_netmask) {
263 		odst = dst;
264 		dst = (struct sockaddr *)&maskeddst;
265 		rt_maskedcopy(odst, (struct sockaddr *)&maskeddst,
266 		    ifa->ifa_netmask);
267 	}
268 	return (memcmp(dst, key, dst->sa_len) == 0);
269 }
270 
271 void
272 rt_replace_ifa(struct rtentry *rt, struct ifaddr *ifa)
273 {
274 	if (rt->rt_ifa &&
275 	    rt->rt_ifa != ifa &&
276 	    rt->rt_ifa->ifa_flags & IFA_ROUTE &&
277 	    rt_ifa_connected(rt, rt->rt_ifa))
278 	{
279 		RT_DPRINTF("rt->_rt_key = %p, ifa = %p, "
280 		    "replace deleted IFA_ROUTE\n",
281 		    (void *)rt->_rt_key, (void *)rt->rt_ifa);
282 		rt->rt_ifa->ifa_flags &= ~IFA_ROUTE;
283 		if (rt_ifa_connected(rt, ifa)) {
284 			RT_DPRINTF("rt->_rt_key = %p, ifa = %p, "
285 			    "replace added IFA_ROUTE\n",
286 			    (void *)rt->_rt_key, (void *)ifa);
287 			ifa->ifa_flags |= IFA_ROUTE;
288 		}
289 	}
290 
291 	ifaref(ifa);
292 	ifafree(rt->rt_ifa);
293 	rt_set_ifa1(rt, ifa);
294 }
295 
296 static void
297 rt_set_ifa(struct rtentry *rt, struct ifaddr *ifa)
298 {
299 	ifaref(ifa);
300 	rt_set_ifa1(rt, ifa);
301 }
302 
303 static int
304 route_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie,
305     void *arg0, void *arg1, void *arg2, void *arg3)
306 {
307 	struct rt_msghdr *rtm;
308 	int result;
309 
310 	result = KAUTH_RESULT_DEFER;
311 	rtm = arg1;
312 
313 	if (action != KAUTH_NETWORK_ROUTE)
314 		return result;
315 
316 	if (rtm->rtm_type == RTM_GET)
317 		result = KAUTH_RESULT_ALLOW;
318 
319 	return result;
320 }
321 
322 void
323 rt_init(void)
324 {
325 
326 #ifdef RTFLUSH_DEBUG
327 	sysctl_net_rtcache_setup(NULL);
328 #endif
329 
330 	pool_init(&rtentry_pool, sizeof(struct rtentry), 0, 0, 0, "rtentpl",
331 	    NULL, IPL_SOFTNET);
332 	pool_init(&rttimer_pool, sizeof(struct rttimer), 0, 0, 0, "rttmrpl",
333 	    NULL, IPL_SOFTNET);
334 
335 	rn_init();	/* initialize all zeroes, all ones, mask table */
336 	rtbl_init();
337 
338 	route_listener = kauth_listen_scope(KAUTH_SCOPE_NETWORK,
339 	    route_listener_cb, NULL);
340 }
341 
342 static void
343 rtflushall(int family)
344 {
345 	struct domain *dom;
346 
347 	if (rtcache_debug())
348 		printf("%s: enter\n", __func__);
349 
350 	if ((dom = pffinddomain(family)) == NULL)
351 		return;
352 
353 	rtcache_invalidate(&dom->dom_rtcache);
354 }
355 
356 static void
357 rtcache(struct route *ro)
358 {
359 	struct domain *dom;
360 
361 	rtcache_invariants(ro);
362 	KASSERT(ro->_ro_rt != NULL);
363 	KASSERT(ro->ro_invalid == false);
364 	KASSERT(rtcache_getdst(ro) != NULL);
365 
366 	if ((dom = pffinddomain(rtcache_getdst(ro)->sa_family)) == NULL)
367 		return;
368 
369 	LIST_INSERT_HEAD(&dom->dom_rtcache, ro, ro_rtcache_next);
370 	rtcache_invariants(ro);
371 }
372 
373 #ifdef RT_DEBUG
374 static void
375 dump_rt(const struct rtentry *rt)
376 {
377 	char buf[512];
378 
379 	aprint_normal("rt: ");
380 	aprint_normal("p=%p ", rt);
381 	if (rt->_rt_key == NULL) {
382 		aprint_normal("dst=(NULL) ");
383 	} else {
384 		sockaddr_format(rt->_rt_key, buf, sizeof(buf));
385 		aprint_normal("dst=%s ", buf);
386 	}
387 	if (rt->rt_gateway == NULL) {
388 		aprint_normal("gw=(NULL) ");
389 	} else {
390 		sockaddr_format(rt->_rt_key, buf, sizeof(buf));
391 		aprint_normal("gw=%s ", buf);
392 	}
393 	aprint_normal("flags=%x ", rt->rt_flags);
394 	if (rt->rt_ifp == NULL) {
395 		aprint_normal("if=(NULL) ");
396 	} else {
397 		aprint_normal("if=%s ", rt->rt_ifp->if_xname);
398 	}
399 	aprint_normal("\n");
400 }
401 #endif /* RT_DEBUG */
402 
403 /*
404  * Packet routing routines. If success, refcnt of a returned rtentry
405  * will be incremented. The caller has to rtfree it by itself.
406  */
407 struct rtentry *
408 rtalloc1(const struct sockaddr *dst, int report)
409 {
410 	rtbl_t *rtbl;
411 	struct rtentry *rt;
412 	int s;
413 
414 	s = splsoftnet();
415 	rtbl = rt_gettable(dst->sa_family);
416 	if (rtbl == NULL)
417 		goto miss;
418 
419 	rt = rt_matchaddr(rtbl, dst);
420 	if (rt == NULL)
421 		goto miss;
422 
423 	rt->rt_refcnt++;
424 
425 	splx(s);
426 	return rt;
427 miss:
428 	rtstat.rts_unreach++;
429 	if (report) {
430 		struct rt_addrinfo info;
431 
432 		memset(&info, 0, sizeof(info));
433 		info.rti_info[RTAX_DST] = dst;
434 		rt_missmsg(RTM_MISS, &info, 0, 0);
435 	}
436 	splx(s);
437 	return NULL;
438 }
439 
440 #if defined(DEBUG) && !defined(NET_MPSAFE)
441 /*
442  * Check the following constraint for each rtcache:
443  *   if a rtcache holds a rtentry, the rtentry's refcnt is more than zero,
444  *   i.e., the rtentry should be referenced at least by the rtcache.
445  */
446 static void
447 rtcache_check_rtrefcnt(int family)
448 {
449 	struct domain *dom = pffinddomain(family);
450 	struct route *ro;
451 
452 	if (dom == NULL)
453 		return;
454 
455 	LIST_FOREACH(ro, &dom->dom_rtcache, ro_rtcache_next)
456 		KDASSERT(ro->_ro_rt == NULL || ro->_ro_rt->rt_refcnt > 0);
457 }
458 #endif
459 
460 void
461 rtfree(struct rtentry *rt)
462 {
463 	struct ifaddr *ifa;
464 
465 	KASSERT(rt != NULL);
466 	KASSERT(rt->rt_refcnt > 0);
467 
468 	rt->rt_refcnt--;
469 #if defined(DEBUG) && !defined(NET_MPSAFE)
470 	if (rt_getkey(rt) != NULL)
471 		rtcache_check_rtrefcnt(rt_getkey(rt)->sa_family);
472 #endif
473 	if (rt->rt_refcnt == 0 && (rt->rt_flags & RTF_UP) == 0) {
474 		rt_assert_inactive(rt);
475 		rttrash--;
476 		ifa = rt->rt_ifa;
477 		rt->rt_ifa = NULL;
478 		ifafree(ifa);
479 		rt->rt_ifp = NULL;
480 		rt_destroy(rt);
481 		pool_put(&rtentry_pool, rt);
482 	}
483 }
484 
485 /*
486  * Force a routing table entry to the specified
487  * destination to go through the given gateway.
488  * Normally called as a result of a routing redirect
489  * message from the network layer.
490  *
491  * N.B.: must be called at splsoftnet
492  */
493 void
494 rtredirect(const struct sockaddr *dst, const struct sockaddr *gateway,
495 	const struct sockaddr *netmask, int flags, const struct sockaddr *src,
496 	struct rtentry **rtp)
497 {
498 	struct rtentry *rt;
499 	int error = 0;
500 	uint64_t *stat = NULL;
501 	struct rt_addrinfo info;
502 	struct ifaddr *ifa;
503 	struct psref psref;
504 
505 	/* verify the gateway is directly reachable */
506 	if ((ifa = ifa_ifwithnet_psref(gateway, &psref)) == NULL) {
507 		error = ENETUNREACH;
508 		goto out;
509 	}
510 	rt = rtalloc1(dst, 0);
511 	/*
512 	 * If the redirect isn't from our current router for this dst,
513 	 * it's either old or wrong.  If it redirects us to ourselves,
514 	 * we have a routing loop, perhaps as a result of an interface
515 	 * going down recently.
516 	 */
517 	if (!(flags & RTF_DONE) && rt &&
518 	     (sockaddr_cmp(src, rt->rt_gateway) != 0 || rt->rt_ifa != ifa))
519 		error = EINVAL;
520 	else {
521 		int s = pserialize_read_enter();
522 		struct ifaddr *_ifa;
523 
524 		_ifa = ifa_ifwithaddr(gateway);
525 		if (_ifa != NULL)
526 			error = EHOSTUNREACH;
527 		pserialize_read_exit(s);
528 	}
529 	if (error)
530 		goto done;
531 	/*
532 	 * Create a new entry if we just got back a wildcard entry
533 	 * or the lookup failed.  This is necessary for hosts
534 	 * which use routing redirects generated by smart gateways
535 	 * to dynamically build the routing tables.
536 	 */
537 	if (rt == NULL || (rt_mask(rt) && rt_mask(rt)->sa_len < 2))
538 		goto create;
539 	/*
540 	 * Don't listen to the redirect if it's
541 	 * for a route to an interface.
542 	 */
543 	if (rt->rt_flags & RTF_GATEWAY) {
544 		if (((rt->rt_flags & RTF_HOST) == 0) && (flags & RTF_HOST)) {
545 			/*
546 			 * Changing from route to net => route to host.
547 			 * Create new route, rather than smashing route to net.
548 			 */
549 		create:
550 			if (rt != NULL)
551 				rtfree(rt);
552 			flags |=  RTF_GATEWAY | RTF_DYNAMIC;
553 			memset(&info, 0, sizeof(info));
554 			info.rti_info[RTAX_DST] = dst;
555 			info.rti_info[RTAX_GATEWAY] = gateway;
556 			info.rti_info[RTAX_NETMASK] = netmask;
557 			info.rti_ifa = ifa;
558 			info.rti_flags = flags;
559 			rt = NULL;
560 			error = rtrequest1(RTM_ADD, &info, &rt);
561 			if (rt != NULL)
562 				flags = rt->rt_flags;
563 			stat = &rtstat.rts_dynamic;
564 		} else {
565 			/*
566 			 * Smash the current notion of the gateway to
567 			 * this destination.  Should check about netmask!!!
568 			 */
569 			error = rt_setgate(rt, gateway);
570 			if (error == 0) {
571 				rt->rt_flags |= RTF_MODIFIED;
572 				flags |= RTF_MODIFIED;
573 			}
574 			stat = &rtstat.rts_newgateway;
575 		}
576 	} else
577 		error = EHOSTUNREACH;
578 done:
579 	if (rt) {
580 		if (rtp != NULL && !error)
581 			*rtp = rt;
582 		else
583 			rtfree(rt);
584 	}
585 out:
586 	if (error)
587 		rtstat.rts_badredirect++;
588 	else if (stat != NULL)
589 		(*stat)++;
590 	memset(&info, 0, sizeof(info));
591 	info.rti_info[RTAX_DST] = dst;
592 	info.rti_info[RTAX_GATEWAY] = gateway;
593 	info.rti_info[RTAX_NETMASK] = netmask;
594 	info.rti_info[RTAX_AUTHOR] = src;
595 	rt_missmsg(RTM_REDIRECT, &info, flags, error);
596 	ifa_release(ifa, &psref);
597 }
598 
599 /*
600  * Delete a route and generate a message.
601  * It doesn't free a passed rt.
602  */
603 static int
604 rtdeletemsg(struct rtentry *rt)
605 {
606 	int error;
607 	struct rt_addrinfo info;
608 	struct rtentry *retrt;
609 
610 	/*
611 	 * Request the new route so that the entry is not actually
612 	 * deleted.  That will allow the information being reported to
613 	 * be accurate (and consistent with route_output()).
614 	 */
615 	memset(&info, 0, sizeof(info));
616 	info.rti_info[RTAX_DST] = rt_getkey(rt);
617 	info.rti_info[RTAX_NETMASK] = rt_mask(rt);
618 	info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
619 	info.rti_flags = rt->rt_flags;
620 	error = rtrequest1(RTM_DELETE, &info, &retrt);
621 
622 	rt_missmsg(RTM_DELETE, &info, info.rti_flags, error);
623 
624 	if (error == 0)
625 		rtfree(retrt);
626 	return error;
627 }
628 
629 struct ifaddr *
630 ifa_ifwithroute_psref(int flags, const struct sockaddr *dst,
631 	const struct sockaddr *gateway, struct psref *psref)
632 {
633 	struct ifaddr *ifa = NULL;
634 
635 	if ((flags & RTF_GATEWAY) == 0) {
636 		/*
637 		 * If we are adding a route to an interface,
638 		 * and the interface is a pt to pt link
639 		 * we should search for the destination
640 		 * as our clue to the interface.  Otherwise
641 		 * we can use the local address.
642 		 */
643 		if ((flags & RTF_HOST) && gateway->sa_family != AF_LINK)
644 			ifa = ifa_ifwithdstaddr_psref(dst, psref);
645 		if (ifa == NULL)
646 			ifa = ifa_ifwithaddr_psref(gateway, psref);
647 	} else {
648 		/*
649 		 * If we are adding a route to a remote net
650 		 * or host, the gateway may still be on the
651 		 * other end of a pt to pt link.
652 		 */
653 		ifa = ifa_ifwithdstaddr_psref(gateway, psref);
654 	}
655 	if (ifa == NULL)
656 		ifa = ifa_ifwithnet_psref(gateway, psref);
657 	if (ifa == NULL) {
658 		int s;
659 		struct rtentry *rt;
660 
661 		rt = rtalloc1(dst, 0);
662 		if (rt == NULL)
663 			return NULL;
664 		/*
665 		 * Just in case. May not need to do this workaround.
666 		 * Revisit when working on rtentry MP-ification.
667 		 */
668 		s = pserialize_read_enter();
669 		IFADDR_READER_FOREACH(ifa, rt->rt_ifp) {
670 			if (ifa == rt->rt_ifa)
671 				break;
672 		}
673 		if (ifa != NULL)
674 			ifa_acquire(ifa, psref);
675 		pserialize_read_exit(s);
676 		rtfree(rt);
677 		if (ifa == NULL)
678 			return NULL;
679 	}
680 	if (ifa->ifa_addr->sa_family != dst->sa_family) {
681 		struct ifaddr *nifa;
682 		int s;
683 
684 		s = pserialize_read_enter();
685 		nifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp);
686 		if (nifa != NULL) {
687 			ifa_release(ifa, psref);
688 			ifa_acquire(nifa, psref);
689 			ifa = nifa;
690 		}
691 		pserialize_read_exit(s);
692 	}
693 	return ifa;
694 }
695 
696 /*
697  * If it suceeds and ret_nrt isn't NULL, refcnt of ret_nrt is incremented.
698  * The caller has to rtfree it by itself.
699  */
700 int
701 rtrequest(int req, const struct sockaddr *dst, const struct sockaddr *gateway,
702 	const struct sockaddr *netmask, int flags, struct rtentry **ret_nrt)
703 {
704 	struct rt_addrinfo info;
705 
706 	memset(&info, 0, sizeof(info));
707 	info.rti_flags = flags;
708 	info.rti_info[RTAX_DST] = dst;
709 	info.rti_info[RTAX_GATEWAY] = gateway;
710 	info.rti_info[RTAX_NETMASK] = netmask;
711 	return rtrequest1(req, &info, ret_nrt);
712 }
713 
714 /*
715  * It's a utility function to add/remove a route to/from the routing table
716  * and tell user processes the addition/removal on success.
717  */
718 int
719 rtrequest_newmsg(const int req, const struct sockaddr *dst,
720 	const struct sockaddr *gateway, const struct sockaddr *netmask,
721 	const int flags)
722 {
723 	int error;
724 	struct rtentry *ret_nrt = NULL;
725 
726 	KASSERT(req == RTM_ADD || req == RTM_DELETE);
727 
728 	error = rtrequest(req, dst, gateway, netmask, flags, &ret_nrt);
729 	if (error != 0)
730 		return error;
731 
732 	KASSERT(ret_nrt != NULL);
733 
734 	rt_newmsg(req, ret_nrt); /* tell user process */
735 	rtfree(ret_nrt);
736 
737 	return 0;
738 }
739 
740 struct ifnet *
741 rt_getifp(struct rt_addrinfo *info, struct psref *psref)
742 {
743 	const struct sockaddr *ifpaddr = info->rti_info[RTAX_IFP];
744 
745 	if (info->rti_ifp != NULL)
746 		return NULL;
747 	/*
748 	 * ifp may be specified by sockaddr_dl when protocol address
749 	 * is ambiguous
750 	 */
751 	if (ifpaddr != NULL && ifpaddr->sa_family == AF_LINK) {
752 		struct ifaddr *ifa;
753 		int s = pserialize_read_enter();
754 
755 		ifa = ifa_ifwithnet(ifpaddr);
756 		if (ifa != NULL)
757 			info->rti_ifp = if_get_byindex(ifa->ifa_ifp->if_index,
758 			    psref);
759 		pserialize_read_exit(s);
760 	}
761 
762 	return info->rti_ifp;
763 }
764 
765 struct ifaddr *
766 rt_getifa(struct rt_addrinfo *info, struct psref *psref)
767 {
768 	struct ifaddr *ifa = NULL;
769 	const struct sockaddr *dst = info->rti_info[RTAX_DST];
770 	const struct sockaddr *gateway = info->rti_info[RTAX_GATEWAY];
771 	const struct sockaddr *ifaaddr = info->rti_info[RTAX_IFA];
772 	int flags = info->rti_flags;
773 	const struct sockaddr *sa;
774 
775 	if (info->rti_ifa == NULL && ifaaddr != NULL) {
776 		ifa = ifa_ifwithaddr_psref(ifaaddr, psref);
777 		if (ifa != NULL)
778 			goto got;
779 	}
780 
781 	sa = ifaaddr != NULL ? ifaaddr :
782 	    (gateway != NULL ? gateway : dst);
783 	if (sa != NULL && info->rti_ifp != NULL)
784 		ifa = ifaof_ifpforaddr_psref(sa, info->rti_ifp, psref);
785 	else if (dst != NULL && gateway != NULL)
786 		ifa = ifa_ifwithroute_psref(flags, dst, gateway, psref);
787 	else if (sa != NULL)
788 		ifa = ifa_ifwithroute_psref(flags, sa, sa, psref);
789 	if (ifa == NULL)
790 		return NULL;
791 got:
792 	if (ifa->ifa_getifa != NULL) {
793 		/* FIXME NOMPSAFE */
794 		ifa = (*ifa->ifa_getifa)(ifa, dst);
795 		if (ifa == NULL)
796 			return NULL;
797 		ifa_acquire(ifa, psref);
798 	}
799 	info->rti_ifa = ifa;
800 	if (info->rti_ifp == NULL)
801 		info->rti_ifp = ifa->ifa_ifp;
802 	return ifa;
803 }
804 
805 /*
806  * If it suceeds and ret_nrt isn't NULL, refcnt of ret_nrt is incremented.
807  * The caller has to rtfree it by itself.
808  */
809 int
810 rtrequest1(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt)
811 {
812 	int s = splsoftnet(), ss;
813 	int error = 0, rc;
814 	struct rtentry *rt;
815 	rtbl_t *rtbl;
816 	struct ifaddr *ifa = NULL, *ifa2 = NULL;
817 	struct sockaddr_storage maskeddst;
818 	const struct sockaddr *dst = info->rti_info[RTAX_DST];
819 	const struct sockaddr *gateway = info->rti_info[RTAX_GATEWAY];
820 	const struct sockaddr *netmask = info->rti_info[RTAX_NETMASK];
821 	int flags = info->rti_flags;
822 	struct psref psref_ifp, psref_ifa;
823 	int bound = 0;
824 	struct ifnet *ifp = NULL;
825 	bool need_to_release_ifa = true;
826 #define senderr(x) { error = x ; goto bad; }
827 
828 	bound = curlwp_bind();
829 	if ((rtbl = rt_gettable(dst->sa_family)) == NULL)
830 		senderr(ESRCH);
831 	if (flags & RTF_HOST)
832 		netmask = NULL;
833 	switch (req) {
834 	case RTM_DELETE:
835 		if (netmask) {
836 			rt_maskedcopy(dst, (struct sockaddr *)&maskeddst,
837 			    netmask);
838 			dst = (struct sockaddr *)&maskeddst;
839 		}
840 		if ((rt = rt_lookup(rtbl, dst, netmask)) == NULL)
841 			senderr(ESRCH);
842 		if ((rt = rt_deladdr(rtbl, dst, netmask)) == NULL)
843 			senderr(ESRCH);
844 		rt->rt_flags &= ~RTF_UP;
845 		if ((ifa = rt->rt_ifa)) {
846 			if (ifa->ifa_flags & IFA_ROUTE &&
847 			    rt_ifa_connected(rt, ifa)) {
848 				RT_DPRINTF("rt->_rt_key = %p, ifa = %p, "
849 				    "deleted IFA_ROUTE\n",
850 				    (void *)rt->_rt_key, (void *)ifa);
851 				ifa->ifa_flags &= ~IFA_ROUTE;
852 			}
853 			if (ifa->ifa_rtrequest)
854 				ifa->ifa_rtrequest(RTM_DELETE, rt, info);
855 			ifa = NULL;
856 		}
857 		rttrash++;
858 		rt_timer_remove_all(rt);
859 		if (ret_nrt) {
860 			*ret_nrt = rt;
861 			rt->rt_refcnt++;
862 		} else if (rt->rt_refcnt <= 0) {
863 			/* Adjust the refcount */
864 			rt->rt_refcnt++;
865 			rtfree(rt);
866 		}
867 		rtcache_clear_rtentry(dst->sa_family, rt);
868 		break;
869 
870 	case RTM_ADD:
871 		if (info->rti_ifa == NULL) {
872 			ifp = rt_getifp(info, &psref_ifp);
873 			ifa = rt_getifa(info, &psref_ifa);
874 			if (ifa == NULL)
875 				senderr(ENETUNREACH);
876 		} else {
877 			/* Caller should have a reference of ifa */
878 			ifa = info->rti_ifa;
879 			need_to_release_ifa = false;
880 		}
881 		rt = pool_get(&rtentry_pool, PR_NOWAIT);
882 		if (rt == NULL)
883 			senderr(ENOBUFS);
884 		memset(rt, 0, sizeof(*rt));
885 		rt->rt_flags = RTF_UP | flags;
886 		LIST_INIT(&rt->rt_timer);
887 
888 		RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
889 		if (netmask) {
890 			rt_maskedcopy(dst, (struct sockaddr *)&maskeddst,
891 			    netmask);
892 			rt_setkey(rt, (struct sockaddr *)&maskeddst, M_NOWAIT);
893 		} else {
894 			rt_setkey(rt, dst, M_NOWAIT);
895 		}
896 		RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
897 		if (rt_getkey(rt) == NULL ||
898 		    rt_setgate(rt, gateway) != 0) {
899 			pool_put(&rtentry_pool, rt);
900 			senderr(ENOBUFS);
901 		}
902 
903 		rt_set_ifa(rt, ifa);
904 		if (info->rti_info[RTAX_TAG] != NULL) {
905 			const struct sockaddr *tag;
906 			tag = rt_settag(rt, info->rti_info[RTAX_TAG]);
907 			if (tag == NULL)
908 				senderr(ENOBUFS);
909 		}
910 		RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
911 
912 		ss = pserialize_read_enter();
913 		if (info->rti_info[RTAX_IFP] != NULL) {
914 			ifa2 = ifa_ifwithnet(info->rti_info[RTAX_IFP]);
915 			if (ifa2 != NULL)
916 				rt->rt_ifp = ifa2->ifa_ifp;
917 			else
918 				rt->rt_ifp = ifa->ifa_ifp;
919 		} else
920 			rt->rt_ifp = ifa->ifa_ifp;
921 		pserialize_read_exit(ss);
922 
923 		RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
924 		rc = rt_addaddr(rtbl, rt, netmask);
925 		RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
926 		if (rc != 0) {
927 			ifafree(ifa); /* for rt_set_ifa above */
928 			rt_destroy(rt);
929 			pool_put(&rtentry_pool, rt);
930 			senderr(rc);
931 		}
932 		RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
933 		if (ifa->ifa_rtrequest)
934 			ifa->ifa_rtrequest(req, rt, info);
935 		if (need_to_release_ifa)
936 			ifa_release(ifa, &psref_ifa);
937 		ifa = NULL;
938 		if_put(ifp, &psref_ifp);
939 		ifp = NULL;
940 		RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
941 		if (ret_nrt) {
942 			*ret_nrt = rt;
943 			rt->rt_refcnt++;
944 		}
945 		rtflushall(dst->sa_family);
946 		break;
947 	case RTM_GET:
948 		if (netmask != NULL) {
949 			rt_maskedcopy(dst, (struct sockaddr *)&maskeddst,
950 			    netmask);
951 			dst = (struct sockaddr *)&maskeddst;
952 		}
953 		if ((rt = rt_lookup(rtbl, dst, netmask)) == NULL)
954 			senderr(ESRCH);
955 		if (ret_nrt != NULL) {
956 			*ret_nrt = rt;
957 			rt->rt_refcnt++;
958 		}
959 		break;
960 	}
961 bad:
962 	if (need_to_release_ifa)
963 		ifa_release(ifa, &psref_ifa);
964 	if_put(ifp, &psref_ifp);
965 	curlwp_bindx(bound);
966 	splx(s);
967 	return error;
968 }
969 
970 int
971 rt_setgate(struct rtentry *rt, const struct sockaddr *gate)
972 {
973 	struct sockaddr *new, *old;
974 
975 	KASSERT(rt->_rt_key != NULL);
976 	RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
977 
978 	new = sockaddr_dup(gate, M_ZERO | M_NOWAIT);
979 	if (new == NULL)
980 		return ENOMEM;
981 
982 	old = rt->rt_gateway;
983 	rt->rt_gateway = new;
984 	if (old != NULL)
985 		sockaddr_free(old);
986 
987 	KASSERT(rt->_rt_key != NULL);
988 	RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
989 
990 	if (rt->rt_flags & RTF_GATEWAY) {
991 		struct rtentry *gwrt = rtalloc1(gate, 1);
992 		/*
993 		 * If we switched gateways, grab the MTU from the new
994 		 * gateway route if the current MTU, if the current MTU is
995 		 * greater than the MTU of gateway.
996 		 * Note that, if the MTU of gateway is 0, we will reset the
997 		 * MTU of the route to run PMTUD again from scratch. XXX
998 		 */
999 		if (gwrt != NULL) {
1000 			KASSERT(gwrt->_rt_key != NULL);
1001 			RT_DPRINTF("gwrt->_rt_key = %p\n", gwrt->_rt_key);
1002 			if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0 &&
1003 			    rt->rt_rmx.rmx_mtu &&
1004 			    rt->rt_rmx.rmx_mtu > gwrt->rt_rmx.rmx_mtu) {
1005 				rt->rt_rmx.rmx_mtu = gwrt->rt_rmx.rmx_mtu;
1006 			}
1007 			rtfree(gwrt);
1008 		}
1009 	}
1010 	KASSERT(rt->_rt_key != NULL);
1011 	RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
1012 	return 0;
1013 }
1014 
1015 static void
1016 rt_maskedcopy(const struct sockaddr *src, struct sockaddr *dst,
1017 	const struct sockaddr *netmask)
1018 {
1019 	const char *netmaskp = &netmask->sa_data[0],
1020 	           *srcp = &src->sa_data[0];
1021 	char *dstp = &dst->sa_data[0];
1022 	const char *maskend = (char *)dst + MIN(netmask->sa_len, src->sa_len);
1023 	const char *srcend = (char *)dst + src->sa_len;
1024 
1025 	dst->sa_len = src->sa_len;
1026 	dst->sa_family = src->sa_family;
1027 
1028 	while (dstp < maskend)
1029 		*dstp++ = *srcp++ & *netmaskp++;
1030 	if (dstp < srcend)
1031 		memset(dstp, 0, (size_t)(srcend - dstp));
1032 }
1033 
1034 /*
1035  * Inform the routing socket of a route change.
1036  */
1037 void
1038 rt_newmsg(const int cmd, const struct rtentry *rt)
1039 {
1040 	struct rt_addrinfo info;
1041 
1042 	memset((void *)&info, 0, sizeof(info));
1043 	info.rti_info[RTAX_DST] = rt_getkey(rt);
1044 	info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1045 	info.rti_info[RTAX_NETMASK] = rt_mask(rt);
1046 	if (rt->rt_ifp) {
1047 		info.rti_info[RTAX_IFP] = rt->rt_ifp->if_dl->ifa_addr;
1048 		info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
1049 	}
1050 
1051 	rt_missmsg(cmd, &info, rt->rt_flags, 0);
1052 }
1053 
1054 /*
1055  * Set up or tear down a routing table entry, normally
1056  * for an interface.
1057  */
1058 int
1059 rtinit(struct ifaddr *ifa, int cmd, int flags)
1060 {
1061 	struct rtentry *rt;
1062 	struct sockaddr *dst, *odst;
1063 	struct sockaddr_storage maskeddst;
1064 	struct rtentry *nrt = NULL;
1065 	int error;
1066 	struct rt_addrinfo info;
1067 
1068 	dst = flags & RTF_HOST ? ifa->ifa_dstaddr : ifa->ifa_addr;
1069 	if (cmd == RTM_DELETE) {
1070 		if ((flags & RTF_HOST) == 0 && ifa->ifa_netmask) {
1071 			/* Delete subnet route for this interface */
1072 			odst = dst;
1073 			dst = (struct sockaddr *)&maskeddst;
1074 			rt_maskedcopy(odst, dst, ifa->ifa_netmask);
1075 		}
1076 		if ((rt = rtalloc1(dst, 0)) != NULL) {
1077 			if (rt->rt_ifa != ifa) {
1078 				rtfree(rt);
1079 				return (flags & RTF_HOST) ? EHOSTUNREACH
1080 							: ENETUNREACH;
1081 			}
1082 			rtfree(rt);
1083 		}
1084 	}
1085 	memset(&info, 0, sizeof(info));
1086 	info.rti_ifa = ifa;
1087 	info.rti_flags = flags | ifa->ifa_flags;
1088 	info.rti_info[RTAX_DST] = dst;
1089 	info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr;
1090 
1091 	/*
1092 	 * XXX here, it seems that we are assuming that ifa_netmask is NULL
1093 	 * for RTF_HOST.  bsdi4 passes NULL explicitly (via intermediate
1094 	 * variable) when RTF_HOST is 1.  still not sure if i can safely
1095 	 * change it to meet bsdi4 behavior.
1096 	 */
1097 	if (cmd != RTM_LLINFO_UPD)
1098 		info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
1099 	error = rtrequest1((cmd == RTM_LLINFO_UPD) ? RTM_GET : cmd, &info,
1100 	    &nrt);
1101 	if (error != 0)
1102 		return error;
1103 
1104 	rt = nrt;
1105 	switch (cmd) {
1106 	case RTM_DELETE:
1107 		rt_newmsg(cmd, rt);
1108 		break;
1109 	case RTM_LLINFO_UPD:
1110 		if (cmd == RTM_LLINFO_UPD && ifa->ifa_rtrequest != NULL)
1111 			ifa->ifa_rtrequest(RTM_LLINFO_UPD, rt, &info);
1112 		rt_newmsg(RTM_CHANGE, rt);
1113 		break;
1114 	case RTM_ADD:
1115 		if (rt->rt_ifa != ifa) {
1116 			printf("rtinit: wrong ifa (%p) was (%p)\n", ifa,
1117 				rt->rt_ifa);
1118 			if (rt->rt_ifa->ifa_rtrequest != NULL) {
1119 				rt->rt_ifa->ifa_rtrequest(RTM_DELETE, rt,
1120 				    &info);
1121 			}
1122 			rt_replace_ifa(rt, ifa);
1123 			rt->rt_ifp = ifa->ifa_ifp;
1124 			if (ifa->ifa_rtrequest != NULL)
1125 				ifa->ifa_rtrequest(RTM_ADD, rt, &info);
1126 		}
1127 		rt_newmsg(cmd, rt);
1128 		break;
1129 	}
1130 	rtfree(rt);
1131 	return error;
1132 }
1133 
1134 /*
1135  * Create a local route entry for the address.
1136  * Announce the addition of the address and the route to the routing socket.
1137  */
1138 int
1139 rt_ifa_addlocal(struct ifaddr *ifa)
1140 {
1141 	struct rtentry *rt;
1142 	int e;
1143 
1144 	/* If there is no loopback entry, allocate one. */
1145 	rt = rtalloc1(ifa->ifa_addr, 0);
1146 #ifdef RT_DEBUG
1147 	if (rt != NULL)
1148 		dump_rt(rt);
1149 #endif
1150 	if (rt == NULL || (rt->rt_flags & RTF_HOST) == 0 ||
1151 	    (rt->rt_ifp->if_flags & IFF_LOOPBACK) == 0)
1152 	{
1153 		struct rt_addrinfo info;
1154 		struct rtentry *nrt;
1155 
1156 		memset(&info, 0, sizeof(info));
1157 		info.rti_flags = RTF_HOST | RTF_LOCAL;
1158 		if (!(ifa->ifa_ifp->if_flags & (IFF_LOOPBACK|IFF_POINTOPOINT)))
1159 			info.rti_flags |= RTF_LLDATA;
1160 		info.rti_info[RTAX_DST] = ifa->ifa_addr;
1161 		info.rti_info[RTAX_GATEWAY] =
1162 		    (const struct sockaddr *)ifa->ifa_ifp->if_sadl;
1163 		info.rti_ifa = ifa;
1164 		nrt = NULL;
1165 		e = rtrequest1(RTM_ADD, &info, &nrt);
1166 		if (nrt && ifa != nrt->rt_ifa)
1167 			rt_replace_ifa(nrt, ifa);
1168 		rt_newaddrmsg(RTM_ADD, ifa, e, nrt);
1169 		if (nrt != NULL) {
1170 #ifdef RT_DEBUG
1171 			dump_rt(nrt);
1172 #endif
1173 			rtfree(nrt);
1174 		}
1175 	} else {
1176 		e = 0;
1177 		rt_newaddrmsg(RTM_NEWADDR, ifa, 0, NULL);
1178 	}
1179 	if (rt != NULL)
1180 		rtfree(rt);
1181 	return e;
1182 }
1183 
1184 /*
1185  * Remove the local route entry for the address.
1186  * Announce the removal of the address and the route to the routing socket.
1187  */
1188 int
1189 rt_ifa_remlocal(struct ifaddr *ifa, struct ifaddr *alt_ifa)
1190 {
1191 	struct rtentry *rt;
1192 	int e = 0;
1193 
1194 	rt = rtalloc1(ifa->ifa_addr, 0);
1195 
1196 	/*
1197 	 * Before deleting, check if a corresponding loopbacked
1198 	 * host route surely exists.  With this check, we can avoid
1199 	 * deleting an interface direct route whose destination is
1200 	 * the same as the address being removed.  This can happen
1201 	 * when removing a subnet-router anycast address on an
1202 	 * interface attached to a shared medium.
1203 	 */
1204 	if (rt != NULL &&
1205 	    (rt->rt_flags & RTF_HOST) &&
1206 	    (rt->rt_ifp->if_flags & IFF_LOOPBACK))
1207 	{
1208 		/* If we cannot replace the route's ifaddr with the equivalent
1209 		 * ifaddr of another interface, I believe it is safest to
1210 		 * delete the route.
1211 		 */
1212 		if (alt_ifa == NULL) {
1213 			e = rtdeletemsg(rt);
1214 			rt_newaddrmsg(RTM_DELADDR, ifa, 0, NULL);
1215 		} else {
1216 			rt_replace_ifa(rt, alt_ifa);
1217 			rt_newmsg(RTM_CHANGE, rt);
1218 		}
1219 	} else
1220 		rt_newaddrmsg(RTM_DELADDR, ifa, 0, NULL);
1221 	if (rt != NULL)
1222 		rtfree(rt);
1223 	return e;
1224 }
1225 
1226 /*
1227  * Route timer routines.  These routes allow functions to be called
1228  * for various routes at any time.  This is useful in supporting
1229  * path MTU discovery and redirect route deletion.
1230  *
1231  * This is similar to some BSDI internal functions, but it provides
1232  * for multiple queues for efficiency's sake...
1233  */
1234 
1235 LIST_HEAD(, rttimer_queue) rttimer_queue_head;
1236 static int rt_init_done = 0;
1237 
1238 /*
1239  * Some subtle order problems with domain initialization mean that
1240  * we cannot count on this being run from rt_init before various
1241  * protocol initializations are done.  Therefore, we make sure
1242  * that this is run when the first queue is added...
1243  */
1244 
1245 static void rt_timer_work(struct work *, void *);
1246 
1247 static void
1248 rt_timer_init(void)
1249 {
1250 	int error;
1251 
1252 	assert(rt_init_done == 0);
1253 
1254 	LIST_INIT(&rttimer_queue_head);
1255 	callout_init(&rt_timer_ch, CALLOUT_MPSAFE);
1256 	error = workqueue_create(&rt_timer_wq, "rt_timer",
1257 	    rt_timer_work, NULL, PRI_SOFTNET, IPL_SOFTNET, WQ_MPSAFE);
1258 	if (error)
1259 		panic("%s: workqueue_create failed (%d)\n", __func__, error);
1260 	callout_reset(&rt_timer_ch, hz, rt_timer_timer, NULL);
1261 	rt_init_done = 1;
1262 }
1263 
1264 struct rttimer_queue *
1265 rt_timer_queue_create(u_int timeout)
1266 {
1267 	struct rttimer_queue *rtq;
1268 
1269 	if (rt_init_done == 0)
1270 		rt_timer_init();
1271 
1272 	R_Malloc(rtq, struct rttimer_queue *, sizeof *rtq);
1273 	if (rtq == NULL)
1274 		return NULL;
1275 	memset(rtq, 0, sizeof(*rtq));
1276 
1277 	rtq->rtq_timeout = timeout;
1278 	TAILQ_INIT(&rtq->rtq_head);
1279 	LIST_INSERT_HEAD(&rttimer_queue_head, rtq, rtq_link);
1280 
1281 	return rtq;
1282 }
1283 
1284 void
1285 rt_timer_queue_change(struct rttimer_queue *rtq, long timeout)
1286 {
1287 
1288 	rtq->rtq_timeout = timeout;
1289 }
1290 
1291 static void
1292 rt_timer_queue_remove_all(struct rttimer_queue *rtq)
1293 {
1294 	struct rttimer *r;
1295 
1296 	while ((r = TAILQ_FIRST(&rtq->rtq_head)) != NULL) {
1297 		LIST_REMOVE(r, rtt_link);
1298 		TAILQ_REMOVE(&rtq->rtq_head, r, rtt_next);
1299 		(*r->rtt_func)(r->rtt_rt, r);
1300 		rtfree(r->rtt_rt);
1301 		pool_put(&rttimer_pool, r);
1302 		if (rtq->rtq_count > 0)
1303 			rtq->rtq_count--;
1304 		else
1305 			printf("rt_timer_queue_remove_all: "
1306 			    "rtq_count reached 0\n");
1307 	}
1308 }
1309 
1310 void
1311 rt_timer_queue_destroy(struct rttimer_queue *rtq)
1312 {
1313 
1314 	rt_timer_queue_remove_all(rtq);
1315 
1316 	LIST_REMOVE(rtq, rtq_link);
1317 
1318 	/*
1319 	 * Caller is responsible for freeing the rttimer_queue structure.
1320 	 */
1321 }
1322 
1323 unsigned long
1324 rt_timer_count(struct rttimer_queue *rtq)
1325 {
1326 	return rtq->rtq_count;
1327 }
1328 
1329 static void
1330 rt_timer_remove_all(struct rtentry *rt)
1331 {
1332 	struct rttimer *r;
1333 
1334 	while ((r = LIST_FIRST(&rt->rt_timer)) != NULL) {
1335 		LIST_REMOVE(r, rtt_link);
1336 		TAILQ_REMOVE(&r->rtt_queue->rtq_head, r, rtt_next);
1337 		if (r->rtt_queue->rtq_count > 0)
1338 			r->rtt_queue->rtq_count--;
1339 		else
1340 			printf("rt_timer_remove_all: rtq_count reached 0\n");
1341 		pool_put(&rttimer_pool, r);
1342 		rt->rt_refcnt--; /* XXX */
1343 	}
1344 }
1345 
1346 int
1347 rt_timer_add(struct rtentry *rt,
1348 	void (*func)(struct rtentry *, struct rttimer *),
1349 	struct rttimer_queue *queue)
1350 {
1351 	struct rttimer *r;
1352 
1353 	KASSERT(func != NULL);
1354 	/*
1355 	 * If there's already a timer with this action, destroy it before
1356 	 * we add a new one.
1357 	 */
1358 	LIST_FOREACH(r, &rt->rt_timer, rtt_link) {
1359 		if (r->rtt_func == func)
1360 			break;
1361 	}
1362 	if (r != NULL) {
1363 		LIST_REMOVE(r, rtt_link);
1364 		TAILQ_REMOVE(&r->rtt_queue->rtq_head, r, rtt_next);
1365 		if (r->rtt_queue->rtq_count > 0)
1366 			r->rtt_queue->rtq_count--;
1367 		else
1368 			printf("rt_timer_add: rtq_count reached 0\n");
1369 		rtfree(r->rtt_rt);
1370 	} else {
1371 		r = pool_get(&rttimer_pool, PR_NOWAIT);
1372 		if (r == NULL)
1373 			return ENOBUFS;
1374 	}
1375 
1376 	memset(r, 0, sizeof(*r));
1377 
1378 	rt->rt_refcnt++;
1379 	r->rtt_rt = rt;
1380 	r->rtt_time = time_uptime;
1381 	r->rtt_func = func;
1382 	r->rtt_queue = queue;
1383 	LIST_INSERT_HEAD(&rt->rt_timer, r, rtt_link);
1384 	TAILQ_INSERT_TAIL(&queue->rtq_head, r, rtt_next);
1385 	r->rtt_queue->rtq_count++;
1386 
1387 	return 0;
1388 }
1389 
1390 static void
1391 rt_timer_work(struct work *wk, void *arg)
1392 {
1393 	struct rttimer_queue *rtq;
1394 	struct rttimer *r;
1395 	int s;
1396 
1397 	s = splsoftnet();
1398 	LIST_FOREACH(rtq, &rttimer_queue_head, rtq_link) {
1399 		while ((r = TAILQ_FIRST(&rtq->rtq_head)) != NULL &&
1400 		    (r->rtt_time + rtq->rtq_timeout) < time_uptime) {
1401 			LIST_REMOVE(r, rtt_link);
1402 			TAILQ_REMOVE(&rtq->rtq_head, r, rtt_next);
1403 			(*r->rtt_func)(r->rtt_rt, r);
1404 			rtfree(r->rtt_rt);
1405 			pool_put(&rttimer_pool, r);
1406 			if (rtq->rtq_count > 0)
1407 				rtq->rtq_count--;
1408 			else
1409 				printf("rt_timer_timer: rtq_count reached 0\n");
1410 		}
1411 	}
1412 	splx(s);
1413 
1414 	callout_reset(&rt_timer_ch, hz, rt_timer_timer, NULL);
1415 }
1416 
1417 static void
1418 rt_timer_timer(void *arg)
1419 {
1420 
1421 	workqueue_enqueue(rt_timer_wq, &rt_timer_wk, NULL);
1422 }
1423 
1424 static struct rtentry *
1425 _rtcache_init(struct route *ro, int flag)
1426 {
1427 	rtcache_invariants(ro);
1428 	KASSERT(ro->_ro_rt == NULL);
1429 
1430 	if (rtcache_getdst(ro) == NULL)
1431 		return NULL;
1432 	ro->ro_invalid = false;
1433 	if ((ro->_ro_rt = rtalloc1(rtcache_getdst(ro), flag)) != NULL)
1434 		rtcache(ro);
1435 
1436 	rtcache_invariants(ro);
1437 	return ro->_ro_rt;
1438 }
1439 
1440 struct rtentry *
1441 rtcache_init(struct route *ro)
1442 {
1443 	return _rtcache_init(ro, 1);
1444 }
1445 
1446 struct rtentry *
1447 rtcache_init_noclone(struct route *ro)
1448 {
1449 	return _rtcache_init(ro, 0);
1450 }
1451 
1452 struct rtentry *
1453 rtcache_update(struct route *ro, int clone)
1454 {
1455 	rtcache_clear(ro);
1456 	return _rtcache_init(ro, clone);
1457 }
1458 
1459 void
1460 rtcache_copy(struct route *new_ro, const struct route *old_ro)
1461 {
1462 	struct rtentry *rt;
1463 
1464 	KASSERT(new_ro != old_ro);
1465 	rtcache_invariants(new_ro);
1466 	rtcache_invariants(old_ro);
1467 
1468 	if ((rt = rtcache_validate(old_ro)) != NULL)
1469 		rt->rt_refcnt++;
1470 
1471 	if (rtcache_getdst(old_ro) == NULL ||
1472 	    rtcache_setdst(new_ro, rtcache_getdst(old_ro)) != 0)
1473 		return;
1474 
1475 	new_ro->ro_invalid = false;
1476 	if ((new_ro->_ro_rt = rt) != NULL)
1477 		rtcache(new_ro);
1478 	rtcache_invariants(new_ro);
1479 }
1480 
1481 static struct dom_rtlist invalid_routes = LIST_HEAD_INITIALIZER(dom_rtlist);
1482 
1483 static void
1484 rtcache_invalidate(struct dom_rtlist *rtlist)
1485 {
1486 	struct route *ro;
1487 
1488 	while ((ro = LIST_FIRST(rtlist)) != NULL) {
1489 		rtcache_invariants(ro);
1490 		KASSERT(ro->_ro_rt != NULL);
1491 		ro->ro_invalid = true;
1492 		LIST_REMOVE(ro, ro_rtcache_next);
1493 		LIST_INSERT_HEAD(&invalid_routes, ro, ro_rtcache_next);
1494 		rtcache_invariants(ro);
1495 	}
1496 }
1497 
1498 static void
1499 rtcache_clear_rtentry(int family, struct rtentry *rt)
1500 {
1501 	struct domain *dom;
1502 	struct route *ro, *nro;
1503 
1504 	if ((dom = pffinddomain(family)) == NULL)
1505 		return;
1506 
1507 	LIST_FOREACH_SAFE(ro, &dom->dom_rtcache, ro_rtcache_next, nro) {
1508 		if (ro->_ro_rt == rt)
1509 			rtcache_clear(ro);
1510 	}
1511 }
1512 
1513 static void
1514 rtcache_clear(struct route *ro)
1515 {
1516 	rtcache_invariants(ro);
1517 	if (ro->_ro_rt == NULL)
1518 		return;
1519 
1520 	LIST_REMOVE(ro, ro_rtcache_next);
1521 
1522 	rtfree(ro->_ro_rt);
1523 	ro->_ro_rt = NULL;
1524 	ro->ro_invalid = false;
1525 	rtcache_invariants(ro);
1526 }
1527 
1528 struct rtentry *
1529 rtcache_lookup2(struct route *ro, const struct sockaddr *dst, int clone,
1530     int *hitp)
1531 {
1532 	const struct sockaddr *odst;
1533 	struct rtentry *rt = NULL;
1534 
1535 	odst = rtcache_getdst(ro);
1536 	if (odst == NULL)
1537 		goto miss;
1538 
1539 	if (sockaddr_cmp(odst, dst) != 0) {
1540 		rtcache_free(ro);
1541 		goto miss;
1542 	}
1543 
1544 	rt = rtcache_validate(ro);
1545 	if (rt == NULL) {
1546 		rtcache_clear(ro);
1547 		goto miss;
1548 	}
1549 
1550 	*hitp = 1;
1551 	rtcache_invariants(ro);
1552 
1553 	return rt;
1554 miss:
1555 	*hitp = 0;
1556 	if (rtcache_setdst(ro, dst) == 0)
1557 		rt = _rtcache_init(ro, clone);
1558 
1559 	rtcache_invariants(ro);
1560 
1561 	return rt;
1562 }
1563 
1564 void
1565 rtcache_free(struct route *ro)
1566 {
1567 	rtcache_clear(ro);
1568 	if (ro->ro_sa != NULL) {
1569 		sockaddr_free(ro->ro_sa);
1570 		ro->ro_sa = NULL;
1571 	}
1572 	rtcache_invariants(ro);
1573 }
1574 
1575 int
1576 rtcache_setdst(struct route *ro, const struct sockaddr *sa)
1577 {
1578 	KASSERT(sa != NULL);
1579 
1580 	rtcache_invariants(ro);
1581 	if (ro->ro_sa != NULL) {
1582 		if (ro->ro_sa->sa_family == sa->sa_family) {
1583 			rtcache_clear(ro);
1584 			sockaddr_copy(ro->ro_sa, ro->ro_sa->sa_len, sa);
1585 			rtcache_invariants(ro);
1586 			return 0;
1587 		}
1588 		/* free ro_sa, wrong family */
1589 		rtcache_free(ro);
1590 	}
1591 
1592 	KASSERT(ro->_ro_rt == NULL);
1593 
1594 	if ((ro->ro_sa = sockaddr_dup(sa, M_ZERO | M_NOWAIT)) == NULL) {
1595 		rtcache_invariants(ro);
1596 		return ENOMEM;
1597 	}
1598 	rtcache_invariants(ro);
1599 	return 0;
1600 }
1601 
1602 const struct sockaddr *
1603 rt_settag(struct rtentry *rt, const struct sockaddr *tag)
1604 {
1605 	if (rt->rt_tag != tag) {
1606 		if (rt->rt_tag != NULL)
1607 			sockaddr_free(rt->rt_tag);
1608 		rt->rt_tag = sockaddr_dup(tag, M_ZERO | M_NOWAIT);
1609 	}
1610 	return rt->rt_tag;
1611 }
1612 
1613 struct sockaddr *
1614 rt_gettag(const struct rtentry *rt)
1615 {
1616 	return rt->rt_tag;
1617 }
1618 
1619 int
1620 rt_check_reject_route(const struct rtentry *rt, const struct ifnet *ifp)
1621 {
1622 
1623 	if ((rt->rt_flags & RTF_REJECT) != 0) {
1624 		/* Mimic looutput */
1625 		if (ifp->if_flags & IFF_LOOPBACK)
1626 			return (rt->rt_flags & RTF_HOST) ?
1627 			    EHOSTUNREACH : ENETUNREACH;
1628 		else if (rt->rt_rmx.rmx_expire == 0 ||
1629 		    time_uptime < rt->rt_rmx.rmx_expire)
1630 			return (rt->rt_flags & RTF_GATEWAY) ?
1631 			    EHOSTUNREACH : EHOSTDOWN;
1632 	}
1633 
1634 	return 0;
1635 }
1636 
1637 #ifdef DDB
1638 
1639 #include <machine/db_machdep.h>
1640 #include <ddb/db_interface.h>
1641 #include <ddb/db_output.h>
1642 
1643 #define	rt_expire rt_rmx.rmx_expire
1644 
1645 static void
1646 db_print_sa(const struct sockaddr *sa)
1647 {
1648 	int len;
1649 	const u_char *p;
1650 
1651 	if (sa == NULL) {
1652 		db_printf("[NULL]");
1653 		return;
1654 	}
1655 
1656 	p = (const u_char *)sa;
1657 	len = sa->sa_len;
1658 	db_printf("[");
1659 	while (len > 0) {
1660 		db_printf("%d", *p);
1661 		p++; len--;
1662 		if (len) db_printf(",");
1663 	}
1664 	db_printf("]\n");
1665 }
1666 
1667 static void
1668 db_print_ifa(struct ifaddr *ifa)
1669 {
1670 	if (ifa == NULL)
1671 		return;
1672 	db_printf("  ifa_addr=");
1673 	db_print_sa(ifa->ifa_addr);
1674 	db_printf("  ifa_dsta=");
1675 	db_print_sa(ifa->ifa_dstaddr);
1676 	db_printf("  ifa_mask=");
1677 	db_print_sa(ifa->ifa_netmask);
1678 	db_printf("  flags=0x%x,refcnt=%d,metric=%d\n",
1679 			  ifa->ifa_flags,
1680 			  ifa->ifa_refcnt,
1681 			  ifa->ifa_metric);
1682 }
1683 
1684 /*
1685  * Function to pass to rt_walktree().
1686  * Return non-zero error to abort walk.
1687  */
1688 static int
1689 db_show_rtentry(struct rtentry *rt, void *w)
1690 {
1691 	db_printf("rtentry=%p", rt);
1692 
1693 	db_printf(" flags=0x%x refcnt=%d use=%"PRId64" expire=%"PRId64"\n",
1694 			  rt->rt_flags, rt->rt_refcnt,
1695 			  rt->rt_use, (uint64_t)rt->rt_expire);
1696 
1697 	db_printf(" key="); db_print_sa(rt_getkey(rt));
1698 	db_printf(" mask="); db_print_sa(rt_mask(rt));
1699 	db_printf(" gw="); db_print_sa(rt->rt_gateway);
1700 
1701 	db_printf(" ifp=%p ", rt->rt_ifp);
1702 	if (rt->rt_ifp)
1703 		db_printf("(%s)", rt->rt_ifp->if_xname);
1704 	else
1705 		db_printf("(NULL)");
1706 
1707 	db_printf(" ifa=%p\n", rt->rt_ifa);
1708 	db_print_ifa(rt->rt_ifa);
1709 
1710 	db_printf(" gwroute=%p llinfo=%p\n",
1711 			  rt->rt_gwroute, rt->rt_llinfo);
1712 
1713 	return 0;
1714 }
1715 
1716 /*
1717  * Function to print all the route trees.
1718  * Use this from ddb:  "show routes"
1719  */
1720 void
1721 db_show_routes(db_expr_t addr, bool have_addr,
1722     db_expr_t count, const char *modif)
1723 {
1724 	rt_walktree(AF_INET, db_show_rtentry, NULL);
1725 }
1726 #endif
1727