xref: /openbsd-src/sys/net/route.c (revision 4b70baf6e17fc8b27fc1f7fa7929335753fa94c3)
1 /*	$OpenBSD: route.c,v 1.383 2019/03/03 16:31:12 deraadt Exp $	*/
2 /*	$NetBSD: route.c,v 1.14 1996/02/13 22:00:46 christos Exp $	*/
3 
4 /*
5  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the project nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 /*
34  * Copyright (c) 1980, 1986, 1991, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. Neither the name of the University nor the names of its contributors
46  *    may be used to endorse or promote products derived from this software
47  *    without specific prior written permission.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59  * SUCH DAMAGE.
60  *
61  *	@(#)route.c	8.2 (Berkeley) 11/15/93
62  */
63 
64 /*
65  *	@(#)COPYRIGHT	1.1 (NRL) 17 January 1995
66  *
67  * NRL grants permission for redistribution and use in source and binary
68  * forms, with or without modification, of the software and documentation
69  * created at NRL provided that the following conditions are met:
70  *
71  * 1. Redistributions of source code must retain the above copyright
72  *    notice, this list of conditions and the following disclaimer.
73  * 2. Redistributions in binary form must reproduce the above copyright
74  *    notice, this list of conditions and the following disclaimer in the
75  *    documentation and/or other materials provided with the distribution.
76  * 3. All advertising materials mentioning features or use of this software
77  *    must display the following acknowledgements:
78  *	This product includes software developed by the University of
79  *	California, Berkeley and its contributors.
80  *	This product includes software developed at the Information
81  *	Technology Division, US Naval Research Laboratory.
82  * 4. Neither the name of the NRL nor the names of its contributors
83  *    may be used to endorse or promote products derived from this software
84  *    without specific prior written permission.
85  *
86  * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS
87  * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
88  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
89  * PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL NRL OR
90  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
91  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
92  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
93  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
94  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
95  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
96  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
97  *
98  * The views and conclusions contained in the software and documentation
99  * are those of the authors and should not be interpreted as representing
100  * official policies, either expressed or implied, of the US Naval
101  * Research Laboratory (NRL).
102  */
103 
104 #include <sys/param.h>
105 #include <sys/systm.h>
106 #include <sys/mbuf.h>
107 #include <sys/socket.h>
108 #include <sys/socketvar.h>
109 #include <sys/timeout.h>
110 #include <sys/domain.h>
111 #include <sys/protosw.h>
112 #include <sys/ioctl.h>
113 #include <sys/kernel.h>
114 #include <sys/queue.h>
115 #include <sys/pool.h>
116 #include <sys/atomic.h>
117 
118 #include <net/if.h>
119 #include <net/if_var.h>
120 #include <net/if_dl.h>
121 #include <net/route.h>
122 
123 #include <netinet/in.h>
124 #include <netinet/ip_var.h>
125 #include <netinet/in_var.h>
126 
127 #ifdef INET6
128 #include <netinet/ip6.h>
129 #include <netinet6/ip6_var.h>
130 #include <netinet6/in6_var.h>
131 #endif
132 
133 #ifdef MPLS
134 #include <netmpls/mpls.h>
135 #endif
136 
137 #ifdef BFD
138 #include <net/bfd.h>
139 #endif
140 
141 #define ROUNDUP(a) (a>0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long))
142 
143 /* Give some jitter to hash, to avoid synchronization between routers. */
144 static uint32_t		rt_hashjitter;
145 
146 extern unsigned int	rtmap_limit;
147 
148 struct cpumem *		rtcounters;
149 int			rttrash;	/* routes not in table but not freed */
150 int			ifatrash;	/* ifas not in ifp list but not free */
151 
152 struct pool		rtentry_pool;	/* pool for rtentry structures */
153 struct pool		rttimer_pool;	/* pool for rttimer structures */
154 
155 void	rt_timer_init(void);
156 int	rt_setgwroute(struct rtentry *, u_int);
157 void	rt_putgwroute(struct rtentry *);
158 int	rtflushclone1(struct rtentry *, void *, u_int);
159 void	rtflushclone(unsigned int, struct rtentry *);
160 int	rt_ifa_purge_walker(struct rtentry *, void *, unsigned int);
161 struct rtentry *rt_match(struct sockaddr *, uint32_t *, int, unsigned int);
162 int	rt_clone(struct rtentry **, struct sockaddr *, unsigned int);
163 struct sockaddr *rt_plentosa(sa_family_t, int, struct sockaddr_in6 *);
164 
165 #ifdef DDB
166 void	db_print_sa(struct sockaddr *);
167 void	db_print_ifa(struct ifaddr *);
168 int	db_show_rtentry(struct rtentry *, void *, unsigned int);
169 #endif
170 
171 #define	LABELID_MAX	50000
172 
173 struct rt_label {
174 	TAILQ_ENTRY(rt_label)	rtl_entry;
175 	char			rtl_name[RTLABEL_LEN];
176 	u_int16_t		rtl_id;
177 	int			rtl_ref;
178 };
179 
180 TAILQ_HEAD(rt_labels, rt_label)	rt_labels = TAILQ_HEAD_INITIALIZER(rt_labels);
181 
182 void
183 route_init(void)
184 {
185 	rtcounters = counters_alloc(rts_ncounters);
186 
187 	pool_init(&rtentry_pool, sizeof(struct rtentry), 0, IPL_SOFTNET, 0,
188 	    "rtentry", NULL);
189 
190 	while (rt_hashjitter == 0)
191 		rt_hashjitter = arc4random();
192 
193 #ifdef BFD
194 	bfdinit();
195 #endif
196 }
197 
198 /*
199  * Returns 1 if the (cached) ``rt'' entry is still valid, 0 otherwise.
200  */
201 int
202 rtisvalid(struct rtentry *rt)
203 {
204 	if (rt == NULL)
205 		return (0);
206 
207 	if (!ISSET(rt->rt_flags, RTF_UP))
208 		return (0);
209 
210 	if (ISSET(rt->rt_flags, RTF_GATEWAY)) {
211 		KASSERT(rt->rt_gwroute != NULL);
212 		KASSERT(!ISSET(rt->rt_gwroute->rt_flags, RTF_GATEWAY));
213 		if (!ISSET(rt->rt_gwroute->rt_flags, RTF_UP))
214 			return (0);
215 	}
216 
217 	return (1);
218 }
219 
220 /*
221  * Do the actual lookup for rtalloc(9), do not use directly!
222  *
223  * Return the best matching entry for the destination ``dst''.
224  *
225  * "RT_RESOLVE" means that a corresponding L2 entry should
226  *   be added to the routing table and resolved (via ARP or
227  *   NDP), if it does not exist.
228  */
229 struct rtentry *
230 rt_match(struct sockaddr *dst, uint32_t *src, int flags, unsigned int tableid)
231 {
232 	struct rtentry		*rt = NULL;
233 
234 	rt = rtable_match(tableid, dst, src);
235 	if (rt == NULL) {
236 		rtstat_inc(rts_unreach);
237 		return (NULL);
238 	}
239 
240 	if (ISSET(rt->rt_flags, RTF_CLONING) && ISSET(flags, RT_RESOLVE))
241 		rt_clone(&rt, dst, tableid);
242 
243 	rt->rt_use++;
244 	return (rt);
245 }
246 
247 int
248 rt_clone(struct rtentry **rtp, struct sockaddr *dst, unsigned int rtableid)
249 {
250 	struct rt_addrinfo	 info;
251 	struct rtentry		*rt = *rtp;
252 	int			 error = 0;
253 
254 	memset(&info, 0, sizeof(info));
255 	info.rti_info[RTAX_DST] = dst;
256 
257 	/*
258 	 * The priority of cloned route should be different
259 	 * to avoid conflict with /32 cloning routes.
260 	 *
261 	 * It should also be higher to let the ARP layer find
262 	 * cloned routes instead of the cloning one.
263 	 */
264 	KERNEL_LOCK();
265 	error = rtrequest(RTM_RESOLVE, &info, rt->rt_priority - 1, &rt,
266 	    rtableid);
267 	KERNEL_UNLOCK();
268 	if (error) {
269 		rtm_miss(RTM_MISS, &info, 0, RTP_NONE, 0, error, rtableid);
270 	} else {
271 		/* Inform listeners of the new route */
272 		rtm_send(rt, RTM_ADD, 0, rtableid);
273 		rtfree(*rtp);
274 		*rtp = rt;
275 	}
276 	return (error);
277 }
278 
279 /*
280  * Originated from bridge_hash() in if_bridge.c
281  */
282 #define mix(a, b, c) do {						\
283 	a -= b; a -= c; a ^= (c >> 13);					\
284 	b -= c; b -= a; b ^= (a << 8);					\
285 	c -= a; c -= b; c ^= (b >> 13);					\
286 	a -= b; a -= c; a ^= (c >> 12);					\
287 	b -= c; b -= a; b ^= (a << 16);					\
288 	c -= a; c -= b; c ^= (b >> 5);					\
289 	a -= b; a -= c; a ^= (c >> 3);					\
290 	b -= c; b -= a; b ^= (a << 10);					\
291 	c -= a; c -= b; c ^= (b >> 15);					\
292 } while (0)
293 
294 int
295 rt_hash(struct rtentry *rt, struct sockaddr *dst, uint32_t *src)
296 {
297 	uint32_t a, b, c;
298 
299 	if (src == NULL || !rtisvalid(rt) || !ISSET(rt->rt_flags, RTF_MPATH))
300 		return (-1);
301 
302 	a = b = 0x9e3779b9;
303 	c = rt_hashjitter;
304 
305 	switch (dst->sa_family) {
306 	case AF_INET:
307 	    {
308 		struct sockaddr_in *sin;
309 
310 		if (!ipmultipath)
311 			return (-1);
312 
313 		sin = satosin(dst);
314 		a += sin->sin_addr.s_addr;
315 		b += (src != NULL) ? src[0] : 0;
316 		mix(a, b, c);
317 		break;
318 	    }
319 #ifdef INET6
320 	case AF_INET6:
321 	    {
322 		struct sockaddr_in6 *sin6;
323 
324 		if (!ip6_multipath)
325 			return (-1);
326 
327 		sin6 = satosin6(dst);
328 		a += sin6->sin6_addr.s6_addr32[0];
329 		b += sin6->sin6_addr.s6_addr32[2];
330 		c += (src != NULL) ? src[0] : 0;
331 		mix(a, b, c);
332 		a += sin6->sin6_addr.s6_addr32[1];
333 		b += sin6->sin6_addr.s6_addr32[3];
334 		c += (src != NULL) ? src[1] : 0;
335 		mix(a, b, c);
336 		a += sin6->sin6_addr.s6_addr32[2];
337 		b += sin6->sin6_addr.s6_addr32[1];
338 		c += (src != NULL) ? src[2] : 0;
339 		mix(a, b, c);
340 		a += sin6->sin6_addr.s6_addr32[3];
341 		b += sin6->sin6_addr.s6_addr32[0];
342 		c += (src != NULL) ? src[3] : 0;
343 		mix(a, b, c);
344 		break;
345 	    }
346 #endif /* INET6 */
347 	}
348 
349 	return (c & 0xffff);
350 }
351 
352 /*
353  * Allocate a route, potentially using multipath to select the peer.
354  */
355 struct rtentry *
356 rtalloc_mpath(struct sockaddr *dst, uint32_t *src, unsigned int rtableid)
357 {
358 	return (rt_match(dst, src, RT_RESOLVE, rtableid));
359 }
360 
361 /*
362  * Look in the routing table for the best matching entry for
363  * ``dst''.
364  *
365  * If a route with a gateway is found and its next hop is no
366  * longer valid, try to cache it.
367  */
368 struct rtentry *
369 rtalloc(struct sockaddr *dst, int flags, unsigned int rtableid)
370 {
371 	return (rt_match(dst, NULL, flags, rtableid));
372 }
373 
374 /*
375  * Cache the route entry corresponding to a reachable next hop in
376  * the gateway entry ``rt''.
377  */
378 int
379 rt_setgwroute(struct rtentry *rt, u_int rtableid)
380 {
381 	struct rtentry *prt, *nhrt;
382 	unsigned int rdomain = rtable_l2(rtableid);
383 	int error;
384 
385 	NET_ASSERT_LOCKED();
386 
387 	KASSERT(ISSET(rt->rt_flags, RTF_GATEWAY));
388 
389 	/* If we cannot find a valid next hop bail. */
390 	nhrt = rt_match(rt->rt_gateway, NULL, RT_RESOLVE, rdomain);
391 	if (nhrt == NULL)
392 		return (ENOENT);
393 
394 	/* Next hop entry must be on the same interface. */
395 	if (nhrt->rt_ifidx != rt->rt_ifidx) {
396 		struct sockaddr_in6	sa_mask;
397 
398 		if (!ISSET(nhrt->rt_flags, RTF_LLINFO) ||
399 		    !ISSET(nhrt->rt_flags, RTF_CLONED)) {
400 			rtfree(nhrt);
401 			return (EHOSTUNREACH);
402 		}
403 
404 		/*
405 		 * We found a L2 entry, so we might have multiple
406 		 * RTF_CLONING routes for the same subnet.  Query
407 		 * the first route of the multipath chain and iterate
408 		 * until we find the correct one.
409 		 */
410 		prt = rtable_lookup(rdomain, rt_key(nhrt->rt_parent),
411 		    rt_plen2mask(nhrt->rt_parent, &sa_mask), NULL, RTP_ANY);
412 		rtfree(nhrt);
413 
414 		while (prt != NULL && prt->rt_ifidx != rt->rt_ifidx)
415 			prt = rtable_iterate(prt);
416 
417 		/* We found nothing or a non-cloning MPATH route. */
418 		if (prt == NULL || !ISSET(prt->rt_flags, RTF_CLONING)) {
419 			rtfree(prt);
420 			return (EHOSTUNREACH);
421 		}
422 
423 		error = rt_clone(&prt, rt->rt_gateway, rdomain);
424 		if (error) {
425 			rtfree(prt);
426 			return (error);
427 		}
428 		nhrt = prt;
429 	}
430 
431 	/*
432 	 * Next hop must be reachable, this also prevents rtentry
433 	 * loops for example when rt->rt_gwroute points to rt.
434 	 */
435 	if (ISSET(nhrt->rt_flags, RTF_CLONING|RTF_GATEWAY)) {
436 		rtfree(nhrt);
437 		return (ENETUNREACH);
438 	}
439 
440 	/* Next hop is valid so remove possible old cache. */
441 	rt_putgwroute(rt);
442 	KASSERT(rt->rt_gwroute == NULL);
443 
444 	/*
445 	 * If the MTU of next hop is 0, this will reset the MTU of the
446 	 * route to run PMTUD again from scratch.
447 	 */
448 	if (!ISSET(rt->rt_locks, RTV_MTU) && (rt->rt_mtu > nhrt->rt_mtu))
449 		rt->rt_mtu = nhrt->rt_mtu;
450 
451 	/*
452 	 * To avoid reference counting problems when writting link-layer
453 	 * addresses in an outgoing packet, we ensure that the lifetime
454 	 * of a cached entry is greater that the bigger lifetime of the
455 	 * gateway entries it is pointed by.
456 	 */
457 	nhrt->rt_flags |= RTF_CACHED;
458 	nhrt->rt_cachecnt++;
459 
460 	rt->rt_gwroute = nhrt;
461 
462 	return (0);
463 }
464 
465 /*
466  * Invalidate the cached route entry of the gateway entry ``rt''.
467  */
468 void
469 rt_putgwroute(struct rtentry *rt)
470 {
471 	struct rtentry *nhrt = rt->rt_gwroute;
472 
473 	NET_ASSERT_LOCKED();
474 
475 	if (!ISSET(rt->rt_flags, RTF_GATEWAY) || nhrt == NULL)
476 		return;
477 
478 	KASSERT(ISSET(nhrt->rt_flags, RTF_CACHED));
479 	KASSERT(nhrt->rt_cachecnt > 0);
480 
481 	--nhrt->rt_cachecnt;
482 	if (nhrt->rt_cachecnt == 0)
483 		nhrt->rt_flags &= ~RTF_CACHED;
484 
485 	rtfree(rt->rt_gwroute);
486 	rt->rt_gwroute = NULL;
487 }
488 
489 void
490 rtref(struct rtentry *rt)
491 {
492 	atomic_inc_int(&rt->rt_refcnt);
493 }
494 
495 void
496 rtfree(struct rtentry *rt)
497 {
498 	int		 refcnt;
499 
500 	if (rt == NULL)
501 		return;
502 
503 	refcnt = (int)atomic_dec_int_nv(&rt->rt_refcnt);
504 	if (refcnt <= 0) {
505 		KASSERT(!ISSET(rt->rt_flags, RTF_UP));
506 		KASSERT(!RT_ROOT(rt));
507 		atomic_dec_int(&rttrash);
508 		if (refcnt < 0) {
509 			printf("rtfree: %p not freed (neg refs)\n", rt);
510 			return;
511 		}
512 
513 		KERNEL_LOCK();
514 		rt_timer_remove_all(rt);
515 		ifafree(rt->rt_ifa);
516 		rtlabel_unref(rt->rt_labelid);
517 #ifdef MPLS
518 		rt_mpls_clear(rt);
519 #endif
520 		free(rt->rt_gateway, M_RTABLE, ROUNDUP(rt->rt_gateway->sa_len));
521 		free(rt_key(rt), M_RTABLE, rt_key(rt)->sa_len);
522 		KERNEL_UNLOCK();
523 
524 		pool_put(&rtentry_pool, rt);
525 	}
526 }
527 
528 void
529 ifafree(struct ifaddr *ifa)
530 {
531 	if (ifa == NULL)
532 		panic("ifafree");
533 	if (ifa->ifa_refcnt == 0) {
534 		ifatrash--;
535 		free(ifa, M_IFADDR, 0);
536 	} else
537 		ifa->ifa_refcnt--;
538 }
539 
540 /*
541  * Force a routing table entry to the specified
542  * destination to go through the given gateway.
543  * Normally called as a result of a routing redirect
544  * message from the network layer.
545  */
546 void
547 rtredirect(struct sockaddr *dst, struct sockaddr *gateway,
548     struct sockaddr *src, struct rtentry **rtp, unsigned int rdomain)
549 {
550 	struct rtentry		*rt;
551 	int			 error = 0;
552 	enum rtstat_counters	 stat = rts_ncounters;
553 	struct rt_addrinfo	 info;
554 	struct ifaddr		*ifa;
555 	unsigned int		 ifidx = 0;
556 	int			 flags = RTF_GATEWAY|RTF_HOST;
557 	uint8_t			 prio = RTP_NONE;
558 
559 	NET_ASSERT_LOCKED();
560 
561 	/* verify the gateway is directly reachable */
562 	rt = rtalloc(gateway, 0, rdomain);
563 	if (!rtisvalid(rt) || ISSET(rt->rt_flags, RTF_GATEWAY)) {
564 		rtfree(rt);
565 		error = ENETUNREACH;
566 		goto out;
567 	}
568 	ifidx = rt->rt_ifidx;
569 	ifa = rt->rt_ifa;
570 	rtfree(rt);
571 	rt = NULL;
572 
573 	rt = rtable_lookup(rdomain, dst, NULL, NULL, RTP_ANY);
574 	/*
575 	 * If the redirect isn't from our current router for this dst,
576 	 * it's either old or wrong.  If it redirects us to ourselves,
577 	 * we have a routing loop, perhaps as a result of an interface
578 	 * going down recently.
579 	 */
580 #define	equal(a1, a2) \
581 	((a1)->sa_len == (a2)->sa_len && \
582 	 bcmp((caddr_t)(a1), (caddr_t)(a2), (a1)->sa_len) == 0)
583 	if (rt != NULL && (!equal(src, rt->rt_gateway) || rt->rt_ifa != ifa))
584 		error = EINVAL;
585 	else if (ifa_ifwithaddr(gateway, rdomain) != NULL ||
586 	    (gateway->sa_family = AF_INET &&
587 	    in_broadcast(satosin(gateway)->sin_addr, rdomain)))
588 		error = EHOSTUNREACH;
589 	if (error)
590 		goto done;
591 	/*
592 	 * Create a new entry if we just got back a wildcard entry
593 	 * or the lookup failed.  This is necessary for hosts
594 	 * which use routing redirects generated by smart gateways
595 	 * to dynamically build the routing tables.
596 	 */
597 	if (rt == NULL)
598 		goto create;
599 	/*
600 	 * Don't listen to the redirect if it's
601 	 * for a route to an interface.
602 	 */
603 	if (ISSET(rt->rt_flags, RTF_GATEWAY)) {
604 		if (!ISSET(rt->rt_flags, RTF_HOST)) {
605 			/*
606 			 * Changing from route to net => route to host.
607 			 * Create new route, rather than smashing route to net.
608 			 */
609 create:
610 			rtfree(rt);
611 			flags |= RTF_DYNAMIC;
612 			bzero(&info, sizeof(info));
613 			info.rti_info[RTAX_DST] = dst;
614 			info.rti_info[RTAX_GATEWAY] = gateway;
615 			info.rti_ifa = ifa;
616 			info.rti_flags = flags;
617 			rt = NULL;
618 			error = rtrequest(RTM_ADD, &info, RTP_DEFAULT, &rt,
619 			    rdomain);
620 			if (error == 0) {
621 				flags = rt->rt_flags;
622 				prio = rt->rt_priority;
623 			}
624 			stat = rts_dynamic;
625 		} else {
626 			/*
627 			 * Smash the current notion of the gateway to
628 			 * this destination.  Should check about netmask!!!
629 			 */
630 			rt->rt_flags |= RTF_MODIFIED;
631 			flags |= RTF_MODIFIED;
632 			prio = rt->rt_priority;
633 			stat = rts_newgateway;
634 			rt_setgate(rt, gateway, rdomain);
635 		}
636 	} else
637 		error = EHOSTUNREACH;
638 done:
639 	if (rt) {
640 		if (rtp && !error)
641 			*rtp = rt;
642 		else
643 			rtfree(rt);
644 	}
645 out:
646 	if (error)
647 		rtstat_inc(rts_badredirect);
648 	else if (stat != rts_ncounters)
649 		rtstat_inc(stat);
650 	bzero((caddr_t)&info, sizeof(info));
651 	info.rti_info[RTAX_DST] = dst;
652 	info.rti_info[RTAX_GATEWAY] = gateway;
653 	info.rti_info[RTAX_AUTHOR] = src;
654 	rtm_miss(RTM_REDIRECT, &info, flags, prio, ifidx, error, rdomain);
655 }
656 
657 /*
658  * Delete a route and generate a message
659  */
660 int
661 rtdeletemsg(struct rtentry *rt, struct ifnet *ifp, u_int tableid)
662 {
663 	int			error;
664 	struct rt_addrinfo	info;
665 	struct sockaddr_in6	sa_mask;
666 
667 	KASSERT(rt->rt_ifidx == ifp->if_index);
668 
669 	/*
670 	 * Request the new route so that the entry is not actually
671 	 * deleted.  That will allow the information being reported to
672 	 * be accurate (and consistent with route_output()).
673 	 */
674 	memset(&info, 0, sizeof(info));
675 	info.rti_info[RTAX_DST] = rt_key(rt);
676 	info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
677 	if (!ISSET(rt->rt_flags, RTF_HOST))
678 		info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask);
679 	error = rtrequest_delete(&info, rt->rt_priority, ifp, &rt, tableid);
680 	rtm_send(rt, RTM_DELETE, error, tableid);
681 	if (error == 0)
682 		rtfree(rt);
683 	return (error);
684 }
685 
686 static inline int
687 rtequal(struct rtentry *a, struct rtentry *b)
688 {
689 	if (a == b)
690 		return 1;
691 
692 	if (memcmp(rt_key(a), rt_key(b), rt_key(a)->sa_len) == 0 &&
693 	    rt_plen(a) == rt_plen(b))
694 		return 1;
695 	else
696 		return 0;
697 }
698 
699 int
700 rtflushclone1(struct rtentry *rt, void *arg, u_int id)
701 {
702 	struct rtentry *cloningrt = arg;
703 	struct ifnet *ifp;
704 	int error;
705 
706 	if (!ISSET(rt->rt_flags, RTF_CLONED))
707 		return 0;
708 
709 	/* Cached route must stay alive as long as their parent are alive. */
710 	if (ISSET(rt->rt_flags, RTF_CACHED) && (rt->rt_parent != cloningrt))
711 		return 0;
712 
713 	if (!rtequal(rt->rt_parent, cloningrt))
714 		return 0;
715 	/*
716 	 * This happens when an interface with a RTF_CLONING route is
717 	 * being detached.  In this case it's safe to bail because all
718 	 * the routes are being purged by rt_ifa_purge().
719 	 */
720 	ifp = if_get(rt->rt_ifidx);
721 	if (ifp == NULL)
722 	        return 0;
723 
724 	error = rtdeletemsg(rt, ifp, id);
725 	if (error == 0)
726 		error = EAGAIN;
727 
728 	if_put(ifp);
729 	return error;
730 }
731 
732 void
733 rtflushclone(unsigned int rtableid, struct rtentry *parent)
734 {
735 
736 #ifdef DIAGNOSTIC
737 	if (!parent || (parent->rt_flags & RTF_CLONING) == 0)
738 		panic("rtflushclone: called with a non-cloning route");
739 #endif
740 	rtable_walk(rtableid, rt_key(parent)->sa_family, rtflushclone1, parent);
741 }
742 
743 int
744 rtrequest_delete(struct rt_addrinfo *info, u_int8_t prio, struct ifnet *ifp,
745     struct rtentry **ret_nrt, u_int tableid)
746 {
747 	struct rtentry	*rt;
748 	int		 error;
749 
750 	NET_ASSERT_LOCKED();
751 
752 	if (!rtable_exists(tableid))
753 		return (EAFNOSUPPORT);
754 	rt = rtable_lookup(tableid, info->rti_info[RTAX_DST],
755 	    info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY], prio);
756 	if (rt == NULL)
757 		return (ESRCH);
758 
759 	/* Make sure that's the route the caller want to delete. */
760 	if (ifp != NULL && ifp->if_index != rt->rt_ifidx) {
761 		rtfree(rt);
762 		return (ESRCH);
763 	}
764 
765 #ifdef BFD
766 	if (ISSET(rt->rt_flags, RTF_BFD))
767 		bfdclear(rt);
768 #endif
769 
770 	error = rtable_delete(tableid, info->rti_info[RTAX_DST],
771 	    info->rti_info[RTAX_NETMASK], rt);
772 	if (error != 0) {
773 		rtfree(rt);
774 		return (ESRCH);
775 	}
776 
777 	/* Release next hop cache before flushing cloned entries. */
778 	rt_putgwroute(rt);
779 
780 	/* Clean up any cloned children. */
781 	if (ISSET(rt->rt_flags, RTF_CLONING))
782 		rtflushclone(tableid, rt);
783 
784 	rtfree(rt->rt_parent);
785 	rt->rt_parent = NULL;
786 
787 	rt->rt_flags &= ~RTF_UP;
788 
789 	KASSERT(ifp->if_index == rt->rt_ifidx);
790 	ifp->if_rtrequest(ifp, RTM_DELETE, rt);
791 
792 	atomic_inc_int(&rttrash);
793 
794 	if (ret_nrt != NULL)
795 		*ret_nrt = rt;
796 	else
797 		rtfree(rt);
798 
799 	return (0);
800 }
801 
802 int
803 rtrequest(int req, struct rt_addrinfo *info, u_int8_t prio,
804     struct rtentry **ret_nrt, u_int tableid)
805 {
806 	struct ifnet		*ifp;
807 	struct rtentry		*rt, *crt;
808 	struct ifaddr		*ifa;
809 	struct sockaddr		*ndst;
810 	struct sockaddr_rtlabel	*sa_rl, sa_rl2;
811 	struct sockaddr_dl	 sa_dl = { sizeof(sa_dl), AF_LINK };
812 	int			 dlen, error;
813 
814 	NET_ASSERT_LOCKED();
815 
816 	if (!rtable_exists(tableid))
817 		return (EAFNOSUPPORT);
818 	if (info->rti_flags & RTF_HOST)
819 		info->rti_info[RTAX_NETMASK] = NULL;
820 	switch (req) {
821 	case RTM_DELETE:
822 		return (EINVAL);
823 
824 	case RTM_RESOLVE:
825 		if (ret_nrt == NULL || (rt = *ret_nrt) == NULL)
826 			return (EINVAL);
827 		if ((rt->rt_flags & RTF_CLONING) == 0)
828 			return (EINVAL);
829 		KASSERT(rt->rt_ifa->ifa_ifp != NULL);
830 		info->rti_ifa = rt->rt_ifa;
831 		info->rti_flags = rt->rt_flags | (RTF_CLONED|RTF_HOST);
832 		info->rti_flags &= ~(RTF_CLONING|RTF_CONNECTED|RTF_STATIC);
833 		info->rti_info[RTAX_GATEWAY] = sdltosa(&sa_dl);
834 		info->rti_info[RTAX_LABEL] =
835 		    rtlabel_id2sa(rt->rt_labelid, &sa_rl2);
836 		/* FALLTHROUGH */
837 
838 	case RTM_ADD:
839 		if (info->rti_ifa == NULL)
840 			return (EINVAL);
841 		ifa = info->rti_ifa;
842 		ifp = ifa->ifa_ifp;
843 		if (prio == 0)
844 			prio = ifp->if_priority + RTP_STATIC;
845 
846 		dlen = info->rti_info[RTAX_DST]->sa_len;
847 		ndst = malloc(dlen, M_RTABLE, M_NOWAIT);
848 		if (ndst == NULL)
849 			return (ENOBUFS);
850 
851 		if (info->rti_info[RTAX_NETMASK] != NULL)
852 			rt_maskedcopy(info->rti_info[RTAX_DST], ndst,
853 			    info->rti_info[RTAX_NETMASK]);
854 		else
855 			memcpy(ndst, info->rti_info[RTAX_DST], dlen);
856 
857 		rt = pool_get(&rtentry_pool, PR_NOWAIT | PR_ZERO);
858 		if (rt == NULL) {
859 			free(ndst, M_RTABLE, dlen);
860 			return (ENOBUFS);
861 		}
862 
863 		rt->rt_refcnt = 1;
864 		rt->rt_flags = info->rti_flags | RTF_UP;
865 		rt->rt_priority = prio;	/* init routing priority */
866 		LIST_INIT(&rt->rt_timer);
867 
868 		/* Check the link state if the table supports it. */
869 		if (rtable_mpath_capable(tableid, ndst->sa_family) &&
870 		    !ISSET(rt->rt_flags, RTF_LOCAL) &&
871 		    (!LINK_STATE_IS_UP(ifp->if_link_state) ||
872 		    !ISSET(ifp->if_flags, IFF_UP))) {
873 			rt->rt_flags &= ~RTF_UP;
874 			rt->rt_priority |= RTP_DOWN;
875 		}
876 
877 		if (info->rti_info[RTAX_LABEL] != NULL) {
878 			sa_rl = (struct sockaddr_rtlabel *)
879 			    info->rti_info[RTAX_LABEL];
880 			rt->rt_labelid = rtlabel_name2id(sa_rl->sr_label);
881 		}
882 
883 #ifdef MPLS
884 		/* We have to allocate additional space for MPLS infos */
885 		if (info->rti_flags & RTF_MPLS &&
886 		    (info->rti_info[RTAX_SRC] != NULL ||
887 		    info->rti_info[RTAX_DST]->sa_family == AF_MPLS)) {
888 			error = rt_mpls_set(rt, info->rti_info[RTAX_SRC],
889 			    info->rti_mpls);
890 			if (error) {
891 				free(ndst, M_RTABLE, dlen);
892 				pool_put(&rtentry_pool, rt);
893 				return (error);
894 			}
895 		} else
896 			rt_mpls_clear(rt);
897 #endif
898 
899 		ifa->ifa_refcnt++;
900 		rt->rt_ifa = ifa;
901 		rt->rt_ifidx = ifp->if_index;
902 		/*
903 		 * Copy metrics and a back pointer from the cloned
904 		 * route's parent.
905 		 */
906 		if (ISSET(rt->rt_flags, RTF_CLONED)) {
907 			rtref(*ret_nrt);
908 			rt->rt_parent = *ret_nrt;
909 			rt->rt_rmx = (*ret_nrt)->rt_rmx;
910 		}
911 
912 		/*
913 		 * We must set rt->rt_gateway before adding ``rt'' to
914 		 * the routing table because the radix MPATH code use
915 		 * it to (re)order routes.
916 		 */
917 		if ((error = rt_setgate(rt, info->rti_info[RTAX_GATEWAY],
918 		    tableid))) {
919 			ifafree(ifa);
920 			rtfree(rt->rt_parent);
921 			rt_putgwroute(rt);
922 			free(rt->rt_gateway, M_RTABLE, 0);
923 			free(ndst, M_RTABLE, dlen);
924 			pool_put(&rtentry_pool, rt);
925 			return (error);
926 		}
927 
928 		error = rtable_insert(tableid, ndst,
929 		    info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY],
930 		    rt->rt_priority, rt);
931 		if (error != 0 &&
932 		    (crt = rtable_match(tableid, ndst, NULL)) != NULL) {
933 			/* overwrite cloned route */
934 			if (ISSET(crt->rt_flags, RTF_CLONED)) {
935 				struct ifnet *cifp;
936 
937 				cifp = if_get(crt->rt_ifidx);
938 				KASSERT(cifp != NULL);
939 				rtdeletemsg(crt, cifp, tableid);
940 				if_put(cifp);
941 
942 				error = rtable_insert(tableid, ndst,
943 				    info->rti_info[RTAX_NETMASK],
944 				    info->rti_info[RTAX_GATEWAY],
945 				    rt->rt_priority, rt);
946 			}
947 			rtfree(crt);
948 		}
949 		if (error != 0) {
950 			ifafree(ifa);
951 			rtfree(rt->rt_parent);
952 			rt_putgwroute(rt);
953 			free(rt->rt_gateway, M_RTABLE, 0);
954 			free(ndst, M_RTABLE, dlen);
955 			pool_put(&rtentry_pool, rt);
956 			return (EEXIST);
957 		}
958 		ifp->if_rtrequest(ifp, req, rt);
959 
960 		if_group_routechange(info->rti_info[RTAX_DST],
961 			info->rti_info[RTAX_NETMASK]);
962 
963 		if (ret_nrt != NULL)
964 			*ret_nrt = rt;
965 		else
966 			rtfree(rt);
967 		break;
968 	}
969 
970 	return (0);
971 }
972 
973 int
974 rt_setgate(struct rtentry *rt, struct sockaddr *gate, u_int rtableid)
975 {
976 	int glen = ROUNDUP(gate->sa_len);
977 	struct sockaddr *sa;
978 
979 	if (rt->rt_gateway == NULL || glen != ROUNDUP(rt->rt_gateway->sa_len)) {
980 		sa = malloc(glen, M_RTABLE, M_NOWAIT);
981 		if (sa == NULL)
982 			return (ENOBUFS);
983 		if (rt->rt_gateway != NULL) {
984 			free(rt->rt_gateway, M_RTABLE,
985 			    ROUNDUP(rt->rt_gateway->sa_len));
986 		}
987 		rt->rt_gateway = sa;
988 	}
989 	memmove(rt->rt_gateway, gate, glen);
990 
991 	if (ISSET(rt->rt_flags, RTF_GATEWAY))
992 		return (rt_setgwroute(rt, rtableid));
993 
994 	return (0);
995 }
996 
997 /*
998  * Return the route entry containing the next hop link-layer
999  * address corresponding to ``rt''.
1000  */
1001 struct rtentry *
1002 rt_getll(struct rtentry *rt)
1003 {
1004 	if (ISSET(rt->rt_flags, RTF_GATEWAY)) {
1005 		KASSERT(rt->rt_gwroute != NULL);
1006 		return (rt->rt_gwroute);
1007 	}
1008 
1009 	return (rt);
1010 }
1011 
1012 void
1013 rt_maskedcopy(struct sockaddr *src, struct sockaddr *dst,
1014     struct sockaddr *netmask)
1015 {
1016 	u_char	*cp1 = (u_char *)src;
1017 	u_char	*cp2 = (u_char *)dst;
1018 	u_char	*cp3 = (u_char *)netmask;
1019 	u_char	*cplim = cp2 + *cp3;
1020 	u_char	*cplim2 = cp2 + *cp1;
1021 
1022 	*cp2++ = *cp1++; *cp2++ = *cp1++; /* copies sa_len & sa_family */
1023 	cp3 += 2;
1024 	if (cplim > cplim2)
1025 		cplim = cplim2;
1026 	while (cp2 < cplim)
1027 		*cp2++ = *cp1++ & *cp3++;
1028 	if (cp2 < cplim2)
1029 		bzero(cp2, cplim2 - cp2);
1030 }
1031 
1032 int
1033 rt_ifa_add(struct ifaddr *ifa, int flags, struct sockaddr *dst,
1034     unsigned int rdomain)
1035 {
1036 	struct ifnet		*ifp = ifa->ifa_ifp;
1037 	struct rtentry		*rt;
1038 	struct sockaddr_rtlabel	 sa_rl;
1039 	struct rt_addrinfo	 info;
1040 	uint8_t			 prio = ifp->if_priority + RTP_STATIC;
1041 	int			 error;
1042 
1043 	memset(&info, 0, sizeof(info));
1044 	info.rti_ifa = ifa;
1045 	info.rti_flags = flags;
1046 	info.rti_info[RTAX_DST] = dst;
1047 	if (flags & RTF_LLINFO)
1048 		info.rti_info[RTAX_GATEWAY] = sdltosa(ifp->if_sadl);
1049 	else
1050 		info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr;
1051 
1052 	KASSERT(rdomain == rtable_l2(rdomain));
1053 	if (rdomain == rtable_l2(ifp->if_rtlabelid)) {
1054 		info.rti_info[RTAX_LABEL] =
1055 		    rtlabel_id2sa(ifp->if_rtlabelid, &sa_rl);
1056 	}
1057 
1058 #ifdef MPLS
1059 	if ((flags & RTF_MPLS) == RTF_MPLS)
1060 		info.rti_mpls = MPLS_OP_POP;
1061 #endif /* MPLS */
1062 
1063 	if ((flags & RTF_HOST) == 0)
1064 		info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
1065 
1066 	if (flags & (RTF_LOCAL|RTF_BROADCAST))
1067 		prio = RTP_LOCAL;
1068 
1069 	if (flags & RTF_CONNECTED)
1070 		prio = ifp->if_priority + RTP_CONNECTED;
1071 
1072 	error = rtrequest(RTM_ADD, &info, prio, &rt, rdomain);
1073 	if (error == 0) {
1074 		/*
1075 		 * A local route is created for every address configured
1076 		 * on an interface, so use this information to notify
1077 		 * userland that a new address has been added.
1078 		 */
1079 		if (flags & RTF_LOCAL)
1080 			rtm_addr(RTM_NEWADDR, ifa);
1081 		rtm_send(rt, RTM_ADD, 0, rdomain);
1082 		rtfree(rt);
1083 	}
1084 	return (error);
1085 }
1086 
1087 int
1088 rt_ifa_del(struct ifaddr *ifa, int flags, struct sockaddr *dst,
1089     unsigned int rdomain)
1090 {
1091 	struct ifnet		*ifp = ifa->ifa_ifp;
1092 	struct rtentry		*rt;
1093 	struct mbuf		*m = NULL;
1094 	struct sockaddr		*deldst;
1095 	struct rt_addrinfo	 info;
1096 	struct sockaddr_rtlabel	 sa_rl;
1097 	uint8_t			 prio = ifp->if_priority + RTP_STATIC;
1098 	int			 error;
1099 
1100 	if ((flags & RTF_HOST) == 0 && ifa->ifa_netmask) {
1101 		m = m_get(M_DONTWAIT, MT_SONAME);
1102 		if (m == NULL)
1103 			return (ENOBUFS);
1104 		deldst = mtod(m, struct sockaddr *);
1105 		rt_maskedcopy(dst, deldst, ifa->ifa_netmask);
1106 		dst = deldst;
1107 	}
1108 
1109 	memset(&info, 0, sizeof(info));
1110 	info.rti_ifa = ifa;
1111 	info.rti_flags = flags;
1112 	info.rti_info[RTAX_DST] = dst;
1113 	if ((flags & RTF_LLINFO) == 0)
1114 		info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr;
1115 
1116 	if (rdomain == rtable_l2(ifp->if_rtlabelid)) {
1117 		info.rti_info[RTAX_LABEL] =
1118 		    rtlabel_id2sa(ifp->if_rtlabelid, &sa_rl);
1119 	}
1120 
1121 	if ((flags & RTF_HOST) == 0)
1122 		info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
1123 
1124 	if (flags & (RTF_LOCAL|RTF_BROADCAST))
1125 		prio = RTP_LOCAL;
1126 
1127 	if (flags & RTF_CONNECTED)
1128 		prio = ifp->if_priority + RTP_CONNECTED;
1129 
1130 	error = rtrequest_delete(&info, prio, ifp, &rt, rdomain);
1131 	if (error == 0) {
1132 		rtm_send(rt, RTM_DELETE, 0, rdomain);
1133 		if (flags & RTF_LOCAL)
1134 			rtm_addr(RTM_DELADDR, ifa);
1135 		rtfree(rt);
1136 	}
1137 	m_free(m);
1138 
1139 	return (error);
1140 }
1141 
1142 /*
1143  * Add ifa's address as a local rtentry.
1144  */
1145 int
1146 rt_ifa_addlocal(struct ifaddr *ifa)
1147 {
1148 	struct ifnet *ifp = ifa->ifa_ifp;
1149 	struct rtentry *rt;
1150 	u_int flags = RTF_HOST|RTF_LOCAL;
1151 	int error = 0;
1152 
1153 	/*
1154 	 * If the configured address correspond to the magical "any"
1155 	 * address do not add a local route entry because that might
1156 	 * corrupt the routing tree which uses this value for the
1157 	 * default routes.
1158 	 */
1159 	switch (ifa->ifa_addr->sa_family) {
1160 	case AF_INET:
1161 		if (satosin(ifa->ifa_addr)->sin_addr.s_addr == INADDR_ANY)
1162 			return (0);
1163 		break;
1164 #ifdef INET6
1165 	case AF_INET6:
1166 		if (IN6_ARE_ADDR_EQUAL(&satosin6(ifa->ifa_addr)->sin6_addr,
1167 		    &in6addr_any))
1168 			return (0);
1169 		break;
1170 #endif
1171 	default:
1172 		break;
1173 	}
1174 
1175 	if (!ISSET(ifp->if_flags, (IFF_LOOPBACK|IFF_POINTOPOINT)))
1176 		flags |= RTF_LLINFO;
1177 
1178 	/* If there is no local entry, allocate one. */
1179 	rt = rtalloc(ifa->ifa_addr, 0, ifp->if_rdomain);
1180 	if (rt == NULL || ISSET(rt->rt_flags, flags) != flags) {
1181 		error = rt_ifa_add(ifa, flags | RTF_MPATH, ifa->ifa_addr,
1182 		    ifp->if_rdomain);
1183 	}
1184 	rtfree(rt);
1185 
1186 	return (error);
1187 }
1188 
1189 /*
1190  * Remove local rtentry of ifa's addresss if it exists.
1191  */
1192 int
1193 rt_ifa_dellocal(struct ifaddr *ifa)
1194 {
1195 	struct ifnet *ifp = ifa->ifa_ifp;
1196 	struct rtentry *rt;
1197 	u_int flags = RTF_HOST|RTF_LOCAL;
1198 	int error = 0;
1199 
1200 	/*
1201 	 * We do not add local routes for such address, so do not bother
1202 	 * removing them.
1203 	 */
1204 	switch (ifa->ifa_addr->sa_family) {
1205 	case AF_INET:
1206 		if (satosin(ifa->ifa_addr)->sin_addr.s_addr == INADDR_ANY)
1207 			return (0);
1208 		break;
1209 #ifdef INET6
1210 	case AF_INET6:
1211 		if (IN6_ARE_ADDR_EQUAL(&satosin6(ifa->ifa_addr)->sin6_addr,
1212 		    &in6addr_any))
1213 			return (0);
1214 		break;
1215 #endif
1216 	default:
1217 		break;
1218 	}
1219 
1220 	if (!ISSET(ifp->if_flags, (IFF_LOOPBACK|IFF_POINTOPOINT)))
1221 		flags |= RTF_LLINFO;
1222 
1223 	/*
1224 	 * Before deleting, check if a corresponding local host
1225 	 * route surely exists.  With this check, we can avoid to
1226 	 * delete an interface direct route whose destination is same
1227 	 * as the address being removed.  This can happen when removing
1228 	 * a subnet-router anycast address on an interface attached
1229 	 * to a shared medium.
1230 	 */
1231 	rt = rtalloc(ifa->ifa_addr, 0, ifp->if_rdomain);
1232 	if (rt != NULL && ISSET(rt->rt_flags, flags) == flags) {
1233 		error = rt_ifa_del(ifa, flags, ifa->ifa_addr,
1234 		    ifp->if_rdomain);
1235 	}
1236 	rtfree(rt);
1237 
1238 	return (error);
1239 }
1240 
1241 /*
1242  * Remove all addresses attached to ``ifa''.
1243  */
1244 void
1245 rt_ifa_purge(struct ifaddr *ifa)
1246 {
1247 	struct ifnet		*ifp = ifa->ifa_ifp;
1248 	unsigned int		 rtableid;
1249 	int			 i;
1250 
1251 	KASSERT(ifp != NULL);
1252 
1253 	for (rtableid = 0; rtableid < rtmap_limit; rtableid++) {
1254 		/* skip rtables that are not in the rdomain of the ifp */
1255 		if (rtable_l2(rtableid) != ifp->if_rdomain)
1256 			continue;
1257 		for (i = 1; i <= AF_MAX; i++) {
1258 			rtable_walk(rtableid, i, rt_ifa_purge_walker, ifa);
1259 		}
1260 	}
1261 }
1262 
1263 int
1264 rt_ifa_purge_walker(struct rtentry *rt, void *vifa, unsigned int rtableid)
1265 {
1266 	struct ifaddr		*ifa = vifa;
1267 	struct ifnet		*ifp = ifa->ifa_ifp;
1268 	int			 error;
1269 
1270 	if (rt->rt_ifa != ifa)
1271 		return (0);
1272 
1273 	if ((error = rtdeletemsg(rt, ifp, rtableid))) {
1274 		return (error);
1275 	}
1276 
1277 	return (EAGAIN);
1278 }
1279 
1280 /*
1281  * Route timer routines.  These routes allow functions to be called
1282  * for various routes at any time.  This is useful in supporting
1283  * path MTU discovery and redirect route deletion.
1284  *
1285  * This is similar to some BSDI internal functions, but it provides
1286  * for multiple queues for efficiency's sake...
1287  */
1288 
1289 LIST_HEAD(, rttimer_queue)	rttimer_queue_head;
1290 static int			rt_init_done = 0;
1291 
1292 #define RTTIMER_CALLOUT(r)	{					\
1293 	if (r->rtt_func != NULL) {					\
1294 		(*r->rtt_func)(r->rtt_rt, r);				\
1295 	} else {							\
1296 		struct ifnet *ifp;					\
1297 									\
1298 		ifp = if_get(r->rtt_rt->rt_ifidx);			\
1299 		if (ifp != NULL) 					\
1300 			rtdeletemsg(r->rtt_rt, ifp, r->rtt_tableid);	\
1301 		if_put(ifp);						\
1302 	}								\
1303 }
1304 
1305 /*
1306  * Some subtle order problems with domain initialization mean that
1307  * we cannot count on this being run from rt_init before various
1308  * protocol initializations are done.  Therefore, we make sure
1309  * that this is run when the first queue is added...
1310  */
1311 
1312 void
1313 rt_timer_init(void)
1314 {
1315 	static struct timeout	rt_timer_timeout;
1316 
1317 	if (rt_init_done)
1318 		panic("rt_timer_init: already initialized");
1319 
1320 	pool_init(&rttimer_pool, sizeof(struct rttimer), 0, IPL_SOFTNET, 0,
1321 	    "rttmr", NULL);
1322 
1323 	LIST_INIT(&rttimer_queue_head);
1324 	timeout_set_proc(&rt_timer_timeout, rt_timer_timer, &rt_timer_timeout);
1325 	timeout_add_sec(&rt_timer_timeout, 1);
1326 	rt_init_done = 1;
1327 }
1328 
1329 struct rttimer_queue *
1330 rt_timer_queue_create(u_int timeout)
1331 {
1332 	struct rttimer_queue	*rtq;
1333 
1334 	if (rt_init_done == 0)
1335 		rt_timer_init();
1336 
1337 	if ((rtq = malloc(sizeof(*rtq), M_RTABLE, M_NOWAIT|M_ZERO)) == NULL)
1338 		return (NULL);
1339 
1340 	rtq->rtq_timeout = timeout;
1341 	rtq->rtq_count = 0;
1342 	TAILQ_INIT(&rtq->rtq_head);
1343 	LIST_INSERT_HEAD(&rttimer_queue_head, rtq, rtq_link);
1344 
1345 	return (rtq);
1346 }
1347 
1348 void
1349 rt_timer_queue_change(struct rttimer_queue *rtq, long timeout)
1350 {
1351 	rtq->rtq_timeout = timeout;
1352 }
1353 
1354 void
1355 rt_timer_queue_destroy(struct rttimer_queue *rtq)
1356 {
1357 	struct rttimer	*r;
1358 
1359 	NET_ASSERT_LOCKED();
1360 
1361 	while ((r = TAILQ_FIRST(&rtq->rtq_head)) != NULL) {
1362 		LIST_REMOVE(r, rtt_link);
1363 		TAILQ_REMOVE(&rtq->rtq_head, r, rtt_next);
1364 		RTTIMER_CALLOUT(r);
1365 		pool_put(&rttimer_pool, r);
1366 		if (rtq->rtq_count > 0)
1367 			rtq->rtq_count--;
1368 		else
1369 			printf("rt_timer_queue_destroy: rtq_count reached 0\n");
1370 	}
1371 
1372 	LIST_REMOVE(rtq, rtq_link);
1373 	free(rtq, M_RTABLE, sizeof(*rtq));
1374 }
1375 
1376 unsigned long
1377 rt_timer_queue_count(struct rttimer_queue *rtq)
1378 {
1379 	return (rtq->rtq_count);
1380 }
1381 
1382 void
1383 rt_timer_remove_all(struct rtentry *rt)
1384 {
1385 	struct rttimer	*r;
1386 
1387 	while ((r = LIST_FIRST(&rt->rt_timer)) != NULL) {
1388 		LIST_REMOVE(r, rtt_link);
1389 		TAILQ_REMOVE(&r->rtt_queue->rtq_head, r, rtt_next);
1390 		if (r->rtt_queue->rtq_count > 0)
1391 			r->rtt_queue->rtq_count--;
1392 		else
1393 			printf("rt_timer_remove_all: rtq_count reached 0\n");
1394 		pool_put(&rttimer_pool, r);
1395 	}
1396 }
1397 
1398 int
1399 rt_timer_add(struct rtentry *rt, void (*func)(struct rtentry *,
1400     struct rttimer *), struct rttimer_queue *queue, u_int rtableid)
1401 {
1402 	struct rttimer	*r;
1403 	long		 current_time;
1404 
1405 	current_time = time_uptime;
1406 	rt->rt_expire = time_uptime + queue->rtq_timeout;
1407 
1408 	/*
1409 	 * If there's already a timer with this action, destroy it before
1410 	 * we add a new one.
1411 	 */
1412 	LIST_FOREACH(r, &rt->rt_timer, rtt_link) {
1413 		if (r->rtt_func == func) {
1414 			LIST_REMOVE(r, rtt_link);
1415 			TAILQ_REMOVE(&r->rtt_queue->rtq_head, r, rtt_next);
1416 			if (r->rtt_queue->rtq_count > 0)
1417 				r->rtt_queue->rtq_count--;
1418 			else
1419 				printf("rt_timer_add: rtq_count reached 0\n");
1420 			pool_put(&rttimer_pool, r);
1421 			break;  /* only one per list, so we can quit... */
1422 		}
1423 	}
1424 
1425 	r = pool_get(&rttimer_pool, PR_NOWAIT | PR_ZERO);
1426 	if (r == NULL)
1427 		return (ENOBUFS);
1428 
1429 	r->rtt_rt = rt;
1430 	r->rtt_time = current_time;
1431 	r->rtt_func = func;
1432 	r->rtt_queue = queue;
1433 	r->rtt_tableid = rtableid;
1434 	LIST_INSERT_HEAD(&rt->rt_timer, r, rtt_link);
1435 	TAILQ_INSERT_TAIL(&queue->rtq_head, r, rtt_next);
1436 	r->rtt_queue->rtq_count++;
1437 
1438 	return (0);
1439 }
1440 
1441 void
1442 rt_timer_timer(void *arg)
1443 {
1444 	struct timeout		*to = (struct timeout *)arg;
1445 	struct rttimer_queue	*rtq;
1446 	struct rttimer		*r;
1447 	long			 current_time;
1448 
1449 	current_time = time_uptime;
1450 
1451 	NET_LOCK();
1452 	LIST_FOREACH(rtq, &rttimer_queue_head, rtq_link) {
1453 		while ((r = TAILQ_FIRST(&rtq->rtq_head)) != NULL &&
1454 		    (r->rtt_time + rtq->rtq_timeout) < current_time) {
1455 			LIST_REMOVE(r, rtt_link);
1456 			TAILQ_REMOVE(&rtq->rtq_head, r, rtt_next);
1457 			RTTIMER_CALLOUT(r);
1458 			pool_put(&rttimer_pool, r);
1459 			if (rtq->rtq_count > 0)
1460 				rtq->rtq_count--;
1461 			else
1462 				printf("rt_timer_timer: rtq_count reached 0\n");
1463 		}
1464 	}
1465 	NET_UNLOCK();
1466 
1467 	timeout_add_sec(to, 1);
1468 }
1469 
1470 #ifdef MPLS
1471 int
1472 rt_mpls_set(struct rtentry *rt, struct sockaddr *src, uint8_t op)
1473 {
1474 	struct sockaddr_mpls	*psa_mpls = (struct sockaddr_mpls *)src;
1475 	struct rt_mpls		*rt_mpls;
1476 
1477 	rt->rt_llinfo = malloc(sizeof(struct rt_mpls), M_TEMP, M_NOWAIT|M_ZERO);
1478 	if (rt->rt_llinfo == NULL)
1479 		return (ENOMEM);
1480 
1481 	rt_mpls = (struct rt_mpls *)rt->rt_llinfo;
1482 	if (psa_mpls != NULL)
1483 		rt_mpls->mpls_label = psa_mpls->smpls_label;
1484 
1485 	rt_mpls->mpls_operation = op;
1486 
1487 	/* XXX: set experimental bits */
1488 	rt->rt_flags |= RTF_MPLS;
1489 
1490 	return (0);
1491 }
1492 
1493 void
1494 rt_mpls_clear(struct rtentry *rt)
1495 {
1496 	if (rt->rt_llinfo != NULL && rt->rt_flags & RTF_MPLS) {
1497 		free(rt->rt_llinfo, M_TEMP, sizeof(struct rt_mpls));
1498 		rt->rt_llinfo = NULL;
1499 	}
1500 	rt->rt_flags &= ~RTF_MPLS;
1501 }
1502 #endif
1503 
1504 u_int16_t
1505 rtlabel_name2id(char *name)
1506 {
1507 	struct rt_label		*label, *p;
1508 	u_int16_t		 new_id = 1;
1509 
1510 	if (!name[0])
1511 		return (0);
1512 
1513 	TAILQ_FOREACH(label, &rt_labels, rtl_entry)
1514 		if (strcmp(name, label->rtl_name) == 0) {
1515 			label->rtl_ref++;
1516 			return (label->rtl_id);
1517 		}
1518 
1519 	/*
1520 	 * to avoid fragmentation, we do a linear search from the beginning
1521 	 * and take the first free slot we find. if there is none or the list
1522 	 * is empty, append a new entry at the end.
1523 	 */
1524 	TAILQ_FOREACH(p, &rt_labels, rtl_entry) {
1525 		if (p->rtl_id != new_id)
1526 			break;
1527 		new_id = p->rtl_id + 1;
1528 	}
1529 	if (new_id > LABELID_MAX)
1530 		return (0);
1531 
1532 	label = malloc(sizeof(*label), M_RTABLE, M_NOWAIT|M_ZERO);
1533 	if (label == NULL)
1534 		return (0);
1535 	strlcpy(label->rtl_name, name, sizeof(label->rtl_name));
1536 	label->rtl_id = new_id;
1537 	label->rtl_ref++;
1538 
1539 	if (p != NULL)	/* insert new entry before p */
1540 		TAILQ_INSERT_BEFORE(p, label, rtl_entry);
1541 	else		/* either list empty or no free slot in between */
1542 		TAILQ_INSERT_TAIL(&rt_labels, label, rtl_entry);
1543 
1544 	return (label->rtl_id);
1545 }
1546 
1547 const char *
1548 rtlabel_id2name(u_int16_t id)
1549 {
1550 	struct rt_label	*label;
1551 
1552 	TAILQ_FOREACH(label, &rt_labels, rtl_entry)
1553 		if (label->rtl_id == id)
1554 			return (label->rtl_name);
1555 
1556 	return (NULL);
1557 }
1558 
1559 struct sockaddr *
1560 rtlabel_id2sa(u_int16_t labelid, struct sockaddr_rtlabel *sa_rl)
1561 {
1562 	const char	*label;
1563 
1564 	if (labelid == 0 || (label = rtlabel_id2name(labelid)) == NULL)
1565 		return (NULL);
1566 
1567 	bzero(sa_rl, sizeof(*sa_rl));
1568 	sa_rl->sr_len = sizeof(*sa_rl);
1569 	sa_rl->sr_family = AF_UNSPEC;
1570 	strlcpy(sa_rl->sr_label, label, sizeof(sa_rl->sr_label));
1571 
1572 	return ((struct sockaddr *)sa_rl);
1573 }
1574 
1575 void
1576 rtlabel_unref(u_int16_t id)
1577 {
1578 	struct rt_label	*p, *next;
1579 
1580 	if (id == 0)
1581 		return;
1582 
1583 	TAILQ_FOREACH_SAFE(p, &rt_labels, rtl_entry, next) {
1584 		if (id == p->rtl_id) {
1585 			if (--p->rtl_ref == 0) {
1586 				TAILQ_REMOVE(&rt_labels, p, rtl_entry);
1587 				free(p, M_RTABLE, sizeof(*p));
1588 			}
1589 			break;
1590 		}
1591 	}
1592 }
1593 
1594 void
1595 rt_if_track(struct ifnet *ifp)
1596 {
1597 	int i;
1598 	u_int tid;
1599 
1600 	for (tid = 0; tid < rtmap_limit; tid++) {
1601 		/* skip rtables that are not in the rdomain of the ifp */
1602 		if (rtable_l2(tid) != ifp->if_rdomain)
1603 			continue;
1604 		for (i = 1; i <= AF_MAX; i++) {
1605 			if (!rtable_mpath_capable(tid, i))
1606 				continue;
1607 
1608 			rtable_walk(tid, i, rt_if_linkstate_change, ifp);
1609 		}
1610 	}
1611 }
1612 
1613 int
1614 rt_if_linkstate_change(struct rtentry *rt, void *arg, u_int id)
1615 {
1616 	struct ifnet *ifp = arg;
1617 	struct sockaddr_in6 sa_mask;
1618 	int error;
1619 
1620 	if (rt->rt_ifidx != ifp->if_index)
1621 		return (0);
1622 
1623 	/* Local routes are always usable. */
1624 	if (rt->rt_flags & RTF_LOCAL) {
1625 		rt->rt_flags |= RTF_UP;
1626 		return (0);
1627 	}
1628 
1629 	if (LINK_STATE_IS_UP(ifp->if_link_state) && ifp->if_flags & IFF_UP) {
1630 		if (ISSET(rt->rt_flags, RTF_UP))
1631 			return (0);
1632 
1633 		/* bring route up */
1634 		rt->rt_flags |= RTF_UP;
1635 		error = rtable_mpath_reprio(id, rt_key(rt), rt_plen(rt),
1636 		    rt->rt_priority & RTP_MASK, rt);
1637 	} else {
1638 		/*
1639 		 * Remove redirected and cloned routes (mainly ARP)
1640 		 * from down interfaces so we have a chance to get
1641 		 * new routes from a better source.
1642 		 */
1643 		if (ISSET(rt->rt_flags, RTF_CLONED|RTF_DYNAMIC) &&
1644 		    !ISSET(rt->rt_flags, RTF_CACHED|RTF_BFD)) {
1645 			if ((error = rtdeletemsg(rt, ifp, id)))
1646 				return (error);
1647 			return (EAGAIN);
1648 		}
1649 
1650 		if (!ISSET(rt->rt_flags, RTF_UP))
1651 			return (0);
1652 
1653 		/* take route down */
1654 		rt->rt_flags &= ~RTF_UP;
1655 		error = rtable_mpath_reprio(id, rt_key(rt), rt_plen(rt),
1656 		    rt->rt_priority | RTP_DOWN, rt);
1657 	}
1658 	if_group_routechange(rt_key(rt), rt_plen2mask(rt, &sa_mask));
1659 
1660 	return (error);
1661 }
1662 
1663 struct sockaddr *
1664 rt_plentosa(sa_family_t af, int plen, struct sockaddr_in6 *sa_mask)
1665 {
1666 	struct sockaddr_in	*sin = (struct sockaddr_in *)sa_mask;
1667 #ifdef INET6
1668 	struct sockaddr_in6	*sin6 = (struct sockaddr_in6 *)sa_mask;
1669 #endif
1670 
1671 	KASSERT(plen >= 0 || plen == -1);
1672 
1673 	if (plen == -1)
1674 		return (NULL);
1675 
1676 	memset(sa_mask, 0, sizeof(*sa_mask));
1677 
1678 	switch (af) {
1679 	case AF_INET:
1680 		sin->sin_family = AF_INET;
1681 		sin->sin_len = sizeof(struct sockaddr_in);
1682 		in_prefixlen2mask(&sin->sin_addr, plen);
1683 		break;
1684 #ifdef INET6
1685 	case AF_INET6:
1686 		sin6->sin6_family = AF_INET6;
1687 		sin6->sin6_len = sizeof(struct sockaddr_in6);
1688 		in6_prefixlen2mask(&sin6->sin6_addr, plen);
1689 		break;
1690 #endif /* INET6 */
1691 	default:
1692 		return (NULL);
1693 	}
1694 
1695 	return ((struct sockaddr *)sa_mask);
1696 }
1697 
1698 struct sockaddr *
1699 rt_plen2mask(struct rtentry *rt, struct sockaddr_in6 *sa_mask)
1700 {
1701 	return (rt_plentosa(rt_key(rt)->sa_family, rt_plen(rt), sa_mask));
1702 }
1703 
1704 #ifdef DDB
1705 #include <machine/db_machdep.h>
1706 #include <ddb/db_output.h>
1707 
1708 void
1709 db_print_sa(struct sockaddr *sa)
1710 {
1711 	int len;
1712 	u_char *p;
1713 
1714 	if (sa == NULL) {
1715 		db_printf("[NULL]");
1716 		return;
1717 	}
1718 
1719 	p = (u_char *)sa;
1720 	len = sa->sa_len;
1721 	db_printf("[");
1722 	while (len > 0) {
1723 		db_printf("%d", *p);
1724 		p++;
1725 		len--;
1726 		if (len)
1727 			db_printf(",");
1728 	}
1729 	db_printf("]\n");
1730 }
1731 
1732 void
1733 db_print_ifa(struct ifaddr *ifa)
1734 {
1735 	if (ifa == NULL)
1736 		return;
1737 	db_printf("  ifa_addr=");
1738 	db_print_sa(ifa->ifa_addr);
1739 	db_printf("  ifa_dsta=");
1740 	db_print_sa(ifa->ifa_dstaddr);
1741 	db_printf("  ifa_mask=");
1742 	db_print_sa(ifa->ifa_netmask);
1743 	db_printf("  flags=0x%x, refcnt=%d, metric=%d\n",
1744 	    ifa->ifa_flags, ifa->ifa_refcnt, ifa->ifa_metric);
1745 }
1746 
1747 /*
1748  * Function to pass to rtalble_walk().
1749  * Return non-zero error to abort walk.
1750  */
1751 int
1752 db_show_rtentry(struct rtentry *rt, void *w, unsigned int id)
1753 {
1754 	db_printf("rtentry=%p", rt);
1755 
1756 	db_printf(" flags=0x%x refcnt=%d use=%llu expire=%lld rtableid=%u\n",
1757 	    rt->rt_flags, rt->rt_refcnt, rt->rt_use, rt->rt_expire, id);
1758 
1759 	db_printf(" key="); db_print_sa(rt_key(rt));
1760 	db_printf(" plen=%d", rt_plen(rt));
1761 	db_printf(" gw="); db_print_sa(rt->rt_gateway);
1762 	db_printf(" ifidx=%u ", rt->rt_ifidx);
1763 	db_printf(" ifa=%p\n", rt->rt_ifa);
1764 	db_print_ifa(rt->rt_ifa);
1765 
1766 	db_printf(" gwroute=%p llinfo=%p\n", rt->rt_gwroute, rt->rt_llinfo);
1767 	return (0);
1768 }
1769 
1770 /*
1771  * Function to print all the route trees.
1772  * Use this from ddb:  "call db_show_arptab"
1773  */
1774 int
1775 db_show_arptab(void)
1776 {
1777 	db_printf("Route tree for AF_INET\n");
1778 	rtable_walk(0, AF_INET, db_show_rtentry, NULL);
1779 	return (0);
1780 }
1781 #endif /* DDB */
1782