xref: /openbsd-src/sys/net/route.c (revision 1ad61ae0a79a724d2d3ec69e69c8e1d1ff6b53a0)
1 /*	$OpenBSD: route.c,v 1.423 2023/11/10 20:05:22 bluhm Exp $	*/
2 /*	$NetBSD: route.c,v 1.14 1996/02/13 22:00:46 christos Exp $	*/
3 
4 /*
5  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the project nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 /*
34  * Copyright (c) 1980, 1986, 1991, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. Neither the name of the University nor the names of its contributors
46  *    may be used to endorse or promote products derived from this software
47  *    without specific prior written permission.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59  * SUCH DAMAGE.
60  *
61  *	@(#)route.c	8.2 (Berkeley) 11/15/93
62  */
63 
64 /*
65  *	@(#)COPYRIGHT	1.1 (NRL) 17 January 1995
66  *
67  * NRL grants permission for redistribution and use in source and binary
68  * forms, with or without modification, of the software and documentation
69  * created at NRL provided that the following conditions are met:
70  *
71  * 1. Redistributions of source code must retain the above copyright
72  *    notice, this list of conditions and the following disclaimer.
73  * 2. Redistributions in binary form must reproduce the above copyright
74  *    notice, this list of conditions and the following disclaimer in the
75  *    documentation and/or other materials provided with the distribution.
76  * 3. All advertising materials mentioning features or use of this software
77  *    must display the following acknowledgements:
78  *	This product includes software developed by the University of
79  *	California, Berkeley and its contributors.
80  *	This product includes software developed at the Information
81  *	Technology Division, US Naval Research Laboratory.
82  * 4. Neither the name of the NRL nor the names of its contributors
83  *    may be used to endorse or promote products derived from this software
84  *    without specific prior written permission.
85  *
86  * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS
87  * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
88  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
89  * PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL NRL OR
90  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
91  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
92  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
93  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
94  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
95  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
96  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
97  *
98  * The views and conclusions contained in the software and documentation
99  * are those of the authors and should not be interpreted as representing
100  * official policies, either expressed or implied, of the US Naval
101  * Research Laboratory (NRL).
102  */
103 
104 #include <sys/param.h>
105 #include <sys/systm.h>
106 #include <sys/mbuf.h>
107 #include <sys/socket.h>
108 #include <sys/socketvar.h>
109 #include <sys/timeout.h>
110 #include <sys/domain.h>
111 #include <sys/ioctl.h>
112 #include <sys/kernel.h>
113 #include <sys/queue.h>
114 #include <sys/pool.h>
115 #include <sys/atomic.h>
116 #include <sys/mutex.h>
117 
118 #include <net/if.h>
119 #include <net/if_var.h>
120 #include <net/if_dl.h>
121 #include <net/route.h>
122 
123 #include <netinet/in.h>
124 #include <netinet/ip_var.h>
125 #include <netinet/in_var.h>
126 
127 #ifdef INET6
128 #include <netinet/ip6.h>
129 #include <netinet6/ip6_var.h>
130 #include <netinet6/in6_var.h>
131 #endif
132 
133 #ifdef MPLS
134 #include <netmpls/mpls.h>
135 #endif
136 
137 #ifdef BFD
138 #include <net/bfd.h>
139 #endif
140 
141 /*
142  * Locks used to protect struct members:
143  *      I       immutable after creation
144  *      L       rtlabel_mtx
145  *      T       rttimer_mtx
146  */
147 
148 #define ROUNDUP(a) (a>0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long))
149 
150 /* Give some jitter to hash, to avoid synchronization between routers. */
151 static uint32_t		rt_hashjitter;
152 
153 extern unsigned int	rtmap_limit;
154 
155 struct cpumem *		rtcounters;
156 int			rttrash;	/* routes not in table but not freed */
157 
158 struct pool	rtentry_pool;		/* pool for rtentry structures */
159 struct pool	rttimer_pool;		/* pool for rttimer structures */
160 
161 int	rt_setgwroute(struct rtentry *, u_int);
162 void	rt_putgwroute(struct rtentry *);
163 int	rtflushclone1(struct rtentry *, void *, u_int);
164 int	rtflushclone(struct rtentry *, unsigned int);
165 int	rt_ifa_purge_walker(struct rtentry *, void *, unsigned int);
166 struct rtentry *rt_match(struct sockaddr *, uint32_t *, int, unsigned int);
167 int	rt_clone(struct rtentry **, struct sockaddr *, unsigned int);
168 struct sockaddr *rt_plentosa(sa_family_t, int, struct sockaddr_in6 *);
169 static int rt_copysa(struct sockaddr *, struct sockaddr *, struct sockaddr **);
170 
171 #define	LABELID_MAX	50000
172 
173 struct rt_label {
174 	TAILQ_ENTRY(rt_label)	rtl_entry;		/* [L] */
175 	char			rtl_name[RTLABEL_LEN];	/* [I] */
176 	u_int16_t		rtl_id;			/* [I] */
177 	int			rtl_ref;		/* [L] */
178 };
179 
180 TAILQ_HEAD(rt_labels, rt_label)	rt_labels =
181     TAILQ_HEAD_INITIALIZER(rt_labels);		/* [L] */
182 struct mutex rtlabel_mtx = MUTEX_INITIALIZER(IPL_NET);
183 
184 void
185 route_init(void)
186 {
187 	rtcounters = counters_alloc(rts_ncounters);
188 
189 	pool_init(&rtentry_pool, sizeof(struct rtentry), 0, IPL_MPFLOOR, 0,
190 	    "rtentry", NULL);
191 
192 	while (rt_hashjitter == 0)
193 		rt_hashjitter = arc4random();
194 
195 #ifdef BFD
196 	bfdinit();
197 #endif
198 }
199 
200 /*
201  * Returns 1 if the (cached) ``rt'' entry is still valid, 0 otherwise.
202  */
203 int
204 rtisvalid(struct rtentry *rt)
205 {
206 	if (rt == NULL)
207 		return (0);
208 
209 	if (!ISSET(rt->rt_flags, RTF_UP))
210 		return (0);
211 
212 	if (ISSET(rt->rt_flags, RTF_GATEWAY)) {
213 		KASSERT(rt->rt_gwroute != NULL);
214 		KASSERT(!ISSET(rt->rt_gwroute->rt_flags, RTF_GATEWAY));
215 		if (!ISSET(rt->rt_gwroute->rt_flags, RTF_UP))
216 			return (0);
217 	}
218 
219 	return (1);
220 }
221 
222 /*
223  * Do the actual lookup for rtalloc(9), do not use directly!
224  *
225  * Return the best matching entry for the destination ``dst''.
226  *
227  * "RT_RESOLVE" means that a corresponding L2 entry should
228  * be added to the routing table and resolved (via ARP or
229  * NDP), if it does not exist.
230  */
231 struct rtentry *
232 rt_match(struct sockaddr *dst, uint32_t *src, int flags, unsigned int tableid)
233 {
234 	struct rtentry		*rt = NULL;
235 
236 	rt = rtable_match(tableid, dst, src);
237 	if (rt == NULL) {
238 		rtstat_inc(rts_unreach);
239 		return (NULL);
240 	}
241 
242 	if (ISSET(rt->rt_flags, RTF_CLONING) && ISSET(flags, RT_RESOLVE))
243 		rt_clone(&rt, dst, tableid);
244 
245 	rt->rt_use++;
246 	return (rt);
247 }
248 
249 int
250 rt_clone(struct rtentry **rtp, struct sockaddr *dst, unsigned int rtableid)
251 {
252 	struct rt_addrinfo	 info;
253 	struct rtentry		*rt = *rtp;
254 	int			 error = 0;
255 
256 	memset(&info, 0, sizeof(info));
257 	info.rti_info[RTAX_DST] = dst;
258 
259 	/*
260 	 * The priority of cloned route should be different
261 	 * to avoid conflict with /32 cloning routes.
262 	 *
263 	 * It should also be higher to let the ARP layer find
264 	 * cloned routes instead of the cloning one.
265 	 */
266 	KERNEL_LOCK();
267 	error = rtrequest(RTM_RESOLVE, &info, rt->rt_priority - 1, &rt,
268 	    rtableid);
269 	KERNEL_UNLOCK();
270 	if (error) {
271 		rtm_miss(RTM_MISS, &info, 0, RTP_NONE, 0, error, rtableid);
272 	} else {
273 		/* Inform listeners of the new route */
274 		rtm_send(rt, RTM_ADD, 0, rtableid);
275 		rtfree(*rtp);
276 		*rtp = rt;
277 	}
278 	return (error);
279 }
280 
281 /*
282  * Originated from bridge_hash() in if_bridge.c
283  */
284 #define mix(a, b, c) do {						\
285 	a -= b; a -= c; a ^= (c >> 13);					\
286 	b -= c; b -= a; b ^= (a << 8);					\
287 	c -= a; c -= b; c ^= (b >> 13);					\
288 	a -= b; a -= c; a ^= (c >> 12);					\
289 	b -= c; b -= a; b ^= (a << 16);					\
290 	c -= a; c -= b; c ^= (b >> 5);					\
291 	a -= b; a -= c; a ^= (c >> 3);					\
292 	b -= c; b -= a; b ^= (a << 10);					\
293 	c -= a; c -= b; c ^= (b >> 15);					\
294 } while (0)
295 
296 int
297 rt_hash(struct rtentry *rt, const struct sockaddr *dst, uint32_t *src)
298 {
299 	uint32_t a, b, c;
300 
301 	if (src == NULL || !rtisvalid(rt) || !ISSET(rt->rt_flags, RTF_MPATH))
302 		return (-1);
303 
304 	a = b = 0x9e3779b9;
305 	c = rt_hashjitter;
306 
307 	switch (dst->sa_family) {
308 	case AF_INET:
309 	    {
310 		const struct sockaddr_in *sin;
311 
312 		if (!ipmultipath)
313 			return (-1);
314 
315 		sin = satosin_const(dst);
316 		a += sin->sin_addr.s_addr;
317 		b += src[0];
318 		mix(a, b, c);
319 		break;
320 	    }
321 #ifdef INET6
322 	case AF_INET6:
323 	    {
324 		const struct sockaddr_in6 *sin6;
325 
326 		if (!ip6_multipath)
327 			return (-1);
328 
329 		sin6 = satosin6_const(dst);
330 		a += sin6->sin6_addr.s6_addr32[0];
331 		b += sin6->sin6_addr.s6_addr32[2];
332 		c += src[0];
333 		mix(a, b, c);
334 		a += sin6->sin6_addr.s6_addr32[1];
335 		b += sin6->sin6_addr.s6_addr32[3];
336 		c += src[1];
337 		mix(a, b, c);
338 		a += sin6->sin6_addr.s6_addr32[2];
339 		b += sin6->sin6_addr.s6_addr32[1];
340 		c += src[2];
341 		mix(a, b, c);
342 		a += sin6->sin6_addr.s6_addr32[3];
343 		b += sin6->sin6_addr.s6_addr32[0];
344 		c += src[3];
345 		mix(a, b, c);
346 		break;
347 	    }
348 #endif /* INET6 */
349 	}
350 
351 	return (c & 0xffff);
352 }
353 
354 /*
355  * Allocate a route, potentially using multipath to select the peer.
356  */
357 struct rtentry *
358 rtalloc_mpath(struct sockaddr *dst, uint32_t *src, unsigned int rtableid)
359 {
360 	return (rt_match(dst, src, RT_RESOLVE, rtableid));
361 }
362 
363 /*
364  * Look in the routing table for the best matching entry for
365  * ``dst''.
366  *
367  * If a route with a gateway is found and its next hop is no
368  * longer valid, try to cache it.
369  */
370 struct rtentry *
371 rtalloc(struct sockaddr *dst, int flags, unsigned int rtableid)
372 {
373 	return (rt_match(dst, NULL, flags, rtableid));
374 }
375 
376 /*
377  * Cache the route entry corresponding to a reachable next hop in
378  * the gateway entry ``rt''.
379  */
380 int
381 rt_setgwroute(struct rtentry *rt, u_int rtableid)
382 {
383 	struct rtentry *prt, *nhrt;
384 	unsigned int rdomain = rtable_l2(rtableid);
385 	int error;
386 
387 	NET_ASSERT_LOCKED();
388 
389 	KASSERT(ISSET(rt->rt_flags, RTF_GATEWAY));
390 
391 	/* If we cannot find a valid next hop bail. */
392 	nhrt = rt_match(rt->rt_gateway, NULL, RT_RESOLVE, rdomain);
393 	if (nhrt == NULL)
394 		return (ENOENT);
395 
396 	/* Next hop entry must be on the same interface. */
397 	if (nhrt->rt_ifidx != rt->rt_ifidx) {
398 		struct sockaddr_in6	sa_mask;
399 
400 		if (!ISSET(nhrt->rt_flags, RTF_LLINFO) ||
401 		    !ISSET(nhrt->rt_flags, RTF_CLONED)) {
402 			rtfree(nhrt);
403 			return (EHOSTUNREACH);
404 		}
405 
406 		/*
407 		 * We found a L2 entry, so we might have multiple
408 		 * RTF_CLONING routes for the same subnet.  Query
409 		 * the first route of the multipath chain and iterate
410 		 * until we find the correct one.
411 		 */
412 		prt = rtable_lookup(rdomain, rt_key(nhrt->rt_parent),
413 		    rt_plen2mask(nhrt->rt_parent, &sa_mask), NULL, RTP_ANY);
414 		rtfree(nhrt);
415 
416 		while (prt != NULL && prt->rt_ifidx != rt->rt_ifidx)
417 			prt = rtable_iterate(prt);
418 
419 		/* We found nothing or a non-cloning MPATH route. */
420 		if (prt == NULL || !ISSET(prt->rt_flags, RTF_CLONING)) {
421 			rtfree(prt);
422 			return (EHOSTUNREACH);
423 		}
424 
425 		error = rt_clone(&prt, rt->rt_gateway, rdomain);
426 		if (error) {
427 			rtfree(prt);
428 			return (error);
429 		}
430 		nhrt = prt;
431 	}
432 
433 	/*
434 	 * Next hop must be reachable, this also prevents rtentry
435 	 * loops for example when rt->rt_gwroute points to rt.
436 	 */
437 	if (ISSET(nhrt->rt_flags, RTF_CLONING|RTF_GATEWAY)) {
438 		rtfree(nhrt);
439 		return (ENETUNREACH);
440 	}
441 
442 	/* Next hop is valid so remove possible old cache. */
443 	rt_putgwroute(rt);
444 	KASSERT(rt->rt_gwroute == NULL);
445 
446 	/*
447 	 * If the MTU of next hop is 0, this will reset the MTU of the
448 	 * route to run PMTUD again from scratch.
449 	 */
450 	if (!ISSET(rt->rt_locks, RTV_MTU) && (rt->rt_mtu > nhrt->rt_mtu))
451 		rt->rt_mtu = nhrt->rt_mtu;
452 
453 	/*
454 	 * To avoid reference counting problems when writing link-layer
455 	 * addresses in an outgoing packet, we ensure that the lifetime
456 	 * of a cached entry is greater than the bigger lifetime of the
457 	 * gateway entries it is pointed by.
458 	 */
459 	nhrt->rt_flags |= RTF_CACHED;
460 	nhrt->rt_cachecnt++;
461 
462 	rt->rt_gwroute = nhrt;
463 
464 	return (0);
465 }
466 
467 /*
468  * Invalidate the cached route entry of the gateway entry ``rt''.
469  */
470 void
471 rt_putgwroute(struct rtentry *rt)
472 {
473 	struct rtentry *nhrt = rt->rt_gwroute;
474 
475 	NET_ASSERT_LOCKED();
476 
477 	if (!ISSET(rt->rt_flags, RTF_GATEWAY) || nhrt == NULL)
478 		return;
479 
480 	KASSERT(ISSET(nhrt->rt_flags, RTF_CACHED));
481 	KASSERT(nhrt->rt_cachecnt > 0);
482 
483 	--nhrt->rt_cachecnt;
484 	if (nhrt->rt_cachecnt == 0)
485 		nhrt->rt_flags &= ~RTF_CACHED;
486 
487 	rtfree(rt->rt_gwroute);
488 	rt->rt_gwroute = NULL;
489 }
490 
491 void
492 rtref(struct rtentry *rt)
493 {
494 	refcnt_take(&rt->rt_refcnt);
495 }
496 
497 void
498 rtfree(struct rtentry *rt)
499 {
500 	if (rt == NULL)
501 		return;
502 
503 	if (refcnt_rele(&rt->rt_refcnt) == 0)
504 		return;
505 
506 	KASSERT(!ISSET(rt->rt_flags, RTF_UP));
507 	KASSERT(!RT_ROOT(rt));
508 	atomic_dec_int(&rttrash);
509 
510 	rt_timer_remove_all(rt);
511 	ifafree(rt->rt_ifa);
512 	rtlabel_unref(rt->rt_labelid);
513 #ifdef MPLS
514 	rt_mpls_clear(rt);
515 #endif
516 	free(rt->rt_gateway, M_RTABLE, ROUNDUP(rt->rt_gateway->sa_len));
517 	free(rt_key(rt), M_RTABLE, rt_key(rt)->sa_len);
518 
519 	pool_put(&rtentry_pool, rt);
520 }
521 
522 struct ifaddr *
523 ifaref(struct ifaddr *ifa)
524 {
525 	refcnt_take(&ifa->ifa_refcnt);
526 	return ifa;
527 }
528 
529 void
530 ifafree(struct ifaddr *ifa)
531 {
532 	if (refcnt_rele(&ifa->ifa_refcnt) == 0)
533 		return;
534 	free(ifa, M_IFADDR, 0);
535 }
536 
537 /*
538  * Force a routing table entry to the specified
539  * destination to go through the given gateway.
540  * Normally called as a result of a routing redirect
541  * message from the network layer.
542  */
543 void
544 rtredirect(struct sockaddr *dst, struct sockaddr *gateway,
545     struct sockaddr *src, struct rtentry **rtp, unsigned int rdomain)
546 {
547 	struct rtentry		*rt;
548 	int			 error = 0;
549 	enum rtstat_counters	 stat = rts_ncounters;
550 	struct rt_addrinfo	 info;
551 	struct ifaddr		*ifa;
552 	unsigned int		 ifidx = 0;
553 	int			 flags = RTF_GATEWAY|RTF_HOST;
554 	uint8_t			 prio = RTP_NONE;
555 
556 	NET_ASSERT_LOCKED();
557 
558 	/* verify the gateway is directly reachable */
559 	rt = rtalloc(gateway, 0, rdomain);
560 	if (!rtisvalid(rt) || ISSET(rt->rt_flags, RTF_GATEWAY)) {
561 		rtfree(rt);
562 		error = ENETUNREACH;
563 		goto out;
564 	}
565 	ifidx = rt->rt_ifidx;
566 	ifa = rt->rt_ifa;
567 	rtfree(rt);
568 	rt = NULL;
569 
570 	rt = rtable_lookup(rdomain, dst, NULL, NULL, RTP_ANY);
571 	/*
572 	 * If the redirect isn't from our current router for this dst,
573 	 * it's either old or wrong.  If it redirects us to ourselves,
574 	 * we have a routing loop, perhaps as a result of an interface
575 	 * going down recently.
576 	 */
577 #define	equal(a1, a2) \
578 	((a1)->sa_len == (a2)->sa_len && \
579 	 bcmp((caddr_t)(a1), (caddr_t)(a2), (a1)->sa_len) == 0)
580 	if (rt != NULL && (!equal(src, rt->rt_gateway) || rt->rt_ifa != ifa))
581 		error = EINVAL;
582 	else if (ifa_ifwithaddr(gateway, rdomain) != NULL ||
583 	    (gateway->sa_family == AF_INET &&
584 	    in_broadcast(satosin(gateway)->sin_addr, rdomain)))
585 		error = EHOSTUNREACH;
586 	if (error)
587 		goto done;
588 	/*
589 	 * Create a new entry if we just got back a wildcard entry
590 	 * or the lookup failed.  This is necessary for hosts
591 	 * which use routing redirects generated by smart gateways
592 	 * to dynamically build the routing tables.
593 	 */
594 	if (rt == NULL)
595 		goto create;
596 	/*
597 	 * Don't listen to the redirect if it's
598 	 * for a route to an interface.
599 	 */
600 	if (ISSET(rt->rt_flags, RTF_GATEWAY)) {
601 		if (!ISSET(rt->rt_flags, RTF_HOST)) {
602 			/*
603 			 * Changing from route to net => route to host.
604 			 * Create new route, rather than smashing route to net.
605 			 */
606 create:
607 			rtfree(rt);
608 			flags |= RTF_DYNAMIC;
609 			bzero(&info, sizeof(info));
610 			info.rti_info[RTAX_DST] = dst;
611 			info.rti_info[RTAX_GATEWAY] = gateway;
612 			info.rti_ifa = ifa;
613 			info.rti_flags = flags;
614 			rt = NULL;
615 			error = rtrequest(RTM_ADD, &info, RTP_DEFAULT, &rt,
616 			    rdomain);
617 			if (error == 0) {
618 				flags = rt->rt_flags;
619 				prio = rt->rt_priority;
620 			}
621 			stat = rts_dynamic;
622 		} else {
623 			/*
624 			 * Smash the current notion of the gateway to
625 			 * this destination.  Should check about netmask!!!
626 			 */
627 			rt->rt_flags |= RTF_MODIFIED;
628 			flags |= RTF_MODIFIED;
629 			prio = rt->rt_priority;
630 			stat = rts_newgateway;
631 			rt_setgate(rt, gateway, rdomain);
632 		}
633 	} else
634 		error = EHOSTUNREACH;
635 done:
636 	if (rt) {
637 		if (rtp && !error)
638 			*rtp = rt;
639 		else
640 			rtfree(rt);
641 	}
642 out:
643 	if (error)
644 		rtstat_inc(rts_badredirect);
645 	else if (stat != rts_ncounters)
646 		rtstat_inc(stat);
647 	bzero((caddr_t)&info, sizeof(info));
648 	info.rti_info[RTAX_DST] = dst;
649 	info.rti_info[RTAX_GATEWAY] = gateway;
650 	info.rti_info[RTAX_AUTHOR] = src;
651 	rtm_miss(RTM_REDIRECT, &info, flags, prio, ifidx, error, rdomain);
652 }
653 
654 /*
655  * Delete a route and generate a message
656  */
657 int
658 rtdeletemsg(struct rtentry *rt, struct ifnet *ifp, u_int tableid)
659 {
660 	int			error;
661 	struct rt_addrinfo	info;
662 	struct sockaddr_rtlabel sa_rl;
663 	struct sockaddr_in6	sa_mask;
664 
665 	KASSERT(rt->rt_ifidx == ifp->if_index);
666 
667 	/*
668 	 * Request the new route so that the entry is not actually
669 	 * deleted.  That will allow the information being reported to
670 	 * be accurate (and consistent with route_output()).
671 	 */
672 	memset(&info, 0, sizeof(info));
673 	info.rti_info[RTAX_DST] = rt_key(rt);
674 	info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
675 	if (!ISSET(rt->rt_flags, RTF_HOST))
676 		info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask);
677 	info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl);
678 	info.rti_flags = rt->rt_flags;
679 	info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
680 	info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
681 	error = rtrequest_delete(&info, rt->rt_priority, ifp, &rt, tableid);
682 	rtm_miss(RTM_DELETE, &info, info.rti_flags, rt->rt_priority,
683 	    rt->rt_ifidx, error, tableid);
684 	if (error == 0)
685 		rtfree(rt);
686 	return (error);
687 }
688 
689 static inline int
690 rtequal(struct rtentry *a, struct rtentry *b)
691 {
692 	if (a == b)
693 		return 1;
694 
695 	if (memcmp(rt_key(a), rt_key(b), rt_key(a)->sa_len) == 0 &&
696 	    rt_plen(a) == rt_plen(b))
697 		return 1;
698 	else
699 		return 0;
700 }
701 
702 int
703 rtflushclone1(struct rtentry *rt, void *arg, u_int id)
704 {
705 	struct rtentry *cloningrt = arg;
706 	struct ifnet *ifp;
707 
708 	if (!ISSET(rt->rt_flags, RTF_CLONED))
709 		return 0;
710 
711 	/* Cached route must stay alive as long as their parent are alive. */
712 	if (ISSET(rt->rt_flags, RTF_CACHED) && (rt->rt_parent != cloningrt))
713 		return 0;
714 
715 	if (!rtequal(rt->rt_parent, cloningrt))
716 		return 0;
717 	/*
718 	 * This happens when an interface with a RTF_CLONING route is
719 	 * being detached.  In this case it's safe to bail because all
720 	 * the routes are being purged by rt_ifa_purge().
721 	 */
722 	ifp = if_get(rt->rt_ifidx);
723 	if (ifp == NULL)
724 		return 0;
725 
726 	if_put(ifp);
727 	return EEXIST;
728 }
729 
730 int
731 rtflushclone(struct rtentry *parent, unsigned int rtableid)
732 {
733 	struct rtentry *rt = NULL;
734 	struct ifnet *ifp;
735 	int error;
736 
737 #ifdef DIAGNOSTIC
738 	if (!parent || (parent->rt_flags & RTF_CLONING) == 0)
739 		panic("rtflushclone: called with a non-cloning route");
740 #endif
741 
742 	do {
743 		error = rtable_walk(rtableid, rt_key(parent)->sa_family, &rt,
744 		    rtflushclone1, parent);
745 		if (rt != NULL && error == EEXIST) {
746 			ifp = if_get(rt->rt_ifidx);
747 			if (ifp == NULL) {
748 				error = EAGAIN;
749 			} else {
750 				error = rtdeletemsg(rt, ifp, rtableid);
751 				if (error == 0)
752 					error = EAGAIN;
753 				if_put(ifp);
754 			}
755 		}
756 		rtfree(rt);
757 		rt = NULL;
758 	} while (error == EAGAIN);
759 
760 	return error;
761 
762 }
763 
764 int
765 rtrequest_delete(struct rt_addrinfo *info, u_int8_t prio, struct ifnet *ifp,
766     struct rtentry **ret_nrt, u_int tableid)
767 {
768 	struct rtentry	*rt;
769 	int		 error;
770 
771 	NET_ASSERT_LOCKED();
772 
773 	if (!rtable_exists(tableid))
774 		return (EAFNOSUPPORT);
775 	rt = rtable_lookup(tableid, info->rti_info[RTAX_DST],
776 	    info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY], prio);
777 	if (rt == NULL)
778 		return (ESRCH);
779 
780 	/* Make sure that's the route the caller want to delete. */
781 	if (ifp != NULL && ifp->if_index != rt->rt_ifidx) {
782 		rtfree(rt);
783 		return (ESRCH);
784 	}
785 
786 #ifdef BFD
787 	if (ISSET(rt->rt_flags, RTF_BFD))
788 		bfdclear(rt);
789 #endif
790 
791 	error = rtable_delete(tableid, info->rti_info[RTAX_DST],
792 	    info->rti_info[RTAX_NETMASK], rt);
793 	if (error != 0) {
794 		rtfree(rt);
795 		return (ESRCH);
796 	}
797 
798 	/* Release next hop cache before flushing cloned entries. */
799 	rt_putgwroute(rt);
800 
801 	/* Clean up any cloned children. */
802 	if (ISSET(rt->rt_flags, RTF_CLONING))
803 		rtflushclone(rt, tableid);
804 
805 	rtfree(rt->rt_parent);
806 	rt->rt_parent = NULL;
807 
808 	rt->rt_flags &= ~RTF_UP;
809 
810 	KASSERT(ifp->if_index == rt->rt_ifidx);
811 	ifp->if_rtrequest(ifp, RTM_DELETE, rt);
812 
813 	atomic_inc_int(&rttrash);
814 
815 	if (ret_nrt != NULL)
816 		*ret_nrt = rt;
817 	else
818 		rtfree(rt);
819 
820 	return (0);
821 }
822 
823 int
824 rtrequest(int req, struct rt_addrinfo *info, u_int8_t prio,
825     struct rtentry **ret_nrt, u_int tableid)
826 {
827 	struct ifnet		*ifp;
828 	struct rtentry		*rt, *crt;
829 	struct ifaddr		*ifa;
830 	struct sockaddr		*ndst;
831 	struct sockaddr_rtlabel	*sa_rl, sa_rl2;
832 	struct sockaddr_dl	 sa_dl = { sizeof(sa_dl), AF_LINK };
833 	int			 error;
834 
835 	NET_ASSERT_LOCKED();
836 
837 	if (!rtable_exists(tableid))
838 		return (EAFNOSUPPORT);
839 	if (info->rti_flags & RTF_HOST)
840 		info->rti_info[RTAX_NETMASK] = NULL;
841 	switch (req) {
842 	case RTM_DELETE:
843 		return (EINVAL);
844 
845 	case RTM_RESOLVE:
846 		if (ret_nrt == NULL || (rt = *ret_nrt) == NULL)
847 			return (EINVAL);
848 		if ((rt->rt_flags & RTF_CLONING) == 0)
849 			return (EINVAL);
850 		KASSERT(rt->rt_ifa->ifa_ifp != NULL);
851 		info->rti_ifa = rt->rt_ifa;
852 		info->rti_flags = rt->rt_flags | (RTF_CLONED|RTF_HOST);
853 		info->rti_flags &= ~(RTF_CLONING|RTF_CONNECTED|RTF_STATIC);
854 		info->rti_info[RTAX_GATEWAY] = sdltosa(&sa_dl);
855 		info->rti_info[RTAX_LABEL] =
856 		    rtlabel_id2sa(rt->rt_labelid, &sa_rl2);
857 		/* FALLTHROUGH */
858 
859 	case RTM_ADD:
860 		if (info->rti_ifa == NULL)
861 			return (EINVAL);
862 		ifa = info->rti_ifa;
863 		ifp = ifa->ifa_ifp;
864 		if (prio == 0)
865 			prio = ifp->if_priority + RTP_STATIC;
866 
867 		error = rt_copysa(info->rti_info[RTAX_DST],
868 		    info->rti_info[RTAX_NETMASK], &ndst);
869 		if (error)
870 			return (error);
871 
872 		rt = pool_get(&rtentry_pool, PR_NOWAIT | PR_ZERO);
873 		if (rt == NULL) {
874 			free(ndst, M_RTABLE, ndst->sa_len);
875 			return (ENOBUFS);
876 		}
877 
878 		refcnt_init_trace(&rt->rt_refcnt, DT_REFCNT_IDX_RTENTRY);
879 		rt->rt_flags = info->rti_flags | RTF_UP;
880 		rt->rt_priority = prio;	/* init routing priority */
881 		LIST_INIT(&rt->rt_timer);
882 
883 		/* Check the link state if the table supports it. */
884 		if (rtable_mpath_capable(tableid, ndst->sa_family) &&
885 		    !ISSET(rt->rt_flags, RTF_LOCAL) &&
886 		    (!LINK_STATE_IS_UP(ifp->if_link_state) ||
887 		    !ISSET(ifp->if_flags, IFF_UP))) {
888 			rt->rt_flags &= ~RTF_UP;
889 			rt->rt_priority |= RTP_DOWN;
890 		}
891 
892 		if (info->rti_info[RTAX_LABEL] != NULL) {
893 			sa_rl = (struct sockaddr_rtlabel *)
894 			    info->rti_info[RTAX_LABEL];
895 			rt->rt_labelid = rtlabel_name2id(sa_rl->sr_label);
896 		}
897 
898 #ifdef MPLS
899 		/* We have to allocate additional space for MPLS infos */
900 		if (info->rti_flags & RTF_MPLS &&
901 		    (info->rti_info[RTAX_SRC] != NULL ||
902 		    info->rti_info[RTAX_DST]->sa_family == AF_MPLS)) {
903 			error = rt_mpls_set(rt, info->rti_info[RTAX_SRC],
904 			    info->rti_mpls);
905 			if (error) {
906 				free(ndst, M_RTABLE, ndst->sa_len);
907 				pool_put(&rtentry_pool, rt);
908 				return (error);
909 			}
910 		} else
911 			rt_mpls_clear(rt);
912 #endif
913 
914 		rt->rt_ifa = ifaref(ifa);
915 		rt->rt_ifidx = ifp->if_index;
916 		/*
917 		 * Copy metrics and a back pointer from the cloned
918 		 * route's parent.
919 		 */
920 		if (ISSET(rt->rt_flags, RTF_CLONED)) {
921 			rtref(*ret_nrt);
922 			rt->rt_parent = *ret_nrt;
923 			rt->rt_rmx = (*ret_nrt)->rt_rmx;
924 		}
925 
926 		/*
927 		 * We must set rt->rt_gateway before adding ``rt'' to
928 		 * the routing table because the radix MPATH code use
929 		 * it to (re)order routes.
930 		 */
931 		if ((error = rt_setgate(rt, info->rti_info[RTAX_GATEWAY],
932 		    tableid))) {
933 			ifafree(ifa);
934 			rtfree(rt->rt_parent);
935 			rt_putgwroute(rt);
936 			free(rt->rt_gateway, M_RTABLE,
937 			    ROUNDUP(rt->rt_gateway->sa_len));
938 			free(ndst, M_RTABLE, ndst->sa_len);
939 			pool_put(&rtentry_pool, rt);
940 			return (error);
941 		}
942 
943 		error = rtable_insert(tableid, ndst,
944 		    info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY],
945 		    rt->rt_priority, rt);
946 		if (error != 0 &&
947 		    (crt = rtable_match(tableid, ndst, NULL)) != NULL) {
948 			/* overwrite cloned route */
949 			if (ISSET(crt->rt_flags, RTF_CLONED) &&
950 			    !ISSET(crt->rt_flags, RTF_CACHED)) {
951 				struct ifnet *cifp;
952 
953 				cifp = if_get(crt->rt_ifidx);
954 				KASSERT(cifp != NULL);
955 				rtdeletemsg(crt, cifp, tableid);
956 				if_put(cifp);
957 
958 				error = rtable_insert(tableid, ndst,
959 				    info->rti_info[RTAX_NETMASK],
960 				    info->rti_info[RTAX_GATEWAY],
961 				    rt->rt_priority, rt);
962 			}
963 			rtfree(crt);
964 		}
965 		if (error != 0) {
966 			ifafree(ifa);
967 			rtfree(rt->rt_parent);
968 			rt_putgwroute(rt);
969 			free(rt->rt_gateway, M_RTABLE,
970 			    ROUNDUP(rt->rt_gateway->sa_len));
971 			free(ndst, M_RTABLE, ndst->sa_len);
972 			pool_put(&rtentry_pool, rt);
973 			return (EEXIST);
974 		}
975 		ifp->if_rtrequest(ifp, req, rt);
976 
977 		if_group_routechange(info->rti_info[RTAX_DST],
978 			info->rti_info[RTAX_NETMASK]);
979 
980 		if (ret_nrt != NULL)
981 			*ret_nrt = rt;
982 		else
983 			rtfree(rt);
984 		break;
985 	}
986 
987 	return (0);
988 }
989 
990 int
991 rt_setgate(struct rtentry *rt, struct sockaddr *gate, u_int rtableid)
992 {
993 	int glen = ROUNDUP(gate->sa_len);
994 	struct sockaddr *sa;
995 
996 	if (rt->rt_gateway == NULL || glen != ROUNDUP(rt->rt_gateway->sa_len)) {
997 		sa = malloc(glen, M_RTABLE, M_NOWAIT);
998 		if (sa == NULL)
999 			return (ENOBUFS);
1000 		if (rt->rt_gateway != NULL) {
1001 			free(rt->rt_gateway, M_RTABLE,
1002 			    ROUNDUP(rt->rt_gateway->sa_len));
1003 		}
1004 		rt->rt_gateway = sa;
1005 	}
1006 	memmove(rt->rt_gateway, gate, glen);
1007 
1008 	if (ISSET(rt->rt_flags, RTF_GATEWAY))
1009 		return (rt_setgwroute(rt, rtableid));
1010 
1011 	return (0);
1012 }
1013 
1014 /*
1015  * Return the route entry containing the next hop link-layer
1016  * address corresponding to ``rt''.
1017  */
1018 struct rtentry *
1019 rt_getll(struct rtentry *rt)
1020 {
1021 	if (ISSET(rt->rt_flags, RTF_GATEWAY)) {
1022 		KASSERT(rt->rt_gwroute != NULL);
1023 		return (rt->rt_gwroute);
1024 	}
1025 
1026 	return (rt);
1027 }
1028 
1029 void
1030 rt_maskedcopy(struct sockaddr *src, struct sockaddr *dst,
1031     struct sockaddr *netmask)
1032 {
1033 	u_char	*cp1 = (u_char *)src;
1034 	u_char	*cp2 = (u_char *)dst;
1035 	u_char	*cp3 = (u_char *)netmask;
1036 	u_char	*cplim = cp2 + *cp3;
1037 	u_char	*cplim2 = cp2 + *cp1;
1038 
1039 	*cp2++ = *cp1++; *cp2++ = *cp1++; /* copies sa_len & sa_family */
1040 	cp3 += 2;
1041 	if (cplim > cplim2)
1042 		cplim = cplim2;
1043 	while (cp2 < cplim)
1044 		*cp2++ = *cp1++ & *cp3++;
1045 	if (cp2 < cplim2)
1046 		bzero(cp2, cplim2 - cp2);
1047 }
1048 
1049 /*
1050  * allocate new sockaddr structure based on the user supplied src and mask
1051  * that is useable for the routing table.
1052  */
1053 static int
1054 rt_copysa(struct sockaddr *src, struct sockaddr *mask, struct sockaddr **dst)
1055 {
1056 	static const u_char maskarray[] = {
1057 	    0x0, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe };
1058 	struct sockaddr *ndst;
1059 	const struct domain *dp;
1060 	u_char *csrc, *cdst;
1061 	int i, plen;
1062 
1063 	for (i = 0; (dp = domains[i]) != NULL; i++) {
1064 		if (dp->dom_rtoffset == 0)
1065 			continue;
1066 		if (src->sa_family == dp->dom_family)
1067 			break;
1068 	}
1069 	if (dp == NULL)
1070 		return (EAFNOSUPPORT);
1071 
1072 	if (src->sa_len < dp->dom_sasize)
1073 		return (EINVAL);
1074 
1075 	plen = rtable_satoplen(src->sa_family, mask);
1076 	if (plen == -1)
1077 		return (EINVAL);
1078 
1079 	ndst = malloc(dp->dom_sasize, M_RTABLE, M_NOWAIT|M_ZERO);
1080 	if (ndst == NULL)
1081 		return (ENOBUFS);
1082 
1083 	ndst->sa_family = src->sa_family;
1084 	ndst->sa_len = dp->dom_sasize;
1085 
1086 	csrc = (u_char *)src + dp->dom_rtoffset;
1087 	cdst = (u_char *)ndst + dp->dom_rtoffset;
1088 
1089 	memcpy(cdst, csrc, plen / 8);
1090 	if (plen % 8 != 0)
1091 		cdst[plen / 8] = csrc[plen / 8] & maskarray[plen % 8];
1092 
1093 	*dst = ndst;
1094 	return (0);
1095 }
1096 
1097 int
1098 rt_ifa_add(struct ifaddr *ifa, int flags, struct sockaddr *dst,
1099     unsigned int rdomain)
1100 {
1101 	struct ifnet		*ifp = ifa->ifa_ifp;
1102 	struct rtentry		*rt;
1103 	struct sockaddr_rtlabel	 sa_rl;
1104 	struct rt_addrinfo	 info;
1105 	uint8_t			 prio = ifp->if_priority + RTP_STATIC;
1106 	int			 error;
1107 
1108 	KASSERT(rdomain == rtable_l2(rdomain));
1109 
1110 	memset(&info, 0, sizeof(info));
1111 	info.rti_ifa = ifa;
1112 	info.rti_flags = flags;
1113 	info.rti_info[RTAX_DST] = dst;
1114 	if (flags & RTF_LLINFO)
1115 		info.rti_info[RTAX_GATEWAY] = sdltosa(ifp->if_sadl);
1116 	else
1117 		info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr;
1118 	info.rti_info[RTAX_LABEL] = rtlabel_id2sa(ifp->if_rtlabelid, &sa_rl);
1119 
1120 #ifdef MPLS
1121 	if ((flags & RTF_MPLS) == RTF_MPLS)
1122 		info.rti_mpls = MPLS_OP_POP;
1123 #endif /* MPLS */
1124 
1125 	if ((flags & RTF_HOST) == 0)
1126 		info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
1127 
1128 	if (flags & (RTF_LOCAL|RTF_BROADCAST))
1129 		prio = RTP_LOCAL;
1130 
1131 	if (flags & RTF_CONNECTED)
1132 		prio = ifp->if_priority + RTP_CONNECTED;
1133 
1134 	error = rtrequest(RTM_ADD, &info, prio, &rt, rdomain);
1135 	if (error == 0) {
1136 		/*
1137 		 * A local route is created for every address configured
1138 		 * on an interface, so use this information to notify
1139 		 * userland that a new address has been added.
1140 		 */
1141 		if (flags & RTF_LOCAL)
1142 			rtm_addr(RTM_NEWADDR, ifa);
1143 		rtm_send(rt, RTM_ADD, 0, rdomain);
1144 		rtfree(rt);
1145 	}
1146 	return (error);
1147 }
1148 
1149 int
1150 rt_ifa_del(struct ifaddr *ifa, int flags, struct sockaddr *dst,
1151     unsigned int rdomain)
1152 {
1153 	struct ifnet		*ifp = ifa->ifa_ifp;
1154 	struct rtentry		*rt;
1155 	struct mbuf		*m = NULL;
1156 	struct sockaddr		*deldst;
1157 	struct rt_addrinfo	 info;
1158 	struct sockaddr_rtlabel	 sa_rl;
1159 	uint8_t			 prio = ifp->if_priority + RTP_STATIC;
1160 	int			 error;
1161 
1162 	KASSERT(rdomain == rtable_l2(rdomain));
1163 
1164 	if ((flags & RTF_HOST) == 0 && ifa->ifa_netmask) {
1165 		m = m_get(M_DONTWAIT, MT_SONAME);
1166 		if (m == NULL)
1167 			return (ENOBUFS);
1168 		deldst = mtod(m, struct sockaddr *);
1169 		rt_maskedcopy(dst, deldst, ifa->ifa_netmask);
1170 		dst = deldst;
1171 	}
1172 
1173 	memset(&info, 0, sizeof(info));
1174 	info.rti_ifa = ifa;
1175 	info.rti_flags = flags;
1176 	info.rti_info[RTAX_DST] = dst;
1177 	if ((flags & RTF_LLINFO) == 0)
1178 		info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr;
1179 	info.rti_info[RTAX_LABEL] = rtlabel_id2sa(ifp->if_rtlabelid, &sa_rl);
1180 
1181 	if ((flags & RTF_HOST) == 0)
1182 		info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
1183 
1184 	if (flags & (RTF_LOCAL|RTF_BROADCAST))
1185 		prio = RTP_LOCAL;
1186 
1187 	if (flags & RTF_CONNECTED)
1188 		prio = ifp->if_priority + RTP_CONNECTED;
1189 
1190 	rtable_clearsource(rdomain, ifa->ifa_addr);
1191 	error = rtrequest_delete(&info, prio, ifp, &rt, rdomain);
1192 	if (error == 0) {
1193 		rtm_send(rt, RTM_DELETE, 0, rdomain);
1194 		if (flags & RTF_LOCAL)
1195 			rtm_addr(RTM_DELADDR, ifa);
1196 		rtfree(rt);
1197 	}
1198 	m_free(m);
1199 
1200 	return (error);
1201 }
1202 
1203 /*
1204  * Add ifa's address as a local rtentry.
1205  */
1206 int
1207 rt_ifa_addlocal(struct ifaddr *ifa)
1208 {
1209 	struct ifnet *ifp = ifa->ifa_ifp;
1210 	struct rtentry *rt;
1211 	u_int flags = RTF_HOST|RTF_LOCAL;
1212 	int error = 0;
1213 
1214 	/*
1215 	 * If the configured address correspond to the magical "any"
1216 	 * address do not add a local route entry because that might
1217 	 * corrupt the routing tree which uses this value for the
1218 	 * default routes.
1219 	 */
1220 	switch (ifa->ifa_addr->sa_family) {
1221 	case AF_INET:
1222 		if (satosin(ifa->ifa_addr)->sin_addr.s_addr == INADDR_ANY)
1223 			return (0);
1224 		break;
1225 #ifdef INET6
1226 	case AF_INET6:
1227 		if (IN6_ARE_ADDR_EQUAL(&satosin6(ifa->ifa_addr)->sin6_addr,
1228 		    &in6addr_any))
1229 			return (0);
1230 		break;
1231 #endif
1232 	default:
1233 		break;
1234 	}
1235 
1236 	if (!ISSET(ifp->if_flags, (IFF_LOOPBACK|IFF_POINTOPOINT)))
1237 		flags |= RTF_LLINFO;
1238 
1239 	/* If there is no local entry, allocate one. */
1240 	rt = rtalloc(ifa->ifa_addr, 0, ifp->if_rdomain);
1241 	if (rt == NULL || ISSET(rt->rt_flags, flags) != flags) {
1242 		error = rt_ifa_add(ifa, flags | RTF_MPATH, ifa->ifa_addr,
1243 		    ifp->if_rdomain);
1244 	}
1245 	rtfree(rt);
1246 
1247 	return (error);
1248 }
1249 
1250 /*
1251  * Remove local rtentry of ifa's address if it exists.
1252  */
1253 int
1254 rt_ifa_dellocal(struct ifaddr *ifa)
1255 {
1256 	struct ifnet *ifp = ifa->ifa_ifp;
1257 	struct rtentry *rt;
1258 	u_int flags = RTF_HOST|RTF_LOCAL;
1259 	int error = 0;
1260 
1261 	/*
1262 	 * We do not add local routes for such address, so do not bother
1263 	 * removing them.
1264 	 */
1265 	switch (ifa->ifa_addr->sa_family) {
1266 	case AF_INET:
1267 		if (satosin(ifa->ifa_addr)->sin_addr.s_addr == INADDR_ANY)
1268 			return (0);
1269 		break;
1270 #ifdef INET6
1271 	case AF_INET6:
1272 		if (IN6_ARE_ADDR_EQUAL(&satosin6(ifa->ifa_addr)->sin6_addr,
1273 		    &in6addr_any))
1274 			return (0);
1275 		break;
1276 #endif
1277 	default:
1278 		break;
1279 	}
1280 
1281 	if (!ISSET(ifp->if_flags, (IFF_LOOPBACK|IFF_POINTOPOINT)))
1282 		flags |= RTF_LLINFO;
1283 
1284 	/*
1285 	 * Before deleting, check if a corresponding local host
1286 	 * route surely exists.  With this check, we can avoid to
1287 	 * delete an interface direct route whose destination is same
1288 	 * as the address being removed.  This can happen when removing
1289 	 * a subnet-router anycast address on an interface attached
1290 	 * to a shared medium.
1291 	 */
1292 	rt = rtalloc(ifa->ifa_addr, 0, ifp->if_rdomain);
1293 	if (rt != NULL && ISSET(rt->rt_flags, flags) == flags) {
1294 		error = rt_ifa_del(ifa, flags, ifa->ifa_addr,
1295 		    ifp->if_rdomain);
1296 	}
1297 	rtfree(rt);
1298 
1299 	return (error);
1300 }
1301 
1302 /*
1303  * Remove all addresses attached to ``ifa''.
1304  */
1305 void
1306 rt_ifa_purge(struct ifaddr *ifa)
1307 {
1308 	struct ifnet		*ifp = ifa->ifa_ifp;
1309 	struct rtentry		*rt = NULL;
1310 	unsigned int		 rtableid;
1311 	int			 error, af = ifa->ifa_addr->sa_family;
1312 
1313 	KASSERT(ifp != NULL);
1314 
1315 	for (rtableid = 0; rtableid < rtmap_limit; rtableid++) {
1316 		/* skip rtables that are not in the rdomain of the ifp */
1317 		if (rtable_l2(rtableid) != ifp->if_rdomain)
1318 			continue;
1319 
1320 		do {
1321 			error = rtable_walk(rtableid, af, &rt,
1322 			    rt_ifa_purge_walker, ifa);
1323 			if (rt != NULL && error == EEXIST) {
1324 				error = rtdeletemsg(rt, ifp, rtableid);
1325 				if (error == 0)
1326 					error = EAGAIN;
1327 			}
1328 			rtfree(rt);
1329 			rt = NULL;
1330 		} while (error == EAGAIN);
1331 
1332 		if (error == EAFNOSUPPORT)
1333 			error = 0;
1334 
1335 		if (error)
1336 			break;
1337 	}
1338 }
1339 
1340 int
1341 rt_ifa_purge_walker(struct rtentry *rt, void *vifa, unsigned int rtableid)
1342 {
1343 	struct ifaddr		*ifa = vifa;
1344 
1345 	if (rt->rt_ifa == ifa)
1346 		return EEXIST;
1347 
1348 	return 0;
1349 }
1350 
1351 /*
1352  * Route timer routines.  These routines allow functions to be called
1353  * for various routes at any time.  This is useful in supporting
1354  * path MTU discovery and redirect route deletion.
1355  *
1356  * This is similar to some BSDI internal functions, but it provides
1357  * for multiple queues for efficiency's sake...
1358  */
1359 
1360 struct mutex			rttimer_mtx;
1361 
1362 struct rttimer {
1363 	TAILQ_ENTRY(rttimer)	rtt_next;	/* [T] entry on timer queue */
1364 	LIST_ENTRY(rttimer)	rtt_link;	/* [T] timers per rtentry */
1365 	struct timeout		rtt_timeout;	/* [I] timeout for this entry */
1366 	struct rttimer_queue	*rtt_queue;	/* [I] back pointer to queue */
1367 	struct rtentry		*rtt_rt;	/* [T] back pointer to route */
1368 	time_t			rtt_expire;	/* [I] rt expire time */
1369 	u_int			rtt_tableid;	/* [I] rtable id of rtt_rt */
1370 };
1371 
1372 #define RTTIMER_CALLOUT(r)	{					\
1373 	if (r->rtt_queue->rtq_func != NULL) {				\
1374 		(*r->rtt_queue->rtq_func)(r->rtt_rt, r->rtt_tableid);	\
1375 	} else {							\
1376 		struct ifnet *ifp;					\
1377 									\
1378 		ifp = if_get(r->rtt_rt->rt_ifidx);			\
1379 		if (ifp != NULL &&					\
1380 		    (r->rtt_rt->rt_flags & (RTF_DYNAMIC|RTF_HOST)) ==	\
1381 		    (RTF_DYNAMIC|RTF_HOST))				\
1382 			rtdeletemsg(r->rtt_rt, ifp, r->rtt_tableid);	\
1383 		if_put(ifp);						\
1384 	}								\
1385 }
1386 
1387 void
1388 rt_timer_init(void)
1389 {
1390 	pool_init(&rttimer_pool, sizeof(struct rttimer), 0,
1391 	    IPL_MPFLOOR, 0, "rttmr", NULL);
1392 	mtx_init(&rttimer_mtx, IPL_MPFLOOR);
1393 }
1394 
1395 void
1396 rt_timer_queue_init(struct rttimer_queue *rtq, int timeout,
1397     void (*func)(struct rtentry *, u_int))
1398 {
1399 	rtq->rtq_timeout = timeout;
1400 	rtq->rtq_count = 0;
1401 	rtq->rtq_func = func;
1402 	TAILQ_INIT(&rtq->rtq_head);
1403 }
1404 
1405 void
1406 rt_timer_queue_change(struct rttimer_queue *rtq, int timeout)
1407 {
1408 	mtx_enter(&rttimer_mtx);
1409 	rtq->rtq_timeout = timeout;
1410 	mtx_leave(&rttimer_mtx);
1411 }
1412 
1413 void
1414 rt_timer_queue_flush(struct rttimer_queue *rtq)
1415 {
1416 	struct rttimer		*r;
1417 	TAILQ_HEAD(, rttimer)	 rttlist;
1418 
1419 	NET_ASSERT_LOCKED();
1420 
1421 	TAILQ_INIT(&rttlist);
1422 	mtx_enter(&rttimer_mtx);
1423 	while ((r = TAILQ_FIRST(&rtq->rtq_head)) != NULL) {
1424 		LIST_REMOVE(r, rtt_link);
1425 		TAILQ_REMOVE(&rtq->rtq_head, r, rtt_next);
1426 		TAILQ_INSERT_TAIL(&rttlist, r, rtt_next);
1427 		KASSERT(rtq->rtq_count > 0);
1428 		rtq->rtq_count--;
1429 	}
1430 	mtx_leave(&rttimer_mtx);
1431 
1432 	while ((r = TAILQ_FIRST(&rttlist)) != NULL) {
1433 		TAILQ_REMOVE(&rttlist, r, rtt_next);
1434 		RTTIMER_CALLOUT(r);
1435 		pool_put(&rttimer_pool, r);
1436 	}
1437 }
1438 
1439 unsigned long
1440 rt_timer_queue_count(struct rttimer_queue *rtq)
1441 {
1442 	return (rtq->rtq_count);
1443 }
1444 
1445 static inline struct rttimer *
1446 rt_timer_unlink(struct rttimer *r)
1447 {
1448 	MUTEX_ASSERT_LOCKED(&rttimer_mtx);
1449 
1450 	LIST_REMOVE(r, rtt_link);
1451 	r->rtt_rt = NULL;
1452 
1453 	if (timeout_del(&r->rtt_timeout) == 0) {
1454 		/* timeout fired, so rt_timer_timer will do the cleanup */
1455 		return NULL;
1456 	}
1457 
1458 	TAILQ_REMOVE(&r->rtt_queue->rtq_head, r, rtt_next);
1459 	KASSERT(r->rtt_queue->rtq_count > 0);
1460 	r->rtt_queue->rtq_count--;
1461 	return r;
1462 }
1463 
1464 void
1465 rt_timer_remove_all(struct rtentry *rt)
1466 {
1467 	struct rttimer		*r;
1468 	TAILQ_HEAD(, rttimer)	 rttlist;
1469 
1470 	TAILQ_INIT(&rttlist);
1471 	mtx_enter(&rttimer_mtx);
1472 	while ((r = LIST_FIRST(&rt->rt_timer)) != NULL) {
1473 		r = rt_timer_unlink(r);
1474 		if (r != NULL)
1475 			TAILQ_INSERT_TAIL(&rttlist, r, rtt_next);
1476 	}
1477 	mtx_leave(&rttimer_mtx);
1478 
1479 	while ((r = TAILQ_FIRST(&rttlist)) != NULL) {
1480 		TAILQ_REMOVE(&rttlist, r, rtt_next);
1481 		pool_put(&rttimer_pool, r);
1482 	}
1483 }
1484 
1485 time_t
1486 rt_timer_get_expire(const struct rtentry *rt)
1487 {
1488 	const struct rttimer	*r;
1489 	time_t			 expire = 0;
1490 
1491 	mtx_enter(&rttimer_mtx);
1492 	LIST_FOREACH(r, &rt->rt_timer, rtt_link) {
1493 		if (expire == 0 || expire > r->rtt_expire)
1494 			expire = r->rtt_expire;
1495 	}
1496 	mtx_leave(&rttimer_mtx);
1497 
1498 	return expire;
1499 }
1500 
1501 int
1502 rt_timer_add(struct rtentry *rt, struct rttimer_queue *queue, u_int rtableid)
1503 {
1504 	struct rttimer	*r, *rnew;
1505 
1506 	rnew = pool_get(&rttimer_pool, PR_NOWAIT | PR_ZERO);
1507 	if (rnew == NULL)
1508 		return (ENOBUFS);
1509 
1510 	rnew->rtt_rt = rt;
1511 	rnew->rtt_queue = queue;
1512 	rnew->rtt_tableid = rtableid;
1513 	rnew->rtt_expire = getuptime() + queue->rtq_timeout;
1514 	timeout_set_proc(&rnew->rtt_timeout, rt_timer_timer, rnew);
1515 
1516 	mtx_enter(&rttimer_mtx);
1517 	/*
1518 	 * If there's already a timer with this action, destroy it before
1519 	 * we add a new one.
1520 	 */
1521 	LIST_FOREACH(r, &rt->rt_timer, rtt_link) {
1522 		if (r->rtt_queue == queue) {
1523 			r = rt_timer_unlink(r);
1524 			break;  /* only one per list, so we can quit... */
1525 		}
1526 	}
1527 
1528 	LIST_INSERT_HEAD(&rt->rt_timer, rnew, rtt_link);
1529 	TAILQ_INSERT_TAIL(&queue->rtq_head, rnew, rtt_next);
1530 	timeout_add_sec(&rnew->rtt_timeout, queue->rtq_timeout);
1531 	rnew->rtt_queue->rtq_count++;
1532 	mtx_leave(&rttimer_mtx);
1533 
1534 	if (r != NULL)
1535 		pool_put(&rttimer_pool, r);
1536 
1537 	return (0);
1538 }
1539 
1540 void
1541 rt_timer_timer(void *arg)
1542 {
1543 	struct rttimer		*r = arg;
1544 	struct rttimer_queue	*rtq = r->rtt_queue;
1545 
1546 	NET_LOCK();
1547 	mtx_enter(&rttimer_mtx);
1548 
1549 	if (r->rtt_rt != NULL)
1550 		LIST_REMOVE(r, rtt_link);
1551 	TAILQ_REMOVE(&rtq->rtq_head, r, rtt_next);
1552 	KASSERT(rtq->rtq_count > 0);
1553 	rtq->rtq_count--;
1554 
1555 	mtx_leave(&rttimer_mtx);
1556 
1557 	if (r->rtt_rt != NULL)
1558 		RTTIMER_CALLOUT(r);
1559 	NET_UNLOCK();
1560 
1561 	pool_put(&rttimer_pool, r);
1562 }
1563 
1564 #ifdef MPLS
1565 int
1566 rt_mpls_set(struct rtentry *rt, struct sockaddr *src, uint8_t op)
1567 {
1568 	struct sockaddr_mpls	*psa_mpls = (struct sockaddr_mpls *)src;
1569 	struct rt_mpls		*rt_mpls;
1570 
1571 	if (psa_mpls == NULL && op != MPLS_OP_POP)
1572 		return (EOPNOTSUPP);
1573 	if (psa_mpls != NULL && psa_mpls->smpls_len != sizeof(*psa_mpls))
1574 		return (EINVAL);
1575 	if (psa_mpls != NULL && psa_mpls->smpls_family != AF_MPLS)
1576 		return (EAFNOSUPPORT);
1577 
1578 	rt->rt_llinfo = malloc(sizeof(struct rt_mpls), M_TEMP, M_NOWAIT|M_ZERO);
1579 	if (rt->rt_llinfo == NULL)
1580 		return (ENOMEM);
1581 
1582 	rt_mpls = (struct rt_mpls *)rt->rt_llinfo;
1583 	if (psa_mpls != NULL)
1584 		rt_mpls->mpls_label = psa_mpls->smpls_label;
1585 	rt_mpls->mpls_operation = op;
1586 	/* XXX: set experimental bits */
1587 	rt->rt_flags |= RTF_MPLS;
1588 
1589 	return (0);
1590 }
1591 
1592 void
1593 rt_mpls_clear(struct rtentry *rt)
1594 {
1595 	if (rt->rt_llinfo != NULL && rt->rt_flags & RTF_MPLS) {
1596 		free(rt->rt_llinfo, M_TEMP, sizeof(struct rt_mpls));
1597 		rt->rt_llinfo = NULL;
1598 	}
1599 	rt->rt_flags &= ~RTF_MPLS;
1600 }
1601 #endif
1602 
1603 u_int16_t
1604 rtlabel_name2id(char *name)
1605 {
1606 	struct rt_label		*label, *p;
1607 	u_int16_t		 new_id = 1, id = 0;
1608 
1609 	if (!name[0])
1610 		return (0);
1611 
1612 	mtx_enter(&rtlabel_mtx);
1613 	TAILQ_FOREACH(label, &rt_labels, rtl_entry)
1614 		if (strcmp(name, label->rtl_name) == 0) {
1615 			label->rtl_ref++;
1616 			id = label->rtl_id;
1617 			goto out;
1618 		}
1619 
1620 	/*
1621 	 * to avoid fragmentation, we do a linear search from the beginning
1622 	 * and take the first free slot we find. if there is none or the list
1623 	 * is empty, append a new entry at the end.
1624 	 */
1625 	TAILQ_FOREACH(p, &rt_labels, rtl_entry) {
1626 		if (p->rtl_id != new_id)
1627 			break;
1628 		new_id = p->rtl_id + 1;
1629 	}
1630 	if (new_id > LABELID_MAX)
1631 		goto out;
1632 
1633 	label = malloc(sizeof(*label), M_RTABLE, M_NOWAIT|M_ZERO);
1634 	if (label == NULL)
1635 		goto out;
1636 	strlcpy(label->rtl_name, name, sizeof(label->rtl_name));
1637 	label->rtl_id = new_id;
1638 	label->rtl_ref++;
1639 
1640 	if (p != NULL)	/* insert new entry before p */
1641 		TAILQ_INSERT_BEFORE(p, label, rtl_entry);
1642 	else		/* either list empty or no free slot in between */
1643 		TAILQ_INSERT_TAIL(&rt_labels, label, rtl_entry);
1644 
1645 	id = label->rtl_id;
1646 out:
1647 	mtx_leave(&rtlabel_mtx);
1648 
1649 	return (id);
1650 }
1651 
1652 const char *
1653 rtlabel_id2name_locked(u_int16_t id)
1654 {
1655 	struct rt_label	*label;
1656 
1657 	MUTEX_ASSERT_LOCKED(&rtlabel_mtx);
1658 
1659 	TAILQ_FOREACH(label, &rt_labels, rtl_entry)
1660 		if (label->rtl_id == id)
1661 			return (label->rtl_name);
1662 
1663 	return (NULL);
1664 }
1665 
1666 const char *
1667 rtlabel_id2name(u_int16_t id, char *rtlabelbuf, size_t sz)
1668 {
1669 	const char *label;
1670 
1671 	if (id == 0)
1672 		return (NULL);
1673 
1674 	mtx_enter(&rtlabel_mtx);
1675 	if ((label = rtlabel_id2name_locked(id)) != NULL)
1676 		strlcpy(rtlabelbuf, label, sz);
1677 	mtx_leave(&rtlabel_mtx);
1678 
1679 	if (label == NULL)
1680 		return (NULL);
1681 
1682 	return (rtlabelbuf);
1683 }
1684 
1685 struct sockaddr *
1686 rtlabel_id2sa(u_int16_t labelid, struct sockaddr_rtlabel *sa_rl)
1687 {
1688 	const char	*label;
1689 
1690 	if (labelid == 0)
1691 		return (NULL);
1692 
1693 	mtx_enter(&rtlabel_mtx);
1694 	if ((label = rtlabel_id2name_locked(labelid)) != NULL) {
1695 		bzero(sa_rl, sizeof(*sa_rl));
1696 		sa_rl->sr_len = sizeof(*sa_rl);
1697 		sa_rl->sr_family = AF_UNSPEC;
1698 		strlcpy(sa_rl->sr_label, label, sizeof(sa_rl->sr_label));
1699 	}
1700 	mtx_leave(&rtlabel_mtx);
1701 
1702 	if (label == NULL)
1703 		return (NULL);
1704 
1705 	return ((struct sockaddr *)sa_rl);
1706 }
1707 
1708 void
1709 rtlabel_unref(u_int16_t id)
1710 {
1711 	struct rt_label	*p, *next;
1712 
1713 	if (id == 0)
1714 		return;
1715 
1716 	mtx_enter(&rtlabel_mtx);
1717 	TAILQ_FOREACH_SAFE(p, &rt_labels, rtl_entry, next) {
1718 		if (id == p->rtl_id) {
1719 			if (--p->rtl_ref == 0) {
1720 				TAILQ_REMOVE(&rt_labels, p, rtl_entry);
1721 				free(p, M_RTABLE, sizeof(*p));
1722 			}
1723 			break;
1724 		}
1725 	}
1726 	mtx_leave(&rtlabel_mtx);
1727 }
1728 
1729 int
1730 rt_if_track(struct ifnet *ifp)
1731 {
1732 	unsigned int rtableid;
1733 	struct rtentry *rt = NULL;
1734 	int i, error = 0;
1735 
1736 	for (rtableid = 0; rtableid < rtmap_limit; rtableid++) {
1737 		/* skip rtables that are not in the rdomain of the ifp */
1738 		if (rtable_l2(rtableid) != ifp->if_rdomain)
1739 			continue;
1740 		for (i = 1; i <= AF_MAX; i++) {
1741 			if (!rtable_mpath_capable(rtableid, i))
1742 				continue;
1743 
1744 			do {
1745 				error = rtable_walk(rtableid, i, &rt,
1746 				    rt_if_linkstate_change, ifp);
1747 				if (rt != NULL && error == EEXIST) {
1748 					error = rtdeletemsg(rt, ifp, rtableid);
1749 					if (error == 0)
1750 						error = EAGAIN;
1751 				}
1752 				rtfree(rt);
1753 				rt = NULL;
1754 			} while (error == EAGAIN);
1755 
1756 			if (error == EAFNOSUPPORT)
1757 				error = 0;
1758 
1759 			if (error)
1760 				break;
1761 		}
1762 	}
1763 
1764 	return (error);
1765 }
1766 
1767 int
1768 rt_if_linkstate_change(struct rtentry *rt, void *arg, u_int id)
1769 {
1770 	struct ifnet *ifp = arg;
1771 	struct sockaddr_in6 sa_mask;
1772 	int error;
1773 
1774 	if (rt->rt_ifidx != ifp->if_index)
1775 		return (0);
1776 
1777 	/* Local routes are always usable. */
1778 	if (rt->rt_flags & RTF_LOCAL) {
1779 		rt->rt_flags |= RTF_UP;
1780 		return (0);
1781 	}
1782 
1783 	if (LINK_STATE_IS_UP(ifp->if_link_state) && ifp->if_flags & IFF_UP) {
1784 		if (ISSET(rt->rt_flags, RTF_UP))
1785 			return (0);
1786 
1787 		/* bring route up */
1788 		rt->rt_flags |= RTF_UP;
1789 		error = rtable_mpath_reprio(id, rt_key(rt), rt_plen(rt),
1790 		    rt->rt_priority & RTP_MASK, rt);
1791 	} else {
1792 		/*
1793 		 * Remove redirected and cloned routes (mainly ARP)
1794 		 * from down interfaces so we have a chance to get
1795 		 * new routes from a better source.
1796 		 */
1797 		if (ISSET(rt->rt_flags, RTF_CLONED|RTF_DYNAMIC) &&
1798 		    !ISSET(rt->rt_flags, RTF_CACHED|RTF_BFD)) {
1799 			return (EEXIST);
1800 		}
1801 
1802 		if (!ISSET(rt->rt_flags, RTF_UP))
1803 			return (0);
1804 
1805 		/* take route down */
1806 		rt->rt_flags &= ~RTF_UP;
1807 		error = rtable_mpath_reprio(id, rt_key(rt), rt_plen(rt),
1808 		    rt->rt_priority | RTP_DOWN, rt);
1809 	}
1810 	if_group_routechange(rt_key(rt), rt_plen2mask(rt, &sa_mask));
1811 
1812 	return (error);
1813 }
1814 
1815 struct sockaddr *
1816 rt_plentosa(sa_family_t af, int plen, struct sockaddr_in6 *sa_mask)
1817 {
1818 	struct sockaddr_in	*sin = (struct sockaddr_in *)sa_mask;
1819 #ifdef INET6
1820 	struct sockaddr_in6	*sin6 = (struct sockaddr_in6 *)sa_mask;
1821 #endif
1822 
1823 	KASSERT(plen >= 0 || plen == -1);
1824 
1825 	if (plen == -1)
1826 		return (NULL);
1827 
1828 	memset(sa_mask, 0, sizeof(*sa_mask));
1829 
1830 	switch (af) {
1831 	case AF_INET:
1832 		sin->sin_family = AF_INET;
1833 		sin->sin_len = sizeof(struct sockaddr_in);
1834 		in_prefixlen2mask(&sin->sin_addr, plen);
1835 		break;
1836 #ifdef INET6
1837 	case AF_INET6:
1838 		sin6->sin6_family = AF_INET6;
1839 		sin6->sin6_len = sizeof(struct sockaddr_in6);
1840 		in6_prefixlen2mask(&sin6->sin6_addr, plen);
1841 		break;
1842 #endif /* INET6 */
1843 	default:
1844 		return (NULL);
1845 	}
1846 
1847 	return ((struct sockaddr *)sa_mask);
1848 }
1849 
1850 struct sockaddr *
1851 rt_plen2mask(struct rtentry *rt, struct sockaddr_in6 *sa_mask)
1852 {
1853 	return (rt_plentosa(rt_key(rt)->sa_family, rt_plen(rt), sa_mask));
1854 }
1855 
1856 #ifdef DDB
1857 #include <machine/db_machdep.h>
1858 #include <ddb/db_output.h>
1859 
1860 void	db_print_sa(struct sockaddr *);
1861 void	db_print_ifa(struct ifaddr *);
1862 
1863 void
1864 db_print_sa(struct sockaddr *sa)
1865 {
1866 	int len;
1867 	u_char *p;
1868 
1869 	if (sa == NULL) {
1870 		db_printf("[NULL]");
1871 		return;
1872 	}
1873 
1874 	p = (u_char *)sa;
1875 	len = sa->sa_len;
1876 	db_printf("[");
1877 	while (len > 0) {
1878 		db_printf("%d", *p);
1879 		p++;
1880 		len--;
1881 		if (len)
1882 			db_printf(",");
1883 	}
1884 	db_printf("]\n");
1885 }
1886 
1887 void
1888 db_print_ifa(struct ifaddr *ifa)
1889 {
1890 	if (ifa == NULL)
1891 		return;
1892 	db_printf("  ifa_addr=");
1893 	db_print_sa(ifa->ifa_addr);
1894 	db_printf("  ifa_dsta=");
1895 	db_print_sa(ifa->ifa_dstaddr);
1896 	db_printf("  ifa_mask=");
1897 	db_print_sa(ifa->ifa_netmask);
1898 	db_printf("  flags=0x%x, refcnt=%u, metric=%d\n",
1899 	    ifa->ifa_flags, ifa->ifa_refcnt.r_refs, ifa->ifa_metric);
1900 }
1901 
1902 /*
1903  * Function to pass to rtable_walk().
1904  * Return non-zero error to abort walk.
1905  */
1906 int
1907 db_show_rtentry(struct rtentry *rt, void *w, unsigned int id)
1908 {
1909 	db_printf("rtentry=%p", rt);
1910 
1911 	db_printf(" flags=0x%x refcnt=%u use=%llu expire=%lld\n",
1912 	    rt->rt_flags, rt->rt_refcnt.r_refs, rt->rt_use, rt->rt_expire);
1913 
1914 	db_printf(" key="); db_print_sa(rt_key(rt));
1915 	db_printf(" plen=%d", rt_plen(rt));
1916 	db_printf(" gw="); db_print_sa(rt->rt_gateway);
1917 	db_printf(" ifidx=%u ", rt->rt_ifidx);
1918 	db_printf(" ifa=%p\n", rt->rt_ifa);
1919 	db_print_ifa(rt->rt_ifa);
1920 
1921 	db_printf(" gwroute=%p llinfo=%p priority=%d\n",
1922 	    rt->rt_gwroute, rt->rt_llinfo, rt->rt_priority);
1923 	return (0);
1924 }
1925 
1926 /*
1927  * Function to print all the route trees.
1928  */
1929 int
1930 db_show_rtable(int af, unsigned int rtableid)
1931 {
1932 	db_printf("Route tree for af %d, rtableid %u\n", af, rtableid);
1933 	rtable_walk(rtableid, af, NULL, db_show_rtentry, NULL);
1934 	return (0);
1935 }
1936 #endif /* DDB */
1937