xref: /openbsd-src/sys/net/rtsock.c (revision f1dd7b858388b4a23f4f67a4957ec5ff656ebbe8)
1 /*	$OpenBSD: rtsock.c,v 1.313 2021/05/16 13:09:39 mvs Exp $	*/
2 /*	$NetBSD: rtsock.c,v 1.18 1996/03/29 00:32:10 cgd Exp $	*/
3 
4 /*
5  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the project nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 /*
34  * Copyright (c) 1988, 1991, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. Neither the name of the University nor the names of its contributors
46  *    may be used to endorse or promote products derived from this software
47  *    without specific prior written permission.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59  * SUCH DAMAGE.
60  *
61  *	@(#)rtsock.c	8.6 (Berkeley) 2/11/95
62  */
63 
64 #include <sys/param.h>
65 #include <sys/systm.h>
66 #include <sys/proc.h>
67 #include <sys/sysctl.h>
68 #include <sys/mbuf.h>
69 #include <sys/socket.h>
70 #include <sys/socketvar.h>
71 #include <sys/domain.h>
72 #include <sys/pool.h>
73 #include <sys/protosw.h>
74 #include <sys/srp.h>
75 
76 #include <net/if.h>
77 #include <net/if_dl.h>
78 #include <net/if_var.h>
79 #include <net/route.h>
80 
81 #include <netinet/in.h>
82 
83 #ifdef MPLS
84 #include <netmpls/mpls.h>
85 #endif
86 #ifdef IPSEC
87 #include <netinet/ip_ipsp.h>
88 #include <net/if_enc.h>
89 #endif
90 #ifdef BFD
91 #include <net/bfd.h>
92 #endif
93 
94 #include <sys/stdarg.h>
95 #include <sys/kernel.h>
96 #include <sys/timeout.h>
97 
98 #define	ROUTESNDQ	8192
99 #define	ROUTERCVQ	8192
100 
101 const struct sockaddr route_src = { 2, PF_ROUTE, };
102 
103 struct walkarg {
104 	int	w_op, w_arg, w_given, w_needed, w_tmemsize;
105 	caddr_t	w_where, w_tmem;
106 };
107 
108 void	route_prinit(void);
109 void	rcb_ref(void *, void *);
110 void	rcb_unref(void *, void *);
111 int	route_output(struct mbuf *, struct socket *, struct sockaddr *,
112 	    struct mbuf *);
113 int	route_ctloutput(int, struct socket *, int, int, struct mbuf *);
114 int	route_usrreq(struct socket *, int, struct mbuf *, struct mbuf *,
115 	    struct mbuf *, struct proc *);
116 void	route_input(struct mbuf *m0, struct socket *, sa_family_t);
117 int	route_arp_conflict(struct rtentry *, struct rt_addrinfo *);
118 int	route_cleargateway(struct rtentry *, void *, unsigned int);
119 void	rtm_senddesync_timer(void *);
120 void	rtm_senddesync(struct socket *);
121 int	rtm_sendup(struct socket *, struct mbuf *);
122 
123 int	rtm_getifa(struct rt_addrinfo *, unsigned int);
124 int	rtm_output(struct rt_msghdr *, struct rtentry **, struct rt_addrinfo *,
125 	    uint8_t, unsigned int);
126 struct rt_msghdr *rtm_report(struct rtentry *, u_char, int, int);
127 struct mbuf	*rtm_msg1(int, struct rt_addrinfo *);
128 int		 rtm_msg2(int, int, struct rt_addrinfo *, caddr_t,
129 		     struct walkarg *);
130 int		 rtm_xaddrs(caddr_t, caddr_t, struct rt_addrinfo *);
131 int		 rtm_validate_proposal(struct rt_addrinfo *);
132 void		 rtm_setmetrics(u_long, const struct rt_metrics *,
133 		     struct rt_kmetrics *);
134 void		 rtm_getmetrics(const struct rt_kmetrics *,
135 		     struct rt_metrics *);
136 
137 int		 sysctl_iflist(int, struct walkarg *);
138 int		 sysctl_ifnames(struct walkarg *);
139 int		 sysctl_rtable_rtstat(void *, size_t *, void *);
140 
141 int		 rt_setsource(unsigned int, struct sockaddr *);
142 
143 /*
144  * Locks used to protect struct members
145  *       I       immutable after creation
146  *       s       solock
147  */
148 struct rtpcb {
149 	struct socket		*rop_socket;		/* [I] */
150 
151 	SRPL_ENTRY(rtpcb)	rop_list;
152 	struct refcnt		rop_refcnt;
153 	struct timeout		rop_timeout;
154 	unsigned int		rop_msgfilter;		/* [s] */
155 	unsigned int		rop_flagfilter;		/* [s] */
156 	unsigned int		rop_flags;		/* [s] */
157 	u_int			rop_rtableid;		/* [s] */
158 	unsigned short		rop_proto;		/* [I] */
159 	u_char			rop_priority;		/* [s] */
160 };
161 #define	sotortpcb(so)	((struct rtpcb *)(so)->so_pcb)
162 
163 struct rtptable {
164 	SRPL_HEAD(, rtpcb)	rtp_list;
165 	struct srpl_rc		rtp_rc;
166 	struct rwlock		rtp_lk;
167 	unsigned int		rtp_count;
168 };
169 
170 struct pool rtpcb_pool;
171 struct rtptable rtptable;
172 
173 /*
174  * These flags and timeout are used for indicating to userland (via a
175  * RTM_DESYNC msg) when the route socket has overflowed and messages
176  * have been lost.
177  */
178 #define ROUTECB_FLAG_DESYNC	0x1	/* Route socket out of memory */
179 #define ROUTECB_FLAG_FLUSH	0x2	/* Wait until socket is empty before
180 					   queueing more packets */
181 
182 #define ROUTE_DESYNC_RESEND_TIMEOUT	200	/* In ms */
183 
184 void
185 route_prinit(void)
186 {
187 	srpl_rc_init(&rtptable.rtp_rc, rcb_ref, rcb_unref, NULL);
188 	rw_init(&rtptable.rtp_lk, "rtsock");
189 	SRPL_INIT(&rtptable.rtp_list);
190 	pool_init(&rtpcb_pool, sizeof(struct rtpcb), 0,
191 	    IPL_SOFTNET, PR_WAITOK, "rtpcb", NULL);
192 }
193 
194 void
195 rcb_ref(void *null, void *v)
196 {
197 	struct rtpcb *rop = v;
198 
199 	refcnt_take(&rop->rop_refcnt);
200 }
201 
202 void
203 rcb_unref(void *null, void *v)
204 {
205 	struct rtpcb *rop = v;
206 
207 	refcnt_rele_wake(&rop->rop_refcnt);
208 }
209 
210 int
211 route_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam,
212     struct mbuf *control, struct proc *p)
213 {
214 	struct rtpcb	*rop;
215 	int		 error = 0;
216 
217 	if (req == PRU_CONTROL)
218 		return (EOPNOTSUPP);
219 
220 	soassertlocked(so);
221 
222 	if (control && control->m_len) {
223 		error = EOPNOTSUPP;
224 		goto release;
225 	}
226 
227 	rop = sotortpcb(so);
228 	if (rop == NULL) {
229 		error = EINVAL;
230 		goto release;
231 	}
232 
233 	switch (req) {
234 	/* no connect, bind, accept. Socket is connected from the start */
235 	case PRU_CONNECT:
236 	case PRU_BIND:
237 	case PRU_CONNECT2:
238 	case PRU_LISTEN:
239 	case PRU_ACCEPT:
240 		error = EOPNOTSUPP;
241 		break;
242 
243 	case PRU_DISCONNECT:
244 	case PRU_ABORT:
245 		soisdisconnected(so);
246 		break;
247 	case PRU_SHUTDOWN:
248 		socantsendmore(so);
249 		break;
250 	case PRU_SENSE:
251 		/* stat: don't bother with a blocksize. */
252 		break;
253 
254 	/* minimal support, just implement a fake peer address */
255 	case PRU_SOCKADDR:
256 		error = EINVAL;
257 		break;
258 	case PRU_PEERADDR:
259 		bcopy(&route_src, mtod(nam, caddr_t), route_src.sa_len);
260 		nam->m_len = route_src.sa_len;
261 		break;
262 
263 	case PRU_RCVD:
264 		/*
265 		 * If we are in a FLUSH state, check if the buffer is
266 		 * empty so that we can clear the flag.
267 		 */
268 		if (((rop->rop_flags & ROUTECB_FLAG_FLUSH) != 0) &&
269 		    ((sbspace(rop->rop_socket, &rop->rop_socket->so_rcv) ==
270 		    rop->rop_socket->so_rcv.sb_hiwat)))
271 			rop->rop_flags &= ~ROUTECB_FLAG_FLUSH;
272 		break;
273 
274 	case PRU_RCVOOB:
275 	case PRU_SENDOOB:
276 		error = EOPNOTSUPP;
277 		break;
278 	case PRU_SEND:
279 		if (nam) {
280 			error = EISCONN;
281 			break;
282 		}
283 		error = (*so->so_proto->pr_output)(m, so, NULL, NULL);
284 		m = NULL;
285 		break;
286 	default:
287 		panic("route_usrreq");
288 	}
289 
290  release:
291 	if (req != PRU_RCVD && req != PRU_RCVOOB && req != PRU_SENSE) {
292 		m_freem(control);
293 		m_freem(m);
294 	}
295 	return (error);
296 }
297 
298 int
299 route_attach(struct socket *so, int proto)
300 {
301 	struct rtpcb	*rop;
302 	int		 error;
303 
304 	error = soreserve(so, ROUTESNDQ, ROUTERCVQ);
305 	if (error)
306 		return (error);
307 	/*
308 	 * use the rawcb but allocate a rtpcb, this
309 	 * code does not care about the additional fields
310 	 * and works directly on the raw socket.
311 	 */
312 	rop = pool_get(&rtpcb_pool, PR_WAITOK|PR_ZERO);
313 	so->so_pcb = rop;
314 	/* Init the timeout structure */
315 	timeout_set_flags(&rop->rop_timeout, rtm_senddesync_timer, so,
316 	    TIMEOUT_PROC);
317 	refcnt_init(&rop->rop_refcnt);
318 
319 	rop->rop_socket = so;
320 	rop->rop_proto = proto;
321 
322 	rop->rop_rtableid = curproc->p_p->ps_rtableid;
323 
324 	soisconnected(so);
325 	so->so_options |= SO_USELOOPBACK;
326 
327 	rw_enter(&rtptable.rtp_lk, RW_WRITE);
328 	SRPL_INSERT_HEAD_LOCKED(&rtptable.rtp_rc, &rtptable.rtp_list, rop,
329 	    rop_list);
330 	rtptable.rtp_count++;
331 	rw_exit(&rtptable.rtp_lk);
332 
333 	return (0);
334 }
335 
336 int
337 route_detach(struct socket *so)
338 {
339 	struct rtpcb	*rop;
340 
341 	soassertlocked(so);
342 
343 	rop = sotortpcb(so);
344 	if (rop == NULL)
345 		return (EINVAL);
346 
347 	rw_enter(&rtptable.rtp_lk, RW_WRITE);
348 
349 	rtptable.rtp_count--;
350 	SRPL_REMOVE_LOCKED(&rtptable.rtp_rc, &rtptable.rtp_list, rop, rtpcb,
351 	    rop_list);
352 	rw_exit(&rtptable.rtp_lk);
353 
354 	sounlock(so, SL_LOCKED);
355 
356 	/* wait for all references to drop */
357 	refcnt_finalize(&rop->rop_refcnt, "rtsockrefs");
358 	timeout_del_barrier(&rop->rop_timeout);
359 
360 	solock(so);
361 
362 	so->so_pcb = NULL;
363 	KASSERT((so->so_state & SS_NOFDREF) == 0);
364 	pool_put(&rtpcb_pool, rop);
365 
366 	return (0);
367 }
368 
369 int
370 route_ctloutput(int op, struct socket *so, int level, int optname,
371     struct mbuf *m)
372 {
373 	struct rtpcb *rop = sotortpcb(so);
374 	int error = 0;
375 	unsigned int tid, prio;
376 
377 	if (level != AF_ROUTE)
378 		return (EINVAL);
379 
380 	switch (op) {
381 	case PRCO_SETOPT:
382 		switch (optname) {
383 		case ROUTE_MSGFILTER:
384 			if (m == NULL || m->m_len != sizeof(unsigned int))
385 				error = EINVAL;
386 			else
387 				rop->rop_msgfilter = *mtod(m, unsigned int *);
388 			break;
389 		case ROUTE_TABLEFILTER:
390 			if (m == NULL || m->m_len != sizeof(unsigned int)) {
391 				error = EINVAL;
392 				break;
393 			}
394 			tid = *mtod(m, unsigned int *);
395 			if (tid != RTABLE_ANY && !rtable_exists(tid))
396 				error = ENOENT;
397 			else
398 				rop->rop_rtableid = tid;
399 			break;
400 		case ROUTE_PRIOFILTER:
401 			if (m == NULL || m->m_len != sizeof(unsigned int)) {
402 				error = EINVAL;
403 				break;
404 			}
405 			prio = *mtod(m, unsigned int *);
406 			if (prio > RTP_MAX)
407 				error = EINVAL;
408 			else
409 				rop->rop_priority = prio;
410 			break;
411 		case ROUTE_FLAGFILTER:
412 			if (m == NULL || m->m_len != sizeof(unsigned int))
413 				error = EINVAL;
414 			else
415 				rop->rop_flagfilter = *mtod(m, unsigned int *);
416 			break;
417 		default:
418 			error = ENOPROTOOPT;
419 			break;
420 		}
421 		break;
422 	case PRCO_GETOPT:
423 		switch (optname) {
424 		case ROUTE_MSGFILTER:
425 			m->m_len = sizeof(unsigned int);
426 			*mtod(m, unsigned int *) = rop->rop_msgfilter;
427 			break;
428 		case ROUTE_TABLEFILTER:
429 			m->m_len = sizeof(unsigned int);
430 			*mtod(m, unsigned int *) = rop->rop_rtableid;
431 			break;
432 		case ROUTE_PRIOFILTER:
433 			m->m_len = sizeof(unsigned int);
434 			*mtod(m, unsigned int *) = rop->rop_priority;
435 			break;
436 		case ROUTE_FLAGFILTER:
437 			m->m_len = sizeof(unsigned int);
438 			*mtod(m, unsigned int *) = rop->rop_flagfilter;
439 			break;
440 		default:
441 			error = ENOPROTOOPT;
442 			break;
443 		}
444 	}
445 	return (error);
446 }
447 
448 void
449 rtm_senddesync_timer(void *xso)
450 {
451 	struct socket	*so = xso;
452 	int		 s;
453 
454 	s = solock(so);
455 	rtm_senddesync(so);
456 	sounlock(so, s);
457 }
458 
459 void
460 rtm_senddesync(struct socket *so)
461 {
462 	struct rtpcb	*rop = sotortpcb(so);
463 	struct mbuf	*desync_mbuf;
464 
465 	soassertlocked(so);
466 
467 	/* If we are in a DESYNC state, try to send a RTM_DESYNC packet */
468 	if ((rop->rop_flags & ROUTECB_FLAG_DESYNC) == 0)
469 		return;
470 
471 	/*
472 	 * If we fail to alloc memory or if sbappendaddr()
473 	 * fails, re-add timeout and try again.
474 	 */
475 	desync_mbuf = rtm_msg1(RTM_DESYNC, NULL);
476 	if (desync_mbuf != NULL) {
477 		if (sbappendaddr(so, &so->so_rcv, &route_src,
478 		    desync_mbuf, NULL) != 0) {
479 			rop->rop_flags &= ~ROUTECB_FLAG_DESYNC;
480 			sorwakeup(rop->rop_socket);
481 			return;
482 		}
483 		m_freem(desync_mbuf);
484 	}
485 	/* Re-add timeout to try sending msg again */
486 	timeout_add_msec(&rop->rop_timeout, ROUTE_DESYNC_RESEND_TIMEOUT);
487 }
488 
489 void
490 route_input(struct mbuf *m0, struct socket *so0, sa_family_t sa_family)
491 {
492 	struct socket *so;
493 	struct rtpcb *rop;
494 	struct rt_msghdr *rtm;
495 	struct mbuf *m = m0;
496 	struct srp_ref sr;
497 	int s;
498 
499 	/* ensure that we can access the rtm_type via mtod() */
500 	if (m->m_len < offsetof(struct rt_msghdr, rtm_type) + 1) {
501 		m_freem(m);
502 		return;
503 	}
504 
505 	SRPL_FOREACH(rop, &sr, &rtptable.rtp_list, rop_list) {
506 		/*
507 		 * If route socket is bound to an address family only send
508 		 * messages that match the address family. Address family
509 		 * agnostic messages are always sent.
510 		 */
511 		if (sa_family != AF_UNSPEC && rop->rop_proto != AF_UNSPEC &&
512 		    rop->rop_proto != sa_family)
513 			continue;
514 
515 
516 		so = rop->rop_socket;
517 		s = solock(so);
518 
519 		/*
520 		 * Check to see if we don't want our own messages and
521 		 * if we can receive anything.
522 		 */
523 		if ((so0 == so && !(so0->so_options & SO_USELOOPBACK)) ||
524 		    !(so->so_state & SS_ISCONNECTED) ||
525 		    (so->so_state & SS_CANTRCVMORE))
526 			goto next;
527 
528 		/* filter messages that the process does not want */
529 		rtm = mtod(m, struct rt_msghdr *);
530 		/* but RTM_DESYNC can't be filtered */
531 		if (rtm->rtm_type != RTM_DESYNC) {
532 			if (rop->rop_msgfilter != 0 &&
533 			    !(rop->rop_msgfilter & (1 << rtm->rtm_type)))
534 				goto next;
535 			if (ISSET(rop->rop_flagfilter, rtm->rtm_flags))
536 				goto next;
537 		}
538 		switch (rtm->rtm_type) {
539 		case RTM_IFANNOUNCE:
540 		case RTM_DESYNC:
541 			/* no tableid */
542 			break;
543 		case RTM_RESOLVE:
544 		case RTM_NEWADDR:
545 		case RTM_DELADDR:
546 		case RTM_IFINFO:
547 		case RTM_80211INFO:
548 		case RTM_BFD:
549 			/* check against rdomain id */
550 			if (rop->rop_rtableid != RTABLE_ANY &&
551 			    rtable_l2(rop->rop_rtableid) != rtm->rtm_tableid)
552 				goto next;
553 			break;
554 		default:
555 			if (rop->rop_priority != 0 &&
556 			    rop->rop_priority < rtm->rtm_priority)
557 				goto next;
558 			/* check against rtable id */
559 			if (rop->rop_rtableid != RTABLE_ANY &&
560 			    rop->rop_rtableid != rtm->rtm_tableid)
561 				goto next;
562 			break;
563 		}
564 
565 		/*
566 		 * Check to see if the flush flag is set. If so, don't queue
567 		 * any more messages until the flag is cleared.
568 		 */
569 		if ((rop->rop_flags & ROUTECB_FLAG_FLUSH) != 0)
570 			goto next;
571 
572 		rtm_sendup(so, m);
573 next:
574 		sounlock(so, s);
575 	}
576 	SRPL_LEAVE(&sr);
577 
578 	m_freem(m);
579 }
580 
581 int
582 rtm_sendup(struct socket *so, struct mbuf *m0)
583 {
584 	struct rtpcb *rop = sotortpcb(so);
585 	struct mbuf *m;
586 
587 	soassertlocked(so);
588 
589 	m = m_copym(m0, 0, M_COPYALL, M_NOWAIT);
590 	if (m == NULL)
591 		return (ENOMEM);
592 
593 	if (sbspace(so, &so->so_rcv) < (2 * MSIZE) ||
594 	    sbappendaddr(so, &so->so_rcv, &route_src, m, NULL) == 0) {
595 		/* Flag socket as desync'ed and flush required */
596 		rop->rop_flags |= ROUTECB_FLAG_DESYNC | ROUTECB_FLAG_FLUSH;
597 		rtm_senddesync(so);
598 		m_freem(m);
599 		return (ENOBUFS);
600 	}
601 
602 	sorwakeup(so);
603 	return (0);
604 }
605 
606 struct rt_msghdr *
607 rtm_report(struct rtentry *rt, u_char type, int seq, int tableid)
608 {
609 	struct rt_msghdr	*rtm;
610 	struct rt_addrinfo	 info;
611 	struct sockaddr_rtlabel	 sa_rl;
612 	struct sockaddr_in6	 sa_mask;
613 #ifdef BFD
614 	struct sockaddr_bfd	 sa_bfd;
615 #endif
616 	struct ifnet		*ifp = NULL;
617 	int			 len;
618 
619 	bzero(&info, sizeof(info));
620 	info.rti_info[RTAX_DST] = rt_key(rt);
621 	info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
622 	info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask);
623 	info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl);
624 #ifdef BFD
625 	if (rt->rt_flags & RTF_BFD)
626 		info.rti_info[RTAX_BFD] = bfd2sa(rt, &sa_bfd);
627 #endif
628 #ifdef MPLS
629 	if (rt->rt_flags & RTF_MPLS) {
630 		struct sockaddr_mpls	 sa_mpls;
631 
632 		bzero(&sa_mpls, sizeof(sa_mpls));
633 		sa_mpls.smpls_family = AF_MPLS;
634 		sa_mpls.smpls_len = sizeof(sa_mpls);
635 		sa_mpls.smpls_label = ((struct rt_mpls *)
636 		    rt->rt_llinfo)->mpls_label;
637 		info.rti_info[RTAX_SRC] = (struct sockaddr *)&sa_mpls;
638 		info.rti_mpls = ((struct rt_mpls *)
639 		    rt->rt_llinfo)->mpls_operation;
640 	}
641 #endif
642 	ifp = if_get(rt->rt_ifidx);
643 	if (ifp != NULL) {
644 		info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
645 		info.rti_info[RTAX_IFA] =
646 		    rtable_getsource(tableid, info.rti_info[RTAX_DST]->sa_family);
647 		if (info.rti_info[RTAX_IFA] == NULL)
648 			info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
649 		if (ifp->if_flags & IFF_POINTOPOINT)
650 			info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr;
651 	}
652 	if_put(ifp);
653 	/* RTAX_GENMASK, RTAX_AUTHOR, RTAX_SRCMASK ignored */
654 
655 	/* build new route message */
656 	len = rtm_msg2(type, RTM_VERSION, &info, NULL, NULL);
657 	rtm = malloc(len, M_RTABLE, M_WAITOK | M_ZERO);
658 
659 	rtm_msg2(type, RTM_VERSION, &info, (caddr_t)rtm, NULL);
660 	rtm->rtm_type = type;
661 	rtm->rtm_index = rt->rt_ifidx;
662 	rtm->rtm_tableid = tableid;
663 	rtm->rtm_priority = rt->rt_priority & RTP_MASK;
664 	rtm->rtm_flags = rt->rt_flags;
665 	rtm->rtm_pid = curproc->p_p->ps_pid;
666 	rtm->rtm_seq = seq;
667 	rtm_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx);
668 	rtm->rtm_addrs = info.rti_addrs;
669 #ifdef MPLS
670 	rtm->rtm_mpls = info.rti_mpls;
671 #endif
672 	return rtm;
673 }
674 
675 int
676 route_output(struct mbuf *m, struct socket *so, struct sockaddr *dstaddr,
677     struct mbuf *control)
678 {
679 	struct rt_msghdr	*rtm = NULL;
680 	struct rtentry		*rt = NULL;
681 	struct rt_addrinfo	 info;
682 	struct ifnet		*ifp;
683 	int			 len, seq, useloopback, error = 0;
684 	u_int			 tableid;
685 	u_int8_t		 prio;
686 	u_char			 vers, type;
687 
688 	if (m == NULL || ((m->m_len < sizeof(int32_t)) &&
689 	    (m = m_pullup(m, sizeof(int32_t))) == 0))
690 		return (ENOBUFS);
691 	if ((m->m_flags & M_PKTHDR) == 0)
692 		panic("route_output");
693 
694 	useloopback = so->so_options & SO_USELOOPBACK;
695 
696 	/*
697 	 * The socket can't be closed concurrently because the file
698 	 * descriptor reference is still held.
699 	 */
700 
701 	sounlock(so, SL_LOCKED);
702 
703 	len = m->m_pkthdr.len;
704 	if (len < offsetof(struct rt_msghdr, rtm_hdrlen) + 1 ||
705 	    len != mtod(m, struct rt_msghdr *)->rtm_msglen) {
706 		error = EINVAL;
707 		goto fail;
708 	}
709 	vers = mtod(m, struct rt_msghdr *)->rtm_version;
710 	switch (vers) {
711 	case RTM_VERSION:
712 		if (len < sizeof(struct rt_msghdr)) {
713 			error = EINVAL;
714 			goto fail;
715 		}
716 		if (len > RTM_MAXSIZE) {
717 			error = EMSGSIZE;
718 			goto fail;
719 		}
720 		rtm = malloc(len, M_RTABLE, M_WAITOK);
721 		m_copydata(m, 0, len, rtm);
722 		break;
723 	default:
724 		error = EPROTONOSUPPORT;
725 		goto fail;
726 	}
727 
728 	/* Verify that the caller is sending an appropriate message early */
729 	switch (rtm->rtm_type) {
730 	case RTM_ADD:
731 	case RTM_DELETE:
732 	case RTM_GET:
733 	case RTM_CHANGE:
734 	case RTM_PROPOSAL:
735 	case RTM_SOURCE:
736 		break;
737 	default:
738 		error = EOPNOTSUPP;
739 		goto fail;
740 	}
741 	/*
742 	 * Verify that the header length is valid.
743 	 * All messages from userland start with a struct rt_msghdr.
744 	 */
745 	if (rtm->rtm_hdrlen == 0)	/* old client */
746 		rtm->rtm_hdrlen = sizeof(struct rt_msghdr);
747 	if (rtm->rtm_hdrlen < sizeof(struct rt_msghdr) ||
748 	    len < rtm->rtm_hdrlen) {
749 		error = EINVAL;
750 		goto fail;
751 	}
752 
753 	rtm->rtm_pid = curproc->p_p->ps_pid;
754 
755 	/*
756 	 * Verify that the caller has the appropriate privilege; RTM_GET
757 	 * is the only operation the non-superuser is allowed.
758 	 */
759 	if (rtm->rtm_type != RTM_GET && suser(curproc) != 0) {
760 		error = EACCES;
761 		goto fail;
762 	}
763 	tableid = rtm->rtm_tableid;
764 	if (!rtable_exists(tableid)) {
765 		if (rtm->rtm_type == RTM_ADD) {
766 			if ((error = rtable_add(tableid)) != 0)
767 				goto fail;
768 		} else {
769 			error = EINVAL;
770 			goto fail;
771 		}
772 	}
773 
774 	/* Do not let userland play with kernel-only flags. */
775 	if ((rtm->rtm_flags & (RTF_LOCAL|RTF_BROADCAST)) != 0) {
776 		error = EINVAL;
777 		goto fail;
778 	}
779 
780 	/* make sure that kernel-only bits are not set */
781 	rtm->rtm_priority &= RTP_MASK;
782 	rtm->rtm_flags &= ~(RTF_DONE|RTF_CLONED|RTF_CACHED);
783 	rtm->rtm_fmask &= RTF_FMASK;
784 
785 	if (rtm->rtm_priority != 0) {
786 		if (rtm->rtm_priority > RTP_MAX ||
787 		    rtm->rtm_priority == RTP_LOCAL) {
788 			error = EINVAL;
789 			goto fail;
790 		}
791 		prio = rtm->rtm_priority;
792 	} else if (rtm->rtm_type != RTM_ADD)
793 		prio = RTP_ANY;
794 	else if (rtm->rtm_flags & RTF_STATIC)
795 		prio = 0;
796 	else
797 		prio = RTP_DEFAULT;
798 
799 	bzero(&info, sizeof(info));
800 	info.rti_addrs = rtm->rtm_addrs;
801 	if ((error = rtm_xaddrs(rtm->rtm_hdrlen + (caddr_t)rtm,
802 	    len + (caddr_t)rtm, &info)) != 0)
803 		goto fail;
804 
805 	info.rti_flags = rtm->rtm_flags;
806 
807 	if (rtm->rtm_type != RTM_SOURCE &&
808 	    rtm->rtm_type != RTM_PROPOSAL &&
809 	    (info.rti_info[RTAX_DST] == NULL ||
810 	    info.rti_info[RTAX_DST]->sa_family >= AF_MAX ||
811 	    (info.rti_info[RTAX_GATEWAY] != NULL &&
812 	    info.rti_info[RTAX_GATEWAY]->sa_family >= AF_MAX) ||
813 	    info.rti_info[RTAX_GENMASK] != NULL)) {
814 		error = EINVAL;
815 		goto fail;
816 	}
817 #ifdef MPLS
818 	info.rti_mpls = rtm->rtm_mpls;
819 #endif
820 
821 	if (info.rti_info[RTAX_GATEWAY] != NULL &&
822 	    info.rti_info[RTAX_GATEWAY]->sa_family == AF_LINK &&
823 	    (info.rti_flags & RTF_CLONING) == 0) {
824 		info.rti_flags |= RTF_LLINFO;
825 	}
826 
827 	/*
828 	 * Validate RTM_PROPOSAL and pass it along or error out.
829 	 */
830 	if (rtm->rtm_type == RTM_PROPOSAL) {
831 		if (rtm_validate_proposal(&info) == -1) {
832 			error = EINVAL;
833 			goto fail;
834 		}
835 		/*
836 		 * If this is a solicitation proposal forward request to
837 		 * all interfaces. Most handlers will ignore it but at least
838 		 * umb(4) will send a response to this event.
839 		 */
840 		if (rtm->rtm_priority == RTP_PROPOSAL_SOLICIT) {
841 			NET_LOCK();
842 			TAILQ_FOREACH(ifp, &ifnet, if_list) {
843 				ifp->if_rtrequest(ifp, RTM_PROPOSAL, NULL);
844 			}
845 			NET_UNLOCK();
846 		}
847 	} else if (rtm->rtm_type == RTM_SOURCE) {
848 		if (info.rti_info[RTAX_IFA] == NULL) {
849 			error = EINVAL;
850 			goto fail;
851 		}
852 		if ((error =
853 		    rt_setsource(tableid, info.rti_info[RTAX_IFA])) != 0)
854 			goto fail;
855 	} else {
856 		error = rtm_output(rtm, &rt, &info, prio, tableid);
857 		if (!error) {
858 			type = rtm->rtm_type;
859 			seq = rtm->rtm_seq;
860 			free(rtm, M_RTABLE, len);
861 			rtm = rtm_report(rt, type, seq, tableid);
862 			len = rtm->rtm_msglen;
863 		}
864 	}
865 
866 	rtfree(rt);
867 	if (error) {
868 		rtm->rtm_errno = error;
869 	} else {
870 		rtm->rtm_flags |= RTF_DONE;
871 	}
872 
873 	/*
874 	 * Check to see if we don't want our own messages.
875 	 */
876 	if (!useloopback) {
877 		if (rtptable.rtp_count == 0) {
878 			/* no other listener and no loopback of messages */
879 			goto fail;
880 		}
881 	}
882 	if (m_copyback(m, 0, len, rtm, M_NOWAIT)) {
883 		m_freem(m);
884 		m = NULL;
885 	} else if (m->m_pkthdr.len > len)
886 		m_adj(m, len - m->m_pkthdr.len);
887 	free(rtm, M_RTABLE, len);
888 	if (m)
889 		route_input(m, so, info.rti_info[RTAX_DST] ?
890 		    info.rti_info[RTAX_DST]->sa_family : AF_UNSPEC);
891 	solock(so);
892 
893 	return (error);
894 fail:
895 	free(rtm, M_RTABLE, len);
896 	m_freem(m);
897 	solock(so);
898 
899 	return (error);
900 }
901 
902 int
903 rtm_output(struct rt_msghdr *rtm, struct rtentry **prt,
904     struct rt_addrinfo *info, uint8_t prio, unsigned int tableid)
905 {
906 	struct rtentry		*rt = *prt;
907 	struct ifnet		*ifp = NULL;
908 	int			 plen, newgate = 0, error = 0;
909 
910 	switch (rtm->rtm_type) {
911 	case RTM_ADD:
912 		if (info->rti_info[RTAX_GATEWAY] == NULL) {
913 			error = EINVAL;
914 			break;
915 		}
916 
917 		rt = rtable_match(tableid, info->rti_info[RTAX_DST], NULL);
918 		if ((error = route_arp_conflict(rt, info))) {
919 			rtfree(rt);
920 			rt = NULL;
921 			break;
922 		}
923 
924 		/*
925 		 * We cannot go through a delete/create/insert cycle for
926 		 * cached route because this can lead to races in the
927 		 * receive path.  Instead we update the L2 cache.
928 		 */
929 		if ((rt != NULL) && ISSET(rt->rt_flags, RTF_CACHED))
930 			goto change;
931 
932 		rtfree(rt);
933 		rt = NULL;
934 
935 		NET_LOCK();
936 		if ((error = rtm_getifa(info, tableid)) != 0) {
937 			NET_UNLOCK();
938 			break;
939 		}
940 		error = rtrequest(RTM_ADD, info, prio, &rt, tableid);
941 		NET_UNLOCK();
942 		if (error == 0)
943 			rtm_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx,
944 			    &rt->rt_rmx);
945 		break;
946 	case RTM_DELETE:
947 		rt = rtable_lookup(tableid, info->rti_info[RTAX_DST],
948 		    info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY],
949 		    prio);
950 		if (rt == NULL) {
951 			error = ESRCH;
952 			break;
953 		}
954 
955 		/*
956 		 * If we got multipath routes, we require users to specify
957 		 * a matching gateway.
958 		 */
959 		if (ISSET(rt->rt_flags, RTF_MPATH) &&
960 		    info->rti_info[RTAX_GATEWAY] == NULL) {
961 			error = ESRCH;
962 			break;
963 		}
964 
965 		/* Detaching an interface requires the KERNEL_LOCK(). */
966 		ifp = if_get(rt->rt_ifidx);
967 		KASSERT(ifp != NULL);
968 
969 		/*
970 		 * Invalidate the cache of automagically created and
971 		 * referenced L2 entries to make sure that ``rt_gwroute''
972 		 * pointer stays valid for other CPUs.
973 		 */
974 		if ((ISSET(rt->rt_flags, RTF_CACHED))) {
975 			NET_LOCK();
976 			ifp->if_rtrequest(ifp, RTM_INVALIDATE, rt);
977 			/* Reset the MTU of the gateway route. */
978 			rtable_walk(tableid, rt_key(rt)->sa_family, NULL,
979 			    route_cleargateway, rt);
980 			NET_UNLOCK();
981 			if_put(ifp);
982 			break;
983 		}
984 
985 		/*
986 		 * Make sure that local routes are only modified by the
987 		 * kernel.
988 		 */
989 		if (ISSET(rt->rt_flags, RTF_LOCAL|RTF_BROADCAST)) {
990 			if_put(ifp);
991 			error = EINVAL;
992 			break;
993 		}
994 
995 		rtfree(rt);
996 		rt = NULL;
997 
998 		NET_LOCK();
999 		error = rtrequest_delete(info, prio, ifp, &rt, tableid);
1000 		NET_UNLOCK();
1001 		if_put(ifp);
1002 		break;
1003 	case RTM_CHANGE:
1004 		rt = rtable_lookup(tableid, info->rti_info[RTAX_DST],
1005 		    info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY],
1006 		    prio);
1007 		/*
1008 		 * If we got multipath routes, we require users to specify
1009 		 * a matching gateway.
1010 		 */
1011 		if ((rt != NULL) && ISSET(rt->rt_flags, RTF_MPATH) &&
1012 		    (info->rti_info[RTAX_GATEWAY] == NULL)) {
1013 			rtfree(rt);
1014 			rt = NULL;
1015 		}
1016 		/*
1017 		 * If RTAX_GATEWAY is the argument we're trying to
1018 		 * change, try to find a compatible route.
1019 		 */
1020 		if ((rt == NULL) && (info->rti_info[RTAX_GATEWAY] != NULL)) {
1021 			rt = rtable_lookup(tableid, info->rti_info[RTAX_DST],
1022 			    info->rti_info[RTAX_NETMASK], NULL, prio);
1023 			/* Ensure we don't pick a multipath one. */
1024 			if ((rt != NULL) && ISSET(rt->rt_flags, RTF_MPATH)) {
1025 				rtfree(rt);
1026 				rt = NULL;
1027 			}
1028 		}
1029 
1030 		if (rt == NULL) {
1031 			error = ESRCH;
1032 			break;
1033 		}
1034 
1035 		/*
1036 		 * Make sure that local routes are only modified by the
1037 		 * kernel.
1038 		 */
1039 		if (ISSET(rt->rt_flags, RTF_LOCAL|RTF_BROADCAST)) {
1040 			error = EINVAL;
1041 			break;
1042 		}
1043 
1044 		/*
1045 		 * RTM_CHANGE needs a perfect match.
1046 		 */
1047 		plen = rtable_satoplen(info->rti_info[RTAX_DST]->sa_family,
1048 		    info->rti_info[RTAX_NETMASK]);
1049 		if (rt_plen(rt) != plen) {
1050 			error = ESRCH;
1051 			break;
1052 		}
1053 
1054 		if (info->rti_info[RTAX_GATEWAY] != NULL)
1055 			if (rt->rt_gateway == NULL ||
1056 			    bcmp(rt->rt_gateway,
1057 			    info->rti_info[RTAX_GATEWAY],
1058 			    info->rti_info[RTAX_GATEWAY]->sa_len)) {
1059 				newgate = 1;
1060 			}
1061 		/*
1062 		 * Check reachable gateway before changing the route.
1063 		 * New gateway could require new ifaddr, ifp;
1064 		 * flags may also be different; ifp may be specified
1065 		 * by ll sockaddr when protocol address is ambiguous.
1066 		 */
1067 		if (newgate || info->rti_info[RTAX_IFP] != NULL ||
1068 		    info->rti_info[RTAX_IFA] != NULL) {
1069 			struct ifaddr	*ifa = NULL;
1070 
1071 			NET_LOCK();
1072 			if ((error = rtm_getifa(info, tableid)) != 0) {
1073 				NET_UNLOCK();
1074 				break;
1075 			}
1076 			ifa = info->rti_ifa;
1077 			if (rt->rt_ifa != ifa) {
1078 				ifp = if_get(rt->rt_ifidx);
1079 				KASSERT(ifp != NULL);
1080 				ifp->if_rtrequest(ifp, RTM_DELETE, rt);
1081 				ifafree(rt->rt_ifa);
1082 				if_put(ifp);
1083 
1084 				ifa->ifa_refcnt++;
1085 				rt->rt_ifa = ifa;
1086 				rt->rt_ifidx = ifa->ifa_ifp->if_index;
1087 				/* recheck link state after ifp change */
1088 				rt_if_linkstate_change(rt, ifa->ifa_ifp,
1089 				    tableid);
1090 			}
1091 			NET_UNLOCK();
1092 		}
1093 change:
1094 		if (info->rti_info[RTAX_GATEWAY] != NULL) {
1095 			/* When updating the gateway, make sure it is valid. */
1096 			if (!newgate && rt->rt_gateway->sa_family !=
1097 			    info->rti_info[RTAX_GATEWAY]->sa_family) {
1098 				error = EINVAL;
1099 				break;
1100 			}
1101 
1102 			NET_LOCK();
1103 			error = rt_setgate(rt,
1104 			    info->rti_info[RTAX_GATEWAY], tableid);
1105 			NET_UNLOCK();
1106 			if (error)
1107 				break;
1108 		}
1109 #ifdef MPLS
1110 		if (rtm->rtm_flags & RTF_MPLS) {
1111 			NET_LOCK();
1112 			error = rt_mpls_set(rt,
1113 			    info->rti_info[RTAX_SRC], info->rti_mpls);
1114 			NET_UNLOCK();
1115 			if (error)
1116 				break;
1117 		} else if (newgate || (rtm->rtm_fmask & RTF_MPLS)) {
1118 			NET_LOCK();
1119 			/* if gateway changed remove MPLS information */
1120 			rt_mpls_clear(rt);
1121 			NET_UNLOCK();
1122 		}
1123 #endif
1124 
1125 #ifdef BFD
1126 		if (ISSET(rtm->rtm_flags, RTF_BFD)) {
1127 			if ((error = bfdset(rt)))
1128 				break;
1129 		} else if (!ISSET(rtm->rtm_flags, RTF_BFD) &&
1130 		    ISSET(rtm->rtm_fmask, RTF_BFD)) {
1131 			bfdclear(rt);
1132 		}
1133 #endif
1134 
1135 		NET_LOCK();
1136 		/* Hack to allow some flags to be toggled */
1137 		if (rtm->rtm_fmask) {
1138 			/* MPLS flag it is set by rt_mpls_set() */
1139 			rtm->rtm_fmask &= ~RTF_MPLS;
1140 			rtm->rtm_flags &= ~RTF_MPLS;
1141 			rt->rt_flags =
1142 			    (rt->rt_flags & ~rtm->rtm_fmask) |
1143 			    (rtm->rtm_flags & rtm->rtm_fmask);
1144 		}
1145 		rtm_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx, &rt->rt_rmx);
1146 
1147 		ifp = if_get(rt->rt_ifidx);
1148 		KASSERT(ifp != NULL);
1149 		ifp->if_rtrequest(ifp, RTM_ADD, rt);
1150 		if_put(ifp);
1151 
1152 		if (info->rti_info[RTAX_LABEL] != NULL) {
1153 			char *rtlabel = ((struct sockaddr_rtlabel *)
1154 			    info->rti_info[RTAX_LABEL])->sr_label;
1155 			rtlabel_unref(rt->rt_labelid);
1156 			rt->rt_labelid = rtlabel_name2id(rtlabel);
1157 		}
1158 		if_group_routechange(info->rti_info[RTAX_DST],
1159 		    info->rti_info[RTAX_NETMASK]);
1160 		rt->rt_locks &= ~(rtm->rtm_inits);
1161 		rt->rt_locks |= (rtm->rtm_inits & rtm->rtm_rmx.rmx_locks);
1162 		NET_UNLOCK();
1163 		break;
1164 	case RTM_GET:
1165 		rt = rtable_lookup(tableid, info->rti_info[RTAX_DST],
1166 		    info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY],
1167 		    prio);
1168 		if (rt == NULL)
1169 			error = ESRCH;
1170 		break;
1171 	}
1172 
1173 	*prt = rt;
1174 	return (error);
1175 }
1176 
1177 struct ifaddr *
1178 ifa_ifwithroute(int flags, struct sockaddr *dst, struct sockaddr *gateway,
1179     unsigned int rtableid)
1180 {
1181 	struct ifaddr	*ifa;
1182 
1183 	if ((flags & RTF_GATEWAY) == 0) {
1184 		/*
1185 		 * If we are adding a route to an interface,
1186 		 * and the interface is a pt to pt link
1187 		 * we should search for the destination
1188 		 * as our clue to the interface.  Otherwise
1189 		 * we can use the local address.
1190 		 */
1191 		ifa = NULL;
1192 		if (flags & RTF_HOST)
1193 			ifa = ifa_ifwithdstaddr(dst, rtableid);
1194 		if (ifa == NULL)
1195 			ifa = ifa_ifwithaddr(gateway, rtableid);
1196 	} else {
1197 		/*
1198 		 * If we are adding a route to a remote net
1199 		 * or host, the gateway may still be on the
1200 		 * other end of a pt to pt link.
1201 		 */
1202 		ifa = ifa_ifwithdstaddr(gateway, rtableid);
1203 	}
1204 	if (ifa == NULL) {
1205 		if (gateway->sa_family == AF_LINK) {
1206 			struct sockaddr_dl *sdl = satosdl(gateway);
1207 			struct ifnet *ifp = if_get(sdl->sdl_index);
1208 
1209 			if (ifp != NULL)
1210 				ifa = ifaof_ifpforaddr(dst, ifp);
1211 			if_put(ifp);
1212 		} else {
1213 			struct rtentry *rt;
1214 
1215 			rt = rtalloc(gateway, RT_RESOLVE, rtable_l2(rtableid));
1216 			if (rt != NULL)
1217 				ifa = rt->rt_ifa;
1218 			rtfree(rt);
1219 		}
1220 	}
1221 	if (ifa == NULL)
1222 		return (NULL);
1223 	if (ifa->ifa_addr->sa_family != dst->sa_family) {
1224 		struct ifaddr	*oifa = ifa;
1225 		ifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp);
1226 		if (ifa == NULL)
1227 			ifa = oifa;
1228 	}
1229 	return (ifa);
1230 }
1231 
1232 int
1233 rtm_getifa(struct rt_addrinfo *info, unsigned int rtid)
1234 {
1235 	struct ifnet	*ifp = NULL;
1236 
1237 	/*
1238 	 * The "returned" `ifa' is guaranteed to be alive only if
1239 	 * the NET_LOCK() is held.
1240 	 */
1241 	NET_ASSERT_LOCKED();
1242 
1243 	/*
1244 	 * ifp may be specified by sockaddr_dl when protocol address
1245 	 * is ambiguous
1246 	 */
1247 	if (info->rti_info[RTAX_IFP] != NULL) {
1248 		struct sockaddr_dl *sdl;
1249 
1250 		sdl = satosdl(info->rti_info[RTAX_IFP]);
1251 		ifp = if_get(sdl->sdl_index);
1252 	}
1253 
1254 #ifdef IPSEC
1255 	/*
1256 	 * If the destination is a PF_KEY address, we'll look
1257 	 * for the existence of a encap interface number or address
1258 	 * in the options list of the gateway. By default, we'll return
1259 	 * enc0.
1260 	 */
1261 	if (info->rti_info[RTAX_DST] &&
1262 	    info->rti_info[RTAX_DST]->sa_family == PF_KEY)
1263 		info->rti_ifa = enc_getifa(rtid, 0);
1264 #endif
1265 
1266 	if (info->rti_ifa == NULL && info->rti_info[RTAX_IFA] != NULL)
1267 		info->rti_ifa = ifa_ifwithaddr(info->rti_info[RTAX_IFA], rtid);
1268 
1269 	if (info->rti_ifa == NULL) {
1270 		struct sockaddr	*sa;
1271 
1272 		if ((sa = info->rti_info[RTAX_IFA]) == NULL)
1273 			if ((sa = info->rti_info[RTAX_GATEWAY]) == NULL)
1274 				sa = info->rti_info[RTAX_DST];
1275 
1276 		if (sa != NULL && ifp != NULL)
1277 			info->rti_ifa = ifaof_ifpforaddr(sa, ifp);
1278 		else if (info->rti_info[RTAX_DST] != NULL &&
1279 		    info->rti_info[RTAX_GATEWAY] != NULL)
1280 			info->rti_ifa = ifa_ifwithroute(info->rti_flags,
1281 			    info->rti_info[RTAX_DST],
1282 			    info->rti_info[RTAX_GATEWAY],
1283 			    rtid);
1284 		else if (sa != NULL)
1285 			info->rti_ifa = ifa_ifwithroute(info->rti_flags,
1286 			    sa, sa, rtid);
1287 	}
1288 
1289 	if_put(ifp);
1290 
1291 	if (info->rti_ifa == NULL)
1292 		return (ENETUNREACH);
1293 
1294 	return (0);
1295 }
1296 
1297 int
1298 route_cleargateway(struct rtentry *rt, void *arg, unsigned int rtableid)
1299 {
1300 	struct rtentry *nhrt = arg;
1301 
1302 	if (ISSET(rt->rt_flags, RTF_GATEWAY) && rt->rt_gwroute == nhrt &&
1303 	    !ISSET(rt->rt_locks, RTV_MTU))
1304 		rt->rt_mtu = 0;
1305 
1306 	return (0);
1307 }
1308 
1309 /*
1310  * Check if the user request to insert an ARP entry does not conflict
1311  * with existing ones.
1312  *
1313  * Only two entries are allowed for a given IP address: a private one
1314  * (priv) and a public one (pub).
1315  */
1316 int
1317 route_arp_conflict(struct rtentry *rt, struct rt_addrinfo *info)
1318 {
1319 	int		 proxy = (info->rti_flags & RTF_ANNOUNCE);
1320 
1321 	if ((info->rti_flags & RTF_LLINFO) == 0 ||
1322 	    (info->rti_info[RTAX_DST]->sa_family != AF_INET))
1323 		return (0);
1324 
1325 	if (rt == NULL || !ISSET(rt->rt_flags, RTF_LLINFO))
1326 		return (0);
1327 
1328 	/* If the entry is cached, it can be updated. */
1329 	if (ISSET(rt->rt_flags, RTF_CACHED))
1330 		return (0);
1331 
1332 	/*
1333 	 * Same destination, not cached and both "priv" or "pub" conflict.
1334 	 * If a second entry exists, it always conflict.
1335 	 */
1336 	if ((ISSET(rt->rt_flags, RTF_ANNOUNCE) == proxy) ||
1337 	    ISSET(rt->rt_flags, RTF_MPATH))
1338 		return (EEXIST);
1339 
1340 	/* No conflict but an entry exist so we need to force mpath. */
1341 	info->rti_flags |= RTF_MPATH;
1342 	return (0);
1343 }
1344 
1345 void
1346 rtm_setmetrics(u_long which, const struct rt_metrics *in,
1347     struct rt_kmetrics *out)
1348 {
1349 	int64_t expire;
1350 
1351 	if (which & RTV_MTU)
1352 		out->rmx_mtu = in->rmx_mtu;
1353 	if (which & RTV_EXPIRE) {
1354 		expire = in->rmx_expire;
1355 		if (expire != 0) {
1356 			expire -= gettime();
1357 			expire += getuptime();
1358 		}
1359 
1360 		out->rmx_expire = expire;
1361 	}
1362 }
1363 
1364 void
1365 rtm_getmetrics(const struct rt_kmetrics *in, struct rt_metrics *out)
1366 {
1367 	int64_t expire;
1368 
1369 	expire = in->rmx_expire;
1370 	if (expire != 0) {
1371 		expire -= getuptime();
1372 		expire += gettime();
1373 	}
1374 
1375 	bzero(out, sizeof(*out));
1376 	out->rmx_locks = in->rmx_locks;
1377 	out->rmx_mtu = in->rmx_mtu;
1378 	out->rmx_expire = expire;
1379 	out->rmx_pksent = in->rmx_pksent;
1380 }
1381 
1382 #define ROUNDUP(a) \
1383 	((a) > 0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long))
1384 #define ADVANCE(x, n) (x += ROUNDUP((n)->sa_len))
1385 
1386 int
1387 rtm_xaddrs(caddr_t cp, caddr_t cplim, struct rt_addrinfo *rtinfo)
1388 {
1389 	struct sockaddr	*sa;
1390 	int		 i;
1391 
1392 	/*
1393 	 * Parse address bits, split address storage in chunks, and
1394 	 * set info pointers.  Use sa_len for traversing the memory
1395 	 * and check that we stay within in the limit.
1396 	 */
1397 	bzero(rtinfo->rti_info, sizeof(rtinfo->rti_info));
1398 	for (i = 0; i < sizeof(rtinfo->rti_addrs) * 8; i++) {
1399 		if ((rtinfo->rti_addrs & (1 << i)) == 0)
1400 			continue;
1401 		if (i >= RTAX_MAX || cp + sizeof(socklen_t) > cplim)
1402 			return (EINVAL);
1403 		sa = (struct sockaddr *)cp;
1404 		if (cp + sa->sa_len > cplim)
1405 			return (EINVAL);
1406 		rtinfo->rti_info[i] = sa;
1407 		ADVANCE(cp, sa);
1408 	}
1409 	/*
1410 	 * Check that the address family is suitable for the route address
1411 	 * type.  Check that each address has a size that fits its family
1412 	 * and its length is within the size.  Strings within addresses must
1413 	 * be NUL terminated.
1414 	 */
1415 	for (i = 0; i < RTAX_MAX; i++) {
1416 		size_t len, maxlen, size;
1417 
1418 		sa = rtinfo->rti_info[i];
1419 		if (sa == NULL)
1420 			continue;
1421 		maxlen = size = 0;
1422 		switch (i) {
1423 		case RTAX_DST:
1424 		case RTAX_GATEWAY:
1425 		case RTAX_SRC:
1426 			switch (sa->sa_family) {
1427 			case AF_INET:
1428 				size = sizeof(struct sockaddr_in);
1429 				break;
1430 			case AF_LINK:
1431 				size = sizeof(struct sockaddr_dl);
1432 				break;
1433 #ifdef INET6
1434 			case AF_INET6:
1435 				size = sizeof(struct sockaddr_in6);
1436 				break;
1437 #endif
1438 #ifdef MPLS
1439 			case AF_MPLS:
1440 				size = sizeof(struct sockaddr_mpls);
1441 				break;
1442 #endif
1443 			}
1444 			break;
1445 		case RTAX_IFP:
1446 			if (sa->sa_family != AF_LINK)
1447 				return (EAFNOSUPPORT);
1448 			/*
1449 			 * XXX Should be sizeof(struct sockaddr_dl), but
1450 			 * route(8) has a bug and provides less memory.
1451 			 * arp(8) has another bug and uses sizeof pointer.
1452 			 */
1453 			size = 4;
1454 			break;
1455 		case RTAX_IFA:
1456 			switch (sa->sa_family) {
1457 			case AF_INET:
1458 				size = sizeof(struct sockaddr_in);
1459 				break;
1460 #ifdef INET6
1461 			case AF_INET6:
1462 				size = sizeof(struct sockaddr_in6);
1463 				break;
1464 #endif
1465 			default:
1466 				return (EAFNOSUPPORT);
1467 			}
1468 			break;
1469 		case RTAX_LABEL:
1470 			sa->sa_family = AF_UNSPEC;
1471 			maxlen = RTLABEL_LEN;
1472 			size = sizeof(struct sockaddr_rtlabel);
1473 			break;
1474 #ifdef BFD
1475 		case RTAX_BFD:
1476 			sa->sa_family = AF_UNSPEC;
1477 			size = sizeof(struct sockaddr_bfd);
1478 			break;
1479 #endif
1480 		case RTAX_DNS:
1481 			/* more validation in rtm_validate_proposal */
1482 			if (sa->sa_len > sizeof(struct sockaddr_rtdns))
1483 				return (EINVAL);
1484 			if (sa->sa_len < offsetof(struct sockaddr_rtdns,
1485 			    sr_dns))
1486 				return (EINVAL);
1487 			switch (sa->sa_family) {
1488 			case AF_INET:
1489 #ifdef INET6
1490 			case AF_INET6:
1491 #endif
1492 				break;
1493 			default:
1494 				return (EAFNOSUPPORT);
1495 			}
1496 			break;
1497 		case RTAX_STATIC:
1498 			sa->sa_family = AF_UNSPEC;
1499 			maxlen = RTSTATIC_LEN;
1500 			size = sizeof(struct sockaddr_rtstatic);
1501 			break;
1502 		case RTAX_SEARCH:
1503 			sa->sa_family = AF_UNSPEC;
1504 			maxlen = RTSEARCH_LEN;
1505 			size = sizeof(struct sockaddr_rtsearch);
1506 			break;
1507 		}
1508 		if (size) {
1509 			/* memory for the full struct must be provided */
1510 			if (sa->sa_len < size)
1511 				return (EINVAL);
1512 		}
1513 		if (maxlen) {
1514 			/* this should not happen */
1515 			if (2 + maxlen > size)
1516 				return (EINVAL);
1517 			/* strings must be NUL terminated within the struct */
1518 			len = strnlen(sa->sa_data, maxlen);
1519 			if (len >= maxlen || 2 + len >= sa->sa_len)
1520 				return (EINVAL);
1521 			break;
1522 		}
1523 	}
1524 	return (0);
1525 }
1526 
1527 struct mbuf *
1528 rtm_msg1(int type, struct rt_addrinfo *rtinfo)
1529 {
1530 	struct rt_msghdr	*rtm;
1531 	struct mbuf		*m;
1532 	int			 i;
1533 	struct sockaddr		*sa;
1534 	int			 len, dlen, hlen;
1535 
1536 	switch (type) {
1537 	case RTM_DELADDR:
1538 	case RTM_NEWADDR:
1539 		len = sizeof(struct ifa_msghdr);
1540 		break;
1541 	case RTM_IFINFO:
1542 		len = sizeof(struct if_msghdr);
1543 		break;
1544 	case RTM_IFANNOUNCE:
1545 		len = sizeof(struct if_announcemsghdr);
1546 		break;
1547 #ifdef BFD
1548 	case RTM_BFD:
1549 		len = sizeof(struct bfd_msghdr);
1550 		break;
1551 #endif
1552 	case RTM_80211INFO:
1553 		len = sizeof(struct if_ieee80211_msghdr);
1554 		break;
1555 	default:
1556 		len = sizeof(struct rt_msghdr);
1557 		break;
1558 	}
1559 	if (len > MCLBYTES)
1560 		panic("rtm_msg1");
1561 	m = m_gethdr(M_DONTWAIT, MT_DATA);
1562 	if (m && len > MHLEN) {
1563 		MCLGET(m, M_DONTWAIT);
1564 		if ((m->m_flags & M_EXT) == 0) {
1565 			m_free(m);
1566 			m = NULL;
1567 		}
1568 	}
1569 	if (m == NULL)
1570 		return (m);
1571 	m->m_pkthdr.len = m->m_len = hlen = len;
1572 	m->m_pkthdr.ph_ifidx = 0;
1573 	rtm = mtod(m, struct rt_msghdr *);
1574 	bzero(rtm, len);
1575 	for (i = 0; i < RTAX_MAX; i++) {
1576 		if (rtinfo == NULL || (sa = rtinfo->rti_info[i]) == NULL)
1577 			continue;
1578 		rtinfo->rti_addrs |= (1 << i);
1579 		dlen = ROUNDUP(sa->sa_len);
1580 		if (m_copyback(m, len, dlen, sa, M_NOWAIT)) {
1581 			m_freem(m);
1582 			return (NULL);
1583 		}
1584 		len += dlen;
1585 	}
1586 	rtm->rtm_msglen = len;
1587 	rtm->rtm_hdrlen = hlen;
1588 	rtm->rtm_version = RTM_VERSION;
1589 	rtm->rtm_type = type;
1590 	return (m);
1591 }
1592 
1593 int
1594 rtm_msg2(int type, int vers, struct rt_addrinfo *rtinfo, caddr_t cp,
1595     struct walkarg *w)
1596 {
1597 	int		i;
1598 	int		len, dlen, hlen, second_time = 0;
1599 	caddr_t		cp0;
1600 
1601 	rtinfo->rti_addrs = 0;
1602 again:
1603 	switch (type) {
1604 	case RTM_DELADDR:
1605 	case RTM_NEWADDR:
1606 		len = sizeof(struct ifa_msghdr);
1607 		break;
1608 	case RTM_IFINFO:
1609 		len = sizeof(struct if_msghdr);
1610 		break;
1611 	default:
1612 		len = sizeof(struct rt_msghdr);
1613 		break;
1614 	}
1615 	hlen = len;
1616 	if ((cp0 = cp) != NULL)
1617 		cp += len;
1618 	for (i = 0; i < RTAX_MAX; i++) {
1619 		struct sockaddr *sa;
1620 
1621 		if ((sa = rtinfo->rti_info[i]) == NULL)
1622 			continue;
1623 		rtinfo->rti_addrs |= (1 << i);
1624 		dlen = ROUNDUP(sa->sa_len);
1625 		if (cp) {
1626 			bcopy(sa, cp, (size_t)dlen);
1627 			cp += dlen;
1628 		}
1629 		len += dlen;
1630 	}
1631 	/* align message length to the next natural boundary */
1632 	len = ALIGN(len);
1633 	if (cp == 0 && w != NULL && !second_time) {
1634 		w->w_needed += len;
1635 		if (w->w_needed <= 0 && w->w_where) {
1636 			if (w->w_tmemsize < len) {
1637 				free(w->w_tmem, M_RTABLE, w->w_tmemsize);
1638 				w->w_tmem = malloc(len, M_RTABLE,
1639 				    M_NOWAIT | M_ZERO);
1640 				if (w->w_tmem)
1641 					w->w_tmemsize = len;
1642 			}
1643 			if (w->w_tmem) {
1644 				cp = w->w_tmem;
1645 				second_time = 1;
1646 				goto again;
1647 			} else
1648 				w->w_where = 0;
1649 		}
1650 	}
1651 	if (cp && w)		/* clear the message header */
1652 		bzero(cp0, hlen);
1653 
1654 	if (cp) {
1655 		struct rt_msghdr *rtm = (struct rt_msghdr *)cp0;
1656 
1657 		rtm->rtm_version = RTM_VERSION;
1658 		rtm->rtm_type = type;
1659 		rtm->rtm_msglen = len;
1660 		rtm->rtm_hdrlen = hlen;
1661 	}
1662 	return (len);
1663 }
1664 
1665 void
1666 rtm_send(struct rtentry *rt, int cmd, int error, unsigned int rtableid)
1667 {
1668 	struct rt_addrinfo	 info;
1669 	struct ifnet		*ifp;
1670 	struct sockaddr_rtlabel	 sa_rl;
1671 	struct sockaddr_in6	 sa_mask;
1672 
1673 	memset(&info, 0, sizeof(info));
1674 	info.rti_info[RTAX_DST] = rt_key(rt);
1675 	info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1676 	if (!ISSET(rt->rt_flags, RTF_HOST))
1677 		info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask);
1678 	info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl);
1679 	ifp = if_get(rt->rt_ifidx);
1680 	if (ifp != NULL) {
1681 		info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
1682 		info.rti_info[RTAX_IFA] =
1683 		    rtable_getsource(rtableid, info.rti_info[RTAX_DST]->sa_family);
1684 		if (info.rti_info[RTAX_IFA] == NULL)
1685 			info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
1686 	}
1687 
1688 	rtm_miss(cmd, &info, rt->rt_flags, rt->rt_priority, rt->rt_ifidx, error,
1689 	    rtableid);
1690 	if_put(ifp);
1691 }
1692 
1693 /*
1694  * This routine is called to generate a message from the routing
1695  * socket indicating that a redirect has occurred, a routing lookup
1696  * has failed, or that a protocol has detected timeouts to a particular
1697  * destination.
1698  */
1699 void
1700 rtm_miss(int type, struct rt_addrinfo *rtinfo, int flags, uint8_t prio,
1701     u_int ifidx, int error, u_int tableid)
1702 {
1703 	struct rt_msghdr	*rtm;
1704 	struct mbuf		*m;
1705 	struct sockaddr		*sa = rtinfo->rti_info[RTAX_DST];
1706 
1707 	if (rtptable.rtp_count == 0)
1708 		return;
1709 	m = rtm_msg1(type, rtinfo);
1710 	if (m == NULL)
1711 		return;
1712 	rtm = mtod(m, struct rt_msghdr *);
1713 	rtm->rtm_flags = RTF_DONE | flags;
1714 	rtm->rtm_priority = prio;
1715 	rtm->rtm_errno = error;
1716 	rtm->rtm_tableid = tableid;
1717 	rtm->rtm_addrs = rtinfo->rti_addrs;
1718 	rtm->rtm_index = ifidx;
1719 	route_input(m, NULL, sa ? sa->sa_family : AF_UNSPEC);
1720 }
1721 
1722 /*
1723  * This routine is called to generate a message from the routing
1724  * socket indicating that the status of a network interface has changed.
1725  */
1726 void
1727 rtm_ifchg(struct ifnet *ifp)
1728 {
1729 	struct rt_addrinfo	 info;
1730 	struct if_msghdr	*ifm;
1731 	struct mbuf		*m;
1732 
1733 	if (rtptable.rtp_count == 0)
1734 		return;
1735 	memset(&info, 0, sizeof(info));
1736 	info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
1737 	m = rtm_msg1(RTM_IFINFO, &info);
1738 	if (m == NULL)
1739 		return;
1740 	ifm = mtod(m, struct if_msghdr *);
1741 	ifm->ifm_index = ifp->if_index;
1742 	ifm->ifm_tableid = ifp->if_rdomain;
1743 	ifm->ifm_flags = ifp->if_flags;
1744 	ifm->ifm_xflags = ifp->if_xflags;
1745 	if_getdata(ifp, &ifm->ifm_data);
1746 	ifm->ifm_addrs = info.rti_addrs;
1747 	route_input(m, NULL, AF_UNSPEC);
1748 }
1749 
1750 /*
1751  * This is called to generate messages from the routing socket
1752  * indicating a network interface has had addresses associated with it.
1753  * if we ever reverse the logic and replace messages TO the routing
1754  * socket indicate a request to configure interfaces, then it will
1755  * be unnecessary as the routing socket will automatically generate
1756  * copies of it.
1757  */
1758 void
1759 rtm_addr(int cmd, struct ifaddr *ifa)
1760 {
1761 	struct ifnet		*ifp = ifa->ifa_ifp;
1762 	struct mbuf		*m;
1763 	struct rt_addrinfo	 info;
1764 	struct ifa_msghdr	*ifam;
1765 
1766 	if (rtptable.rtp_count == 0)
1767 		return;
1768 
1769 	memset(&info, 0, sizeof(info));
1770 	info.rti_info[RTAX_IFA] = ifa->ifa_addr;
1771 	info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
1772 	info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
1773 	info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
1774 	if ((m = rtm_msg1(cmd, &info)) == NULL)
1775 		return;
1776 	ifam = mtod(m, struct ifa_msghdr *);
1777 	ifam->ifam_index = ifp->if_index;
1778 	ifam->ifam_metric = ifa->ifa_metric;
1779 	ifam->ifam_flags = ifa->ifa_flags;
1780 	ifam->ifam_addrs = info.rti_addrs;
1781 	ifam->ifam_tableid = ifp->if_rdomain;
1782 
1783 	route_input(m, NULL,
1784 	    ifa->ifa_addr ? ifa->ifa_addr->sa_family : AF_UNSPEC);
1785 }
1786 
1787 /*
1788  * This is called to generate routing socket messages indicating
1789  * network interface arrival and departure.
1790  */
1791 void
1792 rtm_ifannounce(struct ifnet *ifp, int what)
1793 {
1794 	struct if_announcemsghdr	*ifan;
1795 	struct mbuf			*m;
1796 
1797 	if (rtptable.rtp_count == 0)
1798 		return;
1799 	m = rtm_msg1(RTM_IFANNOUNCE, NULL);
1800 	if (m == NULL)
1801 		return;
1802 	ifan = mtod(m, struct if_announcemsghdr *);
1803 	ifan->ifan_index = ifp->if_index;
1804 	strlcpy(ifan->ifan_name, ifp->if_xname, sizeof(ifan->ifan_name));
1805 	ifan->ifan_what = what;
1806 	route_input(m, NULL, AF_UNSPEC);
1807 }
1808 
1809 #ifdef BFD
1810 /*
1811  * This is used to generate routing socket messages indicating
1812  * the state of a BFD session.
1813  */
1814 void
1815 rtm_bfd(struct bfd_config *bfd)
1816 {
1817 	struct bfd_msghdr	*bfdm;
1818 	struct sockaddr_bfd	 sa_bfd;
1819 	struct mbuf		*m;
1820 	struct rt_addrinfo	 info;
1821 
1822 	if (rtptable.rtp_count == 0)
1823 		return;
1824 	memset(&info, 0, sizeof(info));
1825 	info.rti_info[RTAX_DST] = rt_key(bfd->bc_rt);
1826 	info.rti_info[RTAX_IFA] = bfd->bc_rt->rt_ifa->ifa_addr;
1827 
1828 	m = rtm_msg1(RTM_BFD, &info);
1829 	if (m == NULL)
1830 		return;
1831 	bfdm = mtod(m, struct bfd_msghdr *);
1832 	bfdm->bm_addrs = info.rti_addrs;
1833 
1834 	bfd2sa(bfd->bc_rt, &sa_bfd);
1835 	memcpy(&bfdm->bm_sa, &sa_bfd, sizeof(sa_bfd));
1836 
1837 	route_input(m, NULL, info.rti_info[RTAX_DST]->sa_family);
1838 }
1839 #endif /* BFD */
1840 
1841 /*
1842  * This is used to generate routing socket messages indicating
1843  * the state of an ieee80211 interface.
1844  */
1845 void
1846 rtm_80211info(struct ifnet *ifp, struct if_ieee80211_data *ifie)
1847 {
1848 	struct if_ieee80211_msghdr	*ifim;
1849 	struct mbuf			*m;
1850 
1851 	if (rtptable.rtp_count == 0)
1852 		return;
1853 	m = rtm_msg1(RTM_80211INFO, NULL);
1854 	if (m == NULL)
1855 		return;
1856 	ifim = mtod(m, struct if_ieee80211_msghdr *);
1857 	ifim->ifim_index = ifp->if_index;
1858 	ifim->ifim_tableid = ifp->if_rdomain;
1859 
1860 	memcpy(&ifim->ifim_ifie, ifie, sizeof(ifim->ifim_ifie));
1861 	route_input(m, NULL, AF_UNSPEC);
1862 }
1863 
1864 /*
1865  * This is used to generate routing socket messages indicating
1866  * the address selection proposal from an interface.
1867  */
1868 void
1869 rtm_proposal(struct ifnet *ifp, struct rt_addrinfo *rtinfo, int flags,
1870     uint8_t prio)
1871 {
1872 	struct rt_msghdr	*rtm;
1873 	struct mbuf		*m;
1874 
1875 	m = rtm_msg1(RTM_PROPOSAL, rtinfo);
1876 	if (m == NULL)
1877 		return;
1878 	rtm = mtod(m, struct rt_msghdr *);
1879 	rtm->rtm_flags = RTF_DONE | flags;
1880 	rtm->rtm_priority = prio;
1881 	rtm->rtm_tableid = ifp->if_rdomain;
1882 	rtm->rtm_index = ifp->if_index;
1883 	rtm->rtm_addrs = rtinfo->rti_addrs;
1884 
1885 	route_input(m, NULL, rtinfo->rti_info[RTAX_DNS]->sa_family);
1886 }
1887 
1888 /*
1889  * This is used in dumping the kernel table via sysctl().
1890  */
1891 int
1892 sysctl_dumpentry(struct rtentry *rt, void *v, unsigned int id)
1893 {
1894 	struct walkarg		*w = v;
1895 	int			 error = 0, size;
1896 	struct rt_addrinfo	 info;
1897 	struct ifnet		*ifp;
1898 #ifdef BFD
1899 	struct sockaddr_bfd	 sa_bfd;
1900 #endif
1901 	struct sockaddr_rtlabel	 sa_rl;
1902 	struct sockaddr_in6	 sa_mask;
1903 
1904 	if (w->w_op == NET_RT_FLAGS && !(rt->rt_flags & w->w_arg))
1905 		return 0;
1906 	if (w->w_op == NET_RT_DUMP && w->w_arg) {
1907 		u_int8_t prio = w->w_arg & RTP_MASK;
1908 		if (w->w_arg < 0) {
1909 			prio = (-w->w_arg) & RTP_MASK;
1910 			/* Show all routes that are not this priority */
1911 			if (prio == (rt->rt_priority & RTP_MASK))
1912 				return 0;
1913 		} else {
1914 			if (prio != (rt->rt_priority & RTP_MASK) &&
1915 			    prio != RTP_ANY)
1916 				return 0;
1917 		}
1918 	}
1919 	bzero(&info, sizeof(info));
1920 	info.rti_info[RTAX_DST] = rt_key(rt);
1921 	info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1922 	info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask);
1923 	ifp = if_get(rt->rt_ifidx);
1924 	if (ifp != NULL) {
1925 		info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
1926 		info.rti_info[RTAX_IFA] =
1927 		    rtable_getsource(id, info.rti_info[RTAX_DST]->sa_family);
1928 		if (info.rti_info[RTAX_IFA] == NULL)
1929 			info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
1930 		if (ifp->if_flags & IFF_POINTOPOINT)
1931 			info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr;
1932 	}
1933 	if_put(ifp);
1934 	info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl);
1935 #ifdef BFD
1936 	if (rt->rt_flags & RTF_BFD)
1937 		info.rti_info[RTAX_BFD] = bfd2sa(rt, &sa_bfd);
1938 #endif
1939 #ifdef MPLS
1940 	if (rt->rt_flags & RTF_MPLS) {
1941 		struct sockaddr_mpls	 sa_mpls;
1942 
1943 		bzero(&sa_mpls, sizeof(sa_mpls));
1944 		sa_mpls.smpls_family = AF_MPLS;
1945 		sa_mpls.smpls_len = sizeof(sa_mpls);
1946 		sa_mpls.smpls_label = ((struct rt_mpls *)
1947 		    rt->rt_llinfo)->mpls_label;
1948 		info.rti_info[RTAX_SRC] = (struct sockaddr *)&sa_mpls;
1949 		info.rti_mpls = ((struct rt_mpls *)
1950 		    rt->rt_llinfo)->mpls_operation;
1951 	}
1952 #endif
1953 
1954 	size = rtm_msg2(RTM_GET, RTM_VERSION, &info, NULL, w);
1955 	if (w->w_where && w->w_tmem && w->w_needed <= 0) {
1956 		struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem;
1957 
1958 		rtm->rtm_pid = curproc->p_p->ps_pid;
1959 		rtm->rtm_flags = rt->rt_flags;
1960 		rtm->rtm_priority = rt->rt_priority & RTP_MASK;
1961 		rtm_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx);
1962 		/* Do not account the routing table's reference. */
1963 		rtm->rtm_rmx.rmx_refcnt = rt->rt_refcnt - 1;
1964 		rtm->rtm_index = rt->rt_ifidx;
1965 		rtm->rtm_addrs = info.rti_addrs;
1966 		rtm->rtm_tableid = id;
1967 #ifdef MPLS
1968 		rtm->rtm_mpls = info.rti_mpls;
1969 #endif
1970 		if ((error = copyout(rtm, w->w_where, size)) != 0)
1971 			w->w_where = NULL;
1972 		else
1973 			w->w_where += size;
1974 	}
1975 	return (error);
1976 }
1977 
1978 int
1979 sysctl_iflist(int af, struct walkarg *w)
1980 {
1981 	struct ifnet		*ifp;
1982 	struct ifaddr		*ifa;
1983 	struct rt_addrinfo	 info;
1984 	int			 len, error = 0;
1985 
1986 	bzero(&info, sizeof(info));
1987 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1988 		if (w->w_arg && w->w_arg != ifp->if_index)
1989 			continue;
1990 		/* Copy the link-layer address first */
1991 		info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
1992 		len = rtm_msg2(RTM_IFINFO, RTM_VERSION, &info, 0, w);
1993 		if (w->w_where && w->w_tmem && w->w_needed <= 0) {
1994 			struct if_msghdr *ifm;
1995 
1996 			ifm = (struct if_msghdr *)w->w_tmem;
1997 			ifm->ifm_index = ifp->if_index;
1998 			ifm->ifm_tableid = ifp->if_rdomain;
1999 			ifm->ifm_flags = ifp->if_flags;
2000 			if_getdata(ifp, &ifm->ifm_data);
2001 			ifm->ifm_addrs = info.rti_addrs;
2002 			error = copyout(ifm, w->w_where, len);
2003 			if (error)
2004 				return (error);
2005 			w->w_where += len;
2006 		}
2007 		info.rti_info[RTAX_IFP] = NULL;
2008 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
2009 			KASSERT(ifa->ifa_addr->sa_family != AF_LINK);
2010 			if (af && af != ifa->ifa_addr->sa_family)
2011 				continue;
2012 			info.rti_info[RTAX_IFA] = ifa->ifa_addr;
2013 			info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
2014 			info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
2015 			len = rtm_msg2(RTM_NEWADDR, RTM_VERSION, &info, 0, w);
2016 			if (w->w_where && w->w_tmem && w->w_needed <= 0) {
2017 				struct ifa_msghdr *ifam;
2018 
2019 				ifam = (struct ifa_msghdr *)w->w_tmem;
2020 				ifam->ifam_index = ifa->ifa_ifp->if_index;
2021 				ifam->ifam_flags = ifa->ifa_flags;
2022 				ifam->ifam_metric = ifa->ifa_metric;
2023 				ifam->ifam_addrs = info.rti_addrs;
2024 				error = copyout(w->w_tmem, w->w_where, len);
2025 				if (error)
2026 					return (error);
2027 				w->w_where += len;
2028 			}
2029 		}
2030 		info.rti_info[RTAX_IFA] = info.rti_info[RTAX_NETMASK] =
2031 		    info.rti_info[RTAX_BRD] = NULL;
2032 	}
2033 	return (0);
2034 }
2035 
2036 int
2037 sysctl_ifnames(struct walkarg *w)
2038 {
2039 	struct if_nameindex_msg ifn;
2040 	struct ifnet *ifp;
2041 	int error = 0;
2042 
2043 	/* XXX ignore tableid for now */
2044 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
2045 		if (w->w_arg && w->w_arg != ifp->if_index)
2046 			continue;
2047 		w->w_needed += sizeof(ifn);
2048 		if (w->w_where && w->w_needed <= 0) {
2049 
2050 			memset(&ifn, 0, sizeof(ifn));
2051 			ifn.if_index = ifp->if_index;
2052 			strlcpy(ifn.if_name, ifp->if_xname,
2053 			    sizeof(ifn.if_name));
2054 			error = copyout(&ifn, w->w_where, sizeof(ifn));
2055 			if (error)
2056 				return (error);
2057 			w->w_where += sizeof(ifn);
2058 		}
2059 	}
2060 
2061 	return (0);
2062 }
2063 
2064 int
2065 sysctl_source(int af, u_int tableid, struct walkarg *w)
2066 {
2067 	struct sockaddr	*sa;
2068 	int		 size, error = 0;
2069 
2070 	sa = rtable_getsource(tableid, af);
2071 	if (sa) {
2072 		switch (sa->sa_family) {
2073 		case AF_INET:
2074 			size = sizeof(struct sockaddr_in);
2075 			break;
2076 #ifdef INET6
2077 		case AF_INET6:
2078 			size = sizeof(struct sockaddr_in6);
2079 			break;
2080 #endif
2081 		default:
2082 			return (0);
2083 		}
2084 		w->w_needed += size;
2085 		if (w->w_where && w->w_needed <= 0) {
2086 			if ((error = copyout(sa, w->w_where, size)))
2087 				return (error);
2088 			w->w_where += size;
2089 		}
2090 	}
2091 	return (0);
2092 }
2093 
2094 int
2095 sysctl_rtable(int *name, u_int namelen, void *where, size_t *given, void *new,
2096     size_t newlen)
2097 {
2098 	int			 i, error = EINVAL;
2099 	u_char			 af;
2100 	struct walkarg		 w;
2101 	struct rt_tableinfo	 tableinfo;
2102 	u_int			 tableid = 0;
2103 
2104 	if (new)
2105 		return (EPERM);
2106 	if (namelen < 3 || namelen > 4)
2107 		return (EINVAL);
2108 	af = name[0];
2109 	bzero(&w, sizeof(w));
2110 	w.w_where = where;
2111 	w.w_given = *given;
2112 	w.w_needed = 0 - w.w_given;
2113 	w.w_op = name[1];
2114 	w.w_arg = name[2];
2115 
2116 	if (namelen == 4) {
2117 		tableid = name[3];
2118 		if (!rtable_exists(tableid))
2119 			return (ENOENT);
2120 	} else
2121 		tableid = curproc->p_p->ps_rtableid;
2122 
2123 	switch (w.w_op) {
2124 	case NET_RT_DUMP:
2125 	case NET_RT_FLAGS:
2126 		NET_LOCK();
2127 		for (i = 1; i <= AF_MAX; i++) {
2128 			if (af != 0 && af != i)
2129 				continue;
2130 
2131 			error = rtable_walk(tableid, i, NULL, sysctl_dumpentry,
2132 			    &w);
2133 			if (error == EAFNOSUPPORT)
2134 				error = 0;
2135 			if (error)
2136 				break;
2137 		}
2138 		NET_UNLOCK();
2139 		break;
2140 
2141 	case NET_RT_IFLIST:
2142 		NET_LOCK();
2143 		error = sysctl_iflist(af, &w);
2144 		NET_UNLOCK();
2145 		break;
2146 
2147 	case NET_RT_STATS:
2148 		return (sysctl_rtable_rtstat(where, given, new));
2149 	case NET_RT_TABLE:
2150 		tableid = w.w_arg;
2151 		if (!rtable_exists(tableid))
2152 			return (ENOENT);
2153 		memset(&tableinfo, 0, sizeof tableinfo);
2154 		tableinfo.rti_tableid = tableid;
2155 		tableinfo.rti_domainid = rtable_l2(tableid);
2156 		error = sysctl_rdstruct(where, given, new,
2157 		    &tableinfo, sizeof(tableinfo));
2158 		return (error);
2159 	case NET_RT_IFNAMES:
2160 		NET_LOCK();
2161 		error = sysctl_ifnames(&w);
2162 		NET_UNLOCK();
2163 		break;
2164 	case NET_RT_SOURCE:
2165 		tableid = w.w_arg;
2166 		if (!rtable_exists(tableid))
2167 			return (ENOENT);
2168 		NET_LOCK();
2169 		for (i = 1; i <= AF_MAX; i++) {
2170 			if (af != 0 && af != i)
2171 				continue;
2172 
2173 			error = sysctl_source(i, tableid, &w);
2174 			if (error == EAFNOSUPPORT)
2175 				error = 0;
2176 			if (error)
2177 				break;
2178 		}
2179 		NET_UNLOCK();
2180 		break;
2181 	}
2182 	free(w.w_tmem, M_RTABLE, w.w_tmemsize);
2183 	w.w_needed += w.w_given;
2184 	if (where) {
2185 		*given = w.w_where - (caddr_t)where;
2186 		if (*given < w.w_needed)
2187 			return (ENOMEM);
2188 	} else
2189 		*given = (11 * w.w_needed) / 10;
2190 
2191 	return (error);
2192 }
2193 
2194 int
2195 sysctl_rtable_rtstat(void *oldp, size_t *oldlenp, void *newp)
2196 {
2197 	extern struct cpumem *rtcounters;
2198 	uint64_t counters[rts_ncounters];
2199 	struct rtstat rtstat;
2200 	uint32_t *words = (uint32_t *)&rtstat;
2201 	int i;
2202 
2203 	CTASSERT(sizeof(rtstat) == (nitems(counters) * sizeof(uint32_t)));
2204 	memset(&rtstat, 0, sizeof rtstat);
2205 	counters_read(rtcounters, counters, nitems(counters));
2206 
2207 	for (i = 0; i < nitems(counters); i++)
2208 		words[i] = (uint32_t)counters[i];
2209 
2210 	return (sysctl_rdstruct(oldp, oldlenp, newp, &rtstat, sizeof(rtstat)));
2211 }
2212 
2213 int
2214 rtm_validate_proposal(struct rt_addrinfo *info)
2215 {
2216 	if (info->rti_addrs & ~(RTA_NETMASK | RTA_IFA | RTA_DNS | RTA_STATIC |
2217 	    RTA_SEARCH)) {
2218 		return -1;
2219 	}
2220 
2221 	if (ISSET(info->rti_addrs, RTA_NETMASK)) {
2222 		struct sockaddr *sa = info->rti_info[RTAX_NETMASK];
2223 		if (sa == NULL)
2224 			return -1;
2225 		switch (sa->sa_family) {
2226 		case AF_INET:
2227 			if (sa->sa_len != sizeof(struct sockaddr_in))
2228 				return -1;
2229 			break;
2230 		case AF_INET6:
2231 			if (sa->sa_len != sizeof(struct sockaddr_in6))
2232 				return -1;
2233 			break;
2234 		default:
2235 			return -1;
2236 		}
2237 	}
2238 
2239 	if (ISSET(info->rti_addrs, RTA_IFA)) {
2240 		struct sockaddr *sa = info->rti_info[RTAX_IFA];
2241 		if (sa == NULL)
2242 			return -1;
2243 		switch (sa->sa_family) {
2244 		case AF_INET:
2245 			if (sa->sa_len != sizeof(struct sockaddr_in))
2246 				return -1;
2247 			break;
2248 		case AF_INET6:
2249 			if (sa->sa_len != sizeof(struct sockaddr_in6))
2250 				return -1;
2251 			break;
2252 		default:
2253 			return -1;
2254 		}
2255 	}
2256 
2257 	if (ISSET(info->rti_addrs, RTA_DNS)) {
2258 		struct sockaddr_rtdns *rtdns =
2259 		    (struct sockaddr_rtdns *)info->rti_info[RTAX_DNS];
2260 		if (rtdns == NULL)
2261 			return -1;
2262 		if (rtdns->sr_len > sizeof(*rtdns))
2263 			return -1;
2264 		if (rtdns->sr_len < offsetof(struct sockaddr_rtdns, sr_dns))
2265 			return -1;
2266 		switch (rtdns->sr_family) {
2267 		case AF_INET:
2268 			if ((rtdns->sr_len - offsetof(struct sockaddr_rtdns,
2269 			    sr_dns)) % sizeof(struct in_addr) != 0)
2270 				return -1;
2271 			break;
2272 #ifdef INET6
2273 		case AF_INET6:
2274 			if ((rtdns->sr_len - offsetof(struct sockaddr_rtdns,
2275 			    sr_dns)) % sizeof(struct in6_addr) != 0)
2276 				return -1;
2277 			break;
2278 #endif
2279 		default:
2280 			return -1;
2281 		}
2282 	}
2283 
2284 	if (ISSET(info->rti_addrs, RTA_STATIC)) {
2285 		struct sockaddr_rtstatic *rtstatic =
2286 		    (struct sockaddr_rtstatic *)info->rti_info[RTAX_STATIC];
2287 		if (rtstatic == NULL)
2288 			return -1;
2289 		if (rtstatic->sr_len > sizeof(*rtstatic))
2290 			return -1;
2291 		if (rtstatic->sr_len <=
2292 		    offsetof(struct sockaddr_rtstatic, sr_static))
2293 			return -1;
2294 	}
2295 
2296 	if (ISSET(info->rti_addrs, RTA_SEARCH)) {
2297 		struct sockaddr_rtsearch *rtsearch =
2298 		    (struct sockaddr_rtsearch *)info->rti_info[RTAX_SEARCH];
2299 		if (rtsearch == NULL)
2300 			return -1;
2301 		if (rtsearch->sr_len > sizeof(*rtsearch))
2302 			return -1;
2303 		if (rtsearch->sr_len <=
2304 		    offsetof(struct sockaddr_rtsearch, sr_search))
2305 			return -1;
2306 	}
2307 
2308 	return 0;
2309 }
2310 
2311 int
2312 rt_setsource(unsigned int rtableid, struct sockaddr *src)
2313 {
2314 	struct ifaddr	*ifa;
2315 	int		error;
2316 	/*
2317 	 * If source address is 0.0.0.0 or ::
2318 	 * use automatic source selection
2319 	 */
2320 	switch(src->sa_family) {
2321 	case AF_INET:
2322 		if(satosin(src)->sin_addr.s_addr == INADDR_ANY) {
2323 			rtable_setsource(rtableid, AF_INET, NULL);
2324 			return (0);
2325 		}
2326 		break;
2327 #ifdef INET6
2328 	case AF_INET6:
2329 		if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(src)->sin6_addr)) {
2330 			rtable_setsource(rtableid, AF_INET6, NULL);
2331 			return (0);
2332 		}
2333 		break;
2334 #endif
2335 	default:
2336 		return (EAFNOSUPPORT);
2337 	}
2338 
2339 	KERNEL_LOCK();
2340 	/*
2341 	 * Check if source address is assigned to an interface in the
2342 	 * same rdomain
2343 	 */
2344 	if ((ifa = ifa_ifwithaddr(src, rtableid)) == NULL) {
2345 		KERNEL_UNLOCK();
2346 		return (EINVAL);
2347 	}
2348 
2349 	error = rtable_setsource(rtableid, src->sa_family, ifa->ifa_addr);
2350 	KERNEL_UNLOCK();
2351 
2352 	return (error);
2353 }
2354 
2355 /*
2356  * Definitions of protocols supported in the ROUTE domain.
2357  */
2358 
2359 struct domain routedomain;
2360 
2361 struct protosw routesw[] = {
2362 {
2363   .pr_type	= SOCK_RAW,
2364   .pr_domain	= &routedomain,
2365   .pr_flags	= PR_ATOMIC|PR_ADDR|PR_WANTRCVD,
2366   .pr_output	= route_output,
2367   .pr_ctloutput	= route_ctloutput,
2368   .pr_usrreq	= route_usrreq,
2369   .pr_attach	= route_attach,
2370   .pr_detach	= route_detach,
2371   .pr_init	= route_prinit,
2372   .pr_sysctl	= sysctl_rtable
2373 }
2374 };
2375 
2376 struct domain routedomain = {
2377   .dom_family = PF_ROUTE,
2378   .dom_name = "route",
2379   .dom_init = route_init,
2380   .dom_protosw = routesw,
2381   .dom_protoswNPROTOSW = &routesw[nitems(routesw)]
2382 };
2383