xref: /openbsd-src/sys/net/rtsock.c (revision 097a140d792de8b2bbe59ad827d39eabf9b4280a)
1 /*	$OpenBSD: rtsock.c,v 1.311 2021/04/26 08:21:36 claudio Exp $	*/
2 /*	$NetBSD: rtsock.c,v 1.18 1996/03/29 00:32:10 cgd Exp $	*/
3 
4 /*
5  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the project nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 /*
34  * Copyright (c) 1988, 1991, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. Neither the name of the University nor the names of its contributors
46  *    may be used to endorse or promote products derived from this software
47  *    without specific prior written permission.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59  * SUCH DAMAGE.
60  *
61  *	@(#)rtsock.c	8.6 (Berkeley) 2/11/95
62  */
63 
64 #include <sys/param.h>
65 #include <sys/systm.h>
66 #include <sys/proc.h>
67 #include <sys/sysctl.h>
68 #include <sys/mbuf.h>
69 #include <sys/socket.h>
70 #include <sys/socketvar.h>
71 #include <sys/domain.h>
72 #include <sys/pool.h>
73 #include <sys/protosw.h>
74 #include <sys/srp.h>
75 
76 #include <net/if.h>
77 #include <net/if_dl.h>
78 #include <net/if_var.h>
79 #include <net/route.h>
80 
81 #include <netinet/in.h>
82 
83 #ifdef MPLS
84 #include <netmpls/mpls.h>
85 #endif
86 #ifdef IPSEC
87 #include <netinet/ip_ipsp.h>
88 #include <net/if_enc.h>
89 #endif
90 #ifdef BFD
91 #include <net/bfd.h>
92 #endif
93 
94 #include <sys/stdarg.h>
95 #include <sys/kernel.h>
96 #include <sys/timeout.h>
97 
98 #define	ROUTESNDQ	8192
99 #define	ROUTERCVQ	8192
100 
101 const struct sockaddr route_src = { 2, PF_ROUTE, };
102 
103 struct walkarg {
104 	int	w_op, w_arg, w_given, w_needed, w_tmemsize;
105 	caddr_t	w_where, w_tmem;
106 };
107 
108 void	route_prinit(void);
109 void	rcb_ref(void *, void *);
110 void	rcb_unref(void *, void *);
111 int	route_output(struct mbuf *, struct socket *, struct sockaddr *,
112 	    struct mbuf *);
113 int	route_ctloutput(int, struct socket *, int, int, struct mbuf *);
114 int	route_usrreq(struct socket *, int, struct mbuf *, struct mbuf *,
115 	    struct mbuf *, struct proc *);
116 void	route_input(struct mbuf *m0, struct socket *, sa_family_t);
117 int	route_arp_conflict(struct rtentry *, struct rt_addrinfo *);
118 int	route_cleargateway(struct rtentry *, void *, unsigned int);
119 void	rtm_senddesync_timer(void *);
120 void	rtm_senddesync(struct socket *);
121 int	rtm_sendup(struct socket *, struct mbuf *);
122 
123 int	rtm_getifa(struct rt_addrinfo *, unsigned int);
124 int	rtm_output(struct rt_msghdr *, struct rtentry **, struct rt_addrinfo *,
125 	    uint8_t, unsigned int);
126 struct rt_msghdr *rtm_report(struct rtentry *, u_char, int, int);
127 struct mbuf	*rtm_msg1(int, struct rt_addrinfo *);
128 int		 rtm_msg2(int, int, struct rt_addrinfo *, caddr_t,
129 		     struct walkarg *);
130 int		 rtm_xaddrs(caddr_t, caddr_t, struct rt_addrinfo *);
131 int		 rtm_validate_proposal(struct rt_addrinfo *);
132 void		 rtm_setmetrics(u_long, const struct rt_metrics *,
133 		     struct rt_kmetrics *);
134 void		 rtm_getmetrics(const struct rt_kmetrics *,
135 		     struct rt_metrics *);
136 
137 int		 sysctl_iflist(int, struct walkarg *);
138 int		 sysctl_ifnames(struct walkarg *);
139 int		 sysctl_rtable_rtstat(void *, size_t *, void *);
140 
141 int		 rt_setsource(unsigned int, struct sockaddr *);
142 
143 /*
144  * Locks used to protect struct members
145  *       I       immutable after creation
146  *       sK      solock (kernel lock)
147  */
148 struct rtpcb {
149 	struct socket		*rop_socket;		/* [I] */
150 
151 	SRPL_ENTRY(rtpcb)	rop_list;
152 	struct refcnt		rop_refcnt;
153 	struct timeout		rop_timeout;
154 	unsigned int		rop_msgfilter;		/* [sK] */
155 	unsigned int		rop_flagfilter;		/* [sK] */
156 	unsigned int		rop_flags;		/* [sK] */
157 	u_int			rop_rtableid;		/* [sK] */
158 	unsigned short		rop_proto;		/* [I] */
159 	u_char			rop_priority;		/* [sK] */
160 };
161 #define	sotortpcb(so)	((struct rtpcb *)(so)->so_pcb)
162 
163 struct rtptable {
164 	SRPL_HEAD(, rtpcb)	rtp_list;
165 	struct srpl_rc		rtp_rc;
166 	struct rwlock		rtp_lk;
167 	unsigned int		rtp_count;
168 };
169 
170 struct pool rtpcb_pool;
171 struct rtptable rtptable;
172 
173 /*
174  * These flags and timeout are used for indicating to userland (via a
175  * RTM_DESYNC msg) when the route socket has overflowed and messages
176  * have been lost.
177  */
178 #define ROUTECB_FLAG_DESYNC	0x1	/* Route socket out of memory */
179 #define ROUTECB_FLAG_FLUSH	0x2	/* Wait until socket is empty before
180 					   queueing more packets */
181 
182 #define ROUTE_DESYNC_RESEND_TIMEOUT	200	/* In ms */
183 
184 void
185 route_prinit(void)
186 {
187 	srpl_rc_init(&rtptable.rtp_rc, rcb_ref, rcb_unref, NULL);
188 	rw_init(&rtptable.rtp_lk, "rtsock");
189 	SRPL_INIT(&rtptable.rtp_list);
190 	pool_init(&rtpcb_pool, sizeof(struct rtpcb), 0,
191 	    IPL_NONE, PR_WAITOK, "rtpcb", NULL);
192 }
193 
194 void
195 rcb_ref(void *null, void *v)
196 {
197 	struct rtpcb *rop = v;
198 
199 	refcnt_take(&rop->rop_refcnt);
200 }
201 
202 void
203 rcb_unref(void *null, void *v)
204 {
205 	struct rtpcb *rop = v;
206 
207 	refcnt_rele_wake(&rop->rop_refcnt);
208 }
209 
210 int
211 route_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam,
212     struct mbuf *control, struct proc *p)
213 {
214 	struct rtpcb	*rop;
215 	int		 error = 0;
216 
217 	if (req == PRU_CONTROL)
218 		return (EOPNOTSUPP);
219 
220 	soassertlocked(so);
221 
222 	if (control && control->m_len) {
223 		error = EOPNOTSUPP;
224 		goto release;
225 	}
226 
227 	rop = sotortpcb(so);
228 	if (rop == NULL) {
229 		error = EINVAL;
230 		goto release;
231 	}
232 
233 	switch (req) {
234 	/* no connect, bind, accept. Socket is connected from the start */
235 	case PRU_CONNECT:
236 	case PRU_BIND:
237 	case PRU_CONNECT2:
238 	case PRU_LISTEN:
239 	case PRU_ACCEPT:
240 		error = EOPNOTSUPP;
241 		break;
242 
243 	case PRU_DISCONNECT:
244 	case PRU_ABORT:
245 		soisdisconnected(so);
246 		break;
247 	case PRU_SHUTDOWN:
248 		socantsendmore(so);
249 		break;
250 	case PRU_SENSE:
251 		/* stat: don't bother with a blocksize. */
252 		break;
253 
254 	/* minimal support, just implement a fake peer address */
255 	case PRU_SOCKADDR:
256 		error = EINVAL;
257 		break;
258 	case PRU_PEERADDR:
259 		bcopy(&route_src, mtod(nam, caddr_t), route_src.sa_len);
260 		nam->m_len = route_src.sa_len;
261 		break;
262 
263 	case PRU_RCVD:
264 		/*
265 		 * If we are in a FLUSH state, check if the buffer is
266 		 * empty so that we can clear the flag.
267 		 */
268 		if (((rop->rop_flags & ROUTECB_FLAG_FLUSH) != 0) &&
269 		    ((sbspace(rop->rop_socket, &rop->rop_socket->so_rcv) ==
270 		    rop->rop_socket->so_rcv.sb_hiwat)))
271 			rop->rop_flags &= ~ROUTECB_FLAG_FLUSH;
272 		break;
273 
274 	case PRU_RCVOOB:
275 	case PRU_SENDOOB:
276 		error = EOPNOTSUPP;
277 		break;
278 	case PRU_SEND:
279 		if (nam) {
280 			error = EISCONN;
281 			break;
282 		}
283 		error = (*so->so_proto->pr_output)(m, so, NULL, NULL);
284 		m = NULL;
285 		break;
286 	default:
287 		panic("route_usrreq");
288 	}
289 
290  release:
291 	if (req != PRU_RCVD && req != PRU_RCVOOB && req != PRU_SENSE) {
292 		m_freem(control);
293 		m_freem(m);
294 	}
295 	return (error);
296 }
297 
298 int
299 route_attach(struct socket *so, int proto)
300 {
301 	struct rtpcb	*rop;
302 	int		 error;
303 
304 	error = soreserve(so, ROUTESNDQ, ROUTERCVQ);
305 	if (error)
306 		return (error);
307 	/*
308 	 * use the rawcb but allocate a rtpcb, this
309 	 * code does not care about the additional fields
310 	 * and works directly on the raw socket.
311 	 */
312 	rop = pool_get(&rtpcb_pool, PR_WAITOK|PR_ZERO);
313 	so->so_pcb = rop;
314 	/* Init the timeout structure */
315 	timeout_set(&rop->rop_timeout, rtm_senddesync_timer, so);
316 	refcnt_init(&rop->rop_refcnt);
317 
318 	rop->rop_socket = so;
319 	rop->rop_proto = proto;
320 
321 	rop->rop_rtableid = curproc->p_p->ps_rtableid;
322 
323 	soisconnected(so);
324 	so->so_options |= SO_USELOOPBACK;
325 
326 	rw_enter(&rtptable.rtp_lk, RW_WRITE);
327 	SRPL_INSERT_HEAD_LOCKED(&rtptable.rtp_rc, &rtptable.rtp_list, rop,
328 	    rop_list);
329 	rtptable.rtp_count++;
330 	rw_exit(&rtptable.rtp_lk);
331 
332 	return (0);
333 }
334 
335 int
336 route_detach(struct socket *so)
337 {
338 	struct rtpcb	*rop;
339 
340 	soassertlocked(so);
341 
342 	rop = sotortpcb(so);
343 	if (rop == NULL)
344 		return (EINVAL);
345 
346 	rw_enter(&rtptable.rtp_lk, RW_WRITE);
347 
348 	timeout_del(&rop->rop_timeout);
349 	rtptable.rtp_count--;
350 
351 	SRPL_REMOVE_LOCKED(&rtptable.rtp_rc, &rtptable.rtp_list, rop, rtpcb,
352 	    rop_list);
353 	rw_exit(&rtptable.rtp_lk);
354 
355 	/* wait for all references to drop */
356 	refcnt_finalize(&rop->rop_refcnt, "rtsockrefs");
357 
358 	so->so_pcb = NULL;
359 	KASSERT((so->so_state & SS_NOFDREF) == 0);
360 	pool_put(&rtpcb_pool, rop);
361 
362 	return (0);
363 }
364 
365 int
366 route_ctloutput(int op, struct socket *so, int level, int optname,
367     struct mbuf *m)
368 {
369 	struct rtpcb *rop = sotortpcb(so);
370 	int error = 0;
371 	unsigned int tid, prio;
372 
373 	if (level != AF_ROUTE)
374 		return (EINVAL);
375 
376 	switch (op) {
377 	case PRCO_SETOPT:
378 		switch (optname) {
379 		case ROUTE_MSGFILTER:
380 			if (m == NULL || m->m_len != sizeof(unsigned int))
381 				error = EINVAL;
382 			else
383 				rop->rop_msgfilter = *mtod(m, unsigned int *);
384 			break;
385 		case ROUTE_TABLEFILTER:
386 			if (m == NULL || m->m_len != sizeof(unsigned int)) {
387 				error = EINVAL;
388 				break;
389 			}
390 			tid = *mtod(m, unsigned int *);
391 			if (tid != RTABLE_ANY && !rtable_exists(tid))
392 				error = ENOENT;
393 			else
394 				rop->rop_rtableid = tid;
395 			break;
396 		case ROUTE_PRIOFILTER:
397 			if (m == NULL || m->m_len != sizeof(unsigned int)) {
398 				error = EINVAL;
399 				break;
400 			}
401 			prio = *mtod(m, unsigned int *);
402 			if (prio > RTP_MAX)
403 				error = EINVAL;
404 			else
405 				rop->rop_priority = prio;
406 			break;
407 		case ROUTE_FLAGFILTER:
408 			if (m == NULL || m->m_len != sizeof(unsigned int))
409 				error = EINVAL;
410 			else
411 				rop->rop_flagfilter = *mtod(m, unsigned int *);
412 			break;
413 		default:
414 			error = ENOPROTOOPT;
415 			break;
416 		}
417 		break;
418 	case PRCO_GETOPT:
419 		switch (optname) {
420 		case ROUTE_MSGFILTER:
421 			m->m_len = sizeof(unsigned int);
422 			*mtod(m, unsigned int *) = rop->rop_msgfilter;
423 			break;
424 		case ROUTE_TABLEFILTER:
425 			m->m_len = sizeof(unsigned int);
426 			*mtod(m, unsigned int *) = rop->rop_rtableid;
427 			break;
428 		case ROUTE_PRIOFILTER:
429 			m->m_len = sizeof(unsigned int);
430 			*mtod(m, unsigned int *) = rop->rop_priority;
431 			break;
432 		case ROUTE_FLAGFILTER:
433 			m->m_len = sizeof(unsigned int);
434 			*mtod(m, unsigned int *) = rop->rop_flagfilter;
435 			break;
436 		default:
437 			error = ENOPROTOOPT;
438 			break;
439 		}
440 	}
441 	return (error);
442 }
443 
444 void
445 rtm_senddesync_timer(void *xso)
446 {
447 	struct socket	*so = xso;
448 	int		 s;
449 
450 	s = solock(so);
451 	rtm_senddesync(so);
452 	sounlock(so, s);
453 }
454 
455 void
456 rtm_senddesync(struct socket *so)
457 {
458 	struct rtpcb	*rop = sotortpcb(so);
459 	struct mbuf	*desync_mbuf;
460 
461 	soassertlocked(so);
462 
463 	/* If we are in a DESYNC state, try to send a RTM_DESYNC packet */
464 	if ((rop->rop_flags & ROUTECB_FLAG_DESYNC) == 0)
465 		return;
466 
467 	/*
468 	 * If we fail to alloc memory or if sbappendaddr()
469 	 * fails, re-add timeout and try again.
470 	 */
471 	desync_mbuf = rtm_msg1(RTM_DESYNC, NULL);
472 	if (desync_mbuf != NULL) {
473 		if (sbappendaddr(so, &so->so_rcv, &route_src,
474 		    desync_mbuf, NULL) != 0) {
475 			rop->rop_flags &= ~ROUTECB_FLAG_DESYNC;
476 			sorwakeup(rop->rop_socket);
477 			return;
478 		}
479 		m_freem(desync_mbuf);
480 	}
481 	/* Re-add timeout to try sending msg again */
482 	timeout_add_msec(&rop->rop_timeout, ROUTE_DESYNC_RESEND_TIMEOUT);
483 }
484 
485 void
486 route_input(struct mbuf *m0, struct socket *so0, sa_family_t sa_family)
487 {
488 	struct socket *so;
489 	struct rtpcb *rop;
490 	struct rt_msghdr *rtm;
491 	struct mbuf *m = m0;
492 	struct srp_ref sr;
493 	int s;
494 
495 	/* ensure that we can access the rtm_type via mtod() */
496 	if (m->m_len < offsetof(struct rt_msghdr, rtm_type) + 1) {
497 		m_freem(m);
498 		return;
499 	}
500 
501 	SRPL_FOREACH(rop, &sr, &rtptable.rtp_list, rop_list) {
502 		/*
503 		 * If route socket is bound to an address family only send
504 		 * messages that match the address family. Address family
505 		 * agnostic messages are always sent.
506 		 */
507 		if (sa_family != AF_UNSPEC && rop->rop_proto != AF_UNSPEC &&
508 		    rop->rop_proto != sa_family)
509 			continue;
510 
511 
512 		so = rop->rop_socket;
513 		s = solock(so);
514 
515 		/*
516 		 * Check to see if we don't want our own messages and
517 		 * if we can receive anything.
518 		 */
519 		if ((so0 == so && !(so0->so_options & SO_USELOOPBACK)) ||
520 		    !(so->so_state & SS_ISCONNECTED) ||
521 		    (so->so_state & SS_CANTRCVMORE))
522 			goto next;
523 
524 		/* filter messages that the process does not want */
525 		rtm = mtod(m, struct rt_msghdr *);
526 		/* but RTM_DESYNC can't be filtered */
527 		if (rtm->rtm_type != RTM_DESYNC) {
528 			if (rop->rop_msgfilter != 0 &&
529 			    !(rop->rop_msgfilter & (1 << rtm->rtm_type)))
530 				goto next;
531 			if (ISSET(rop->rop_flagfilter, rtm->rtm_flags))
532 				goto next;
533 		}
534 		switch (rtm->rtm_type) {
535 		case RTM_IFANNOUNCE:
536 		case RTM_DESYNC:
537 			/* no tableid */
538 			break;
539 		case RTM_RESOLVE:
540 		case RTM_NEWADDR:
541 		case RTM_DELADDR:
542 		case RTM_IFINFO:
543 		case RTM_80211INFO:
544 		case RTM_BFD:
545 			/* check against rdomain id */
546 			if (rop->rop_rtableid != RTABLE_ANY &&
547 			    rtable_l2(rop->rop_rtableid) != rtm->rtm_tableid)
548 				goto next;
549 			break;
550 		default:
551 			if (rop->rop_priority != 0 &&
552 			    rop->rop_priority < rtm->rtm_priority)
553 				goto next;
554 			/* check against rtable id */
555 			if (rop->rop_rtableid != RTABLE_ANY &&
556 			    rop->rop_rtableid != rtm->rtm_tableid)
557 				goto next;
558 			break;
559 		}
560 
561 		/*
562 		 * Check to see if the flush flag is set. If so, don't queue
563 		 * any more messages until the flag is cleared.
564 		 */
565 		if ((rop->rop_flags & ROUTECB_FLAG_FLUSH) != 0)
566 			goto next;
567 
568 		rtm_sendup(so, m);
569 next:
570 		sounlock(so, s);
571 	}
572 	SRPL_LEAVE(&sr);
573 
574 	m_freem(m);
575 }
576 
577 int
578 rtm_sendup(struct socket *so, struct mbuf *m0)
579 {
580 	struct rtpcb *rop = sotortpcb(so);
581 	struct mbuf *m;
582 
583 	soassertlocked(so);
584 
585 	m = m_copym(m0, 0, M_COPYALL, M_NOWAIT);
586 	if (m == NULL)
587 		return (ENOMEM);
588 
589 	if (sbspace(so, &so->so_rcv) < (2 * MSIZE) ||
590 	    sbappendaddr(so, &so->so_rcv, &route_src, m, NULL) == 0) {
591 		/* Flag socket as desync'ed and flush required */
592 		rop->rop_flags |= ROUTECB_FLAG_DESYNC | ROUTECB_FLAG_FLUSH;
593 		rtm_senddesync(so);
594 		m_freem(m);
595 		return (ENOBUFS);
596 	}
597 
598 	sorwakeup(so);
599 	return (0);
600 }
601 
602 struct rt_msghdr *
603 rtm_report(struct rtentry *rt, u_char type, int seq, int tableid)
604 {
605 	struct rt_msghdr	*rtm;
606 	struct rt_addrinfo	 info;
607 	struct sockaddr_rtlabel	 sa_rl;
608 	struct sockaddr_in6	 sa_mask;
609 #ifdef BFD
610 	struct sockaddr_bfd	 sa_bfd;
611 #endif
612 	struct ifnet		*ifp = NULL;
613 	int			 len;
614 
615 	bzero(&info, sizeof(info));
616 	info.rti_info[RTAX_DST] = rt_key(rt);
617 	info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
618 	info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask);
619 	info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl);
620 #ifdef BFD
621 	if (rt->rt_flags & RTF_BFD)
622 		info.rti_info[RTAX_BFD] = bfd2sa(rt, &sa_bfd);
623 #endif
624 #ifdef MPLS
625 	if (rt->rt_flags & RTF_MPLS) {
626 		struct sockaddr_mpls	 sa_mpls;
627 
628 		bzero(&sa_mpls, sizeof(sa_mpls));
629 		sa_mpls.smpls_family = AF_MPLS;
630 		sa_mpls.smpls_len = sizeof(sa_mpls);
631 		sa_mpls.smpls_label = ((struct rt_mpls *)
632 		    rt->rt_llinfo)->mpls_label;
633 		info.rti_info[RTAX_SRC] = (struct sockaddr *)&sa_mpls;
634 		info.rti_mpls = ((struct rt_mpls *)
635 		    rt->rt_llinfo)->mpls_operation;
636 	}
637 #endif
638 	ifp = if_get(rt->rt_ifidx);
639 	if (ifp != NULL) {
640 		info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
641 		info.rti_info[RTAX_IFA] =
642 		    rtable_getsource(tableid, info.rti_info[RTAX_DST]->sa_family);
643 		if (info.rti_info[RTAX_IFA] == NULL)
644 			info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
645 		if (ifp->if_flags & IFF_POINTOPOINT)
646 			info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr;
647 	}
648 	if_put(ifp);
649 	/* RTAX_GENMASK, RTAX_AUTHOR, RTAX_SRCMASK ignored */
650 
651 	/* build new route message */
652 	len = rtm_msg2(type, RTM_VERSION, &info, NULL, NULL);
653 	rtm = malloc(len, M_RTABLE, M_WAITOK | M_ZERO);
654 
655 	rtm_msg2(type, RTM_VERSION, &info, (caddr_t)rtm, NULL);
656 	rtm->rtm_type = type;
657 	rtm->rtm_index = rt->rt_ifidx;
658 	rtm->rtm_tableid = tableid;
659 	rtm->rtm_priority = rt->rt_priority & RTP_MASK;
660 	rtm->rtm_flags = rt->rt_flags;
661 	rtm->rtm_pid = curproc->p_p->ps_pid;
662 	rtm->rtm_seq = seq;
663 	rtm_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx);
664 	rtm->rtm_addrs = info.rti_addrs;
665 #ifdef MPLS
666 	rtm->rtm_mpls = info.rti_mpls;
667 #endif
668 	return rtm;
669 }
670 
671 int
672 route_output(struct mbuf *m, struct socket *so, struct sockaddr *dstaddr,
673     struct mbuf *control)
674 {
675 	struct rt_msghdr	*rtm = NULL;
676 	struct rtentry		*rt = NULL;
677 	struct rt_addrinfo	 info;
678 	struct ifnet		*ifp;
679 	int			 len, seq, error = 0;
680 	u_int			 tableid;
681 	u_int8_t		 prio;
682 	u_char			 vers, type;
683 
684 	if (m == NULL || ((m->m_len < sizeof(int32_t)) &&
685 	    (m = m_pullup(m, sizeof(int32_t))) == 0))
686 		return (ENOBUFS);
687 	if ((m->m_flags & M_PKTHDR) == 0)
688 		panic("route_output");
689 	len = m->m_pkthdr.len;
690 	if (len < offsetof(struct rt_msghdr, rtm_hdrlen) + 1 ||
691 	    len != mtod(m, struct rt_msghdr *)->rtm_msglen) {
692 		error = EINVAL;
693 		goto fail;
694 	}
695 	vers = mtod(m, struct rt_msghdr *)->rtm_version;
696 	switch (vers) {
697 	case RTM_VERSION:
698 		if (len < sizeof(struct rt_msghdr)) {
699 			error = EINVAL;
700 			goto fail;
701 		}
702 		if (len > RTM_MAXSIZE) {
703 			error = EMSGSIZE;
704 			goto fail;
705 		}
706 		rtm = malloc(len, M_RTABLE, M_WAITOK);
707 		m_copydata(m, 0, len, rtm);
708 		break;
709 	default:
710 		error = EPROTONOSUPPORT;
711 		goto fail;
712 	}
713 
714 	/* Verify that the caller is sending an appropriate message early */
715 	switch (rtm->rtm_type) {
716 	case RTM_ADD:
717 	case RTM_DELETE:
718 	case RTM_GET:
719 	case RTM_CHANGE:
720 	case RTM_PROPOSAL:
721 	case RTM_SOURCE:
722 		break;
723 	default:
724 		error = EOPNOTSUPP;
725 		goto fail;
726 	}
727 	/*
728 	 * Verify that the header length is valid.
729 	 * All messages from userland start with a struct rt_msghdr.
730 	 */
731 	if (rtm->rtm_hdrlen == 0)	/* old client */
732 		rtm->rtm_hdrlen = sizeof(struct rt_msghdr);
733 	if (rtm->rtm_hdrlen < sizeof(struct rt_msghdr) ||
734 	    len < rtm->rtm_hdrlen) {
735 		error = EINVAL;
736 		goto fail;
737 	}
738 
739 	rtm->rtm_pid = curproc->p_p->ps_pid;
740 
741 	/*
742 	 * Verify that the caller has the appropriate privilege; RTM_GET
743 	 * is the only operation the non-superuser is allowed.
744 	 */
745 	if (rtm->rtm_type != RTM_GET && suser(curproc) != 0) {
746 		error = EACCES;
747 		goto fail;
748 	}
749 	tableid = rtm->rtm_tableid;
750 	if (!rtable_exists(tableid)) {
751 		if (rtm->rtm_type == RTM_ADD) {
752 			if ((error = rtable_add(tableid)) != 0)
753 				goto fail;
754 		} else {
755 			error = EINVAL;
756 			goto fail;
757 		}
758 	}
759 
760 	/* Do not let userland play with kernel-only flags. */
761 	if ((rtm->rtm_flags & (RTF_LOCAL|RTF_BROADCAST)) != 0) {
762 		error = EINVAL;
763 		goto fail;
764 	}
765 
766 	/* make sure that kernel-only bits are not set */
767 	rtm->rtm_priority &= RTP_MASK;
768 	rtm->rtm_flags &= ~(RTF_DONE|RTF_CLONED|RTF_CACHED);
769 	rtm->rtm_fmask &= RTF_FMASK;
770 
771 	if (rtm->rtm_priority != 0) {
772 		if (rtm->rtm_priority > RTP_MAX ||
773 		    rtm->rtm_priority == RTP_LOCAL) {
774 			error = EINVAL;
775 			goto fail;
776 		}
777 		prio = rtm->rtm_priority;
778 	} else if (rtm->rtm_type != RTM_ADD)
779 		prio = RTP_ANY;
780 	else if (rtm->rtm_flags & RTF_STATIC)
781 		prio = 0;
782 	else
783 		prio = RTP_DEFAULT;
784 
785 	bzero(&info, sizeof(info));
786 	info.rti_addrs = rtm->rtm_addrs;
787 	if ((error = rtm_xaddrs(rtm->rtm_hdrlen + (caddr_t)rtm,
788 	    len + (caddr_t)rtm, &info)) != 0)
789 		goto fail;
790 
791 	info.rti_flags = rtm->rtm_flags;
792 
793 	if (rtm->rtm_type != RTM_SOURCE &&
794 	    rtm->rtm_type != RTM_PROPOSAL &&
795 	    (info.rti_info[RTAX_DST] == NULL ||
796 	    info.rti_info[RTAX_DST]->sa_family >= AF_MAX ||
797 	    (info.rti_info[RTAX_GATEWAY] != NULL &&
798 	    info.rti_info[RTAX_GATEWAY]->sa_family >= AF_MAX) ||
799 	    info.rti_info[RTAX_GENMASK] != NULL)) {
800 		error = EINVAL;
801 		goto fail;
802 	}
803 #ifdef MPLS
804 	info.rti_mpls = rtm->rtm_mpls;
805 #endif
806 
807 	if (info.rti_info[RTAX_GATEWAY] != NULL &&
808 	    info.rti_info[RTAX_GATEWAY]->sa_family == AF_LINK &&
809 	    (info.rti_flags & RTF_CLONING) == 0) {
810 		info.rti_flags |= RTF_LLINFO;
811 	}
812 
813 	/*
814 	 * Validate RTM_PROPOSAL and pass it along or error out.
815 	 */
816 	if (rtm->rtm_type == RTM_PROPOSAL) {
817 		if (rtm_validate_proposal(&info) == -1) {
818 			error = EINVAL;
819 			goto fail;
820 		}
821 		/*
822 		 * If this is a solicitation proposal forward request to
823 		 * all interfaces. Most handlers will ignore it but at least
824 		 * umb(4) will send a response to this event.
825 		 */
826 		if (rtm->rtm_priority == RTP_PROPOSAL_SOLICIT) {
827 			NET_LOCK();
828 			TAILQ_FOREACH(ifp, &ifnet, if_list) {
829 				ifp->if_rtrequest(ifp, RTM_PROPOSAL, NULL);
830 			}
831 			NET_UNLOCK();
832 		}
833 	} else if (rtm->rtm_type == RTM_SOURCE) {
834 		if (info.rti_info[RTAX_IFA] == NULL) {
835 			error = EINVAL;
836 			goto fail;
837 		}
838 		if ((error =
839 		    rt_setsource(tableid, info.rti_info[RTAX_IFA])) != 0)
840 			goto fail;
841 	} else {
842 		error = rtm_output(rtm, &rt, &info, prio, tableid);
843 		if (!error) {
844 			type = rtm->rtm_type;
845 			seq = rtm->rtm_seq;
846 			free(rtm, M_RTABLE, len);
847 			rtm = rtm_report(rt, type, seq, tableid);
848 			len = rtm->rtm_msglen;
849 		}
850 	}
851 
852 	rtfree(rt);
853 	if (error) {
854 		rtm->rtm_errno = error;
855 	} else {
856 		rtm->rtm_flags |= RTF_DONE;
857 	}
858 
859 	/*
860 	 * Check to see if we don't want our own messages.
861 	 */
862 	if (!(so->so_options & SO_USELOOPBACK)) {
863 		if (rtptable.rtp_count <= 1) {
864 			/* no other listener and no loopback of messages */
865 fail:
866 			free(rtm, M_RTABLE, len);
867 			m_freem(m);
868 			return (error);
869 		}
870 	}
871 	if (m_copyback(m, 0, len, rtm, M_NOWAIT)) {
872 		m_freem(m);
873 		m = NULL;
874 	} else if (m->m_pkthdr.len > len)
875 		m_adj(m, len - m->m_pkthdr.len);
876 	free(rtm, M_RTABLE, len);
877 	if (m)
878 		route_input(m, so, info.rti_info[RTAX_DST] ?
879 		    info.rti_info[RTAX_DST]->sa_family : AF_UNSPEC);
880 
881 	return (error);
882 }
883 
884 int
885 rtm_output(struct rt_msghdr *rtm, struct rtentry **prt,
886     struct rt_addrinfo *info, uint8_t prio, unsigned int tableid)
887 {
888 	struct rtentry		*rt = *prt;
889 	struct ifnet		*ifp = NULL;
890 	int			 plen, newgate = 0, error = 0;
891 
892 	switch (rtm->rtm_type) {
893 	case RTM_ADD:
894 		if (info->rti_info[RTAX_GATEWAY] == NULL) {
895 			error = EINVAL;
896 			break;
897 		}
898 
899 		rt = rtable_match(tableid, info->rti_info[RTAX_DST], NULL);
900 		if ((error = route_arp_conflict(rt, info))) {
901 			rtfree(rt);
902 			rt = NULL;
903 			break;
904 		}
905 
906 		/*
907 		 * We cannot go through a delete/create/insert cycle for
908 		 * cached route because this can lead to races in the
909 		 * receive path.  Instead we update the L2 cache.
910 		 */
911 		if ((rt != NULL) && ISSET(rt->rt_flags, RTF_CACHED))
912 			goto change;
913 
914 		rtfree(rt);
915 		rt = NULL;
916 
917 		NET_LOCK();
918 		if ((error = rtm_getifa(info, tableid)) != 0) {
919 			NET_UNLOCK();
920 			break;
921 		}
922 		error = rtrequest(RTM_ADD, info, prio, &rt, tableid);
923 		NET_UNLOCK();
924 		if (error == 0)
925 			rtm_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx,
926 			    &rt->rt_rmx);
927 		break;
928 	case RTM_DELETE:
929 		rt = rtable_lookup(tableid, info->rti_info[RTAX_DST],
930 		    info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY],
931 		    prio);
932 		if (rt == NULL) {
933 			error = ESRCH;
934 			break;
935 		}
936 
937 		/*
938 		 * If we got multipath routes, we require users to specify
939 		 * a matching gateway.
940 		 */
941 		if (ISSET(rt->rt_flags, RTF_MPATH) &&
942 		    info->rti_info[RTAX_GATEWAY] == NULL) {
943 			error = ESRCH;
944 			break;
945 		}
946 
947 		/* Detaching an interface requires the KERNEL_LOCK(). */
948 		ifp = if_get(rt->rt_ifidx);
949 		KASSERT(ifp != NULL);
950 
951 		/*
952 		 * Invalidate the cache of automagically created and
953 		 * referenced L2 entries to make sure that ``rt_gwroute''
954 		 * pointer stays valid for other CPUs.
955 		 */
956 		if ((ISSET(rt->rt_flags, RTF_CACHED))) {
957 			NET_LOCK();
958 			ifp->if_rtrequest(ifp, RTM_INVALIDATE, rt);
959 			/* Reset the MTU of the gateway route. */
960 			rtable_walk(tableid, rt_key(rt)->sa_family, NULL,
961 			    route_cleargateway, rt);
962 			NET_UNLOCK();
963 			if_put(ifp);
964 			break;
965 		}
966 
967 		/*
968 		 * Make sure that local routes are only modified by the
969 		 * kernel.
970 		 */
971 		if (ISSET(rt->rt_flags, RTF_LOCAL|RTF_BROADCAST)) {
972 			if_put(ifp);
973 			error = EINVAL;
974 			break;
975 		}
976 
977 		rtfree(rt);
978 		rt = NULL;
979 
980 		NET_LOCK();
981 		error = rtrequest_delete(info, prio, ifp, &rt, tableid);
982 		NET_UNLOCK();
983 		if_put(ifp);
984 		break;
985 	case RTM_CHANGE:
986 		rt = rtable_lookup(tableid, info->rti_info[RTAX_DST],
987 		    info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY],
988 		    prio);
989 		/*
990 		 * If we got multipath routes, we require users to specify
991 		 * a matching gateway.
992 		 */
993 		if ((rt != NULL) && ISSET(rt->rt_flags, RTF_MPATH) &&
994 		    (info->rti_info[RTAX_GATEWAY] == NULL)) {
995 			rtfree(rt);
996 			rt = NULL;
997 		}
998 		/*
999 		 * If RTAX_GATEWAY is the argument we're trying to
1000 		 * change, try to find a compatible route.
1001 		 */
1002 		if ((rt == NULL) && (info->rti_info[RTAX_GATEWAY] != NULL)) {
1003 			rt = rtable_lookup(tableid, info->rti_info[RTAX_DST],
1004 			    info->rti_info[RTAX_NETMASK], NULL, prio);
1005 			/* Ensure we don't pick a multipath one. */
1006 			if ((rt != NULL) && ISSET(rt->rt_flags, RTF_MPATH)) {
1007 				rtfree(rt);
1008 				rt = NULL;
1009 			}
1010 		}
1011 
1012 		if (rt == NULL) {
1013 			error = ESRCH;
1014 			break;
1015 		}
1016 
1017 		/*
1018 		 * Make sure that local routes are only modified by the
1019 		 * kernel.
1020 		 */
1021 		if (ISSET(rt->rt_flags, RTF_LOCAL|RTF_BROADCAST)) {
1022 			error = EINVAL;
1023 			break;
1024 		}
1025 
1026 		/*
1027 		 * RTM_CHANGE needs a perfect match.
1028 		 */
1029 		plen = rtable_satoplen(info->rti_info[RTAX_DST]->sa_family,
1030 		    info->rti_info[RTAX_NETMASK]);
1031 		if (rt_plen(rt) != plen) {
1032 			error = ESRCH;
1033 			break;
1034 		}
1035 
1036 		if (info->rti_info[RTAX_GATEWAY] != NULL)
1037 			if (rt->rt_gateway == NULL ||
1038 			    bcmp(rt->rt_gateway,
1039 			    info->rti_info[RTAX_GATEWAY],
1040 			    info->rti_info[RTAX_GATEWAY]->sa_len)) {
1041 				newgate = 1;
1042 			}
1043 		/*
1044 		 * Check reachable gateway before changing the route.
1045 		 * New gateway could require new ifaddr, ifp;
1046 		 * flags may also be different; ifp may be specified
1047 		 * by ll sockaddr when protocol address is ambiguous.
1048 		 */
1049 		if (newgate || info->rti_info[RTAX_IFP] != NULL ||
1050 		    info->rti_info[RTAX_IFA] != NULL) {
1051 			struct ifaddr	*ifa = NULL;
1052 
1053 			NET_LOCK();
1054 			if ((error = rtm_getifa(info, tableid)) != 0) {
1055 				NET_UNLOCK();
1056 				break;
1057 			}
1058 			ifa = info->rti_ifa;
1059 			if (rt->rt_ifa != ifa) {
1060 				ifp = if_get(rt->rt_ifidx);
1061 				KASSERT(ifp != NULL);
1062 				ifp->if_rtrequest(ifp, RTM_DELETE, rt);
1063 				ifafree(rt->rt_ifa);
1064 				if_put(ifp);
1065 
1066 				ifa->ifa_refcnt++;
1067 				rt->rt_ifa = ifa;
1068 				rt->rt_ifidx = ifa->ifa_ifp->if_index;
1069 				/* recheck link state after ifp change */
1070 				rt_if_linkstate_change(rt, ifa->ifa_ifp,
1071 				    tableid);
1072 			}
1073 			NET_UNLOCK();
1074 		}
1075 change:
1076 		if (info->rti_info[RTAX_GATEWAY] != NULL) {
1077 			/* When updating the gateway, make sure it is valid. */
1078 			if (!newgate && rt->rt_gateway->sa_family !=
1079 			    info->rti_info[RTAX_GATEWAY]->sa_family) {
1080 				error = EINVAL;
1081 				break;
1082 			}
1083 
1084 			NET_LOCK();
1085 			error = rt_setgate(rt,
1086 			    info->rti_info[RTAX_GATEWAY], tableid);
1087 			NET_UNLOCK();
1088 			if (error)
1089 				break;
1090 		}
1091 #ifdef MPLS
1092 		if (rtm->rtm_flags & RTF_MPLS) {
1093 			NET_LOCK();
1094 			error = rt_mpls_set(rt,
1095 			    info->rti_info[RTAX_SRC], info->rti_mpls);
1096 			NET_UNLOCK();
1097 			if (error)
1098 				break;
1099 		} else if (newgate || (rtm->rtm_fmask & RTF_MPLS)) {
1100 			NET_LOCK();
1101 			/* if gateway changed remove MPLS information */
1102 			rt_mpls_clear(rt);
1103 			NET_UNLOCK();
1104 		}
1105 #endif
1106 
1107 #ifdef BFD
1108 		if (ISSET(rtm->rtm_flags, RTF_BFD)) {
1109 			if ((error = bfdset(rt)))
1110 				break;
1111 		} else if (!ISSET(rtm->rtm_flags, RTF_BFD) &&
1112 		    ISSET(rtm->rtm_fmask, RTF_BFD)) {
1113 			bfdclear(rt);
1114 		}
1115 #endif
1116 
1117 		NET_LOCK();
1118 		/* Hack to allow some flags to be toggled */
1119 		if (rtm->rtm_fmask) {
1120 			/* MPLS flag it is set by rt_mpls_set() */
1121 			rtm->rtm_fmask &= ~RTF_MPLS;
1122 			rtm->rtm_flags &= ~RTF_MPLS;
1123 			rt->rt_flags =
1124 			    (rt->rt_flags & ~rtm->rtm_fmask) |
1125 			    (rtm->rtm_flags & rtm->rtm_fmask);
1126 		}
1127 		rtm_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx, &rt->rt_rmx);
1128 
1129 		ifp = if_get(rt->rt_ifidx);
1130 		KASSERT(ifp != NULL);
1131 		ifp->if_rtrequest(ifp, RTM_ADD, rt);
1132 		if_put(ifp);
1133 
1134 		if (info->rti_info[RTAX_LABEL] != NULL) {
1135 			char *rtlabel = ((struct sockaddr_rtlabel *)
1136 			    info->rti_info[RTAX_LABEL])->sr_label;
1137 			rtlabel_unref(rt->rt_labelid);
1138 			rt->rt_labelid = rtlabel_name2id(rtlabel);
1139 		}
1140 		if_group_routechange(info->rti_info[RTAX_DST],
1141 		    info->rti_info[RTAX_NETMASK]);
1142 		rt->rt_locks &= ~(rtm->rtm_inits);
1143 		rt->rt_locks |= (rtm->rtm_inits & rtm->rtm_rmx.rmx_locks);
1144 		NET_UNLOCK();
1145 		break;
1146 	case RTM_GET:
1147 		rt = rtable_lookup(tableid, info->rti_info[RTAX_DST],
1148 		    info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY],
1149 		    prio);
1150 		if (rt == NULL)
1151 			error = ESRCH;
1152 		break;
1153 	}
1154 
1155 	*prt = rt;
1156 	return (error);
1157 }
1158 
1159 struct ifaddr *
1160 ifa_ifwithroute(int flags, struct sockaddr *dst, struct sockaddr *gateway,
1161     unsigned int rtableid)
1162 {
1163 	struct ifaddr	*ifa;
1164 
1165 	if ((flags & RTF_GATEWAY) == 0) {
1166 		/*
1167 		 * If we are adding a route to an interface,
1168 		 * and the interface is a pt to pt link
1169 		 * we should search for the destination
1170 		 * as our clue to the interface.  Otherwise
1171 		 * we can use the local address.
1172 		 */
1173 		ifa = NULL;
1174 		if (flags & RTF_HOST)
1175 			ifa = ifa_ifwithdstaddr(dst, rtableid);
1176 		if (ifa == NULL)
1177 			ifa = ifa_ifwithaddr(gateway, rtableid);
1178 	} else {
1179 		/*
1180 		 * If we are adding a route to a remote net
1181 		 * or host, the gateway may still be on the
1182 		 * other end of a pt to pt link.
1183 		 */
1184 		ifa = ifa_ifwithdstaddr(gateway, rtableid);
1185 	}
1186 	if (ifa == NULL) {
1187 		if (gateway->sa_family == AF_LINK) {
1188 			struct sockaddr_dl *sdl = satosdl(gateway);
1189 			struct ifnet *ifp = if_get(sdl->sdl_index);
1190 
1191 			if (ifp != NULL)
1192 				ifa = ifaof_ifpforaddr(dst, ifp);
1193 			if_put(ifp);
1194 		} else {
1195 			struct rtentry *rt;
1196 
1197 			rt = rtalloc(gateway, RT_RESOLVE, rtable_l2(rtableid));
1198 			if (rt != NULL)
1199 				ifa = rt->rt_ifa;
1200 			rtfree(rt);
1201 		}
1202 	}
1203 	if (ifa == NULL)
1204 		return (NULL);
1205 	if (ifa->ifa_addr->sa_family != dst->sa_family) {
1206 		struct ifaddr	*oifa = ifa;
1207 		ifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp);
1208 		if (ifa == NULL)
1209 			ifa = oifa;
1210 	}
1211 	return (ifa);
1212 }
1213 
1214 int
1215 rtm_getifa(struct rt_addrinfo *info, unsigned int rtid)
1216 {
1217 	struct ifnet	*ifp = NULL;
1218 
1219 	/*
1220 	 * The "returned" `ifa' is guaranteed to be alive only if
1221 	 * the NET_LOCK() is held.
1222 	 */
1223 	NET_ASSERT_LOCKED();
1224 
1225 	/*
1226 	 * ifp may be specified by sockaddr_dl when protocol address
1227 	 * is ambiguous
1228 	 */
1229 	if (info->rti_info[RTAX_IFP] != NULL) {
1230 		struct sockaddr_dl *sdl;
1231 
1232 		sdl = satosdl(info->rti_info[RTAX_IFP]);
1233 		ifp = if_get(sdl->sdl_index);
1234 	}
1235 
1236 #ifdef IPSEC
1237 	/*
1238 	 * If the destination is a PF_KEY address, we'll look
1239 	 * for the existence of a encap interface number or address
1240 	 * in the options list of the gateway. By default, we'll return
1241 	 * enc0.
1242 	 */
1243 	if (info->rti_info[RTAX_DST] &&
1244 	    info->rti_info[RTAX_DST]->sa_family == PF_KEY)
1245 		info->rti_ifa = enc_getifa(rtid, 0);
1246 #endif
1247 
1248 	if (info->rti_ifa == NULL && info->rti_info[RTAX_IFA] != NULL)
1249 		info->rti_ifa = ifa_ifwithaddr(info->rti_info[RTAX_IFA], rtid);
1250 
1251 	if (info->rti_ifa == NULL) {
1252 		struct sockaddr	*sa;
1253 
1254 		if ((sa = info->rti_info[RTAX_IFA]) == NULL)
1255 			if ((sa = info->rti_info[RTAX_GATEWAY]) == NULL)
1256 				sa = info->rti_info[RTAX_DST];
1257 
1258 		if (sa != NULL && ifp != NULL)
1259 			info->rti_ifa = ifaof_ifpforaddr(sa, ifp);
1260 		else if (info->rti_info[RTAX_DST] != NULL &&
1261 		    info->rti_info[RTAX_GATEWAY] != NULL)
1262 			info->rti_ifa = ifa_ifwithroute(info->rti_flags,
1263 			    info->rti_info[RTAX_DST],
1264 			    info->rti_info[RTAX_GATEWAY],
1265 			    rtid);
1266 		else if (sa != NULL)
1267 			info->rti_ifa = ifa_ifwithroute(info->rti_flags,
1268 			    sa, sa, rtid);
1269 	}
1270 
1271 	if_put(ifp);
1272 
1273 	if (info->rti_ifa == NULL)
1274 		return (ENETUNREACH);
1275 
1276 	return (0);
1277 }
1278 
1279 int
1280 route_cleargateway(struct rtentry *rt, void *arg, unsigned int rtableid)
1281 {
1282 	struct rtentry *nhrt = arg;
1283 
1284 	if (ISSET(rt->rt_flags, RTF_GATEWAY) && rt->rt_gwroute == nhrt &&
1285 	    !ISSET(rt->rt_locks, RTV_MTU))
1286 		rt->rt_mtu = 0;
1287 
1288 	return (0);
1289 }
1290 
1291 /*
1292  * Check if the user request to insert an ARP entry does not conflict
1293  * with existing ones.
1294  *
1295  * Only two entries are allowed for a given IP address: a private one
1296  * (priv) and a public one (pub).
1297  */
1298 int
1299 route_arp_conflict(struct rtentry *rt, struct rt_addrinfo *info)
1300 {
1301 	int		 proxy = (info->rti_flags & RTF_ANNOUNCE);
1302 
1303 	if ((info->rti_flags & RTF_LLINFO) == 0 ||
1304 	    (info->rti_info[RTAX_DST]->sa_family != AF_INET))
1305 		return (0);
1306 
1307 	if (rt == NULL || !ISSET(rt->rt_flags, RTF_LLINFO))
1308 		return (0);
1309 
1310 	/* If the entry is cached, it can be updated. */
1311 	if (ISSET(rt->rt_flags, RTF_CACHED))
1312 		return (0);
1313 
1314 	/*
1315 	 * Same destination, not cached and both "priv" or "pub" conflict.
1316 	 * If a second entry exists, it always conflict.
1317 	 */
1318 	if ((ISSET(rt->rt_flags, RTF_ANNOUNCE) == proxy) ||
1319 	    ISSET(rt->rt_flags, RTF_MPATH))
1320 		return (EEXIST);
1321 
1322 	/* No conflict but an entry exist so we need to force mpath. */
1323 	info->rti_flags |= RTF_MPATH;
1324 	return (0);
1325 }
1326 
1327 void
1328 rtm_setmetrics(u_long which, const struct rt_metrics *in,
1329     struct rt_kmetrics *out)
1330 {
1331 	int64_t expire;
1332 
1333 	if (which & RTV_MTU)
1334 		out->rmx_mtu = in->rmx_mtu;
1335 	if (which & RTV_EXPIRE) {
1336 		expire = in->rmx_expire;
1337 		if (expire != 0) {
1338 			expire -= gettime();
1339 			expire += getuptime();
1340 		}
1341 
1342 		out->rmx_expire = expire;
1343 	}
1344 }
1345 
1346 void
1347 rtm_getmetrics(const struct rt_kmetrics *in, struct rt_metrics *out)
1348 {
1349 	int64_t expire;
1350 
1351 	expire = in->rmx_expire;
1352 	if (expire != 0) {
1353 		expire -= getuptime();
1354 		expire += gettime();
1355 	}
1356 
1357 	bzero(out, sizeof(*out));
1358 	out->rmx_locks = in->rmx_locks;
1359 	out->rmx_mtu = in->rmx_mtu;
1360 	out->rmx_expire = expire;
1361 	out->rmx_pksent = in->rmx_pksent;
1362 }
1363 
1364 #define ROUNDUP(a) \
1365 	((a) > 0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long))
1366 #define ADVANCE(x, n) (x += ROUNDUP((n)->sa_len))
1367 
1368 int
1369 rtm_xaddrs(caddr_t cp, caddr_t cplim, struct rt_addrinfo *rtinfo)
1370 {
1371 	struct sockaddr	*sa;
1372 	int		 i;
1373 
1374 	/*
1375 	 * Parse address bits, split address storage in chunks, and
1376 	 * set info pointers.  Use sa_len for traversing the memory
1377 	 * and check that we stay within in the limit.
1378 	 */
1379 	bzero(rtinfo->rti_info, sizeof(rtinfo->rti_info));
1380 	for (i = 0; i < sizeof(rtinfo->rti_addrs) * 8; i++) {
1381 		if ((rtinfo->rti_addrs & (1 << i)) == 0)
1382 			continue;
1383 		if (i >= RTAX_MAX || cp + sizeof(socklen_t) > cplim)
1384 			return (EINVAL);
1385 		sa = (struct sockaddr *)cp;
1386 		if (cp + sa->sa_len > cplim)
1387 			return (EINVAL);
1388 		rtinfo->rti_info[i] = sa;
1389 		ADVANCE(cp, sa);
1390 	}
1391 	/*
1392 	 * Check that the address family is suitable for the route address
1393 	 * type.  Check that each address has a size that fits its family
1394 	 * and its length is within the size.  Strings within addresses must
1395 	 * be NUL terminated.
1396 	 */
1397 	for (i = 0; i < RTAX_MAX; i++) {
1398 		size_t len, maxlen, size;
1399 
1400 		sa = rtinfo->rti_info[i];
1401 		if (sa == NULL)
1402 			continue;
1403 		maxlen = size = 0;
1404 		switch (i) {
1405 		case RTAX_DST:
1406 		case RTAX_GATEWAY:
1407 		case RTAX_SRC:
1408 			switch (sa->sa_family) {
1409 			case AF_INET:
1410 				size = sizeof(struct sockaddr_in);
1411 				break;
1412 			case AF_LINK:
1413 				size = sizeof(struct sockaddr_dl);
1414 				break;
1415 #ifdef INET6
1416 			case AF_INET6:
1417 				size = sizeof(struct sockaddr_in6);
1418 				break;
1419 #endif
1420 #ifdef MPLS
1421 			case AF_MPLS:
1422 				size = sizeof(struct sockaddr_mpls);
1423 				break;
1424 #endif
1425 			}
1426 			break;
1427 		case RTAX_IFP:
1428 			if (sa->sa_family != AF_LINK)
1429 				return (EAFNOSUPPORT);
1430 			/*
1431 			 * XXX Should be sizeof(struct sockaddr_dl), but
1432 			 * route(8) has a bug and provides less memory.
1433 			 * arp(8) has another bug and uses sizeof pointer.
1434 			 */
1435 			size = 4;
1436 			break;
1437 		case RTAX_IFA:
1438 			switch (sa->sa_family) {
1439 			case AF_INET:
1440 				size = sizeof(struct sockaddr_in);
1441 				break;
1442 #ifdef INET6
1443 			case AF_INET6:
1444 				size = sizeof(struct sockaddr_in6);
1445 				break;
1446 #endif
1447 			default:
1448 				return (EAFNOSUPPORT);
1449 			}
1450 			break;
1451 		case RTAX_LABEL:
1452 			sa->sa_family = AF_UNSPEC;
1453 			maxlen = RTLABEL_LEN;
1454 			size = sizeof(struct sockaddr_rtlabel);
1455 			break;
1456 #ifdef BFD
1457 		case RTAX_BFD:
1458 			sa->sa_family = AF_UNSPEC;
1459 			size = sizeof(struct sockaddr_bfd);
1460 			break;
1461 #endif
1462 		case RTAX_DNS:
1463 			/* more validation in rtm_validate_proposal */
1464 			if (sa->sa_len > sizeof(struct sockaddr_rtdns))
1465 				return (EINVAL);
1466 			if (sa->sa_len < offsetof(struct sockaddr_rtdns,
1467 			    sr_dns))
1468 				return (EINVAL);
1469 			switch (sa->sa_family) {
1470 			case AF_INET:
1471 #ifdef INET6
1472 			case AF_INET6:
1473 #endif
1474 				break;
1475 			default:
1476 				return (EAFNOSUPPORT);
1477 			}
1478 			break;
1479 		case RTAX_STATIC:
1480 			sa->sa_family = AF_UNSPEC;
1481 			maxlen = RTSTATIC_LEN;
1482 			size = sizeof(struct sockaddr_rtstatic);
1483 			break;
1484 		case RTAX_SEARCH:
1485 			sa->sa_family = AF_UNSPEC;
1486 			maxlen = RTSEARCH_LEN;
1487 			size = sizeof(struct sockaddr_rtsearch);
1488 			break;
1489 		}
1490 		if (size) {
1491 			/* memory for the full struct must be provided */
1492 			if (sa->sa_len < size)
1493 				return (EINVAL);
1494 		}
1495 		if (maxlen) {
1496 			/* this should not happen */
1497 			if (2 + maxlen > size)
1498 				return (EINVAL);
1499 			/* strings must be NUL terminated within the struct */
1500 			len = strnlen(sa->sa_data, maxlen);
1501 			if (len >= maxlen || 2 + len >= sa->sa_len)
1502 				return (EINVAL);
1503 			break;
1504 		}
1505 	}
1506 	return (0);
1507 }
1508 
1509 struct mbuf *
1510 rtm_msg1(int type, struct rt_addrinfo *rtinfo)
1511 {
1512 	struct rt_msghdr	*rtm;
1513 	struct mbuf		*m;
1514 	int			 i;
1515 	struct sockaddr		*sa;
1516 	int			 len, dlen, hlen;
1517 
1518 	switch (type) {
1519 	case RTM_DELADDR:
1520 	case RTM_NEWADDR:
1521 		len = sizeof(struct ifa_msghdr);
1522 		break;
1523 	case RTM_IFINFO:
1524 		len = sizeof(struct if_msghdr);
1525 		break;
1526 	case RTM_IFANNOUNCE:
1527 		len = sizeof(struct if_announcemsghdr);
1528 		break;
1529 #ifdef BFD
1530 	case RTM_BFD:
1531 		len = sizeof(struct bfd_msghdr);
1532 		break;
1533 #endif
1534 	case RTM_80211INFO:
1535 		len = sizeof(struct if_ieee80211_msghdr);
1536 		break;
1537 	default:
1538 		len = sizeof(struct rt_msghdr);
1539 		break;
1540 	}
1541 	if (len > MCLBYTES)
1542 		panic("rtm_msg1");
1543 	m = m_gethdr(M_DONTWAIT, MT_DATA);
1544 	if (m && len > MHLEN) {
1545 		MCLGET(m, M_DONTWAIT);
1546 		if ((m->m_flags & M_EXT) == 0) {
1547 			m_free(m);
1548 			m = NULL;
1549 		}
1550 	}
1551 	if (m == NULL)
1552 		return (m);
1553 	m->m_pkthdr.len = m->m_len = hlen = len;
1554 	m->m_pkthdr.ph_ifidx = 0;
1555 	rtm = mtod(m, struct rt_msghdr *);
1556 	bzero(rtm, len);
1557 	for (i = 0; i < RTAX_MAX; i++) {
1558 		if (rtinfo == NULL || (sa = rtinfo->rti_info[i]) == NULL)
1559 			continue;
1560 		rtinfo->rti_addrs |= (1 << i);
1561 		dlen = ROUNDUP(sa->sa_len);
1562 		if (m_copyback(m, len, dlen, sa, M_NOWAIT)) {
1563 			m_freem(m);
1564 			return (NULL);
1565 		}
1566 		len += dlen;
1567 	}
1568 	rtm->rtm_msglen = len;
1569 	rtm->rtm_hdrlen = hlen;
1570 	rtm->rtm_version = RTM_VERSION;
1571 	rtm->rtm_type = type;
1572 	return (m);
1573 }
1574 
1575 int
1576 rtm_msg2(int type, int vers, struct rt_addrinfo *rtinfo, caddr_t cp,
1577     struct walkarg *w)
1578 {
1579 	int		i;
1580 	int		len, dlen, hlen, second_time = 0;
1581 	caddr_t		cp0;
1582 
1583 	rtinfo->rti_addrs = 0;
1584 again:
1585 	switch (type) {
1586 	case RTM_DELADDR:
1587 	case RTM_NEWADDR:
1588 		len = sizeof(struct ifa_msghdr);
1589 		break;
1590 	case RTM_IFINFO:
1591 		len = sizeof(struct if_msghdr);
1592 		break;
1593 	default:
1594 		len = sizeof(struct rt_msghdr);
1595 		break;
1596 	}
1597 	hlen = len;
1598 	if ((cp0 = cp) != NULL)
1599 		cp += len;
1600 	for (i = 0; i < RTAX_MAX; i++) {
1601 		struct sockaddr *sa;
1602 
1603 		if ((sa = rtinfo->rti_info[i]) == NULL)
1604 			continue;
1605 		rtinfo->rti_addrs |= (1 << i);
1606 		dlen = ROUNDUP(sa->sa_len);
1607 		if (cp) {
1608 			bcopy(sa, cp, (size_t)dlen);
1609 			cp += dlen;
1610 		}
1611 		len += dlen;
1612 	}
1613 	/* align message length to the next natural boundary */
1614 	len = ALIGN(len);
1615 	if (cp == 0 && w != NULL && !second_time) {
1616 		w->w_needed += len;
1617 		if (w->w_needed <= 0 && w->w_where) {
1618 			if (w->w_tmemsize < len) {
1619 				free(w->w_tmem, M_RTABLE, w->w_tmemsize);
1620 				w->w_tmem = malloc(len, M_RTABLE,
1621 				    M_NOWAIT | M_ZERO);
1622 				if (w->w_tmem)
1623 					w->w_tmemsize = len;
1624 			}
1625 			if (w->w_tmem) {
1626 				cp = w->w_tmem;
1627 				second_time = 1;
1628 				goto again;
1629 			} else
1630 				w->w_where = 0;
1631 		}
1632 	}
1633 	if (cp && w)		/* clear the message header */
1634 		bzero(cp0, hlen);
1635 
1636 	if (cp) {
1637 		struct rt_msghdr *rtm = (struct rt_msghdr *)cp0;
1638 
1639 		rtm->rtm_version = RTM_VERSION;
1640 		rtm->rtm_type = type;
1641 		rtm->rtm_msglen = len;
1642 		rtm->rtm_hdrlen = hlen;
1643 	}
1644 	return (len);
1645 }
1646 
1647 void
1648 rtm_send(struct rtentry *rt, int cmd, int error, unsigned int rtableid)
1649 {
1650 	struct rt_addrinfo	 info;
1651 	struct ifnet		*ifp;
1652 	struct sockaddr_rtlabel	 sa_rl;
1653 	struct sockaddr_in6	 sa_mask;
1654 
1655 	memset(&info, 0, sizeof(info));
1656 	info.rti_info[RTAX_DST] = rt_key(rt);
1657 	info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1658 	if (!ISSET(rt->rt_flags, RTF_HOST))
1659 		info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask);
1660 	info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl);
1661 	ifp = if_get(rt->rt_ifidx);
1662 	if (ifp != NULL) {
1663 		info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
1664 		info.rti_info[RTAX_IFA] =
1665 		    rtable_getsource(rtableid, info.rti_info[RTAX_DST]->sa_family);
1666 		if (info.rti_info[RTAX_IFA] == NULL)
1667 			info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
1668 	}
1669 
1670 	rtm_miss(cmd, &info, rt->rt_flags, rt->rt_priority, rt->rt_ifidx, error,
1671 	    rtableid);
1672 	if_put(ifp);
1673 }
1674 
1675 /*
1676  * This routine is called to generate a message from the routing
1677  * socket indicating that a redirect has occurred, a routing lookup
1678  * has failed, or that a protocol has detected timeouts to a particular
1679  * destination.
1680  */
1681 void
1682 rtm_miss(int type, struct rt_addrinfo *rtinfo, int flags, uint8_t prio,
1683     u_int ifidx, int error, u_int tableid)
1684 {
1685 	struct rt_msghdr	*rtm;
1686 	struct mbuf		*m;
1687 	struct sockaddr		*sa = rtinfo->rti_info[RTAX_DST];
1688 
1689 	if (rtptable.rtp_count == 0)
1690 		return;
1691 	m = rtm_msg1(type, rtinfo);
1692 	if (m == NULL)
1693 		return;
1694 	rtm = mtod(m, struct rt_msghdr *);
1695 	rtm->rtm_flags = RTF_DONE | flags;
1696 	rtm->rtm_priority = prio;
1697 	rtm->rtm_errno = error;
1698 	rtm->rtm_tableid = tableid;
1699 	rtm->rtm_addrs = rtinfo->rti_addrs;
1700 	rtm->rtm_index = ifidx;
1701 	route_input(m, NULL, sa ? sa->sa_family : AF_UNSPEC);
1702 }
1703 
1704 /*
1705  * This routine is called to generate a message from the routing
1706  * socket indicating that the status of a network interface has changed.
1707  */
1708 void
1709 rtm_ifchg(struct ifnet *ifp)
1710 {
1711 	struct rt_addrinfo	 info;
1712 	struct if_msghdr	*ifm;
1713 	struct mbuf		*m;
1714 
1715 	if (rtptable.rtp_count == 0)
1716 		return;
1717 	memset(&info, 0, sizeof(info));
1718 	info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
1719 	m = rtm_msg1(RTM_IFINFO, &info);
1720 	if (m == NULL)
1721 		return;
1722 	ifm = mtod(m, struct if_msghdr *);
1723 	ifm->ifm_index = ifp->if_index;
1724 	ifm->ifm_tableid = ifp->if_rdomain;
1725 	ifm->ifm_flags = ifp->if_flags;
1726 	ifm->ifm_xflags = ifp->if_xflags;
1727 	if_getdata(ifp, &ifm->ifm_data);
1728 	ifm->ifm_addrs = info.rti_addrs;
1729 	route_input(m, NULL, AF_UNSPEC);
1730 }
1731 
1732 /*
1733  * This is called to generate messages from the routing socket
1734  * indicating a network interface has had addresses associated with it.
1735  * if we ever reverse the logic and replace messages TO the routing
1736  * socket indicate a request to configure interfaces, then it will
1737  * be unnecessary as the routing socket will automatically generate
1738  * copies of it.
1739  */
1740 void
1741 rtm_addr(int cmd, struct ifaddr *ifa)
1742 {
1743 	struct ifnet		*ifp = ifa->ifa_ifp;
1744 	struct mbuf		*m;
1745 	struct rt_addrinfo	 info;
1746 	struct ifa_msghdr	*ifam;
1747 
1748 	if (rtptable.rtp_count == 0)
1749 		return;
1750 
1751 	memset(&info, 0, sizeof(info));
1752 	info.rti_info[RTAX_IFA] = ifa->ifa_addr;
1753 	info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
1754 	info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
1755 	info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
1756 	if ((m = rtm_msg1(cmd, &info)) == NULL)
1757 		return;
1758 	ifam = mtod(m, struct ifa_msghdr *);
1759 	ifam->ifam_index = ifp->if_index;
1760 	ifam->ifam_metric = ifa->ifa_metric;
1761 	ifam->ifam_flags = ifa->ifa_flags;
1762 	ifam->ifam_addrs = info.rti_addrs;
1763 	ifam->ifam_tableid = ifp->if_rdomain;
1764 
1765 	route_input(m, NULL,
1766 	    ifa->ifa_addr ? ifa->ifa_addr->sa_family : AF_UNSPEC);
1767 }
1768 
1769 /*
1770  * This is called to generate routing socket messages indicating
1771  * network interface arrival and departure.
1772  */
1773 void
1774 rtm_ifannounce(struct ifnet *ifp, int what)
1775 {
1776 	struct if_announcemsghdr	*ifan;
1777 	struct mbuf			*m;
1778 
1779 	if (rtptable.rtp_count == 0)
1780 		return;
1781 	m = rtm_msg1(RTM_IFANNOUNCE, NULL);
1782 	if (m == NULL)
1783 		return;
1784 	ifan = mtod(m, struct if_announcemsghdr *);
1785 	ifan->ifan_index = ifp->if_index;
1786 	strlcpy(ifan->ifan_name, ifp->if_xname, sizeof(ifan->ifan_name));
1787 	ifan->ifan_what = what;
1788 	route_input(m, NULL, AF_UNSPEC);
1789 }
1790 
1791 #ifdef BFD
1792 /*
1793  * This is used to generate routing socket messages indicating
1794  * the state of a BFD session.
1795  */
1796 void
1797 rtm_bfd(struct bfd_config *bfd)
1798 {
1799 	struct bfd_msghdr	*bfdm;
1800 	struct sockaddr_bfd	 sa_bfd;
1801 	struct mbuf		*m;
1802 	struct rt_addrinfo	 info;
1803 
1804 	if (rtptable.rtp_count == 0)
1805 		return;
1806 	memset(&info, 0, sizeof(info));
1807 	info.rti_info[RTAX_DST] = rt_key(bfd->bc_rt);
1808 	info.rti_info[RTAX_IFA] = bfd->bc_rt->rt_ifa->ifa_addr;
1809 
1810 	m = rtm_msg1(RTM_BFD, &info);
1811 	if (m == NULL)
1812 		return;
1813 	bfdm = mtod(m, struct bfd_msghdr *);
1814 	bfdm->bm_addrs = info.rti_addrs;
1815 
1816 	bfd2sa(bfd->bc_rt, &sa_bfd);
1817 	memcpy(&bfdm->bm_sa, &sa_bfd, sizeof(sa_bfd));
1818 
1819 	route_input(m, NULL, info.rti_info[RTAX_DST]->sa_family);
1820 }
1821 #endif /* BFD */
1822 
1823 /*
1824  * This is used to generate routing socket messages indicating
1825  * the state of an ieee80211 interface.
1826  */
1827 void
1828 rtm_80211info(struct ifnet *ifp, struct if_ieee80211_data *ifie)
1829 {
1830 	struct if_ieee80211_msghdr	*ifim;
1831 	struct mbuf			*m;
1832 
1833 	if (rtptable.rtp_count == 0)
1834 		return;
1835 	m = rtm_msg1(RTM_80211INFO, NULL);
1836 	if (m == NULL)
1837 		return;
1838 	ifim = mtod(m, struct if_ieee80211_msghdr *);
1839 	ifim->ifim_index = ifp->if_index;
1840 	ifim->ifim_tableid = ifp->if_rdomain;
1841 
1842 	memcpy(&ifim->ifim_ifie, ifie, sizeof(ifim->ifim_ifie));
1843 	route_input(m, NULL, AF_UNSPEC);
1844 }
1845 
1846 /*
1847  * This is used to generate routing socket messages indicating
1848  * the address selection proposal from an interface.
1849  */
1850 void
1851 rtm_proposal(struct ifnet *ifp, struct rt_addrinfo *rtinfo, int flags,
1852     uint8_t prio)
1853 {
1854 	struct rt_msghdr	*rtm;
1855 	struct mbuf		*m;
1856 
1857 	m = rtm_msg1(RTM_PROPOSAL, rtinfo);
1858 	if (m == NULL)
1859 		return;
1860 	rtm = mtod(m, struct rt_msghdr *);
1861 	rtm->rtm_flags = RTF_DONE | flags;
1862 	rtm->rtm_priority = prio;
1863 	rtm->rtm_tableid = ifp->if_rdomain;
1864 	rtm->rtm_index = ifp->if_index;
1865 	rtm->rtm_addrs = rtinfo->rti_addrs;
1866 
1867 	route_input(m, NULL, rtinfo->rti_info[RTAX_DNS]->sa_family);
1868 }
1869 
1870 /*
1871  * This is used in dumping the kernel table via sysctl().
1872  */
1873 int
1874 sysctl_dumpentry(struct rtentry *rt, void *v, unsigned int id)
1875 {
1876 	struct walkarg		*w = v;
1877 	int			 error = 0, size;
1878 	struct rt_addrinfo	 info;
1879 	struct ifnet		*ifp;
1880 #ifdef BFD
1881 	struct sockaddr_bfd	 sa_bfd;
1882 #endif
1883 	struct sockaddr_rtlabel	 sa_rl;
1884 	struct sockaddr_in6	 sa_mask;
1885 
1886 	if (w->w_op == NET_RT_FLAGS && !(rt->rt_flags & w->w_arg))
1887 		return 0;
1888 	if (w->w_op == NET_RT_DUMP && w->w_arg) {
1889 		u_int8_t prio = w->w_arg & RTP_MASK;
1890 		if (w->w_arg < 0) {
1891 			prio = (-w->w_arg) & RTP_MASK;
1892 			/* Show all routes that are not this priority */
1893 			if (prio == (rt->rt_priority & RTP_MASK))
1894 				return 0;
1895 		} else {
1896 			if (prio != (rt->rt_priority & RTP_MASK) &&
1897 			    prio != RTP_ANY)
1898 				return 0;
1899 		}
1900 	}
1901 	bzero(&info, sizeof(info));
1902 	info.rti_info[RTAX_DST] = rt_key(rt);
1903 	info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1904 	info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask);
1905 	ifp = if_get(rt->rt_ifidx);
1906 	if (ifp != NULL) {
1907 		info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
1908 		info.rti_info[RTAX_IFA] =
1909 		    rtable_getsource(id, info.rti_info[RTAX_DST]->sa_family);
1910 		if (info.rti_info[RTAX_IFA] == NULL)
1911 			info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
1912 		if (ifp->if_flags & IFF_POINTOPOINT)
1913 			info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr;
1914 	}
1915 	if_put(ifp);
1916 	info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl);
1917 #ifdef BFD
1918 	if (rt->rt_flags & RTF_BFD)
1919 		info.rti_info[RTAX_BFD] = bfd2sa(rt, &sa_bfd);
1920 #endif
1921 #ifdef MPLS
1922 	if (rt->rt_flags & RTF_MPLS) {
1923 		struct sockaddr_mpls	 sa_mpls;
1924 
1925 		bzero(&sa_mpls, sizeof(sa_mpls));
1926 		sa_mpls.smpls_family = AF_MPLS;
1927 		sa_mpls.smpls_len = sizeof(sa_mpls);
1928 		sa_mpls.smpls_label = ((struct rt_mpls *)
1929 		    rt->rt_llinfo)->mpls_label;
1930 		info.rti_info[RTAX_SRC] = (struct sockaddr *)&sa_mpls;
1931 		info.rti_mpls = ((struct rt_mpls *)
1932 		    rt->rt_llinfo)->mpls_operation;
1933 	}
1934 #endif
1935 
1936 	size = rtm_msg2(RTM_GET, RTM_VERSION, &info, NULL, w);
1937 	if (w->w_where && w->w_tmem && w->w_needed <= 0) {
1938 		struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem;
1939 
1940 		rtm->rtm_pid = curproc->p_p->ps_pid;
1941 		rtm->rtm_flags = rt->rt_flags;
1942 		rtm->rtm_priority = rt->rt_priority & RTP_MASK;
1943 		rtm_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx);
1944 		/* Do not account the routing table's reference. */
1945 		rtm->rtm_rmx.rmx_refcnt = rt->rt_refcnt - 1;
1946 		rtm->rtm_index = rt->rt_ifidx;
1947 		rtm->rtm_addrs = info.rti_addrs;
1948 		rtm->rtm_tableid = id;
1949 #ifdef MPLS
1950 		rtm->rtm_mpls = info.rti_mpls;
1951 #endif
1952 		if ((error = copyout(rtm, w->w_where, size)) != 0)
1953 			w->w_where = NULL;
1954 		else
1955 			w->w_where += size;
1956 	}
1957 	return (error);
1958 }
1959 
1960 int
1961 sysctl_iflist(int af, struct walkarg *w)
1962 {
1963 	struct ifnet		*ifp;
1964 	struct ifaddr		*ifa;
1965 	struct rt_addrinfo	 info;
1966 	int			 len, error = 0;
1967 
1968 	bzero(&info, sizeof(info));
1969 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1970 		if (w->w_arg && w->w_arg != ifp->if_index)
1971 			continue;
1972 		/* Copy the link-layer address first */
1973 		info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
1974 		len = rtm_msg2(RTM_IFINFO, RTM_VERSION, &info, 0, w);
1975 		if (w->w_where && w->w_tmem && w->w_needed <= 0) {
1976 			struct if_msghdr *ifm;
1977 
1978 			ifm = (struct if_msghdr *)w->w_tmem;
1979 			ifm->ifm_index = ifp->if_index;
1980 			ifm->ifm_tableid = ifp->if_rdomain;
1981 			ifm->ifm_flags = ifp->if_flags;
1982 			if_getdata(ifp, &ifm->ifm_data);
1983 			ifm->ifm_addrs = info.rti_addrs;
1984 			error = copyout(ifm, w->w_where, len);
1985 			if (error)
1986 				return (error);
1987 			w->w_where += len;
1988 		}
1989 		info.rti_info[RTAX_IFP] = NULL;
1990 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1991 			KASSERT(ifa->ifa_addr->sa_family != AF_LINK);
1992 			if (af && af != ifa->ifa_addr->sa_family)
1993 				continue;
1994 			info.rti_info[RTAX_IFA] = ifa->ifa_addr;
1995 			info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
1996 			info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
1997 			len = rtm_msg2(RTM_NEWADDR, RTM_VERSION, &info, 0, w);
1998 			if (w->w_where && w->w_tmem && w->w_needed <= 0) {
1999 				struct ifa_msghdr *ifam;
2000 
2001 				ifam = (struct ifa_msghdr *)w->w_tmem;
2002 				ifam->ifam_index = ifa->ifa_ifp->if_index;
2003 				ifam->ifam_flags = ifa->ifa_flags;
2004 				ifam->ifam_metric = ifa->ifa_metric;
2005 				ifam->ifam_addrs = info.rti_addrs;
2006 				error = copyout(w->w_tmem, w->w_where, len);
2007 				if (error)
2008 					return (error);
2009 				w->w_where += len;
2010 			}
2011 		}
2012 		info.rti_info[RTAX_IFA] = info.rti_info[RTAX_NETMASK] =
2013 		    info.rti_info[RTAX_BRD] = NULL;
2014 	}
2015 	return (0);
2016 }
2017 
2018 int
2019 sysctl_ifnames(struct walkarg *w)
2020 {
2021 	struct if_nameindex_msg ifn;
2022 	struct ifnet *ifp;
2023 	int error = 0;
2024 
2025 	/* XXX ignore tableid for now */
2026 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
2027 		if (w->w_arg && w->w_arg != ifp->if_index)
2028 			continue;
2029 		w->w_needed += sizeof(ifn);
2030 		if (w->w_where && w->w_needed <= 0) {
2031 
2032 			memset(&ifn, 0, sizeof(ifn));
2033 			ifn.if_index = ifp->if_index;
2034 			strlcpy(ifn.if_name, ifp->if_xname,
2035 			    sizeof(ifn.if_name));
2036 			error = copyout(&ifn, w->w_where, sizeof(ifn));
2037 			if (error)
2038 				return (error);
2039 			w->w_where += sizeof(ifn);
2040 		}
2041 	}
2042 
2043 	return (0);
2044 }
2045 
2046 int
2047 sysctl_source(int af, u_int tableid, struct walkarg *w)
2048 {
2049 	struct sockaddr	*sa;
2050 	int		 size, error = 0;
2051 
2052 	sa = rtable_getsource(tableid, af);
2053 	if (sa) {
2054 		switch (sa->sa_family) {
2055 		case AF_INET:
2056 			size = sizeof(struct sockaddr_in);
2057 			break;
2058 #ifdef INET6
2059 		case AF_INET6:
2060 			size = sizeof(struct sockaddr_in6);
2061 			break;
2062 #endif
2063 		default:
2064 			return (0);
2065 		}
2066 		w->w_needed += size;
2067 		if (w->w_where && w->w_needed <= 0) {
2068 			if ((error = copyout(sa, w->w_where, size)))
2069 				return (error);
2070 			w->w_where += size;
2071 		}
2072 	}
2073 	return (0);
2074 }
2075 
2076 int
2077 sysctl_rtable(int *name, u_int namelen, void *where, size_t *given, void *new,
2078     size_t newlen)
2079 {
2080 	int			 i, error = EINVAL;
2081 	u_char			 af;
2082 	struct walkarg		 w;
2083 	struct rt_tableinfo	 tableinfo;
2084 	u_int			 tableid = 0;
2085 
2086 	if (new)
2087 		return (EPERM);
2088 	if (namelen < 3 || namelen > 4)
2089 		return (EINVAL);
2090 	af = name[0];
2091 	bzero(&w, sizeof(w));
2092 	w.w_where = where;
2093 	w.w_given = *given;
2094 	w.w_needed = 0 - w.w_given;
2095 	w.w_op = name[1];
2096 	w.w_arg = name[2];
2097 
2098 	if (namelen == 4) {
2099 		tableid = name[3];
2100 		if (!rtable_exists(tableid))
2101 			return (ENOENT);
2102 	} else
2103 		tableid = curproc->p_p->ps_rtableid;
2104 
2105 	switch (w.w_op) {
2106 	case NET_RT_DUMP:
2107 	case NET_RT_FLAGS:
2108 		NET_LOCK();
2109 		for (i = 1; i <= AF_MAX; i++) {
2110 			if (af != 0 && af != i)
2111 				continue;
2112 
2113 			error = rtable_walk(tableid, i, NULL, sysctl_dumpentry,
2114 			    &w);
2115 			if (error == EAFNOSUPPORT)
2116 				error = 0;
2117 			if (error)
2118 				break;
2119 		}
2120 		NET_UNLOCK();
2121 		break;
2122 
2123 	case NET_RT_IFLIST:
2124 		NET_LOCK();
2125 		error = sysctl_iflist(af, &w);
2126 		NET_UNLOCK();
2127 		break;
2128 
2129 	case NET_RT_STATS:
2130 		return (sysctl_rtable_rtstat(where, given, new));
2131 	case NET_RT_TABLE:
2132 		tableid = w.w_arg;
2133 		if (!rtable_exists(tableid))
2134 			return (ENOENT);
2135 		memset(&tableinfo, 0, sizeof tableinfo);
2136 		tableinfo.rti_tableid = tableid;
2137 		tableinfo.rti_domainid = rtable_l2(tableid);
2138 		error = sysctl_rdstruct(where, given, new,
2139 		    &tableinfo, sizeof(tableinfo));
2140 		return (error);
2141 	case NET_RT_IFNAMES:
2142 		NET_LOCK();
2143 		error = sysctl_ifnames(&w);
2144 		NET_UNLOCK();
2145 		break;
2146 	case NET_RT_SOURCE:
2147 		tableid = w.w_arg;
2148 		if (!rtable_exists(tableid))
2149 			return (ENOENT);
2150 		NET_LOCK();
2151 		for (i = 1; i <= AF_MAX; i++) {
2152 			if (af != 0 && af != i)
2153 				continue;
2154 
2155 			error = sysctl_source(i, tableid, &w);
2156 			if (error == EAFNOSUPPORT)
2157 				error = 0;
2158 			if (error)
2159 				break;
2160 		}
2161 		NET_UNLOCK();
2162 		break;
2163 	}
2164 	free(w.w_tmem, M_RTABLE, w.w_tmemsize);
2165 	w.w_needed += w.w_given;
2166 	if (where) {
2167 		*given = w.w_where - (caddr_t)where;
2168 		if (*given < w.w_needed)
2169 			return (ENOMEM);
2170 	} else
2171 		*given = (11 * w.w_needed) / 10;
2172 
2173 	return (error);
2174 }
2175 
2176 int
2177 sysctl_rtable_rtstat(void *oldp, size_t *oldlenp, void *newp)
2178 {
2179 	extern struct cpumem *rtcounters;
2180 	uint64_t counters[rts_ncounters];
2181 	struct rtstat rtstat;
2182 	uint32_t *words = (uint32_t *)&rtstat;
2183 	int i;
2184 
2185 	CTASSERT(sizeof(rtstat) == (nitems(counters) * sizeof(uint32_t)));
2186 	memset(&rtstat, 0, sizeof rtstat);
2187 	counters_read(rtcounters, counters, nitems(counters));
2188 
2189 	for (i = 0; i < nitems(counters); i++)
2190 		words[i] = (uint32_t)counters[i];
2191 
2192 	return (sysctl_rdstruct(oldp, oldlenp, newp, &rtstat, sizeof(rtstat)));
2193 }
2194 
2195 int
2196 rtm_validate_proposal(struct rt_addrinfo *info)
2197 {
2198 	if (info->rti_addrs & ~(RTA_NETMASK | RTA_IFA | RTA_DNS | RTA_STATIC |
2199 	    RTA_SEARCH)) {
2200 		return -1;
2201 	}
2202 
2203 	if (ISSET(info->rti_addrs, RTA_NETMASK)) {
2204 		struct sockaddr *sa = info->rti_info[RTAX_NETMASK];
2205 		if (sa == NULL)
2206 			return -1;
2207 		switch (sa->sa_family) {
2208 		case AF_INET:
2209 			if (sa->sa_len != sizeof(struct sockaddr_in))
2210 				return -1;
2211 			break;
2212 		case AF_INET6:
2213 			if (sa->sa_len != sizeof(struct sockaddr_in6))
2214 				return -1;
2215 			break;
2216 		default:
2217 			return -1;
2218 		}
2219 	}
2220 
2221 	if (ISSET(info->rti_addrs, RTA_IFA)) {
2222 		struct sockaddr *sa = info->rti_info[RTAX_IFA];
2223 		if (sa == NULL)
2224 			return -1;
2225 		switch (sa->sa_family) {
2226 		case AF_INET:
2227 			if (sa->sa_len != sizeof(struct sockaddr_in))
2228 				return -1;
2229 			break;
2230 		case AF_INET6:
2231 			if (sa->sa_len != sizeof(struct sockaddr_in6))
2232 				return -1;
2233 			break;
2234 		default:
2235 			return -1;
2236 		}
2237 	}
2238 
2239 	if (ISSET(info->rti_addrs, RTA_DNS)) {
2240 		struct sockaddr_rtdns *rtdns =
2241 		    (struct sockaddr_rtdns *)info->rti_info[RTAX_DNS];
2242 		if (rtdns == NULL)
2243 			return -1;
2244 		if (rtdns->sr_len > sizeof(*rtdns))
2245 			return -1;
2246 		if (rtdns->sr_len < offsetof(struct sockaddr_rtdns, sr_dns))
2247 			return -1;
2248 		switch (rtdns->sr_family) {
2249 		case AF_INET:
2250 			if ((rtdns->sr_len - offsetof(struct sockaddr_rtdns,
2251 			    sr_dns)) % sizeof(struct in_addr) != 0)
2252 				return -1;
2253 			break;
2254 #ifdef INET6
2255 		case AF_INET6:
2256 			if ((rtdns->sr_len - offsetof(struct sockaddr_rtdns,
2257 			    sr_dns)) % sizeof(struct in6_addr) != 0)
2258 				return -1;
2259 			break;
2260 #endif
2261 		default:
2262 			return -1;
2263 		}
2264 	}
2265 
2266 	if (ISSET(info->rti_addrs, RTA_STATIC)) {
2267 		struct sockaddr_rtstatic *rtstatic =
2268 		    (struct sockaddr_rtstatic *)info->rti_info[RTAX_STATIC];
2269 		if (rtstatic == NULL)
2270 			return -1;
2271 		if (rtstatic->sr_len > sizeof(*rtstatic))
2272 			return -1;
2273 		if (rtstatic->sr_len <=
2274 		    offsetof(struct sockaddr_rtstatic, sr_static))
2275 			return -1;
2276 	}
2277 
2278 	if (ISSET(info->rti_addrs, RTA_SEARCH)) {
2279 		struct sockaddr_rtsearch *rtsearch =
2280 		    (struct sockaddr_rtsearch *)info->rti_info[RTAX_SEARCH];
2281 		if (rtsearch == NULL)
2282 			return -1;
2283 		if (rtsearch->sr_len > sizeof(*rtsearch))
2284 			return -1;
2285 		if (rtsearch->sr_len <=
2286 		    offsetof(struct sockaddr_rtsearch, sr_search))
2287 			return -1;
2288 	}
2289 
2290 	return 0;
2291 }
2292 
2293 int
2294 rt_setsource(unsigned int rtableid, struct sockaddr *src)
2295 {
2296 	struct ifaddr	*ifa;
2297 	int		error;
2298 	/*
2299 	 * If source address is 0.0.0.0 or ::
2300 	 * use automatic source selection
2301 	 */
2302 	switch(src->sa_family) {
2303 	case AF_INET:
2304 		if(satosin(src)->sin_addr.s_addr == INADDR_ANY) {
2305 			rtable_setsource(rtableid, AF_INET, NULL);
2306 			return (0);
2307 		}
2308 		break;
2309 #ifdef INET6
2310 	case AF_INET6:
2311 		if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(src)->sin6_addr)) {
2312 			rtable_setsource(rtableid, AF_INET6, NULL);
2313 			return (0);
2314 		}
2315 		break;
2316 #endif
2317 	default:
2318 		return (EAFNOSUPPORT);
2319 	}
2320 
2321 	KERNEL_LOCK();
2322 	/*
2323 	 * Check if source address is assigned to an interface in the
2324 	 * same rdomain
2325 	 */
2326 	if ((ifa = ifa_ifwithaddr(src, rtableid)) == NULL) {
2327 		KERNEL_UNLOCK();
2328 		return (EINVAL);
2329 	}
2330 
2331 	error = rtable_setsource(rtableid, src->sa_family, ifa->ifa_addr);
2332 	KERNEL_UNLOCK();
2333 
2334 	return (error);
2335 }
2336 
2337 /*
2338  * Definitions of protocols supported in the ROUTE domain.
2339  */
2340 
2341 struct domain routedomain;
2342 
2343 struct protosw routesw[] = {
2344 {
2345   .pr_type	= SOCK_RAW,
2346   .pr_domain	= &routedomain,
2347   .pr_flags	= PR_ATOMIC|PR_ADDR|PR_WANTRCVD,
2348   .pr_output	= route_output,
2349   .pr_ctloutput	= route_ctloutput,
2350   .pr_usrreq	= route_usrreq,
2351   .pr_attach	= route_attach,
2352   .pr_detach	= route_detach,
2353   .pr_init	= route_prinit,
2354   .pr_sysctl	= sysctl_rtable
2355 }
2356 };
2357 
2358 struct domain routedomain = {
2359   .dom_family = PF_ROUTE,
2360   .dom_name = "route",
2361   .dom_init = route_init,
2362   .dom_protosw = routesw,
2363   .dom_protoswNPROTOSW = &routesw[nitems(routesw)]
2364 };
2365