xref: /openbsd-src/sys/net/rtsock.c (revision 46035553bfdd96e63c94e32da0210227ec2e3cf1)
1 /*	$OpenBSD: rtsock.c,v 1.304 2020/11/07 09:51:40 denis Exp $	*/
2 /*	$NetBSD: rtsock.c,v 1.18 1996/03/29 00:32:10 cgd Exp $	*/
3 
4 /*
5  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the project nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 /*
34  * Copyright (c) 1988, 1991, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. Neither the name of the University nor the names of its contributors
46  *    may be used to endorse or promote products derived from this software
47  *    without specific prior written permission.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59  * SUCH DAMAGE.
60  *
61  *	@(#)rtsock.c	8.6 (Berkeley) 2/11/95
62  */
63 
64 #include <sys/param.h>
65 #include <sys/systm.h>
66 #include <sys/proc.h>
67 #include <sys/sysctl.h>
68 #include <sys/mbuf.h>
69 #include <sys/socket.h>
70 #include <sys/socketvar.h>
71 #include <sys/domain.h>
72 #include <sys/pool.h>
73 #include <sys/protosw.h>
74 #include <sys/srp.h>
75 
76 #include <net/if.h>
77 #include <net/if_dl.h>
78 #include <net/if_var.h>
79 #include <net/route.h>
80 
81 #include <netinet/in.h>
82 
83 #ifdef MPLS
84 #include <netmpls/mpls.h>
85 #endif
86 #ifdef IPSEC
87 #include <netinet/ip_ipsp.h>
88 #include <net/if_enc.h>
89 #endif
90 #ifdef BFD
91 #include <net/bfd.h>
92 #endif
93 
94 #include <sys/stdarg.h>
95 #include <sys/kernel.h>
96 #include <sys/timeout.h>
97 
98 #define	ROUTESNDQ	8192
99 #define	ROUTERCVQ	8192
100 
101 const struct sockaddr route_src = { 2, PF_ROUTE, };
102 
103 struct walkarg {
104 	int	w_op, w_arg, w_given, w_needed, w_tmemsize;
105 	caddr_t	w_where, w_tmem;
106 };
107 
108 void	route_prinit(void);
109 void	rcb_ref(void *, void *);
110 void	rcb_unref(void *, void *);
111 int	route_output(struct mbuf *, struct socket *, struct sockaddr *,
112 	    struct mbuf *);
113 int	route_ctloutput(int, struct socket *, int, int, struct mbuf *);
114 int	route_usrreq(struct socket *, int, struct mbuf *, struct mbuf *,
115 	    struct mbuf *, struct proc *);
116 void	route_input(struct mbuf *m0, struct socket *, sa_family_t);
117 int	route_arp_conflict(struct rtentry *, struct rt_addrinfo *);
118 int	route_cleargateway(struct rtentry *, void *, unsigned int);
119 void	rtm_senddesync_timer(void *);
120 void	rtm_senddesync(struct socket *);
121 int	rtm_sendup(struct socket *, struct mbuf *, int);
122 
123 int	rtm_getifa(struct rt_addrinfo *, unsigned int);
124 int	rtm_output(struct rt_msghdr *, struct rtentry **, struct rt_addrinfo *,
125 	    uint8_t, unsigned int);
126 struct rt_msghdr *rtm_report(struct rtentry *, u_char, int, int);
127 struct mbuf	*rtm_msg1(int, struct rt_addrinfo *);
128 int		 rtm_msg2(int, int, struct rt_addrinfo *, caddr_t,
129 		     struct walkarg *);
130 int		 rtm_xaddrs(caddr_t, caddr_t, struct rt_addrinfo *);
131 int		 rtm_validate_proposal(struct rt_addrinfo *);
132 void		 rtm_setmetrics(u_long, const struct rt_metrics *,
133 		     struct rt_kmetrics *);
134 void		 rtm_getmetrics(const struct rt_kmetrics *,
135 		     struct rt_metrics *);
136 
137 int		 sysctl_iflist(int, struct walkarg *);
138 int		 sysctl_ifnames(struct walkarg *);
139 int		 sysctl_rtable_rtstat(void *, size_t *, void *);
140 
141 int		 rt_setsource(unsigned int, struct sockaddr *);
142 
143 /*
144  * Locks used to protect struct members
145  *       I       immutable after creation
146  *       sK      solock (kernel lock)
147  */
148 struct rtpcb {
149 	struct socket		*rop_socket;		/* [I] */
150 
151 	SRPL_ENTRY(rtpcb)	rop_list;
152 	struct refcnt		rop_refcnt;
153 	struct timeout		rop_timeout;
154 	unsigned int		rop_msgfilter;		/* [sK] */
155 	unsigned int		rop_flagfilter;		/* [sK] */
156 	unsigned int		rop_flags;		/* [sK] */
157 	u_int			rop_rtableid;		/* [sK] */
158 	unsigned short		rop_proto;		/* [I] */
159 	u_char			rop_priority;		/* [sK] */
160 };
161 #define	sotortpcb(so)	((struct rtpcb *)(so)->so_pcb)
162 
163 struct rtptable {
164 	SRPL_HEAD(, rtpcb)	rtp_list;
165 	struct srpl_rc		rtp_rc;
166 	struct rwlock		rtp_lk;
167 	unsigned int		rtp_count;
168 };
169 
170 struct pool rtpcb_pool;
171 struct rtptable rtptable;
172 
173 /*
174  * These flags and timeout are used for indicating to userland (via a
175  * RTM_DESYNC msg) when the route socket has overflowed and messages
176  * have been lost.
177  */
178 #define ROUTECB_FLAG_DESYNC	0x1	/* Route socket out of memory */
179 #define ROUTECB_FLAG_FLUSH	0x2	/* Wait until socket is empty before
180 					   queueing more packets */
181 
182 #define ROUTE_DESYNC_RESEND_TIMEOUT	200	/* In ms */
183 
184 void
185 route_prinit(void)
186 {
187 	srpl_rc_init(&rtptable.rtp_rc, rcb_ref, rcb_unref, NULL);
188 	rw_init(&rtptable.rtp_lk, "rtsock");
189 	SRPL_INIT(&rtptable.rtp_list);
190 	pool_init(&rtpcb_pool, sizeof(struct rtpcb), 0,
191 	    IPL_NONE, PR_WAITOK, "rtpcb", NULL);
192 }
193 
194 void
195 rcb_ref(void *null, void *v)
196 {
197 	struct rtpcb *rop = v;
198 
199 	refcnt_take(&rop->rop_refcnt);
200 }
201 
202 void
203 rcb_unref(void *null, void *v)
204 {
205 	struct rtpcb *rop = v;
206 
207 	refcnt_rele_wake(&rop->rop_refcnt);
208 }
209 
210 int
211 route_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam,
212     struct mbuf *control, struct proc *p)
213 {
214 	struct rtpcb	*rop;
215 	int		 error = 0;
216 
217 	if (req == PRU_CONTROL)
218 		return (EOPNOTSUPP);
219 
220 	soassertlocked(so);
221 
222 	if (control && control->m_len) {
223 		error = EOPNOTSUPP;
224 		goto release;
225 	}
226 
227 	rop = sotortpcb(so);
228 	if (rop == NULL) {
229 		error = EINVAL;
230 		goto release;
231 	}
232 
233 	switch (req) {
234 	/* no connect, bind, accept. Socket is connected from the start */
235 	case PRU_CONNECT:
236 	case PRU_BIND:
237 	case PRU_CONNECT2:
238 	case PRU_LISTEN:
239 	case PRU_ACCEPT:
240 		error = EOPNOTSUPP;
241 		break;
242 
243 	case PRU_DISCONNECT:
244 	case PRU_ABORT:
245 		soisdisconnected(so);
246 		break;
247 	case PRU_SHUTDOWN:
248 		socantsendmore(so);
249 		break;
250 	case PRU_SENSE:
251 		/* stat: don't bother with a blocksize. */
252 		break;
253 
254 	/* minimal support, just implement a fake peer address */
255 	case PRU_SOCKADDR:
256 		error = EINVAL;
257 		break;
258 	case PRU_PEERADDR:
259 		bcopy(&route_src, mtod(nam, caddr_t), route_src.sa_len);
260 		nam->m_len = route_src.sa_len;
261 		break;
262 
263 	case PRU_RCVD:
264 		/*
265 		 * If we are in a FLUSH state, check if the buffer is
266 		 * empty so that we can clear the flag.
267 		 */
268 		if (((rop->rop_flags & ROUTECB_FLAG_FLUSH) != 0) &&
269 		    ((sbspace(rop->rop_socket, &rop->rop_socket->so_rcv) ==
270 		    rop->rop_socket->so_rcv.sb_hiwat)))
271 			rop->rop_flags &= ~ROUTECB_FLAG_FLUSH;
272 		break;
273 
274 	case PRU_RCVOOB:
275 	case PRU_SENDOOB:
276 		error = EOPNOTSUPP;
277 		break;
278 	case PRU_SEND:
279 		if (nam) {
280 			error = EISCONN;
281 			break;
282 		}
283 		error = (*so->so_proto->pr_output)(m, so, NULL, NULL);
284 		m = NULL;
285 		break;
286 	default:
287 		panic("route_usrreq");
288 	}
289 
290  release:
291 	if (req != PRU_RCVD && req != PRU_RCVOOB && req != PRU_SENSE) {
292 		m_freem(control);
293 		m_freem(m);
294 	}
295 	return (error);
296 }
297 
298 int
299 route_attach(struct socket *so, int proto)
300 {
301 	struct rtpcb	*rop;
302 	int		 error;
303 
304 	/*
305 	 * use the rawcb but allocate a rtpcb, this
306 	 * code does not care about the additional fields
307 	 * and works directly on the raw socket.
308 	 */
309 	rop = pool_get(&rtpcb_pool, PR_WAITOK|PR_ZERO);
310 	so->so_pcb = rop;
311 	/* Init the timeout structure */
312 	timeout_set(&rop->rop_timeout, rtm_senddesync_timer, so);
313 	refcnt_init(&rop->rop_refcnt);
314 
315 	if (curproc == NULL)
316 		error = EACCES;
317 	else
318 		error = soreserve(so, ROUTESNDQ, ROUTERCVQ);
319 	if (error) {
320 		pool_put(&rtpcb_pool, rop);
321 		return (error);
322 	}
323 
324 	rop->rop_socket = so;
325 	rop->rop_proto = proto;
326 
327 	rop->rop_rtableid = curproc->p_p->ps_rtableid;
328 
329 	soisconnected(so);
330 	so->so_options |= SO_USELOOPBACK;
331 
332 	rw_enter(&rtptable.rtp_lk, RW_WRITE);
333 	SRPL_INSERT_HEAD_LOCKED(&rtptable.rtp_rc, &rtptable.rtp_list, rop,
334 	    rop_list);
335 	rtptable.rtp_count++;
336 	rw_exit(&rtptable.rtp_lk);
337 
338 	return (0);
339 }
340 
341 int
342 route_detach(struct socket *so)
343 {
344 	struct rtpcb	*rop;
345 
346 	soassertlocked(so);
347 
348 	rop = sotortpcb(so);
349 	if (rop == NULL)
350 		return (EINVAL);
351 
352 	rw_enter(&rtptable.rtp_lk, RW_WRITE);
353 
354 	timeout_del(&rop->rop_timeout);
355 	rtptable.rtp_count--;
356 
357 	SRPL_REMOVE_LOCKED(&rtptable.rtp_rc, &rtptable.rtp_list, rop, rtpcb,
358 	    rop_list);
359 	rw_exit(&rtptable.rtp_lk);
360 
361 	/* wait for all references to drop */
362 	refcnt_finalize(&rop->rop_refcnt, "rtsockrefs");
363 
364 	so->so_pcb = NULL;
365 	KASSERT((so->so_state & SS_NOFDREF) == 0);
366 	pool_put(&rtpcb_pool, rop);
367 
368 	return (0);
369 }
370 
371 int
372 route_ctloutput(int op, struct socket *so, int level, int optname,
373     struct mbuf *m)
374 {
375 	struct rtpcb *rop = sotortpcb(so);
376 	int error = 0;
377 	unsigned int tid, prio;
378 
379 	if (level != AF_ROUTE)
380 		return (EINVAL);
381 
382 	switch (op) {
383 	case PRCO_SETOPT:
384 		switch (optname) {
385 		case ROUTE_MSGFILTER:
386 			if (m == NULL || m->m_len != sizeof(unsigned int))
387 				error = EINVAL;
388 			else
389 				rop->rop_msgfilter = *mtod(m, unsigned int *);
390 			break;
391 		case ROUTE_TABLEFILTER:
392 			if (m == NULL || m->m_len != sizeof(unsigned int)) {
393 				error = EINVAL;
394 				break;
395 			}
396 			tid = *mtod(m, unsigned int *);
397 			if (tid != RTABLE_ANY && !rtable_exists(tid))
398 				error = ENOENT;
399 			else
400 				rop->rop_rtableid = tid;
401 			break;
402 		case ROUTE_PRIOFILTER:
403 			if (m == NULL || m->m_len != sizeof(unsigned int)) {
404 				error = EINVAL;
405 				break;
406 			}
407 			prio = *mtod(m, unsigned int *);
408 			if (prio > RTP_MAX)
409 				error = EINVAL;
410 			else
411 				rop->rop_priority = prio;
412 			break;
413 		case ROUTE_FLAGFILTER:
414 			if (m == NULL || m->m_len != sizeof(unsigned int))
415 				error = EINVAL;
416 			else
417 				rop->rop_flagfilter = *mtod(m, unsigned int *);
418 			break;
419 		default:
420 			error = ENOPROTOOPT;
421 			break;
422 		}
423 		break;
424 	case PRCO_GETOPT:
425 		switch (optname) {
426 		case ROUTE_MSGFILTER:
427 			m->m_len = sizeof(unsigned int);
428 			*mtod(m, unsigned int *) = rop->rop_msgfilter;
429 			break;
430 		case ROUTE_TABLEFILTER:
431 			m->m_len = sizeof(unsigned int);
432 			*mtod(m, unsigned int *) = rop->rop_rtableid;
433 			break;
434 		case ROUTE_PRIOFILTER:
435 			m->m_len = sizeof(unsigned int);
436 			*mtod(m, unsigned int *) = rop->rop_priority;
437 			break;
438 		case ROUTE_FLAGFILTER:
439 			m->m_len = sizeof(unsigned int);
440 			*mtod(m, unsigned int *) = rop->rop_flagfilter;
441 			break;
442 		default:
443 			error = ENOPROTOOPT;
444 			break;
445 		}
446 	}
447 	return (error);
448 }
449 
450 void
451 rtm_senddesync_timer(void *xso)
452 {
453 	struct socket	*so = xso;
454 	int		 s;
455 
456 	s = solock(so);
457 	rtm_senddesync(so);
458 	sounlock(so, s);
459 }
460 
461 void
462 rtm_senddesync(struct socket *so)
463 {
464 	struct rtpcb	*rop = sotortpcb(so);
465 	struct mbuf	*desync_mbuf;
466 
467 	soassertlocked(so);
468 
469 	/* If we are in a DESYNC state, try to send a RTM_DESYNC packet */
470 	if ((rop->rop_flags & ROUTECB_FLAG_DESYNC) == 0)
471 		return;
472 
473 	/*
474 	 * If we fail to alloc memory or if sbappendaddr()
475 	 * fails, re-add timeout and try again.
476 	 */
477 	desync_mbuf = rtm_msg1(RTM_DESYNC, NULL);
478 	if (desync_mbuf != NULL) {
479 		if (sbappendaddr(so, &so->so_rcv, &route_src,
480 		    desync_mbuf, NULL) != 0) {
481 			rop->rop_flags &= ~ROUTECB_FLAG_DESYNC;
482 			sorwakeup(rop->rop_socket);
483 			return;
484 		}
485 		m_freem(desync_mbuf);
486 	}
487 	/* Re-add timeout to try sending msg again */
488 	timeout_add_msec(&rop->rop_timeout, ROUTE_DESYNC_RESEND_TIMEOUT);
489 }
490 
491 void
492 route_input(struct mbuf *m0, struct socket *so0, sa_family_t sa_family)
493 {
494 	struct socket *so;
495 	struct rtpcb *rop;
496 	struct rt_msghdr *rtm;
497 	struct mbuf *m = m0;
498 	struct socket *last = NULL;
499 	struct srp_ref sr;
500 	int s;
501 
502 	/* ensure that we can access the rtm_type via mtod() */
503 	if (m->m_len < offsetof(struct rt_msghdr, rtm_type) + 1) {
504 		m_freem(m);
505 		return;
506 	}
507 
508 	SRPL_FOREACH(rop, &sr, &rtptable.rtp_list, rop_list) {
509 		/*
510 		 * If route socket is bound to an address family only send
511 		 * messages that match the address family. Address family
512 		 * agnostic messages are always sent.
513 		 */
514 		if (sa_family != AF_UNSPEC && rop->rop_proto != AF_UNSPEC &&
515 		    rop->rop_proto != sa_family)
516 			continue;
517 
518 
519 		so = rop->rop_socket;
520 		s = solock(so);
521 
522 		/*
523 		 * Check to see if we don't want our own messages and
524 		 * if we can receive anything.
525 		 */
526 		if ((so0 == so && !(so0->so_options & SO_USELOOPBACK)) ||
527 		    !(so->so_state & SS_ISCONNECTED) ||
528 		    (so->so_state & SS_CANTRCVMORE)) {
529 next:
530 			sounlock(so, s);
531 			continue;
532 		}
533 
534 		/* filter messages that the process does not want */
535 		rtm = mtod(m, struct rt_msghdr *);
536 		/* but RTM_DESYNC can't be filtered */
537 		if (rtm->rtm_type != RTM_DESYNC) {
538 			if (rop->rop_msgfilter != 0 &&
539 			    !(rop->rop_msgfilter & (1 << rtm->rtm_type)))
540 				goto next;
541 			if (ISSET(rop->rop_flagfilter, rtm->rtm_flags))
542 				goto next;
543 		}
544 		switch (rtm->rtm_type) {
545 		case RTM_IFANNOUNCE:
546 		case RTM_DESYNC:
547 			/* no tableid */
548 			break;
549 		case RTM_RESOLVE:
550 		case RTM_NEWADDR:
551 		case RTM_DELADDR:
552 		case RTM_IFINFO:
553 		case RTM_80211INFO:
554 		case RTM_BFD:
555 			/* check against rdomain id */
556 			if (rop->rop_rtableid != RTABLE_ANY &&
557 			    rtable_l2(rop->rop_rtableid) != rtm->rtm_tableid)
558 				goto next;
559 			break;
560 		default:
561 			if (rop->rop_priority != 0 &&
562 			    rop->rop_priority < rtm->rtm_priority)
563 				goto next;
564 			/* check against rtable id */
565 			if (rop->rop_rtableid != RTABLE_ANY &&
566 			    rop->rop_rtableid != rtm->rtm_tableid)
567 				goto next;
568 			break;
569 		}
570 
571 		/*
572 		 * Check to see if the flush flag is set. If so, don't queue
573 		 * any more messages until the flag is cleared.
574 		 */
575 		if ((rop->rop_flags & ROUTECB_FLAG_FLUSH) != 0)
576 			goto next;
577 		sounlock(so, s);
578 
579 		if (last) {
580 			s = solock(last);
581 			rtm_sendup(last, m, 1);
582 			sounlock(last, s);
583 			refcnt_rele_wake(&sotortpcb(last)->rop_refcnt);
584 		}
585 		/* keep a reference for last */
586 		refcnt_take(&rop->rop_refcnt);
587 		last = rop->rop_socket;
588 	}
589 	SRPL_LEAVE(&sr);
590 
591 	if (last) {
592 		s = solock(last);
593 		rtm_sendup(last, m, 0);
594 		sounlock(last, s);
595 		refcnt_rele_wake(&sotortpcb(last)->rop_refcnt);
596 	} else
597 		m_freem(m);
598 }
599 
600 int
601 rtm_sendup(struct socket *so, struct mbuf *m0, int more)
602 {
603 	struct rtpcb *rop = sotortpcb(so);
604 	struct mbuf *m;
605 
606 	soassertlocked(so);
607 
608 	if (more) {
609 		m = m_copym(m0, 0, M_COPYALL, M_NOWAIT);
610 		if (m == NULL)
611 			return (ENOMEM);
612 	} else
613 		m = m0;
614 
615 	if (sbspace(so, &so->so_rcv) < (2 * MSIZE) ||
616 	    sbappendaddr(so, &so->so_rcv, &route_src, m, NULL) == 0) {
617 		/* Flag socket as desync'ed and flush required */
618 		rop->rop_flags |= ROUTECB_FLAG_DESYNC | ROUTECB_FLAG_FLUSH;
619 		rtm_senddesync(so);
620 		m_freem(m);
621 		return (ENOBUFS);
622 	}
623 
624 	sorwakeup(so);
625 	return (0);
626 }
627 
628 struct rt_msghdr *
629 rtm_report(struct rtentry *rt, u_char type, int seq, int tableid)
630 {
631 	struct rt_msghdr	*rtm;
632 	struct rt_addrinfo	 info;
633 	struct sockaddr_rtlabel	 sa_rl;
634 	struct sockaddr_in6	 sa_mask;
635 #ifdef BFD
636 	struct sockaddr_bfd	 sa_bfd;
637 #endif
638 	struct ifnet		*ifp = NULL;
639 	int			 len;
640 
641 	bzero(&info, sizeof(info));
642 	info.rti_info[RTAX_DST] = rt_key(rt);
643 	info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
644 	info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask);
645 	info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl);
646 #ifdef BFD
647 	if (rt->rt_flags & RTF_BFD)
648 		info.rti_info[RTAX_BFD] = bfd2sa(rt, &sa_bfd);
649 #endif
650 #ifdef MPLS
651 	if (rt->rt_flags & RTF_MPLS) {
652 		struct sockaddr_mpls	 sa_mpls;
653 
654 		bzero(&sa_mpls, sizeof(sa_mpls));
655 		sa_mpls.smpls_family = AF_MPLS;
656 		sa_mpls.smpls_len = sizeof(sa_mpls);
657 		sa_mpls.smpls_label = ((struct rt_mpls *)
658 		    rt->rt_llinfo)->mpls_label;
659 		info.rti_info[RTAX_SRC] = (struct sockaddr *)&sa_mpls;
660 		info.rti_mpls = ((struct rt_mpls *)
661 		    rt->rt_llinfo)->mpls_operation;
662 	}
663 #endif
664 	ifp = if_get(rt->rt_ifidx);
665 	if (ifp != NULL) {
666 		info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
667 		info.rti_info[RTAX_IFA] =
668 		    rtable_getsource(tableid, info.rti_info[RTAX_DST]->sa_family);
669 		if (info.rti_info[RTAX_IFA] == NULL)
670 			info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
671 		if (ifp->if_flags & IFF_POINTOPOINT)
672 			info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr;
673 	}
674 	if_put(ifp);
675 	/* RTAX_GENMASK, RTAX_AUTHOR, RTAX_SRCMASK ignored */
676 
677 	/* build new route message */
678 	len = rtm_msg2(type, RTM_VERSION, &info, NULL, NULL);
679 	rtm = malloc(len, M_RTABLE, M_WAITOK | M_ZERO);
680 
681 	rtm_msg2(type, RTM_VERSION, &info, (caddr_t)rtm, NULL);
682 	rtm->rtm_type = type;
683 	rtm->rtm_index = rt->rt_ifidx;
684 	rtm->rtm_tableid = tableid;
685 	rtm->rtm_priority = rt->rt_priority & RTP_MASK;
686 	rtm->rtm_flags = rt->rt_flags;
687 	rtm->rtm_pid = curproc->p_p->ps_pid;
688 	rtm->rtm_seq = seq;
689 	rtm_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx);
690 	rtm->rtm_addrs = info.rti_addrs;
691 #ifdef MPLS
692 	rtm->rtm_mpls = info.rti_mpls;
693 #endif
694 	return rtm;
695 }
696 
697 int
698 route_output(struct mbuf *m, struct socket *so, struct sockaddr *dstaddr,
699     struct mbuf *control)
700 {
701 	struct rt_msghdr	*rtm = NULL;
702 	struct rtentry		*rt = NULL;
703 	struct rt_addrinfo	 info;
704 	struct ifnet		*ifp;
705 	int			 len, seq, error = 0;
706 	u_int			 tableid;
707 	u_int8_t		 prio;
708 	u_char			 vers, type;
709 
710 	if (m == NULL || ((m->m_len < sizeof(int32_t)) &&
711 	    (m = m_pullup(m, sizeof(int32_t))) == 0))
712 		return (ENOBUFS);
713 	if ((m->m_flags & M_PKTHDR) == 0)
714 		panic("route_output");
715 	len = m->m_pkthdr.len;
716 	if (len < offsetof(struct rt_msghdr, rtm_hdrlen) + 1 ||
717 	    len != mtod(m, struct rt_msghdr *)->rtm_msglen) {
718 		error = EINVAL;
719 		goto fail;
720 	}
721 	vers = mtod(m, struct rt_msghdr *)->rtm_version;
722 	switch (vers) {
723 	case RTM_VERSION:
724 		if (len < sizeof(struct rt_msghdr)) {
725 			error = EINVAL;
726 			goto fail;
727 		}
728 		if (len > RTM_MAXSIZE) {
729 			error = EMSGSIZE;
730 			goto fail;
731 		}
732 		rtm = malloc(len, M_RTABLE, M_WAITOK);
733 		m_copydata(m, 0, len, (caddr_t)rtm);
734 		break;
735 	default:
736 		error = EPROTONOSUPPORT;
737 		goto fail;
738 	}
739 
740 	/* Verify that the caller is sending an appropriate message early */
741 	switch (rtm->rtm_type) {
742 	case RTM_ADD:
743 	case RTM_DELETE:
744 	case RTM_GET:
745 	case RTM_CHANGE:
746 	case RTM_PROPOSAL:
747 	case RTM_SOURCE:
748 		break;
749 	default:
750 		error = EOPNOTSUPP;
751 		goto fail;
752 	}
753 	/*
754 	 * Verify that the header length is valid.
755 	 * All messages from userland start with a struct rt_msghdr.
756 	 */
757 	if (rtm->rtm_hdrlen == 0)	/* old client */
758 		rtm->rtm_hdrlen = sizeof(struct rt_msghdr);
759 	if (rtm->rtm_hdrlen < sizeof(struct rt_msghdr) ||
760 	    len < rtm->rtm_hdrlen) {
761 		error = EINVAL;
762 		goto fail;
763 	}
764 
765 	rtm->rtm_pid = curproc->p_p->ps_pid;
766 
767 	/*
768 	 * Verify that the caller has the appropriate privilege; RTM_GET
769 	 * is the only operation the non-superuser is allowed.
770 	 */
771 	if (rtm->rtm_type != RTM_GET && suser(curproc) != 0) {
772 		error = EACCES;
773 		goto fail;
774 	}
775 	tableid = rtm->rtm_tableid;
776 	if (!rtable_exists(tableid)) {
777 		if (rtm->rtm_type == RTM_ADD) {
778 			if ((error = rtable_add(tableid)) != 0)
779 				goto fail;
780 		} else {
781 			error = EINVAL;
782 			goto fail;
783 		}
784 	}
785 
786 	/* Do not let userland play with kernel-only flags. */
787 	if ((rtm->rtm_flags & (RTF_LOCAL|RTF_BROADCAST)) != 0) {
788 		error = EINVAL;
789 		goto fail;
790 	}
791 
792 	/* make sure that kernel-only bits are not set */
793 	rtm->rtm_priority &= RTP_MASK;
794 	rtm->rtm_flags &= ~(RTF_DONE|RTF_CLONED|RTF_CACHED);
795 	rtm->rtm_fmask &= RTF_FMASK;
796 
797 	if (rtm->rtm_priority != 0) {
798 		if (rtm->rtm_priority > RTP_MAX ||
799 		    rtm->rtm_priority == RTP_LOCAL) {
800 			error = EINVAL;
801 			goto fail;
802 		}
803 		prio = rtm->rtm_priority;
804 	} else if (rtm->rtm_type != RTM_ADD)
805 		prio = RTP_ANY;
806 	else if (rtm->rtm_flags & RTF_STATIC)
807 		prio = 0;
808 	else
809 		prio = RTP_DEFAULT;
810 
811 	bzero(&info, sizeof(info));
812 	info.rti_addrs = rtm->rtm_addrs;
813 	if ((error = rtm_xaddrs(rtm->rtm_hdrlen + (caddr_t)rtm,
814 	    len + (caddr_t)rtm, &info)) != 0)
815 		goto fail;
816 
817 	info.rti_flags = rtm->rtm_flags;
818 
819 	if (rtm->rtm_type != RTM_SOURCE &&
820 	    rtm->rtm_type != RTM_PROPOSAL &&
821 	    (info.rti_info[RTAX_DST] == NULL ||
822 	    info.rti_info[RTAX_DST]->sa_family >= AF_MAX ||
823 	    (info.rti_info[RTAX_GATEWAY] != NULL &&
824 	    info.rti_info[RTAX_GATEWAY]->sa_family >= AF_MAX) ||
825 	    info.rti_info[RTAX_GENMASK] != NULL)) {
826 		error = EINVAL;
827 		goto fail;
828 	}
829 #ifdef MPLS
830 	info.rti_mpls = rtm->rtm_mpls;
831 #endif
832 
833 	if (info.rti_info[RTAX_GATEWAY] != NULL &&
834 	    info.rti_info[RTAX_GATEWAY]->sa_family == AF_LINK &&
835 	    (info.rti_flags & RTF_CLONING) == 0) {
836 		info.rti_flags |= RTF_LLINFO;
837 	}
838 
839 	/*
840 	 * Validate RTM_PROPOSAL and pass it along or error out.
841 	 */
842 	if (rtm->rtm_type == RTM_PROPOSAL) {
843 		if (rtm_validate_proposal(&info) == -1) {
844 			error = EINVAL;
845 			goto fail;
846 		}
847 		/*
848 		 * If this is a solicitation proposal forward request to
849 		 * all interfaces. Most handlers will ignore it but at least
850 		 * umb(4) will send a response to this event.
851 		 */
852 		if (rtm->rtm_priority == RTP_PROPOSAL_SOLICIT) {
853 			NET_LOCK();
854 			TAILQ_FOREACH(ifp, &ifnet, if_list) {
855 				ifp->if_rtrequest(ifp, RTM_PROPOSAL, NULL);
856 			}
857 			NET_UNLOCK();
858 		}
859 	} else if (rtm->rtm_type == RTM_SOURCE) {
860 		if (info.rti_info[RTAX_IFA] == NULL) {
861 			error = EINVAL;
862 			goto fail;
863 		}
864 		if ((error =
865 		    rt_setsource(tableid, info.rti_info[RTAX_IFA])) != 0)
866 			goto fail;
867 	} else {
868 		error = rtm_output(rtm, &rt, &info, prio, tableid);
869 		if (!error) {
870 			type = rtm->rtm_type;
871 			seq = rtm->rtm_seq;
872 			free(rtm, M_RTABLE, len);
873 			rtm = rtm_report(rt, type, seq, tableid);
874 			len = rtm->rtm_msglen;
875 		}
876 	}
877 
878 	rtfree(rt);
879 	if (error) {
880 		rtm->rtm_errno = error;
881 	} else {
882 		rtm->rtm_flags |= RTF_DONE;
883 	}
884 
885 	/*
886 	 * Check to see if we don't want our own messages.
887 	 */
888 	if (!(so->so_options & SO_USELOOPBACK)) {
889 		if (rtptable.rtp_count <= 1) {
890 			/* no other listener and no loopback of messages */
891 fail:
892 			free(rtm, M_RTABLE, len);
893 			m_freem(m);
894 			return (error);
895 		}
896 	}
897 	if (m_copyback(m, 0, len, rtm, M_NOWAIT)) {
898 		m_freem(m);
899 		m = NULL;
900 	} else if (m->m_pkthdr.len > len)
901 		m_adj(m, len - m->m_pkthdr.len);
902 	free(rtm, M_RTABLE, len);
903 	if (m)
904 		route_input(m, so, info.rti_info[RTAX_DST] ?
905 		    info.rti_info[RTAX_DST]->sa_family : AF_UNSPEC);
906 
907 	return (error);
908 }
909 
910 int
911 rtm_output(struct rt_msghdr *rtm, struct rtentry **prt,
912     struct rt_addrinfo *info, uint8_t prio, unsigned int tableid)
913 {
914 	struct rtentry		*rt = *prt;
915 	struct ifnet		*ifp = NULL;
916 	int			 plen, newgate = 0, error = 0;
917 
918 	switch (rtm->rtm_type) {
919 	case RTM_ADD:
920 		if (info->rti_info[RTAX_GATEWAY] == NULL) {
921 			error = EINVAL;
922 			break;
923 		}
924 
925 		rt = rtable_match(tableid, info->rti_info[RTAX_DST], NULL);
926 		if ((error = route_arp_conflict(rt, info))) {
927 			rtfree(rt);
928 			rt = NULL;
929 			break;
930 		}
931 
932 		/*
933 		 * We cannot go through a delete/create/insert cycle for
934 		 * cached route because this can lead to races in the
935 		 * receive path.  Instead we update the L2 cache.
936 		 */
937 		if ((rt != NULL) && ISSET(rt->rt_flags, RTF_CACHED))
938 			goto change;
939 
940 		rtfree(rt);
941 		rt = NULL;
942 
943 		NET_LOCK();
944 		if ((error = rtm_getifa(info, tableid)) != 0) {
945 			NET_UNLOCK();
946 			break;
947 		}
948 		error = rtrequest(RTM_ADD, info, prio, &rt, tableid);
949 		NET_UNLOCK();
950 		if (error == 0)
951 			rtm_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx,
952 			    &rt->rt_rmx);
953 		break;
954 	case RTM_DELETE:
955 		rt = rtable_lookup(tableid, info->rti_info[RTAX_DST],
956 		    info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY],
957 		    prio);
958 		if (rt == NULL) {
959 			error = ESRCH;
960 			break;
961 		}
962 
963 		/*
964 		 * If we got multipath routes, we require users to specify
965 		 * a matching gateway.
966 		 */
967 		if (ISSET(rt->rt_flags, RTF_MPATH) &&
968 		    info->rti_info[RTAX_GATEWAY] == NULL) {
969 			error = ESRCH;
970 			break;
971 		}
972 
973 		/* Detaching an interface requires the KERNEL_LOCK(). */
974 		ifp = if_get(rt->rt_ifidx);
975 		KASSERT(ifp != NULL);
976 
977 		/*
978 		 * Invalidate the cache of automagically created and
979 		 * referenced L2 entries to make sure that ``rt_gwroute''
980 		 * pointer stays valid for other CPUs.
981 		 */
982 		if ((ISSET(rt->rt_flags, RTF_CACHED))) {
983 			NET_LOCK();
984 			ifp->if_rtrequest(ifp, RTM_INVALIDATE, rt);
985 			/* Reset the MTU of the gateway route. */
986 			rtable_walk(tableid, rt_key(rt)->sa_family, NULL,
987 			    route_cleargateway, rt);
988 			NET_UNLOCK();
989 			if_put(ifp);
990 			break;
991 		}
992 
993 		/*
994 		 * Make sure that local routes are only modified by the
995 		 * kernel.
996 		 */
997 		if (ISSET(rt->rt_flags, RTF_LOCAL|RTF_BROADCAST)) {
998 			if_put(ifp);
999 			error = EINVAL;
1000 			break;
1001 		}
1002 
1003 		rtfree(rt);
1004 		rt = NULL;
1005 
1006 		NET_LOCK();
1007 		error = rtrequest_delete(info, prio, ifp, &rt, tableid);
1008 		NET_UNLOCK();
1009 		if_put(ifp);
1010 		break;
1011 	case RTM_CHANGE:
1012 		rt = rtable_lookup(tableid, info->rti_info[RTAX_DST],
1013 		    info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY],
1014 		    prio);
1015 		/*
1016 		 * If we got multipath routes, we require users to specify
1017 		 * a matching gateway.
1018 		 */
1019 		if ((rt != NULL) && ISSET(rt->rt_flags, RTF_MPATH) &&
1020 		    (info->rti_info[RTAX_GATEWAY] == NULL)) {
1021 			rtfree(rt);
1022 			rt = NULL;
1023 		}
1024 		/*
1025 		 * If RTAX_GATEWAY is the argument we're trying to
1026 		 * change, try to find a compatible route.
1027 		 */
1028 		if ((rt == NULL) && (info->rti_info[RTAX_GATEWAY] != NULL)) {
1029 			rt = rtable_lookup(tableid, info->rti_info[RTAX_DST],
1030 			    info->rti_info[RTAX_NETMASK], NULL, prio);
1031 			/* Ensure we don't pick a multipath one. */
1032 			if ((rt != NULL) && ISSET(rt->rt_flags, RTF_MPATH)) {
1033 				rtfree(rt);
1034 				rt = NULL;
1035 			}
1036 		}
1037 
1038 		if (rt == NULL) {
1039 			error = ESRCH;
1040 			break;
1041 		}
1042 
1043 		/*
1044 		 * Make sure that local routes are only modified by the
1045 		 * kernel.
1046 		 */
1047 		if (ISSET(rt->rt_flags, RTF_LOCAL|RTF_BROADCAST)) {
1048 			error = EINVAL;
1049 			break;
1050 		}
1051 
1052 		/*
1053 		 * RTM_CHANGE needs a perfect match.
1054 		 */
1055 		plen = rtable_satoplen(info->rti_info[RTAX_DST]->sa_family,
1056 		    info->rti_info[RTAX_NETMASK]);
1057 		if (rt_plen(rt) != plen) {
1058 			error = ESRCH;
1059 			break;
1060 		}
1061 
1062 		if (info->rti_info[RTAX_GATEWAY] != NULL)
1063 			if (rt->rt_gateway == NULL ||
1064 			    bcmp(rt->rt_gateway,
1065 			    info->rti_info[RTAX_GATEWAY],
1066 			    info->rti_info[RTAX_GATEWAY]->sa_len)) {
1067 				newgate = 1;
1068 			}
1069 		/*
1070 		 * Check reachable gateway before changing the route.
1071 		 * New gateway could require new ifaddr, ifp;
1072 		 * flags may also be different; ifp may be specified
1073 		 * by ll sockaddr when protocol address is ambiguous.
1074 		 */
1075 		if (newgate || info->rti_info[RTAX_IFP] != NULL ||
1076 		    info->rti_info[RTAX_IFA] != NULL) {
1077 			struct ifaddr	*ifa = NULL;
1078 
1079 			NET_LOCK();
1080 			if ((error = rtm_getifa(info, tableid)) != 0) {
1081 				NET_UNLOCK();
1082 				break;
1083 			}
1084 			ifa = info->rti_ifa;
1085 			if (rt->rt_ifa != ifa) {
1086 				ifp = if_get(rt->rt_ifidx);
1087 				KASSERT(ifp != NULL);
1088 				ifp->if_rtrequest(ifp, RTM_DELETE, rt);
1089 				ifafree(rt->rt_ifa);
1090 				if_put(ifp);
1091 
1092 				ifa->ifa_refcnt++;
1093 				rt->rt_ifa = ifa;
1094 				rt->rt_ifidx = ifa->ifa_ifp->if_index;
1095 				/* recheck link state after ifp change */
1096 				rt_if_linkstate_change(rt, ifa->ifa_ifp,
1097 				    tableid);
1098 			}
1099 			NET_UNLOCK();
1100 		}
1101 change:
1102 		if (info->rti_info[RTAX_GATEWAY] != NULL) {
1103 			/* When updating the gateway, make sure it is valid. */
1104 			if (!newgate && rt->rt_gateway->sa_family !=
1105 			    info->rti_info[RTAX_GATEWAY]->sa_family) {
1106 				error = EINVAL;
1107 				break;
1108 			}
1109 
1110 			NET_LOCK();
1111 			error = rt_setgate(rt,
1112 			    info->rti_info[RTAX_GATEWAY], tableid);
1113 			NET_UNLOCK();
1114 			if (error)
1115 				break;
1116 		}
1117 #ifdef MPLS
1118 		if (rtm->rtm_flags & RTF_MPLS) {
1119 			NET_LOCK();
1120 			error = rt_mpls_set(rt,
1121 			    info->rti_info[RTAX_SRC], info->rti_mpls);
1122 			NET_UNLOCK();
1123 			if (error)
1124 				break;
1125 		} else if (newgate || (rtm->rtm_fmask & RTF_MPLS)) {
1126 			NET_LOCK();
1127 			/* if gateway changed remove MPLS information */
1128 			rt_mpls_clear(rt);
1129 			NET_UNLOCK();
1130 		}
1131 #endif
1132 
1133 #ifdef BFD
1134 		if (ISSET(rtm->rtm_flags, RTF_BFD)) {
1135 			if ((error = bfdset(rt)))
1136 				break;
1137 		} else if (!ISSET(rtm->rtm_flags, RTF_BFD) &&
1138 		    ISSET(rtm->rtm_fmask, RTF_BFD)) {
1139 			bfdclear(rt);
1140 		}
1141 #endif
1142 
1143 		NET_LOCK();
1144 		/* Hack to allow some flags to be toggled */
1145 		if (rtm->rtm_fmask) {
1146 			/* MPLS flag it is set by rt_mpls_set() */
1147 			rtm->rtm_fmask &= ~RTF_MPLS;
1148 			rtm->rtm_flags &= ~RTF_MPLS;
1149 			rt->rt_flags =
1150 			    (rt->rt_flags & ~rtm->rtm_fmask) |
1151 			    (rtm->rtm_flags & rtm->rtm_fmask);
1152 		}
1153 		rtm_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx, &rt->rt_rmx);
1154 
1155 		ifp = if_get(rt->rt_ifidx);
1156 		KASSERT(ifp != NULL);
1157 		ifp->if_rtrequest(ifp, RTM_ADD, rt);
1158 		if_put(ifp);
1159 
1160 		if (info->rti_info[RTAX_LABEL] != NULL) {
1161 			char *rtlabel = ((struct sockaddr_rtlabel *)
1162 			    info->rti_info[RTAX_LABEL])->sr_label;
1163 			rtlabel_unref(rt->rt_labelid);
1164 			rt->rt_labelid = rtlabel_name2id(rtlabel);
1165 		}
1166 		if_group_routechange(info->rti_info[RTAX_DST],
1167 		    info->rti_info[RTAX_NETMASK]);
1168 		rt->rt_locks &= ~(rtm->rtm_inits);
1169 		rt->rt_locks |= (rtm->rtm_inits & rtm->rtm_rmx.rmx_locks);
1170 		NET_UNLOCK();
1171 		break;
1172 	case RTM_GET:
1173 		rt = rtable_lookup(tableid, info->rti_info[RTAX_DST],
1174 		    info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY],
1175 		    prio);
1176 		if (rt == NULL)
1177 			error = ESRCH;
1178 		break;
1179 	}
1180 
1181 	*prt = rt;
1182 	return (error);
1183 }
1184 
1185 struct ifaddr *
1186 ifa_ifwithroute(int flags, struct sockaddr *dst, struct sockaddr *gateway,
1187     unsigned int rtableid)
1188 {
1189 	struct ifaddr	*ifa;
1190 
1191 	if ((flags & RTF_GATEWAY) == 0) {
1192 		/*
1193 		 * If we are adding a route to an interface,
1194 		 * and the interface is a pt to pt link
1195 		 * we should search for the destination
1196 		 * as our clue to the interface.  Otherwise
1197 		 * we can use the local address.
1198 		 */
1199 		ifa = NULL;
1200 		if (flags & RTF_HOST)
1201 			ifa = ifa_ifwithdstaddr(dst, rtableid);
1202 		if (ifa == NULL)
1203 			ifa = ifa_ifwithaddr(gateway, rtableid);
1204 	} else {
1205 		/*
1206 		 * If we are adding a route to a remote net
1207 		 * or host, the gateway may still be on the
1208 		 * other end of a pt to pt link.
1209 		 */
1210 		ifa = ifa_ifwithdstaddr(gateway, rtableid);
1211 	}
1212 	if (ifa == NULL) {
1213 		if (gateway->sa_family == AF_LINK) {
1214 			struct sockaddr_dl *sdl = satosdl(gateway);
1215 			struct ifnet *ifp = if_get(sdl->sdl_index);
1216 
1217 			if (ifp != NULL)
1218 				ifa = ifaof_ifpforaddr(dst, ifp);
1219 			if_put(ifp);
1220 		} else {
1221 			struct rtentry *rt;
1222 
1223 			rt = rtalloc(gateway, RT_RESOLVE, rtable_l2(rtableid));
1224 			if (rt != NULL)
1225 				ifa = rt->rt_ifa;
1226 			rtfree(rt);
1227 		}
1228 	}
1229 	if (ifa == NULL)
1230 		return (NULL);
1231 	if (ifa->ifa_addr->sa_family != dst->sa_family) {
1232 		struct ifaddr	*oifa = ifa;
1233 		ifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp);
1234 		if (ifa == NULL)
1235 			ifa = oifa;
1236 	}
1237 	return (ifa);
1238 }
1239 
1240 int
1241 rtm_getifa(struct rt_addrinfo *info, unsigned int rtid)
1242 {
1243 	struct ifnet	*ifp = NULL;
1244 
1245 	/*
1246 	 * The "returned" `ifa' is guaranteed to be alive only if
1247 	 * the NET_LOCK() is held.
1248 	 */
1249 	NET_ASSERT_LOCKED();
1250 
1251 	/*
1252 	 * ifp may be specified by sockaddr_dl when protocol address
1253 	 * is ambiguous
1254 	 */
1255 	if (info->rti_info[RTAX_IFP] != NULL) {
1256 		struct sockaddr_dl *sdl;
1257 
1258 		sdl = satosdl(info->rti_info[RTAX_IFP]);
1259 		ifp = if_get(sdl->sdl_index);
1260 	}
1261 
1262 #ifdef IPSEC
1263 	/*
1264 	 * If the destination is a PF_KEY address, we'll look
1265 	 * for the existence of a encap interface number or address
1266 	 * in the options list of the gateway. By default, we'll return
1267 	 * enc0.
1268 	 */
1269 	if (info->rti_info[RTAX_DST] &&
1270 	    info->rti_info[RTAX_DST]->sa_family == PF_KEY)
1271 		info->rti_ifa = enc_getifa(rtid, 0);
1272 #endif
1273 
1274 	if (info->rti_ifa == NULL && info->rti_info[RTAX_IFA] != NULL)
1275 		info->rti_ifa = ifa_ifwithaddr(info->rti_info[RTAX_IFA], rtid);
1276 
1277 	if (info->rti_ifa == NULL) {
1278 		struct sockaddr	*sa;
1279 
1280 		if ((sa = info->rti_info[RTAX_IFA]) == NULL)
1281 			if ((sa = info->rti_info[RTAX_GATEWAY]) == NULL)
1282 				sa = info->rti_info[RTAX_DST];
1283 
1284 		if (sa != NULL && ifp != NULL)
1285 			info->rti_ifa = ifaof_ifpforaddr(sa, ifp);
1286 		else if (info->rti_info[RTAX_DST] != NULL &&
1287 		    info->rti_info[RTAX_GATEWAY] != NULL)
1288 			info->rti_ifa = ifa_ifwithroute(info->rti_flags,
1289 			    info->rti_info[RTAX_DST],
1290 			    info->rti_info[RTAX_GATEWAY],
1291 			    rtid);
1292 		else if (sa != NULL)
1293 			info->rti_ifa = ifa_ifwithroute(info->rti_flags,
1294 			    sa, sa, rtid);
1295 	}
1296 
1297 	if_put(ifp);
1298 
1299 	if (info->rti_ifa == NULL)
1300 		return (ENETUNREACH);
1301 
1302 	return (0);
1303 }
1304 
1305 int
1306 route_cleargateway(struct rtentry *rt, void *arg, unsigned int rtableid)
1307 {
1308 	struct rtentry *nhrt = arg;
1309 
1310 	if (ISSET(rt->rt_flags, RTF_GATEWAY) && rt->rt_gwroute == nhrt &&
1311 	    !ISSET(rt->rt_locks, RTV_MTU))
1312 		rt->rt_mtu = 0;
1313 
1314 	return (0);
1315 }
1316 
1317 /*
1318  * Check if the user request to insert an ARP entry does not conflict
1319  * with existing ones.
1320  *
1321  * Only two entries are allowed for a given IP address: a private one
1322  * (priv) and a public one (pub).
1323  */
1324 int
1325 route_arp_conflict(struct rtentry *rt, struct rt_addrinfo *info)
1326 {
1327 	int		 proxy = (info->rti_flags & RTF_ANNOUNCE);
1328 
1329 	if ((info->rti_flags & RTF_LLINFO) == 0 ||
1330 	    (info->rti_info[RTAX_DST]->sa_family != AF_INET))
1331 		return (0);
1332 
1333 	if (rt == NULL || !ISSET(rt->rt_flags, RTF_LLINFO))
1334 		return (0);
1335 
1336 	/* If the entry is cached, it can be updated. */
1337 	if (ISSET(rt->rt_flags, RTF_CACHED))
1338 		return (0);
1339 
1340 	/*
1341 	 * Same destination, not cached and both "priv" or "pub" conflict.
1342 	 * If a second entry exists, it always conflict.
1343 	 */
1344 	if ((ISSET(rt->rt_flags, RTF_ANNOUNCE) == proxy) ||
1345 	    ISSET(rt->rt_flags, RTF_MPATH))
1346 		return (EEXIST);
1347 
1348 	/* No conflict but an entry exist so we need to force mpath. */
1349 	info->rti_flags |= RTF_MPATH;
1350 	return (0);
1351 }
1352 
1353 void
1354 rtm_setmetrics(u_long which, const struct rt_metrics *in,
1355     struct rt_kmetrics *out)
1356 {
1357 	int64_t expire;
1358 
1359 	if (which & RTV_MTU)
1360 		out->rmx_mtu = in->rmx_mtu;
1361 	if (which & RTV_EXPIRE) {
1362 		expire = in->rmx_expire;
1363 		if (expire != 0) {
1364 			expire -= gettime();
1365 			expire += getuptime();
1366 		}
1367 
1368 		out->rmx_expire = expire;
1369 	}
1370 }
1371 
1372 void
1373 rtm_getmetrics(const struct rt_kmetrics *in, struct rt_metrics *out)
1374 {
1375 	int64_t expire;
1376 
1377 	expire = in->rmx_expire;
1378 	if (expire != 0) {
1379 		expire -= getuptime();
1380 		expire += gettime();
1381 	}
1382 
1383 	bzero(out, sizeof(*out));
1384 	out->rmx_locks = in->rmx_locks;
1385 	out->rmx_mtu = in->rmx_mtu;
1386 	out->rmx_expire = expire;
1387 	out->rmx_pksent = in->rmx_pksent;
1388 }
1389 
1390 #define ROUNDUP(a) \
1391 	((a) > 0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long))
1392 #define ADVANCE(x, n) (x += ROUNDUP((n)->sa_len))
1393 
1394 int
1395 rtm_xaddrs(caddr_t cp, caddr_t cplim, struct rt_addrinfo *rtinfo)
1396 {
1397 	struct sockaddr	*sa;
1398 	int		 i;
1399 
1400 	/*
1401 	 * Parse address bits, split address storage in chunks, and
1402 	 * set info pointers.  Use sa_len for traversing the memory
1403 	 * and check that we stay within in the limit.
1404 	 */
1405 	bzero(rtinfo->rti_info, sizeof(rtinfo->rti_info));
1406 	for (i = 0; i < sizeof(rtinfo->rti_addrs) * 8; i++) {
1407 		if ((rtinfo->rti_addrs & (1 << i)) == 0)
1408 			continue;
1409 		if (i >= RTAX_MAX || cp + sizeof(socklen_t) > cplim)
1410 			return (EINVAL);
1411 		sa = (struct sockaddr *)cp;
1412 		if (cp + sa->sa_len > cplim)
1413 			return (EINVAL);
1414 		rtinfo->rti_info[i] = sa;
1415 		ADVANCE(cp, sa);
1416 	}
1417 	/*
1418 	 * Check that the address family is suitable for the route address
1419 	 * type.  Check that each address has a size that fits its family
1420 	 * and its length is within the size.  Strings within addresses must
1421 	 * be NUL terminated.
1422 	 */
1423 	for (i = 0; i < RTAX_MAX; i++) {
1424 		size_t len, maxlen, size;
1425 
1426 		sa = rtinfo->rti_info[i];
1427 		if (sa == NULL)
1428 			continue;
1429 		maxlen = size = 0;
1430 		switch (i) {
1431 		case RTAX_DST:
1432 		case RTAX_GATEWAY:
1433 		case RTAX_SRC:
1434 			switch (sa->sa_family) {
1435 			case AF_INET:
1436 				size = sizeof(struct sockaddr_in);
1437 				break;
1438 			case AF_LINK:
1439 				size = sizeof(struct sockaddr_dl);
1440 				break;
1441 #ifdef INET6
1442 			case AF_INET6:
1443 				size = sizeof(struct sockaddr_in6);
1444 				break;
1445 #endif
1446 #ifdef MPLS
1447 			case AF_MPLS:
1448 				size = sizeof(struct sockaddr_mpls);
1449 				break;
1450 #endif
1451 			}
1452 			break;
1453 		case RTAX_IFP:
1454 			if (sa->sa_family != AF_LINK)
1455 				return (EAFNOSUPPORT);
1456 			/*
1457 			 * XXX Should be sizeof(struct sockaddr_dl), but
1458 			 * route(8) has a bug and provides less memory.
1459 			 * arp(8) has another bug and uses sizeof pointer.
1460 			 */
1461 			size = 4;
1462 			break;
1463 		case RTAX_IFA:
1464 			switch (sa->sa_family) {
1465 			case AF_INET:
1466 				size = sizeof(struct sockaddr_in);
1467 				break;
1468 #ifdef INET6
1469 			case AF_INET6:
1470 				size = sizeof(struct sockaddr_in6);
1471 				break;
1472 #endif
1473 			default:
1474 				return (EAFNOSUPPORT);
1475 			}
1476 			break;
1477 		case RTAX_LABEL:
1478 			sa->sa_family = AF_UNSPEC;
1479 			maxlen = RTLABEL_LEN;
1480 			size = sizeof(struct sockaddr_rtlabel);
1481 			break;
1482 #ifdef BFD
1483 		case RTAX_BFD:
1484 			sa->sa_family = AF_UNSPEC;
1485 			size = sizeof(struct sockaddr_bfd);
1486 			break;
1487 #endif
1488 		case RTAX_DNS:
1489 			/* more validation in rtm_validate_proposal */
1490 			if (sa->sa_len > sizeof(struct sockaddr_rtdns))
1491 				return (EINVAL);
1492 			if (sa->sa_len < offsetof(struct sockaddr_rtdns,
1493 			    sr_dns))
1494 				return (EINVAL);
1495 			switch (sa->sa_family) {
1496 			case AF_INET:
1497 #ifdef INET6
1498 			case AF_INET6:
1499 #endif
1500 				break;
1501 			default:
1502 				return (EAFNOSUPPORT);
1503 			}
1504 			break;
1505 		case RTAX_STATIC:
1506 			sa->sa_family = AF_UNSPEC;
1507 			maxlen = RTSTATIC_LEN;
1508 			size = sizeof(struct sockaddr_rtstatic);
1509 			break;
1510 		case RTAX_SEARCH:
1511 			sa->sa_family = AF_UNSPEC;
1512 			maxlen = RTSEARCH_LEN;
1513 			size = sizeof(struct sockaddr_rtsearch);
1514 			break;
1515 		}
1516 		if (size) {
1517 			/* memory for the full struct must be provided */
1518 			if (sa->sa_len < size)
1519 				return (EINVAL);
1520 		}
1521 		if (maxlen) {
1522 			/* this should not happen */
1523 			if (2 + maxlen > size)
1524 				return (EINVAL);
1525 			/* strings must be NUL terminated within the struct */
1526 			len = strnlen(sa->sa_data, maxlen);
1527 			if (len >= maxlen || 2 + len >= sa->sa_len)
1528 				return (EINVAL);
1529 			break;
1530 		}
1531 	}
1532 	return (0);
1533 }
1534 
1535 struct mbuf *
1536 rtm_msg1(int type, struct rt_addrinfo *rtinfo)
1537 {
1538 	struct rt_msghdr	*rtm;
1539 	struct mbuf		*m;
1540 	int			 i;
1541 	struct sockaddr		*sa;
1542 	int			 len, dlen, hlen;
1543 
1544 	switch (type) {
1545 	case RTM_DELADDR:
1546 	case RTM_NEWADDR:
1547 		len = sizeof(struct ifa_msghdr);
1548 		break;
1549 	case RTM_IFINFO:
1550 		len = sizeof(struct if_msghdr);
1551 		break;
1552 	case RTM_IFANNOUNCE:
1553 		len = sizeof(struct if_announcemsghdr);
1554 		break;
1555 #ifdef BFD
1556 	case RTM_BFD:
1557 		len = sizeof(struct bfd_msghdr);
1558 		break;
1559 #endif
1560 	case RTM_80211INFO:
1561 		len = sizeof(struct if_ieee80211_msghdr);
1562 		break;
1563 	default:
1564 		len = sizeof(struct rt_msghdr);
1565 		break;
1566 	}
1567 	if (len > MCLBYTES)
1568 		panic("rtm_msg1");
1569 	m = m_gethdr(M_DONTWAIT, MT_DATA);
1570 	if (m && len > MHLEN) {
1571 		MCLGET(m, M_DONTWAIT);
1572 		if ((m->m_flags & M_EXT) == 0) {
1573 			m_free(m);
1574 			m = NULL;
1575 		}
1576 	}
1577 	if (m == NULL)
1578 		return (m);
1579 	m->m_pkthdr.len = m->m_len = hlen = len;
1580 	m->m_pkthdr.ph_ifidx = 0;
1581 	rtm = mtod(m, struct rt_msghdr *);
1582 	bzero(rtm, len);
1583 	for (i = 0; i < RTAX_MAX; i++) {
1584 		if (rtinfo == NULL || (sa = rtinfo->rti_info[i]) == NULL)
1585 			continue;
1586 		rtinfo->rti_addrs |= (1 << i);
1587 		dlen = ROUNDUP(sa->sa_len);
1588 		if (m_copyback(m, len, dlen, sa, M_NOWAIT)) {
1589 			m_freem(m);
1590 			return (NULL);
1591 		}
1592 		len += dlen;
1593 	}
1594 	rtm->rtm_msglen = len;
1595 	rtm->rtm_hdrlen = hlen;
1596 	rtm->rtm_version = RTM_VERSION;
1597 	rtm->rtm_type = type;
1598 	return (m);
1599 }
1600 
1601 int
1602 rtm_msg2(int type, int vers, struct rt_addrinfo *rtinfo, caddr_t cp,
1603     struct walkarg *w)
1604 {
1605 	int		i;
1606 	int		len, dlen, hlen, second_time = 0;
1607 	caddr_t		cp0;
1608 
1609 	rtinfo->rti_addrs = 0;
1610 again:
1611 	switch (type) {
1612 	case RTM_DELADDR:
1613 	case RTM_NEWADDR:
1614 		len = sizeof(struct ifa_msghdr);
1615 		break;
1616 	case RTM_IFINFO:
1617 		len = sizeof(struct if_msghdr);
1618 		break;
1619 	default:
1620 		len = sizeof(struct rt_msghdr);
1621 		break;
1622 	}
1623 	hlen = len;
1624 	if ((cp0 = cp) != NULL)
1625 		cp += len;
1626 	for (i = 0; i < RTAX_MAX; i++) {
1627 		struct sockaddr *sa;
1628 
1629 		if ((sa = rtinfo->rti_info[i]) == NULL)
1630 			continue;
1631 		rtinfo->rti_addrs |= (1 << i);
1632 		dlen = ROUNDUP(sa->sa_len);
1633 		if (cp) {
1634 			bcopy(sa, cp, (size_t)dlen);
1635 			cp += dlen;
1636 		}
1637 		len += dlen;
1638 	}
1639 	/* align message length to the next natural boundary */
1640 	len = ALIGN(len);
1641 	if (cp == 0 && w != NULL && !second_time) {
1642 		w->w_needed += len;
1643 		if (w->w_needed <= 0 && w->w_where) {
1644 			if (w->w_tmemsize < len) {
1645 				free(w->w_tmem, M_RTABLE, w->w_tmemsize);
1646 				w->w_tmem = malloc(len, M_RTABLE,
1647 				    M_NOWAIT | M_ZERO);
1648 				if (w->w_tmem)
1649 					w->w_tmemsize = len;
1650 			}
1651 			if (w->w_tmem) {
1652 				cp = w->w_tmem;
1653 				second_time = 1;
1654 				goto again;
1655 			} else
1656 				w->w_where = 0;
1657 		}
1658 	}
1659 	if (cp && w)		/* clear the message header */
1660 		bzero(cp0, hlen);
1661 
1662 	if (cp) {
1663 		struct rt_msghdr *rtm = (struct rt_msghdr *)cp0;
1664 
1665 		rtm->rtm_version = RTM_VERSION;
1666 		rtm->rtm_type = type;
1667 		rtm->rtm_msglen = len;
1668 		rtm->rtm_hdrlen = hlen;
1669 	}
1670 	return (len);
1671 }
1672 
1673 void
1674 rtm_send(struct rtentry *rt, int cmd, int error, unsigned int rtableid)
1675 {
1676 	struct rt_addrinfo	 info;
1677 	struct ifnet		*ifp;
1678 	struct sockaddr_rtlabel	 sa_rl;
1679 	struct sockaddr_in6	 sa_mask;
1680 
1681 	memset(&info, 0, sizeof(info));
1682 	info.rti_info[RTAX_DST] = rt_key(rt);
1683 	info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1684 	if (!ISSET(rt->rt_flags, RTF_HOST))
1685 		info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask);
1686 	info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl);
1687 	ifp = if_get(rt->rt_ifidx);
1688 	if (ifp != NULL) {
1689 		info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
1690 		info.rti_info[RTAX_IFA] =
1691 		    rtable_getsource(rtableid, info.rti_info[RTAX_DST]->sa_family);
1692 		if (info.rti_info[RTAX_IFA] == NULL)
1693 			info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
1694 	}
1695 
1696 	rtm_miss(cmd, &info, rt->rt_flags, rt->rt_priority, rt->rt_ifidx, error,
1697 	    rtableid);
1698 	if_put(ifp);
1699 }
1700 
1701 /*
1702  * This routine is called to generate a message from the routing
1703  * socket indicating that a redirect has occurred, a routing lookup
1704  * has failed, or that a protocol has detected timeouts to a particular
1705  * destination.
1706  */
1707 void
1708 rtm_miss(int type, struct rt_addrinfo *rtinfo, int flags, uint8_t prio,
1709     u_int ifidx, int error, u_int tableid)
1710 {
1711 	struct rt_msghdr	*rtm;
1712 	struct mbuf		*m;
1713 	struct sockaddr		*sa = rtinfo->rti_info[RTAX_DST];
1714 
1715 	if (rtptable.rtp_count == 0)
1716 		return;
1717 	m = rtm_msg1(type, rtinfo);
1718 	if (m == NULL)
1719 		return;
1720 	rtm = mtod(m, struct rt_msghdr *);
1721 	rtm->rtm_flags = RTF_DONE | flags;
1722 	rtm->rtm_priority = prio;
1723 	rtm->rtm_errno = error;
1724 	rtm->rtm_tableid = tableid;
1725 	rtm->rtm_addrs = rtinfo->rti_addrs;
1726 	rtm->rtm_index = ifidx;
1727 	route_input(m, NULL, sa ? sa->sa_family : AF_UNSPEC);
1728 }
1729 
1730 /*
1731  * This routine is called to generate a message from the routing
1732  * socket indicating that the status of a network interface has changed.
1733  */
1734 void
1735 rtm_ifchg(struct ifnet *ifp)
1736 {
1737 	struct if_msghdr	*ifm;
1738 	struct mbuf		*m;
1739 
1740 	if (rtptable.rtp_count == 0)
1741 		return;
1742 	m = rtm_msg1(RTM_IFINFO, NULL);
1743 	if (m == NULL)
1744 		return;
1745 	ifm = mtod(m, struct if_msghdr *);
1746 	ifm->ifm_index = ifp->if_index;
1747 	ifm->ifm_tableid = ifp->if_rdomain;
1748 	ifm->ifm_flags = ifp->if_flags;
1749 	ifm->ifm_xflags = ifp->if_xflags;
1750 	if_getdata(ifp, &ifm->ifm_data);
1751 	ifm->ifm_addrs = 0;
1752 	route_input(m, NULL, AF_UNSPEC);
1753 }
1754 
1755 /*
1756  * This is called to generate messages from the routing socket
1757  * indicating a network interface has had addresses associated with it.
1758  * if we ever reverse the logic and replace messages TO the routing
1759  * socket indicate a request to configure interfaces, then it will
1760  * be unnecessary as the routing socket will automatically generate
1761  * copies of it.
1762  */
1763 void
1764 rtm_addr(int cmd, struct ifaddr *ifa)
1765 {
1766 	struct ifnet		*ifp = ifa->ifa_ifp;
1767 	struct mbuf		*m;
1768 	struct rt_addrinfo	 info;
1769 	struct ifa_msghdr	*ifam;
1770 
1771 	if (rtptable.rtp_count == 0)
1772 		return;
1773 
1774 	memset(&info, 0, sizeof(info));
1775 	info.rti_info[RTAX_IFA] = ifa->ifa_addr;
1776 	info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
1777 	info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
1778 	info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
1779 	if ((m = rtm_msg1(cmd, &info)) == NULL)
1780 		return;
1781 	ifam = mtod(m, struct ifa_msghdr *);
1782 	ifam->ifam_index = ifp->if_index;
1783 	ifam->ifam_metric = ifa->ifa_metric;
1784 	ifam->ifam_flags = ifa->ifa_flags;
1785 	ifam->ifam_addrs = info.rti_addrs;
1786 	ifam->ifam_tableid = ifp->if_rdomain;
1787 
1788 	route_input(m, NULL,
1789 	    ifa->ifa_addr ? ifa->ifa_addr->sa_family : AF_UNSPEC);
1790 }
1791 
1792 /*
1793  * This is called to generate routing socket messages indicating
1794  * network interface arrival and departure.
1795  */
1796 void
1797 rtm_ifannounce(struct ifnet *ifp, int what)
1798 {
1799 	struct if_announcemsghdr	*ifan;
1800 	struct mbuf			*m;
1801 
1802 	if (rtptable.rtp_count == 0)
1803 		return;
1804 	m = rtm_msg1(RTM_IFANNOUNCE, NULL);
1805 	if (m == NULL)
1806 		return;
1807 	ifan = mtod(m, struct if_announcemsghdr *);
1808 	ifan->ifan_index = ifp->if_index;
1809 	strlcpy(ifan->ifan_name, ifp->if_xname, sizeof(ifan->ifan_name));
1810 	ifan->ifan_what = what;
1811 	route_input(m, NULL, AF_UNSPEC);
1812 }
1813 
1814 #ifdef BFD
1815 /*
1816  * This is used to generate routing socket messages indicating
1817  * the state of a BFD session.
1818  */
1819 void
1820 rtm_bfd(struct bfd_config *bfd)
1821 {
1822 	struct bfd_msghdr	*bfdm;
1823 	struct sockaddr_bfd	 sa_bfd;
1824 	struct mbuf		*m;
1825 	struct rt_addrinfo	 info;
1826 
1827 	if (rtptable.rtp_count == 0)
1828 		return;
1829 	memset(&info, 0, sizeof(info));
1830 	info.rti_info[RTAX_DST] = rt_key(bfd->bc_rt);
1831 	info.rti_info[RTAX_IFA] = bfd->bc_rt->rt_ifa->ifa_addr;
1832 
1833 	m = rtm_msg1(RTM_BFD, &info);
1834 	if (m == NULL)
1835 		return;
1836 	bfdm = mtod(m, struct bfd_msghdr *);
1837 	bfdm->bm_addrs = info.rti_addrs;
1838 
1839 	bfd2sa(bfd->bc_rt, &sa_bfd);
1840 	memcpy(&bfdm->bm_sa, &sa_bfd, sizeof(sa_bfd));
1841 
1842 	route_input(m, NULL, info.rti_info[RTAX_DST]->sa_family);
1843 }
1844 #endif /* BFD */
1845 
1846 /*
1847  * This is used to generate routing socket messages indicating
1848  * the state of an ieee80211 interface.
1849  */
1850 void
1851 rtm_80211info(struct ifnet *ifp, struct if_ieee80211_data *ifie)
1852 {
1853 	struct if_ieee80211_msghdr	*ifim;
1854 	struct mbuf			*m;
1855 
1856 	if (rtptable.rtp_count == 0)
1857 		return;
1858 	m = rtm_msg1(RTM_80211INFO, NULL);
1859 	if (m == NULL)
1860 		return;
1861 	ifim = mtod(m, struct if_ieee80211_msghdr *);
1862 	ifim->ifim_index = ifp->if_index;
1863 	ifim->ifim_tableid = ifp->if_rdomain;
1864 
1865 	memcpy(&ifim->ifim_ifie, ifie, sizeof(ifim->ifim_ifie));
1866 	route_input(m, NULL, AF_UNSPEC);
1867 }
1868 
1869 /*
1870  * This is used to generate routing socket messages indicating
1871  * the address selection proposal from an interface.
1872  */
1873 void
1874 rtm_proposal(struct ifnet *ifp, struct rt_addrinfo *rtinfo, int flags,
1875     uint8_t prio)
1876 {
1877 	struct rt_msghdr	*rtm;
1878 	struct mbuf		*m;
1879 
1880 	m = rtm_msg1(RTM_PROPOSAL, rtinfo);
1881 	if (m == NULL)
1882 		return;
1883 	rtm = mtod(m, struct rt_msghdr *);
1884 	rtm->rtm_flags = RTF_DONE | flags;
1885 	rtm->rtm_priority = prio;
1886 	rtm->rtm_tableid = ifp->if_rdomain;
1887 	rtm->rtm_index = ifp->if_index;
1888 	rtm->rtm_addrs = rtinfo->rti_addrs;
1889 
1890 	route_input(m, NULL, rtinfo->rti_info[RTAX_DNS]->sa_family);
1891 }
1892 
1893 /*
1894  * This is used in dumping the kernel table via sysctl().
1895  */
1896 int
1897 sysctl_dumpentry(struct rtentry *rt, void *v, unsigned int id)
1898 {
1899 	struct walkarg		*w = v;
1900 	int			 error = 0, size;
1901 	struct rt_addrinfo	 info;
1902 	struct ifnet		*ifp;
1903 #ifdef BFD
1904 	struct sockaddr_bfd	 sa_bfd;
1905 #endif
1906 	struct sockaddr_rtlabel	 sa_rl;
1907 	struct sockaddr_in6	 sa_mask;
1908 
1909 	if (w->w_op == NET_RT_FLAGS && !(rt->rt_flags & w->w_arg))
1910 		return 0;
1911 	if (w->w_op == NET_RT_DUMP && w->w_arg) {
1912 		u_int8_t prio = w->w_arg & RTP_MASK;
1913 		if (w->w_arg < 0) {
1914 			prio = (-w->w_arg) & RTP_MASK;
1915 			/* Show all routes that are not this priority */
1916 			if (prio == (rt->rt_priority & RTP_MASK))
1917 				return 0;
1918 		} else {
1919 			if (prio != (rt->rt_priority & RTP_MASK) &&
1920 			    prio != RTP_ANY)
1921 				return 0;
1922 		}
1923 	}
1924 	bzero(&info, sizeof(info));
1925 	info.rti_info[RTAX_DST] = rt_key(rt);
1926 	info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1927 	info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask);
1928 	ifp = if_get(rt->rt_ifidx);
1929 	if (ifp != NULL) {
1930 		info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
1931 		info.rti_info[RTAX_IFA] =
1932 		    rtable_getsource(id, info.rti_info[RTAX_DST]->sa_family);
1933 		if (info.rti_info[RTAX_IFA] == NULL)
1934 			info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
1935 		if (ifp->if_flags & IFF_POINTOPOINT)
1936 			info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr;
1937 	}
1938 	if_put(ifp);
1939 	info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl);
1940 #ifdef BFD
1941 	if (rt->rt_flags & RTF_BFD)
1942 		info.rti_info[RTAX_BFD] = bfd2sa(rt, &sa_bfd);
1943 #endif
1944 #ifdef MPLS
1945 	if (rt->rt_flags & RTF_MPLS) {
1946 		struct sockaddr_mpls	 sa_mpls;
1947 
1948 		bzero(&sa_mpls, sizeof(sa_mpls));
1949 		sa_mpls.smpls_family = AF_MPLS;
1950 		sa_mpls.smpls_len = sizeof(sa_mpls);
1951 		sa_mpls.smpls_label = ((struct rt_mpls *)
1952 		    rt->rt_llinfo)->mpls_label;
1953 		info.rti_info[RTAX_SRC] = (struct sockaddr *)&sa_mpls;
1954 		info.rti_mpls = ((struct rt_mpls *)
1955 		    rt->rt_llinfo)->mpls_operation;
1956 	}
1957 #endif
1958 
1959 	size = rtm_msg2(RTM_GET, RTM_VERSION, &info, NULL, w);
1960 	if (w->w_where && w->w_tmem && w->w_needed <= 0) {
1961 		struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem;
1962 
1963 		rtm->rtm_pid = curproc->p_p->ps_pid;
1964 		rtm->rtm_flags = rt->rt_flags;
1965 		rtm->rtm_priority = rt->rt_priority & RTP_MASK;
1966 		rtm_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx);
1967 		/* Do not account the routing table's reference. */
1968 		rtm->rtm_rmx.rmx_refcnt = rt->rt_refcnt - 1;
1969 		rtm->rtm_index = rt->rt_ifidx;
1970 		rtm->rtm_addrs = info.rti_addrs;
1971 		rtm->rtm_tableid = id;
1972 #ifdef MPLS
1973 		rtm->rtm_mpls = info.rti_mpls;
1974 #endif
1975 		if ((error = copyout(rtm, w->w_where, size)) != 0)
1976 			w->w_where = NULL;
1977 		else
1978 			w->w_where += size;
1979 	}
1980 	return (error);
1981 }
1982 
1983 int
1984 sysctl_iflist(int af, struct walkarg *w)
1985 {
1986 	struct ifnet		*ifp;
1987 	struct ifaddr		*ifa;
1988 	struct rt_addrinfo	 info;
1989 	int			 len, error = 0;
1990 
1991 	bzero(&info, sizeof(info));
1992 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1993 		if (w->w_arg && w->w_arg != ifp->if_index)
1994 			continue;
1995 		/* Copy the link-layer address first */
1996 		info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
1997 		len = rtm_msg2(RTM_IFINFO, RTM_VERSION, &info, 0, w);
1998 		if (w->w_where && w->w_tmem && w->w_needed <= 0) {
1999 			struct if_msghdr *ifm;
2000 
2001 			ifm = (struct if_msghdr *)w->w_tmem;
2002 			ifm->ifm_index = ifp->if_index;
2003 			ifm->ifm_tableid = ifp->if_rdomain;
2004 			ifm->ifm_flags = ifp->if_flags;
2005 			if_getdata(ifp, &ifm->ifm_data);
2006 			ifm->ifm_addrs = info.rti_addrs;
2007 			error = copyout(ifm, w->w_where, len);
2008 			if (error)
2009 				return (error);
2010 			w->w_where += len;
2011 		}
2012 		info.rti_info[RTAX_IFP] = NULL;
2013 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
2014 			KASSERT(ifa->ifa_addr->sa_family != AF_LINK);
2015 			if (af && af != ifa->ifa_addr->sa_family)
2016 				continue;
2017 			info.rti_info[RTAX_IFA] = ifa->ifa_addr;
2018 			info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
2019 			info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
2020 			len = rtm_msg2(RTM_NEWADDR, RTM_VERSION, &info, 0, w);
2021 			if (w->w_where && w->w_tmem && w->w_needed <= 0) {
2022 				struct ifa_msghdr *ifam;
2023 
2024 				ifam = (struct ifa_msghdr *)w->w_tmem;
2025 				ifam->ifam_index = ifa->ifa_ifp->if_index;
2026 				ifam->ifam_flags = ifa->ifa_flags;
2027 				ifam->ifam_metric = ifa->ifa_metric;
2028 				ifam->ifam_addrs = info.rti_addrs;
2029 				error = copyout(w->w_tmem, w->w_where, len);
2030 				if (error)
2031 					return (error);
2032 				w->w_where += len;
2033 			}
2034 		}
2035 		info.rti_info[RTAX_IFA] = info.rti_info[RTAX_NETMASK] =
2036 		    info.rti_info[RTAX_BRD] = NULL;
2037 	}
2038 	return (0);
2039 }
2040 
2041 int
2042 sysctl_ifnames(struct walkarg *w)
2043 {
2044 	struct if_nameindex_msg ifn;
2045 	struct ifnet *ifp;
2046 	int error = 0;
2047 
2048 	/* XXX ignore tableid for now */
2049 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
2050 		if (w->w_arg && w->w_arg != ifp->if_index)
2051 			continue;
2052 		w->w_needed += sizeof(ifn);
2053 		if (w->w_where && w->w_needed <= 0) {
2054 
2055 			memset(&ifn, 0, sizeof(ifn));
2056 			ifn.if_index = ifp->if_index;
2057 			strlcpy(ifn.if_name, ifp->if_xname,
2058 			    sizeof(ifn.if_name));
2059 			error = copyout(&ifn, w->w_where, sizeof(ifn));
2060 			if (error)
2061 				return (error);
2062 			w->w_where += sizeof(ifn);
2063 		}
2064 	}
2065 
2066 	return (0);
2067 }
2068 
2069 int
2070 sysctl_source(int af, u_int tableid, struct walkarg *w)
2071 {
2072 	struct sockaddr	*sa;
2073 	int		 size, error = 0;
2074 
2075 	sa = rtable_getsource(tableid, af);
2076 	if (sa) {
2077 		switch (sa->sa_family) {
2078 		case AF_INET:
2079 			size = sizeof(struct sockaddr_in);
2080 			break;
2081 #ifdef INET6
2082 		case AF_INET6:
2083 			size = sizeof(struct sockaddr_in6);
2084 			break;
2085 #endif
2086 		default:
2087 			return (0);
2088 		}
2089 		w->w_needed += size;
2090 		if (w->w_where && w->w_needed <= 0) {
2091 			if ((error = copyout(sa, w->w_where, size)))
2092 				return (error);
2093 			w->w_where += size;
2094 		}
2095 	}
2096 	return (0);
2097 }
2098 
2099 int
2100 sysctl_rtable(int *name, u_int namelen, void *where, size_t *given, void *new,
2101     size_t newlen)
2102 {
2103 	int			 i, error = EINVAL;
2104 	u_char			 af;
2105 	struct walkarg		 w;
2106 	struct rt_tableinfo	 tableinfo;
2107 	u_int			 tableid = 0;
2108 
2109 	if (new)
2110 		return (EPERM);
2111 	if (namelen < 3 || namelen > 4)
2112 		return (EINVAL);
2113 	af = name[0];
2114 	bzero(&w, sizeof(w));
2115 	w.w_where = where;
2116 	w.w_given = *given;
2117 	w.w_needed = 0 - w.w_given;
2118 	w.w_op = name[1];
2119 	w.w_arg = name[2];
2120 
2121 	if (namelen == 4) {
2122 		tableid = name[3];
2123 		if (!rtable_exists(tableid))
2124 			return (ENOENT);
2125 	} else
2126 		tableid = curproc->p_p->ps_rtableid;
2127 
2128 	switch (w.w_op) {
2129 	case NET_RT_DUMP:
2130 	case NET_RT_FLAGS:
2131 		NET_LOCK();
2132 		for (i = 1; i <= AF_MAX; i++) {
2133 			if (af != 0 && af != i)
2134 				continue;
2135 
2136 			error = rtable_walk(tableid, i, NULL, sysctl_dumpentry,
2137 			    &w);
2138 			if (error == EAFNOSUPPORT)
2139 				error = 0;
2140 			if (error)
2141 				break;
2142 		}
2143 		NET_UNLOCK();
2144 		break;
2145 
2146 	case NET_RT_IFLIST:
2147 		NET_LOCK();
2148 		error = sysctl_iflist(af, &w);
2149 		NET_UNLOCK();
2150 		break;
2151 
2152 	case NET_RT_STATS:
2153 		return (sysctl_rtable_rtstat(where, given, new));
2154 	case NET_RT_TABLE:
2155 		tableid = w.w_arg;
2156 		if (!rtable_exists(tableid))
2157 			return (ENOENT);
2158 		memset(&tableinfo, 0, sizeof tableinfo);
2159 		tableinfo.rti_tableid = tableid;
2160 		tableinfo.rti_domainid = rtable_l2(tableid);
2161 		error = sysctl_rdstruct(where, given, new,
2162 		    &tableinfo, sizeof(tableinfo));
2163 		return (error);
2164 	case NET_RT_IFNAMES:
2165 		NET_LOCK();
2166 		error = sysctl_ifnames(&w);
2167 		NET_UNLOCK();
2168 		break;
2169 	case NET_RT_SOURCE:
2170 		tableid = w.w_arg;
2171 		if (!rtable_exists(tableid))
2172 			return (ENOENT);
2173 		NET_LOCK();
2174 		for (i = 1; i <= AF_MAX; i++) {
2175 			if (af != 0 && af != i)
2176 				continue;
2177 
2178 			error = sysctl_source(i, tableid, &w);
2179 			if (error == EAFNOSUPPORT)
2180 				error = 0;
2181 			if (error)
2182 				break;
2183 		}
2184 		NET_UNLOCK();
2185 		break;
2186 	}
2187 	free(w.w_tmem, M_RTABLE, w.w_tmemsize);
2188 	w.w_needed += w.w_given;
2189 	if (where) {
2190 		*given = w.w_where - (caddr_t)where;
2191 		if (*given < w.w_needed)
2192 			return (ENOMEM);
2193 	} else
2194 		*given = (11 * w.w_needed) / 10;
2195 
2196 	return (error);
2197 }
2198 
2199 int
2200 sysctl_rtable_rtstat(void *oldp, size_t *oldlenp, void *newp)
2201 {
2202 	extern struct cpumem *rtcounters;
2203 	uint64_t counters[rts_ncounters];
2204 	struct rtstat rtstat;
2205 	uint32_t *words = (uint32_t *)&rtstat;
2206 	int i;
2207 
2208 	CTASSERT(sizeof(rtstat) == (nitems(counters) * sizeof(uint32_t)));
2209 	memset(&rtstat, 0, sizeof rtstat);
2210 	counters_read(rtcounters, counters, nitems(counters));
2211 
2212 	for (i = 0; i < nitems(counters); i++)
2213 		words[i] = (uint32_t)counters[i];
2214 
2215 	return (sysctl_rdstruct(oldp, oldlenp, newp, &rtstat, sizeof(rtstat)));
2216 }
2217 
2218 int
2219 rtm_validate_proposal(struct rt_addrinfo *info)
2220 {
2221 	if (info->rti_addrs & ~(RTA_NETMASK | RTA_IFA | RTA_DNS | RTA_STATIC |
2222 	    RTA_SEARCH)) {
2223 		return -1;
2224 	}
2225 
2226 	if (ISSET(info->rti_addrs, RTA_NETMASK)) {
2227 		struct sockaddr *sa = info->rti_info[RTAX_NETMASK];
2228 		if (sa == NULL)
2229 			return -1;
2230 		switch (sa->sa_family) {
2231 		case AF_INET:
2232 			if (sa->sa_len != sizeof(struct sockaddr_in))
2233 				return -1;
2234 			break;
2235 		case AF_INET6:
2236 			if (sa->sa_len != sizeof(struct sockaddr_in6))
2237 				return -1;
2238 			break;
2239 		default:
2240 			return -1;
2241 		}
2242 	}
2243 
2244 	if (ISSET(info->rti_addrs, RTA_IFA)) {
2245 		struct sockaddr *sa = info->rti_info[RTAX_IFA];
2246 		if (sa == NULL)
2247 			return -1;
2248 		switch (sa->sa_family) {
2249 		case AF_INET:
2250 			if (sa->sa_len != sizeof(struct sockaddr_in))
2251 				return -1;
2252 			break;
2253 		case AF_INET6:
2254 			if (sa->sa_len != sizeof(struct sockaddr_in6))
2255 				return -1;
2256 			break;
2257 		default:
2258 			return -1;
2259 		}
2260 	}
2261 
2262 	if (ISSET(info->rti_addrs, RTA_DNS)) {
2263 		struct sockaddr_rtdns *rtdns =
2264 		    (struct sockaddr_rtdns *)info->rti_info[RTAX_DNS];
2265 		if (rtdns == NULL)
2266 			return -1;
2267 		if (rtdns->sr_len > sizeof(*rtdns))
2268 			return -1;
2269 		if (rtdns->sr_len < offsetof(struct sockaddr_rtdns, sr_dns))
2270 			return -1;
2271 		switch (rtdns->sr_family) {
2272 		case AF_INET:
2273 			if ((rtdns->sr_len - offsetof(struct sockaddr_rtdns,
2274 			    sr_dns)) % sizeof(struct in_addr) != 0)
2275 				return -1;
2276 			break;
2277 #ifdef INET6
2278 		case AF_INET6:
2279 			if ((rtdns->sr_len - offsetof(struct sockaddr_rtdns,
2280 			    sr_dns)) % sizeof(struct in6_addr) != 0)
2281 				return -1;
2282 			break;
2283 #endif
2284 		default:
2285 			return -1;
2286 		}
2287 	}
2288 
2289 	if (ISSET(info->rti_addrs, RTA_STATIC)) {
2290 		struct sockaddr_rtstatic *rtstatic =
2291 		    (struct sockaddr_rtstatic *)info->rti_info[RTAX_STATIC];
2292 		if (rtstatic == NULL)
2293 			return -1;
2294 		if (rtstatic->sr_len > sizeof(*rtstatic))
2295 			return -1;
2296 		if (rtstatic->sr_len <=
2297 		    offsetof(struct sockaddr_rtstatic, sr_static))
2298 			return -1;
2299 	}
2300 
2301 	if (ISSET(info->rti_addrs, RTA_SEARCH)) {
2302 		struct sockaddr_rtsearch *rtsearch =
2303 		    (struct sockaddr_rtsearch *)info->rti_info[RTAX_SEARCH];
2304 		if (rtsearch == NULL)
2305 			return -1;
2306 		if (rtsearch->sr_len > sizeof(*rtsearch))
2307 			return -1;
2308 		if (rtsearch->sr_len <=
2309 		    offsetof(struct sockaddr_rtsearch, sr_search))
2310 			return -1;
2311 	}
2312 
2313 	return 0;
2314 }
2315 
2316 int
2317 rt_setsource(unsigned int rtableid, struct sockaddr *src)
2318 {
2319 	struct ifaddr	*ifa;
2320 	/*
2321 	 * If source address is 0.0.0.0 or ::
2322 	 * use automatic source selection
2323 	 */
2324 	switch(src->sa_family) {
2325 	case AF_INET:
2326 		if(satosin(src)->sin_addr.s_addr == INADDR_ANY) {
2327 			rtable_setsource(rtableid, AF_INET, NULL);
2328 			return (0);
2329 		}
2330 		break;
2331 #ifdef INET6
2332 	case AF_INET6:
2333 		if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(src)->sin6_addr)) {
2334 			rtable_setsource(rtableid, AF_INET6, NULL);
2335 			return (0);
2336 		}
2337 		break;
2338 #endif
2339 	default:
2340 		return (EAFNOSUPPORT);
2341 	}
2342 
2343 	/*
2344 	 * Check if source address is assigned to an interface in the
2345 	 * same rdomain
2346 	 */
2347 	if ((ifa = ifa_ifwithaddr(src, rtableid)) == NULL)
2348 		return (EINVAL);
2349 
2350 	return (rtable_setsource(rtableid, src->sa_family, ifa->ifa_addr));
2351 }
2352 
2353 /*
2354  * Definitions of protocols supported in the ROUTE domain.
2355  */
2356 
2357 struct domain routedomain;
2358 
2359 struct protosw routesw[] = {
2360 {
2361   .pr_type	= SOCK_RAW,
2362   .pr_domain	= &routedomain,
2363   .pr_flags	= PR_ATOMIC|PR_ADDR|PR_WANTRCVD,
2364   .pr_output	= route_output,
2365   .pr_ctloutput	= route_ctloutput,
2366   .pr_usrreq	= route_usrreq,
2367   .pr_attach	= route_attach,
2368   .pr_detach	= route_detach,
2369   .pr_init	= route_prinit,
2370   .pr_sysctl	= sysctl_rtable
2371 }
2372 };
2373 
2374 struct domain routedomain = {
2375   .dom_family = PF_ROUTE,
2376   .dom_name = "route",
2377   .dom_init = route_init,
2378   .dom_protosw = routesw,
2379   .dom_protoswNPROTOSW = &routesw[nitems(routesw)]
2380 };
2381