xref: /openbsd-src/sys/net/rtsock.c (revision 99fd087599a8791921855f21bd7e36130f39aadc)
1 /*	$OpenBSD: rtsock.c,v 1.297 2019/11/24 07:56:03 claudio Exp $	*/
2 /*	$NetBSD: rtsock.c,v 1.18 1996/03/29 00:32:10 cgd Exp $	*/
3 
4 /*
5  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the project nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 /*
34  * Copyright (c) 1988, 1991, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. Neither the name of the University nor the names of its contributors
46  *    may be used to endorse or promote products derived from this software
47  *    without specific prior written permission.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59  * SUCH DAMAGE.
60  *
61  *	@(#)rtsock.c	8.6 (Berkeley) 2/11/95
62  */
63 
64 #include <sys/param.h>
65 #include <sys/systm.h>
66 #include <sys/proc.h>
67 #include <sys/sysctl.h>
68 #include <sys/mbuf.h>
69 #include <sys/socket.h>
70 #include <sys/socketvar.h>
71 #include <sys/domain.h>
72 #include <sys/pool.h>
73 #include <sys/protosw.h>
74 #include <sys/srp.h>
75 
76 #include <net/if.h>
77 #include <net/if_dl.h>
78 #include <net/if_var.h>
79 #include <net/route.h>
80 
81 #include <netinet/in.h>
82 
83 #ifdef MPLS
84 #include <netmpls/mpls.h>
85 #endif
86 #ifdef IPSEC
87 #include <netinet/ip_ipsp.h>
88 #include <net/if_enc.h>
89 #endif
90 #ifdef BFD
91 #include <net/bfd.h>
92 #endif
93 
94 #include <sys/stdarg.h>
95 #include <sys/kernel.h>
96 #include <sys/timeout.h>
97 
98 #define	ROUTESNDQ	8192
99 #define	ROUTERCVQ	8192
100 
101 const struct sockaddr route_src = { 2, PF_ROUTE, };
102 
103 struct walkarg {
104 	int	w_op, w_arg, w_given, w_needed, w_tmemsize;
105 	caddr_t	w_where, w_tmem;
106 };
107 
108 void	route_prinit(void);
109 void	rcb_ref(void *, void *);
110 void	rcb_unref(void *, void *);
111 int	route_output(struct mbuf *, struct socket *, struct sockaddr *,
112 	    struct mbuf *);
113 int	route_ctloutput(int, struct socket *, int, int, struct mbuf *);
114 int	route_usrreq(struct socket *, int, struct mbuf *, struct mbuf *,
115 	    struct mbuf *, struct proc *);
116 void	route_input(struct mbuf *m0, struct socket *, sa_family_t);
117 int	route_arp_conflict(struct rtentry *, struct rt_addrinfo *);
118 int	route_cleargateway(struct rtentry *, void *, unsigned int);
119 void	rtm_senddesync_timer(void *);
120 void	rtm_senddesync(struct socket *);
121 int	rtm_sendup(struct socket *, struct mbuf *, int);
122 
123 int	rtm_getifa(struct rt_addrinfo *, unsigned int);
124 int	rtm_output(struct rt_msghdr *, struct rtentry **, struct rt_addrinfo *,
125 	    uint8_t, unsigned int);
126 struct rt_msghdr *rtm_report(struct rtentry *, u_char, int, int);
127 struct mbuf	*rtm_msg1(int, struct rt_addrinfo *);
128 int		 rtm_msg2(int, int, struct rt_addrinfo *, caddr_t,
129 		     struct walkarg *);
130 int		 rtm_xaddrs(caddr_t, caddr_t, struct rt_addrinfo *);
131 int		 rtm_validate_proposal(struct rt_addrinfo *);
132 void		 rtm_setmetrics(u_long, const struct rt_metrics *,
133 		     struct rt_kmetrics *);
134 void		 rtm_getmetrics(const struct rt_kmetrics *,
135 		     struct rt_metrics *);
136 
137 int		 sysctl_iflist(int, struct walkarg *);
138 int		 sysctl_ifnames(struct walkarg *);
139 int		 sysctl_rtable_rtstat(void *, size_t *, void *);
140 
141 struct rtpcb {
142 	struct socket		*rop_socket;
143 
144 	SRPL_ENTRY(rtpcb)	rop_list;
145 	struct refcnt		rop_refcnt;
146 	struct timeout		rop_timeout;
147 	unsigned int		rop_msgfilter;
148 	unsigned int		rop_flags;
149 	u_int			rop_rtableid;
150 	unsigned short		rop_proto;
151 	u_char			rop_priority;
152 };
153 #define	sotortpcb(so)	((struct rtpcb *)(so)->so_pcb)
154 
155 struct rtptable {
156 	SRPL_HEAD(, rtpcb)	rtp_list;
157 	struct srpl_rc		rtp_rc;
158 	struct rwlock		rtp_lk;
159 	unsigned int		rtp_count;
160 };
161 
162 struct pool rtpcb_pool;
163 struct rtptable rtptable;
164 
165 /*
166  * These flags and timeout are used for indicating to userland (via a
167  * RTM_DESYNC msg) when the route socket has overflowed and messages
168  * have been lost.
169  */
170 #define ROUTECB_FLAG_DESYNC	0x1	/* Route socket out of memory */
171 #define ROUTECB_FLAG_FLUSH	0x2	/* Wait until socket is empty before
172 					   queueing more packets */
173 
174 #define ROUTE_DESYNC_RESEND_TIMEOUT	200	/* In ms */
175 
176 void
177 route_prinit(void)
178 {
179 	srpl_rc_init(&rtptable.rtp_rc, rcb_ref, rcb_unref, NULL);
180 	rw_init(&rtptable.rtp_lk, "rtsock");
181 	SRPL_INIT(&rtptable.rtp_list);
182 	pool_init(&rtpcb_pool, sizeof(struct rtpcb), 0,
183 	    IPL_NONE, PR_WAITOK, "rtpcb", NULL);
184 }
185 
186 void
187 rcb_ref(void *null, void *v)
188 {
189 	struct rtpcb *rop = v;
190 
191 	refcnt_take(&rop->rop_refcnt);
192 }
193 
194 void
195 rcb_unref(void *null, void *v)
196 {
197 	struct rtpcb *rop = v;
198 
199 	refcnt_rele_wake(&rop->rop_refcnt);
200 }
201 
202 int
203 route_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam,
204     struct mbuf *control, struct proc *p)
205 {
206 	struct rtpcb	*rop;
207 	int		 error = 0;
208 
209 	if (req == PRU_CONTROL)
210 		return (EOPNOTSUPP);
211 
212 	soassertlocked(so);
213 
214 	if (control && control->m_len) {
215 		error = EOPNOTSUPP;
216 		goto release;
217 	}
218 
219 	rop = sotortpcb(so);
220 	if (rop == NULL) {
221 		error = EINVAL;
222 		goto release;
223 	}
224 
225 	switch (req) {
226 	/* no connect, bind, accept. Socket is connected from the start */
227 	case PRU_CONNECT:
228 	case PRU_BIND:
229 	case PRU_CONNECT2:
230 	case PRU_LISTEN:
231 	case PRU_ACCEPT:
232 		error = EOPNOTSUPP;
233 		break;
234 
235 	case PRU_DISCONNECT:
236 	case PRU_ABORT:
237 		soisdisconnected(so);
238 		break;
239 	case PRU_SHUTDOWN:
240 		socantsendmore(so);
241 		break;
242 	case PRU_SENSE:
243 		/* stat: don't bother with a blocksize. */
244 		break;
245 
246 	/* minimal support, just implement a fake peer address */
247 	case PRU_SOCKADDR:
248 		error = EINVAL;
249 		break;
250 	case PRU_PEERADDR:
251 		bcopy(&route_src, mtod(nam, caddr_t), route_src.sa_len);
252 		nam->m_len = route_src.sa_len;
253 		break;
254 
255 	case PRU_RCVD:
256 		/*
257 		 * If we are in a FLUSH state, check if the buffer is
258 		 * empty so that we can clear the flag.
259 		 */
260 		if (((rop->rop_flags & ROUTECB_FLAG_FLUSH) != 0) &&
261 		    ((sbspace(rop->rop_socket, &rop->rop_socket->so_rcv) ==
262 		    rop->rop_socket->so_rcv.sb_hiwat)))
263 			rop->rop_flags &= ~ROUTECB_FLAG_FLUSH;
264 		break;
265 
266 	case PRU_RCVOOB:
267 	case PRU_SENDOOB:
268 		error = EOPNOTSUPP;
269 		break;
270 	case PRU_SEND:
271 		if (nam) {
272 			error = EISCONN;
273 			break;
274 		}
275 		error = (*so->so_proto->pr_output)(m, so, NULL, NULL);
276 		m = NULL;
277 		break;
278 	default:
279 		panic("route_usrreq");
280 	}
281 
282  release:
283 	if (req != PRU_RCVD && req != PRU_RCVOOB && req != PRU_SENSE) {
284 		m_freem(control);
285 		m_freem(m);
286 	}
287 	return (error);
288 }
289 
290 int
291 route_attach(struct socket *so, int proto)
292 {
293 	struct rtpcb	*rop;
294 	int		 error;
295 
296 	/*
297 	 * use the rawcb but allocate a rtpcb, this
298 	 * code does not care about the additional fields
299 	 * and works directly on the raw socket.
300 	 */
301 	rop = pool_get(&rtpcb_pool, PR_WAITOK|PR_ZERO);
302 	so->so_pcb = rop;
303 	/* Init the timeout structure */
304 	timeout_set(&rop->rop_timeout, rtm_senddesync_timer, so);
305 	refcnt_init(&rop->rop_refcnt);
306 
307 	if (curproc == NULL)
308 		error = EACCES;
309 	else
310 		error = soreserve(so, ROUTESNDQ, ROUTERCVQ);
311 	if (error) {
312 		pool_put(&rtpcb_pool, rop);
313 		return (error);
314 	}
315 
316 	rop->rop_socket = so;
317 	rop->rop_proto = proto;
318 
319 	rop->rop_rtableid = curproc->p_p->ps_rtableid;
320 
321 	soisconnected(so);
322 	so->so_options |= SO_USELOOPBACK;
323 
324 	rw_enter(&rtptable.rtp_lk, RW_WRITE);
325 	SRPL_INSERT_HEAD_LOCKED(&rtptable.rtp_rc, &rtptable.rtp_list, rop,
326 	    rop_list);
327 	rtptable.rtp_count++;
328 	rw_exit(&rtptable.rtp_lk);
329 
330 	return (0);
331 }
332 
333 int
334 route_detach(struct socket *so)
335 {
336 	struct rtpcb	*rop;
337 
338 	soassertlocked(so);
339 
340 	rop = sotortpcb(so);
341 	if (rop == NULL)
342 		return (EINVAL);
343 
344 	rw_enter(&rtptable.rtp_lk, RW_WRITE);
345 
346 	timeout_del(&rop->rop_timeout);
347 	rtptable.rtp_count--;
348 
349 	SRPL_REMOVE_LOCKED(&rtptable.rtp_rc, &rtptable.rtp_list, rop, rtpcb,
350 	    rop_list);
351 	rw_exit(&rtptable.rtp_lk);
352 
353 	/* wait for all references to drop */
354 	refcnt_finalize(&rop->rop_refcnt, "rtsockrefs");
355 
356 	so->so_pcb = NULL;
357 	KASSERT((so->so_state & SS_NOFDREF) == 0);
358 	pool_put(&rtpcb_pool, rop);
359 
360 	return (0);
361 }
362 
363 int
364 route_ctloutput(int op, struct socket *so, int level, int optname,
365     struct mbuf *m)
366 {
367 	struct rtpcb *rop = sotortpcb(so);
368 	int error = 0;
369 	unsigned int tid, prio;
370 
371 	if (level != AF_ROUTE)
372 		return (EINVAL);
373 
374 	switch (op) {
375 	case PRCO_SETOPT:
376 		switch (optname) {
377 		case ROUTE_MSGFILTER:
378 			if (m == NULL || m->m_len != sizeof(unsigned int))
379 				error = EINVAL;
380 			else
381 				rop->rop_msgfilter = *mtod(m, unsigned int *);
382 			break;
383 		case ROUTE_TABLEFILTER:
384 			if (m == NULL || m->m_len != sizeof(unsigned int)) {
385 				error = EINVAL;
386 				break;
387 			}
388 			tid = *mtod(m, unsigned int *);
389 			if (tid != RTABLE_ANY && !rtable_exists(tid))
390 				error = ENOENT;
391 			else
392 				rop->rop_rtableid = tid;
393 			break;
394 		case ROUTE_PRIOFILTER:
395 			if (m == NULL || m->m_len != sizeof(unsigned int)) {
396 				error = EINVAL;
397 				break;
398 			}
399 			prio = *mtod(m, unsigned int *);
400 			if (prio > RTP_MAX)
401 				error = EINVAL;
402 			else
403 				rop->rop_priority = prio;
404 			break;
405 		default:
406 			error = ENOPROTOOPT;
407 			break;
408 		}
409 		break;
410 	case PRCO_GETOPT:
411 		switch (optname) {
412 		case ROUTE_MSGFILTER:
413 			m->m_len = sizeof(unsigned int);
414 			*mtod(m, unsigned int *) = rop->rop_msgfilter;
415 			break;
416 		case ROUTE_TABLEFILTER:
417 			m->m_len = sizeof(unsigned int);
418 			*mtod(m, unsigned int *) = rop->rop_rtableid;
419 			break;
420 		case ROUTE_PRIOFILTER:
421 			m->m_len = sizeof(unsigned int);
422 			*mtod(m, unsigned int *) = rop->rop_priority;
423 			break;
424 		default:
425 			error = ENOPROTOOPT;
426 			break;
427 		}
428 	}
429 	return (error);
430 }
431 
432 void
433 rtm_senddesync_timer(void *xso)
434 {
435 	struct socket	*so = xso;
436 	int		 s;
437 
438 	s = solock(so);
439 	rtm_senddesync(so);
440 	sounlock(so, s);
441 }
442 
443 void
444 rtm_senddesync(struct socket *so)
445 {
446 	struct rtpcb	*rop = sotortpcb(so);
447 	struct mbuf	*desync_mbuf;
448 
449 	soassertlocked(so);
450 
451 	/* If we are in a DESYNC state, try to send a RTM_DESYNC packet */
452 	if ((rop->rop_flags & ROUTECB_FLAG_DESYNC) == 0)
453 		return;
454 
455 	/*
456 	 * If we fail to alloc memory or if sbappendaddr()
457 	 * fails, re-add timeout and try again.
458 	 */
459 	desync_mbuf = rtm_msg1(RTM_DESYNC, NULL);
460 	if (desync_mbuf != NULL) {
461 		if (sbappendaddr(so, &so->so_rcv, &route_src,
462 		    desync_mbuf, NULL) != 0) {
463 			rop->rop_flags &= ~ROUTECB_FLAG_DESYNC;
464 			sorwakeup(rop->rop_socket);
465 			return;
466 		}
467 		m_freem(desync_mbuf);
468 	}
469 	/* Re-add timeout to try sending msg again */
470 	timeout_add_msec(&rop->rop_timeout, ROUTE_DESYNC_RESEND_TIMEOUT);
471 }
472 
473 void
474 route_input(struct mbuf *m0, struct socket *so0, sa_family_t sa_family)
475 {
476 	struct socket *so;
477 	struct rtpcb *rop;
478 	struct rt_msghdr *rtm;
479 	struct mbuf *m = m0;
480 	struct socket *last = NULL;
481 	struct srp_ref sr;
482 	int s;
483 
484 	/* ensure that we can access the rtm_type via mtod() */
485 	if (m->m_len < offsetof(struct rt_msghdr, rtm_type) + 1) {
486 		m_freem(m);
487 		return;
488 	}
489 
490 	SRPL_FOREACH(rop, &sr, &rtptable.rtp_list, rop_list) {
491 		/*
492 		 * If route socket is bound to an address family only send
493 		 * messages that match the address family. Address family
494 		 * agnostic messages are always sent.
495 		 */
496 		if (sa_family != AF_UNSPEC && rop->rop_proto != AF_UNSPEC &&
497 		    rop->rop_proto != sa_family)
498 			continue;
499 
500 
501 		so = rop->rop_socket;
502 		s = solock(so);
503 
504 		/*
505 		 * Check to see if we don't want our own messages and
506 		 * if we can receive anything.
507 		 */
508 		if ((so0 == so && !(so0->so_options & SO_USELOOPBACK)) ||
509 		    !(so->so_state & SS_ISCONNECTED) ||
510 		    (so->so_state & SS_CANTRCVMORE)) {
511 next:
512 			sounlock(so, s);
513 			continue;
514 		}
515 
516 		/* filter messages that the process does not want */
517 		rtm = mtod(m, struct rt_msghdr *);
518 		/* but RTM_DESYNC can't be filtered */
519 		if (rtm->rtm_type != RTM_DESYNC && rop->rop_msgfilter != 0 &&
520 		    !(rop->rop_msgfilter & (1 << rtm->rtm_type)))
521 			goto next;
522 		switch (rtm->rtm_type) {
523 		case RTM_IFANNOUNCE:
524 		case RTM_DESYNC:
525 			/* no tableid */
526 			break;
527 		case RTM_RESOLVE:
528 		case RTM_NEWADDR:
529 		case RTM_DELADDR:
530 		case RTM_IFINFO:
531 		case RTM_80211INFO:
532 		case RTM_BFD:
533 			/* check against rdomain id */
534 			if (rop->rop_rtableid != RTABLE_ANY &&
535 			    rtable_l2(rop->rop_rtableid) != rtm->rtm_tableid)
536 				goto next;
537 			break;
538 		default:
539 			if (rop->rop_priority != 0 &&
540 			    rop->rop_priority < rtm->rtm_priority)
541 				goto next;
542 			/* check against rtable id */
543 			if (rop->rop_rtableid != RTABLE_ANY &&
544 			    rop->rop_rtableid != rtm->rtm_tableid)
545 				goto next;
546 			break;
547 		}
548 
549 		/*
550 		 * Check to see if the flush flag is set. If so, don't queue
551 		 * any more messages until the flag is cleared.
552 		 */
553 		if ((rop->rop_flags & ROUTECB_FLAG_FLUSH) != 0)
554 			goto next;
555 		sounlock(so, s);
556 
557 		if (last) {
558 			s = solock(last);
559 			rtm_sendup(last, m, 1);
560 			sounlock(last, s);
561 			refcnt_rele_wake(&sotortpcb(last)->rop_refcnt);
562 		}
563 		/* keep a reference for last */
564 		refcnt_take(&rop->rop_refcnt);
565 		last = rop->rop_socket;
566 	}
567 	SRPL_LEAVE(&sr);
568 
569 	if (last) {
570 		s = solock(last);
571 		rtm_sendup(last, m, 0);
572 		sounlock(last, s);
573 		refcnt_rele_wake(&sotortpcb(last)->rop_refcnt);
574 	} else
575 		m_freem(m);
576 }
577 
578 int
579 rtm_sendup(struct socket *so, struct mbuf *m0, int more)
580 {
581 	struct rtpcb *rop = sotortpcb(so);
582 	struct mbuf *m;
583 
584 	soassertlocked(so);
585 
586 	if (more) {
587 		m = m_copym(m0, 0, M_COPYALL, M_NOWAIT);
588 		if (m == NULL)
589 			return (ENOMEM);
590 	} else
591 		m = m0;
592 
593 	if (sbspace(so, &so->so_rcv) < (2 * MSIZE) ||
594 	    sbappendaddr(so, &so->so_rcv, &route_src, m, NULL) == 0) {
595 		/* Flag socket as desync'ed and flush required */
596 		rop->rop_flags |= ROUTECB_FLAG_DESYNC | ROUTECB_FLAG_FLUSH;
597 		rtm_senddesync(so);
598 		m_freem(m);
599 		return (ENOBUFS);
600 	}
601 
602 	sorwakeup(so);
603 	return (0);
604 }
605 
606 struct rt_msghdr *
607 rtm_report(struct rtentry *rt, u_char type, int seq, int tableid)
608 {
609 	struct rt_msghdr	*rtm;
610 	struct rt_addrinfo	 info;
611 	struct sockaddr_rtlabel	 sa_rl;
612 	struct sockaddr_in6	 sa_mask;
613 #ifdef BFD
614 	struct sockaddr_bfd	 sa_bfd;
615 #endif
616 	struct ifnet		*ifp = NULL;
617 	int			 len;
618 
619 	bzero(&info, sizeof(info));
620 	info.rti_info[RTAX_DST] = rt_key(rt);
621 	info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
622 	info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask);
623 	info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl);
624 #ifdef BFD
625 	if (rt->rt_flags & RTF_BFD)
626 		info.rti_info[RTAX_BFD] = bfd2sa(rt, &sa_bfd);
627 #endif
628 #ifdef MPLS
629 	if (rt->rt_flags & RTF_MPLS) {
630 		struct sockaddr_mpls	 sa_mpls;
631 
632 		bzero(&sa_mpls, sizeof(sa_mpls));
633 		sa_mpls.smpls_family = AF_MPLS;
634 		sa_mpls.smpls_len = sizeof(sa_mpls);
635 		sa_mpls.smpls_label = ((struct rt_mpls *)
636 		    rt->rt_llinfo)->mpls_label;
637 		info.rti_info[RTAX_SRC] = (struct sockaddr *)&sa_mpls;
638 		info.rti_mpls = ((struct rt_mpls *)
639 		    rt->rt_llinfo)->mpls_operation;
640 	}
641 #endif
642 	ifp = if_get(rt->rt_ifidx);
643 	if (ifp != NULL) {
644 		info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
645 		info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
646 		if (ifp->if_flags & IFF_POINTOPOINT)
647 			info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr;
648 	}
649 	if_put(ifp);
650 	/* RTAX_GENMASK, RTAX_AUTHOR, RTAX_SRCMASK ignored */
651 
652 	/* build new route message */
653 	len = rtm_msg2(type, RTM_VERSION, &info, NULL, NULL);
654 	rtm = malloc(len, M_RTABLE, M_WAITOK | M_ZERO);
655 
656 	rtm_msg2(type, RTM_VERSION, &info, (caddr_t)rtm, NULL);
657 	rtm->rtm_type = type;
658 	rtm->rtm_index = rt->rt_ifidx;
659 	rtm->rtm_tableid = tableid;
660 	rtm->rtm_priority = rt->rt_priority & RTP_MASK;
661 	rtm->rtm_flags = rt->rt_flags;
662 	rtm->rtm_pid = curproc->p_p->ps_pid;
663 	rtm->rtm_seq = seq;
664 	rtm_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx);
665 	rtm->rtm_addrs = info.rti_addrs;
666 #ifdef MPLS
667 	rtm->rtm_mpls = info.rti_mpls;
668 #endif
669 	return rtm;
670 }
671 
672 int
673 route_output(struct mbuf *m, struct socket *so, struct sockaddr *dstaddr,
674     struct mbuf *control)
675 {
676 	struct rt_msghdr	*rtm = NULL;
677 	struct rtentry		*rt = NULL;
678 	struct rt_addrinfo	 info;
679 	int			 len, seq, error = 0;
680 	u_int			 tableid;
681 	u_int8_t		 prio;
682 	u_char			 vers, type;
683 
684 	if (m == NULL || ((m->m_len < sizeof(int32_t)) &&
685 	    (m = m_pullup(m, sizeof(int32_t))) == 0))
686 		return (ENOBUFS);
687 	if ((m->m_flags & M_PKTHDR) == 0)
688 		panic("route_output");
689 	len = m->m_pkthdr.len;
690 	if (len < offsetof(struct rt_msghdr, rtm_hdrlen) + 1 ||
691 	    len != mtod(m, struct rt_msghdr *)->rtm_msglen) {
692 		error = EINVAL;
693 		goto fail;
694 	}
695 	vers = mtod(m, struct rt_msghdr *)->rtm_version;
696 	switch (vers) {
697 	case RTM_VERSION:
698 		if (len < sizeof(struct rt_msghdr)) {
699 			error = EINVAL;
700 			goto fail;
701 		}
702 		if (len > RTM_MAXSIZE) {
703 			error = EMSGSIZE;
704 			goto fail;
705 		}
706 		rtm = malloc(len, M_RTABLE, M_WAITOK);
707 		m_copydata(m, 0, len, (caddr_t)rtm);
708 		break;
709 	default:
710 		error = EPROTONOSUPPORT;
711 		goto fail;
712 	}
713 
714 	/* Verify that the caller is sending an appropriate message early */
715 	switch (rtm->rtm_type) {
716 	case RTM_ADD:
717 	case RTM_DELETE:
718 	case RTM_GET:
719 	case RTM_CHANGE:
720 	case RTM_PROPOSAL:
721 		break;
722 	default:
723 		error = EOPNOTSUPP;
724 		goto fail;
725 	}
726 	/*
727 	 * Verify that the header length is valid.
728 	 * All messages from userland start with a struct rt_msghdr.
729 	 */
730 	if (rtm->rtm_hdrlen == 0)	/* old client */
731 		rtm->rtm_hdrlen = sizeof(struct rt_msghdr);
732 	if (rtm->rtm_hdrlen < sizeof(struct rt_msghdr) ||
733 	    len < rtm->rtm_hdrlen) {
734 		error = EINVAL;
735 		goto fail;
736 	}
737 
738 	rtm->rtm_pid = curproc->p_p->ps_pid;
739 
740 	/*
741 	 * Verify that the caller has the appropriate privilege; RTM_GET
742 	 * is the only operation the non-superuser is allowed.
743 	 */
744 	if (rtm->rtm_type != RTM_GET && suser(curproc) != 0) {
745 		error = EACCES;
746 		goto fail;
747 	}
748 	tableid = rtm->rtm_tableid;
749 	if (!rtable_exists(tableid)) {
750 		if (rtm->rtm_type == RTM_ADD) {
751 			if ((error = rtable_add(tableid)) != 0)
752 				goto fail;
753 		} else {
754 			error = EINVAL;
755 			goto fail;
756 		}
757 	}
758 
759 
760 	/* Do not let userland play with kernel-only flags. */
761 	if ((rtm->rtm_flags & (RTF_LOCAL|RTF_BROADCAST)) != 0) {
762 		error = EINVAL;
763 		goto fail;
764 	}
765 
766 	/* make sure that kernel-only bits are not set */
767 	rtm->rtm_priority &= RTP_MASK;
768 	rtm->rtm_flags &= ~(RTF_DONE|RTF_CLONED|RTF_CACHED);
769 	rtm->rtm_fmask &= RTF_FMASK;
770 
771 	if (rtm->rtm_priority != 0) {
772 		if (rtm->rtm_priority > RTP_MAX ||
773 		    rtm->rtm_priority == RTP_LOCAL) {
774 			error = EINVAL;
775 			goto fail;
776 		}
777 		prio = rtm->rtm_priority;
778 	} else if (rtm->rtm_type != RTM_ADD)
779 		prio = RTP_ANY;
780 	else if (rtm->rtm_flags & RTF_STATIC)
781 		prio = 0;
782 	else
783 		prio = RTP_DEFAULT;
784 
785 	bzero(&info, sizeof(info));
786 	info.rti_addrs = rtm->rtm_addrs;
787 	if ((error = rtm_xaddrs(rtm->rtm_hdrlen + (caddr_t)rtm,
788 	    len + (caddr_t)rtm, &info)) != 0)
789 		goto fail;
790 	info.rti_flags = rtm->rtm_flags;
791 	if (rtm->rtm_type != RTM_PROPOSAL &&
792 	   (info.rti_info[RTAX_DST] == NULL ||
793 	    info.rti_info[RTAX_DST]->sa_family >= AF_MAX ||
794 	    (info.rti_info[RTAX_GATEWAY] != NULL &&
795 	    info.rti_info[RTAX_GATEWAY]->sa_family >= AF_MAX) ||
796 	    info.rti_info[RTAX_GENMASK] != NULL)) {
797 		error = EINVAL;
798 		goto fail;
799 	}
800 #ifdef MPLS
801 	info.rti_mpls = rtm->rtm_mpls;
802 #endif
803 
804 	if (info.rti_info[RTAX_GATEWAY] != NULL &&
805 	    info.rti_info[RTAX_GATEWAY]->sa_family == AF_LINK &&
806 	    (info.rti_flags & RTF_CLONING) == 0) {
807 		info.rti_flags |= RTF_LLINFO;
808 	}
809 
810 	/*
811 	 * Validate RTM_PROPOSAL and pass it along or error out.
812 	 */
813 	if (rtm->rtm_type == RTM_PROPOSAL) {
814 		if (rtm_validate_proposal(&info) == -1) {
815 			error = EINVAL;
816 			goto fail;
817 		}
818 		/*
819 		 * If this is a solicitation proposal forward request to
820 		 * all interfaces. Most handlers will ignore it but at least
821 		 * umb(4) will send a response to this event.
822 		 */
823 		if (rtm->rtm_priority == RTP_PROPOSAL_SOLICIT) {
824 			struct ifnet *ifp;
825 			NET_LOCK();
826 			TAILQ_FOREACH(ifp, &ifnet, if_list) {
827 				ifp->if_rtrequest(ifp, RTM_PROPOSAL, NULL);
828 			}
829 			NET_UNLOCK();
830 		}
831 	} else {
832 		error = rtm_output(rtm, &rt, &info, prio, tableid);
833 		if (!error) {
834 			type = rtm->rtm_type;
835 			seq = rtm->rtm_seq;
836 			free(rtm, M_RTABLE, len);
837 			rtm = rtm_report(rt, type, seq, tableid);
838 			len = rtm->rtm_msglen;
839 		}
840 	}
841 
842 	rtfree(rt);
843 	if (error) {
844 		rtm->rtm_errno = error;
845 	} else {
846 		rtm->rtm_flags |= RTF_DONE;
847 	}
848 
849 	/*
850 	 * Check to see if we don't want our own messages.
851 	 */
852 	if (!(so->so_options & SO_USELOOPBACK)) {
853 		if (rtptable.rtp_count <= 1) {
854 			/* no other listener and no loopback of messages */
855 fail:
856 			free(rtm, M_RTABLE, len);
857 			m_freem(m);
858 			return (error);
859 		}
860 	}
861 	if (rtm) {
862 		if (m_copyback(m, 0, len, rtm, M_NOWAIT)) {
863 			m_freem(m);
864 			m = NULL;
865 		} else if (m->m_pkthdr.len > len)
866 			m_adj(m, len - m->m_pkthdr.len);
867 		free(rtm, M_RTABLE, len);
868 	}
869 	if (m)
870 		route_input(m, so, info.rti_info[RTAX_DST] ?
871 		    info.rti_info[RTAX_DST]->sa_family : AF_UNSPEC);
872 
873 	return (error);
874 }
875 
876 int
877 rtm_output(struct rt_msghdr *rtm, struct rtentry **prt,
878     struct rt_addrinfo *info, uint8_t prio, unsigned int tableid)
879 {
880 	struct rtentry		*rt = *prt;
881 	struct ifnet		*ifp = NULL;
882 	int			 plen, newgate = 0, error = 0;
883 
884 	switch (rtm->rtm_type) {
885 	case RTM_ADD:
886 		if (info->rti_info[RTAX_GATEWAY] == NULL) {
887 			error = EINVAL;
888 			break;
889 		}
890 
891 		rt = rtable_match(tableid, info->rti_info[RTAX_DST], NULL);
892 		if ((error = route_arp_conflict(rt, info))) {
893 			rtfree(rt);
894 			rt = NULL;
895 			break;
896 		}
897 
898 		/*
899 		 * We cannot go through a delete/create/insert cycle for
900 		 * cached route because this can lead to races in the
901 		 * receive path.  Instead we update the L2 cache.
902 		 */
903 		if ((rt != NULL) && ISSET(rt->rt_flags, RTF_CACHED))
904 			goto change;
905 
906 		rtfree(rt);
907 		rt = NULL;
908 
909 		NET_LOCK();
910 		if ((error = rtm_getifa(info, tableid)) != 0) {
911 			NET_UNLOCK();
912 			break;
913 		}
914 		error = rtrequest(RTM_ADD, info, prio, &rt, tableid);
915 		NET_UNLOCK();
916 		if (error == 0)
917 			rtm_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx,
918 			    &rt->rt_rmx);
919 		break;
920 	case RTM_DELETE:
921 		rt = rtable_lookup(tableid, info->rti_info[RTAX_DST],
922 		    info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY],
923 		    prio);
924 		if (rt == NULL) {
925 			error = ESRCH;
926 			break;
927 		}
928 
929 		/*
930 		 * If we got multipath routes, we require users to specify
931 		 * a matching gateway.
932 		 */
933 		if (ISSET(rt->rt_flags, RTF_MPATH) &&
934 		    info->rti_info[RTAX_GATEWAY] == NULL) {
935 			error = ESRCH;
936 			break;
937 		}
938 
939 		/* Detaching an interface requires the KERNEL_LOCK(). */
940 		ifp = if_get(rt->rt_ifidx);
941 		KASSERT(ifp != NULL);
942 
943 		/*
944 		 * Invalidate the cache of automagically created and
945 		 * referenced L2 entries to make sure that ``rt_gwroute''
946 		 * pointer stays valid for other CPUs.
947 		 */
948 		if ((ISSET(rt->rt_flags, RTF_CACHED))) {
949 			NET_LOCK();
950 			ifp->if_rtrequest(ifp, RTM_INVALIDATE, rt);
951 			/* Reset the MTU of the gateway route. */
952 			rtable_walk(tableid, rt_key(rt)->sa_family, NULL,
953 			    route_cleargateway, rt);
954 			NET_UNLOCK();
955 			if_put(ifp);
956 			break;
957 		}
958 
959 		/*
960 		 * Make sure that local routes are only modified by the
961 		 * kernel.
962 		 */
963 		if (ISSET(rt->rt_flags, RTF_LOCAL|RTF_BROADCAST)) {
964 			if_put(ifp);
965 			error = EINVAL;
966 			break;
967 		}
968 
969 		rtfree(rt);
970 		rt = NULL;
971 
972 		NET_LOCK();
973 		error = rtrequest_delete(info, prio, ifp, &rt, tableid);
974 		NET_UNLOCK();
975 		if_put(ifp);
976 		break;
977 	case RTM_CHANGE:
978 		rt = rtable_lookup(tableid, info->rti_info[RTAX_DST],
979 		    info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY],
980 		    prio);
981 		/*
982 		 * If we got multipath routes, we require users to specify
983 		 * a matching gateway.
984 		 */
985 		if ((rt != NULL) && ISSET(rt->rt_flags, RTF_MPATH) &&
986 		    (info->rti_info[RTAX_GATEWAY] == NULL)) {
987 			rtfree(rt);
988 			rt = NULL;
989 		}
990 		/*
991 		 * If RTAX_GATEWAY is the argument we're trying to
992 		 * change, try to find a compatible route.
993 		 */
994 		if ((rt == NULL) && (info->rti_info[RTAX_GATEWAY] != NULL)) {
995 			rt = rtable_lookup(tableid, info->rti_info[RTAX_DST],
996 			    info->rti_info[RTAX_NETMASK], NULL, prio);
997 			/* Ensure we don't pick a multipath one. */
998 			if ((rt != NULL) && ISSET(rt->rt_flags, RTF_MPATH)) {
999 				rtfree(rt);
1000 				rt = NULL;
1001 			}
1002 		}
1003 
1004 		if (rt == NULL) {
1005 			error = ESRCH;
1006 			break;
1007 		}
1008 
1009 		/*
1010 		 * Make sure that local routes are only modified by the
1011 		 * kernel.
1012 		 */
1013 		if (ISSET(rt->rt_flags, RTF_LOCAL|RTF_BROADCAST)) {
1014 			error = EINVAL;
1015 			break;
1016 		}
1017 
1018 		/*
1019 		 * RTM_CHANGE needs a perfect match.
1020 		 */
1021 		plen = rtable_satoplen(info->rti_info[RTAX_DST]->sa_family,
1022 		    info->rti_info[RTAX_NETMASK]);
1023 		if (rt_plen(rt) != plen) {
1024 			error = ESRCH;
1025 			break;
1026 		}
1027 
1028 		if (info->rti_info[RTAX_GATEWAY] != NULL)
1029 			if (rt->rt_gateway == NULL ||
1030 			    bcmp(rt->rt_gateway,
1031 			    info->rti_info[RTAX_GATEWAY],
1032 			    info->rti_info[RTAX_GATEWAY]->sa_len)) {
1033 				newgate = 1;
1034 			}
1035 		/*
1036 		 * Check reachable gateway before changing the route.
1037 		 * New gateway could require new ifaddr, ifp;
1038 		 * flags may also be different; ifp may be specified
1039 		 * by ll sockaddr when protocol address is ambiguous.
1040 		 */
1041 		if (newgate || info->rti_info[RTAX_IFP] != NULL ||
1042 		    info->rti_info[RTAX_IFA] != NULL) {
1043 			struct ifaddr	*ifa = NULL;
1044 
1045 			NET_LOCK();
1046 			if ((error = rtm_getifa(info, tableid)) != 0) {
1047 				NET_UNLOCK();
1048 				break;
1049 			}
1050 			ifa = info->rti_ifa;
1051 			if (rt->rt_ifa != ifa) {
1052 				ifp = if_get(rt->rt_ifidx);
1053 				KASSERT(ifp != NULL);
1054 				ifp->if_rtrequest(ifp, RTM_DELETE, rt);
1055 				ifafree(rt->rt_ifa);
1056 				if_put(ifp);
1057 
1058 				ifa->ifa_refcnt++;
1059 				rt->rt_ifa = ifa;
1060 				rt->rt_ifidx = ifa->ifa_ifp->if_index;
1061 				/* recheck link state after ifp change */
1062 				rt_if_linkstate_change(rt, ifa->ifa_ifp,
1063 				    tableid);
1064 			}
1065 			NET_UNLOCK();
1066 		}
1067 change:
1068 		if (info->rti_info[RTAX_GATEWAY] != NULL) {
1069 			/* When updating the gateway, make sure it is valid. */
1070 			if (!newgate && rt->rt_gateway->sa_family !=
1071 			    info->rti_info[RTAX_GATEWAY]->sa_family) {
1072 				error = EINVAL;
1073 				break;
1074 			}
1075 
1076 			NET_LOCK();
1077 			error = rt_setgate(rt,
1078 			    info->rti_info[RTAX_GATEWAY], tableid);
1079 			NET_UNLOCK();
1080 			if (error)
1081 				break;
1082 		}
1083 #ifdef MPLS
1084 		if (rtm->rtm_flags & RTF_MPLS) {
1085 			NET_LOCK();
1086 			error = rt_mpls_set(rt,
1087 			    info->rti_info[RTAX_SRC], info->rti_mpls);
1088 			NET_UNLOCK();
1089 			if (error)
1090 				break;
1091 		} else if (newgate || (rtm->rtm_fmask & RTF_MPLS)) {
1092 			NET_LOCK();
1093 			/* if gateway changed remove MPLS information */
1094 			rt_mpls_clear(rt);
1095 			NET_UNLOCK();
1096 		}
1097 #endif
1098 
1099 #ifdef BFD
1100 		if (ISSET(rtm->rtm_flags, RTF_BFD)) {
1101 			if ((error = bfdset(rt)))
1102 				break;
1103 		} else if (!ISSET(rtm->rtm_flags, RTF_BFD) &&
1104 		    ISSET(rtm->rtm_fmask, RTF_BFD)) {
1105 			bfdclear(rt);
1106 		}
1107 #endif
1108 
1109 		NET_LOCK();
1110 		/* Hack to allow some flags to be toggled */
1111 		if (rtm->rtm_fmask) {
1112 			/* MPLS flag it is set by rt_mpls_set() */
1113 			rtm->rtm_fmask &= ~RTF_MPLS;
1114 			rtm->rtm_flags &= ~RTF_MPLS;
1115 			rt->rt_flags =
1116 			    (rt->rt_flags & ~rtm->rtm_fmask) |
1117 			    (rtm->rtm_flags & rtm->rtm_fmask);
1118 		}
1119 		rtm_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx, &rt->rt_rmx);
1120 
1121 		ifp = if_get(rt->rt_ifidx);
1122 		KASSERT(ifp != NULL);
1123 		ifp->if_rtrequest(ifp, RTM_ADD, rt);
1124 		if_put(ifp);
1125 
1126 		if (info->rti_info[RTAX_LABEL] != NULL) {
1127 			char *rtlabel = ((struct sockaddr_rtlabel *)
1128 			    info->rti_info[RTAX_LABEL])->sr_label;
1129 			rtlabel_unref(rt->rt_labelid);
1130 			rt->rt_labelid = rtlabel_name2id(rtlabel);
1131 		}
1132 		if_group_routechange(info->rti_info[RTAX_DST],
1133 		    info->rti_info[RTAX_NETMASK]);
1134 		rt->rt_locks &= ~(rtm->rtm_inits);
1135 		rt->rt_locks |= (rtm->rtm_inits & rtm->rtm_rmx.rmx_locks);
1136 		NET_UNLOCK();
1137 		break;
1138 	case RTM_GET:
1139 		rt = rtable_lookup(tableid, info->rti_info[RTAX_DST],
1140 		    info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY],
1141 		    prio);
1142 		if (rt == NULL)
1143 			error = ESRCH;
1144 		break;
1145 	}
1146 
1147 	*prt = rt;
1148 	return (error);
1149 }
1150 
1151 struct ifaddr *
1152 ifa_ifwithroute(int flags, struct sockaddr *dst, struct sockaddr *gateway,
1153     unsigned int rtableid)
1154 {
1155 	struct ifaddr	*ifa;
1156 
1157 	if ((flags & RTF_GATEWAY) == 0) {
1158 		/*
1159 		 * If we are adding a route to an interface,
1160 		 * and the interface is a pt to pt link
1161 		 * we should search for the destination
1162 		 * as our clue to the interface.  Otherwise
1163 		 * we can use the local address.
1164 		 */
1165 		ifa = NULL;
1166 		if (flags & RTF_HOST)
1167 			ifa = ifa_ifwithdstaddr(dst, rtableid);
1168 		if (ifa == NULL)
1169 			ifa = ifa_ifwithaddr(gateway, rtableid);
1170 	} else {
1171 		/*
1172 		 * If we are adding a route to a remote net
1173 		 * or host, the gateway may still be on the
1174 		 * other end of a pt to pt link.
1175 		 */
1176 		ifa = ifa_ifwithdstaddr(gateway, rtableid);
1177 	}
1178 	if (ifa == NULL) {
1179 		if (gateway->sa_family == AF_LINK) {
1180 			struct sockaddr_dl *sdl = satosdl(gateway);
1181 			struct ifnet *ifp = if_get(sdl->sdl_index);
1182 
1183 			if (ifp != NULL)
1184 				ifa = ifaof_ifpforaddr(dst, ifp);
1185 			if_put(ifp);
1186 		} else {
1187 			struct rtentry *rt;
1188 
1189 			rt = rtalloc(gateway, RT_RESOLVE, rtable_l2(rtableid));
1190 			if (rt != NULL)
1191 				ifa = rt->rt_ifa;
1192 			rtfree(rt);
1193 		}
1194 	}
1195 	if (ifa == NULL)
1196 		return (NULL);
1197 	if (ifa->ifa_addr->sa_family != dst->sa_family) {
1198 		struct ifaddr	*oifa = ifa;
1199 		ifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp);
1200 		if (ifa == NULL)
1201 			ifa = oifa;
1202 	}
1203 	return (ifa);
1204 }
1205 
1206 int
1207 rtm_getifa(struct rt_addrinfo *info, unsigned int rtid)
1208 {
1209 	struct ifnet	*ifp = NULL;
1210 
1211 	/*
1212 	 * The "returned" `ifa' is guaranteed to be alive only if
1213 	 * the NET_LOCK() is held.
1214 	 */
1215 	NET_ASSERT_LOCKED();
1216 
1217 	/*
1218 	 * ifp may be specified by sockaddr_dl when protocol address
1219 	 * is ambiguous
1220 	 */
1221 	if (info->rti_info[RTAX_IFP] != NULL) {
1222 		struct sockaddr_dl *sdl;
1223 
1224 		sdl = satosdl(info->rti_info[RTAX_IFP]);
1225 		ifp = if_get(sdl->sdl_index);
1226 	}
1227 
1228 #ifdef IPSEC
1229 	/*
1230 	 * If the destination is a PF_KEY address, we'll look
1231 	 * for the existence of a encap interface number or address
1232 	 * in the options list of the gateway. By default, we'll return
1233 	 * enc0.
1234 	 */
1235 	if (info->rti_info[RTAX_DST] &&
1236 	    info->rti_info[RTAX_DST]->sa_family == PF_KEY)
1237 		info->rti_ifa = enc_getifa(rtid, 0);
1238 #endif
1239 
1240 	if (info->rti_ifa == NULL && info->rti_info[RTAX_IFA] != NULL)
1241 		info->rti_ifa = ifa_ifwithaddr(info->rti_info[RTAX_IFA], rtid);
1242 
1243 	if (info->rti_ifa == NULL) {
1244 		struct sockaddr	*sa;
1245 
1246 		if ((sa = info->rti_info[RTAX_IFA]) == NULL)
1247 			if ((sa = info->rti_info[RTAX_GATEWAY]) == NULL)
1248 				sa = info->rti_info[RTAX_DST];
1249 
1250 		if (sa != NULL && ifp != NULL)
1251 			info->rti_ifa = ifaof_ifpforaddr(sa, ifp);
1252 		else if (info->rti_info[RTAX_DST] != NULL &&
1253 		    info->rti_info[RTAX_GATEWAY] != NULL)
1254 			info->rti_ifa = ifa_ifwithroute(info->rti_flags,
1255 			    info->rti_info[RTAX_DST],
1256 			    info->rti_info[RTAX_GATEWAY],
1257 			    rtid);
1258 		else if (sa != NULL)
1259 			info->rti_ifa = ifa_ifwithroute(info->rti_flags,
1260 			    sa, sa, rtid);
1261 	}
1262 
1263 	if_put(ifp);
1264 
1265 	if (info->rti_ifa == NULL)
1266 		return (ENETUNREACH);
1267 
1268 	return (0);
1269 }
1270 
1271 int
1272 route_cleargateway(struct rtentry *rt, void *arg, unsigned int rtableid)
1273 {
1274 	struct rtentry *nhrt = arg;
1275 
1276 	if (ISSET(rt->rt_flags, RTF_GATEWAY) && rt->rt_gwroute == nhrt &&
1277 	    !ISSET(rt->rt_locks, RTV_MTU))
1278 		rt->rt_mtu = 0;
1279 
1280 	return (0);
1281 }
1282 
1283 /*
1284  * Check if the user request to insert an ARP entry does not conflict
1285  * with existing ones.
1286  *
1287  * Only two entries are allowed for a given IP address: a private one
1288  * (priv) and a public one (pub).
1289  */
1290 int
1291 route_arp_conflict(struct rtentry *rt, struct rt_addrinfo *info)
1292 {
1293 	int		 proxy = (info->rti_flags & RTF_ANNOUNCE);
1294 
1295 	if ((info->rti_flags & RTF_LLINFO) == 0 ||
1296 	    (info->rti_info[RTAX_DST]->sa_family != AF_INET))
1297 		return (0);
1298 
1299 	if (rt == NULL || !ISSET(rt->rt_flags, RTF_LLINFO))
1300 		return (0);
1301 
1302 	/* If the entry is cached, it can be updated. */
1303 	if (ISSET(rt->rt_flags, RTF_CACHED))
1304 		return (0);
1305 
1306 	/*
1307 	 * Same destination, not cached and both "priv" or "pub" conflict.
1308 	 * If a second entry exists, it always conflict.
1309 	 */
1310 	if ((ISSET(rt->rt_flags, RTF_ANNOUNCE) == proxy) ||
1311 	    ISSET(rt->rt_flags, RTF_MPATH))
1312 		return (EEXIST);
1313 
1314 	/* No conflict but an entry exist so we need to force mpath. */
1315 	info->rti_flags |= RTF_MPATH;
1316 	return (0);
1317 }
1318 
1319 void
1320 rtm_setmetrics(u_long which, const struct rt_metrics *in,
1321     struct rt_kmetrics *out)
1322 {
1323 	int64_t expire;
1324 
1325 	if (which & RTV_MTU)
1326 		out->rmx_mtu = in->rmx_mtu;
1327 	if (which & RTV_EXPIRE) {
1328 		expire = in->rmx_expire;
1329 		if (expire != 0) {
1330 			expire -= time_second;
1331 			expire += time_uptime;
1332 		}
1333 
1334 		out->rmx_expire = expire;
1335 	}
1336 }
1337 
1338 void
1339 rtm_getmetrics(const struct rt_kmetrics *in, struct rt_metrics *out)
1340 {
1341 	int64_t expire;
1342 
1343 	expire = in->rmx_expire;
1344 	if (expire != 0) {
1345 		expire -= time_uptime;
1346 		expire += time_second;
1347 	}
1348 
1349 	bzero(out, sizeof(*out));
1350 	out->rmx_locks = in->rmx_locks;
1351 	out->rmx_mtu = in->rmx_mtu;
1352 	out->rmx_expire = expire;
1353 	out->rmx_pksent = in->rmx_pksent;
1354 }
1355 
1356 #define ROUNDUP(a) \
1357 	((a) > 0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long))
1358 #define ADVANCE(x, n) (x += ROUNDUP((n)->sa_len))
1359 
1360 int
1361 rtm_xaddrs(caddr_t cp, caddr_t cplim, struct rt_addrinfo *rtinfo)
1362 {
1363 	struct sockaddr	*sa;
1364 	int		 i;
1365 
1366 	/*
1367 	 * Parse address bits, split address storage in chunks, and
1368 	 * set info pointers.  Use sa_len for traversing the memory
1369 	 * and check that we stay within in the limit.
1370 	 */
1371 	bzero(rtinfo->rti_info, sizeof(rtinfo->rti_info));
1372 	for (i = 0; i < sizeof(rtinfo->rti_addrs) * 8; i++) {
1373 		if ((rtinfo->rti_addrs & (1 << i)) == 0)
1374 			continue;
1375 		if (i >= RTAX_MAX || cp + sizeof(socklen_t) > cplim)
1376 			return (EINVAL);
1377 		sa = (struct sockaddr *)cp;
1378 		if (cp + sa->sa_len > cplim)
1379 			return (EINVAL);
1380 		rtinfo->rti_info[i] = sa;
1381 		ADVANCE(cp, sa);
1382 	}
1383 	/*
1384 	 * Check that the address family is suitable for the route address
1385 	 * type.  Check that each address has a size that fits its family
1386 	 * and its length is within the size.  Strings within addresses must
1387 	 * be NUL terminated.
1388 	 */
1389 	for (i = 0; i < RTAX_MAX; i++) {
1390 		size_t len, maxlen, size;
1391 
1392 		sa = rtinfo->rti_info[i];
1393 		if (sa == NULL)
1394 			continue;
1395 		maxlen = size = 0;
1396 		switch (i) {
1397 		case RTAX_DST:
1398 		case RTAX_GATEWAY:
1399 		case RTAX_SRC:
1400 			switch (sa->sa_family) {
1401 			case AF_INET:
1402 				size = sizeof(struct sockaddr_in);
1403 				break;
1404 			case AF_LINK:
1405 				size = sizeof(struct sockaddr_dl);
1406 				break;
1407 #ifdef INET6
1408 			case AF_INET6:
1409 				size = sizeof(struct sockaddr_in6);
1410 				break;
1411 #endif
1412 #ifdef MPLS
1413 			case AF_MPLS:
1414 				size = sizeof(struct sockaddr_mpls);
1415 				break;
1416 #endif
1417 			}
1418 			break;
1419 		case RTAX_IFP:
1420 			if (sa->sa_family != AF_LINK)
1421 				return (EAFNOSUPPORT);
1422 			/*
1423 			 * XXX Should be sizeof(struct sockaddr_dl), but
1424 			 * route(8) has a bug and provides less memory.
1425 			 * arp(8) has another bug and uses sizeof pointer.
1426 			 */
1427 			size = 4;
1428 			break;
1429 		case RTAX_IFA:
1430 			switch (sa->sa_family) {
1431 			case AF_INET:
1432 				size = sizeof(struct sockaddr_in);
1433 				break;
1434 #ifdef INET6
1435 			case AF_INET6:
1436 				size = sizeof(struct sockaddr_in6);
1437 				break;
1438 #endif
1439 			default:
1440 				return (EAFNOSUPPORT);
1441 			}
1442 			break;
1443 		case RTAX_LABEL:
1444 			sa->sa_family = AF_UNSPEC;
1445 			maxlen = RTLABEL_LEN;
1446 			size = sizeof(struct sockaddr_rtlabel);
1447 			break;
1448 #ifdef BFD
1449 		case RTAX_BFD:
1450 			sa->sa_family = AF_UNSPEC;
1451 			size = sizeof(struct sockaddr_bfd);
1452 			break;
1453 #endif
1454 		case RTAX_DNS:
1455 			/* more validation in rtm_validate_proposal */
1456 			if (sa->sa_len > sizeof(struct sockaddr_rtdns))
1457 				return (EINVAL);
1458 			if (sa->sa_len < offsetof(struct sockaddr_rtdns,
1459 			    sr_dns))
1460 				return (EINVAL);
1461 			switch (sa->sa_family) {
1462 			case AF_INET:
1463 #ifdef INET6
1464 			case AF_INET6:
1465 #endif
1466 				break;
1467 			default:
1468 				return (EAFNOSUPPORT);
1469 			}
1470 			break;
1471 		case RTAX_STATIC:
1472 			sa->sa_family = AF_UNSPEC;
1473 			maxlen = RTSTATIC_LEN;
1474 			size = sizeof(struct sockaddr_rtstatic);
1475 			break;
1476 		case RTAX_SEARCH:
1477 			sa->sa_family = AF_UNSPEC;
1478 			maxlen = RTSEARCH_LEN;
1479 			size = sizeof(struct sockaddr_rtsearch);
1480 			break;
1481 		}
1482 		if (size) {
1483 			/* memory for the full struct must be provided */
1484 			if (sa->sa_len < size)
1485 				return (EINVAL);
1486 		}
1487 		if (maxlen) {
1488 			/* this should not happen */
1489 			if (2 + maxlen > size)
1490 				return (EINVAL);
1491 			/* strings must be NUL terminated within the struct */
1492 			len = strnlen(sa->sa_data, maxlen);
1493 			if (len >= maxlen || 2 + len >= sa->sa_len)
1494 				return (EINVAL);
1495 			break;
1496 		}
1497 	}
1498 	return (0);
1499 }
1500 
1501 struct mbuf *
1502 rtm_msg1(int type, struct rt_addrinfo *rtinfo)
1503 {
1504 	struct rt_msghdr	*rtm;
1505 	struct mbuf		*m;
1506 	int			 i;
1507 	struct sockaddr		*sa;
1508 	int			 len, dlen, hlen;
1509 
1510 	switch (type) {
1511 	case RTM_DELADDR:
1512 	case RTM_NEWADDR:
1513 		len = sizeof(struct ifa_msghdr);
1514 		break;
1515 	case RTM_IFINFO:
1516 		len = sizeof(struct if_msghdr);
1517 		break;
1518 	case RTM_IFANNOUNCE:
1519 		len = sizeof(struct if_announcemsghdr);
1520 		break;
1521 #ifdef BFD
1522 	case RTM_BFD:
1523 		len = sizeof(struct bfd_msghdr);
1524 		break;
1525 #endif
1526 	case RTM_80211INFO:
1527 		len = sizeof(struct if_ieee80211_msghdr);
1528 		break;
1529 	default:
1530 		len = sizeof(struct rt_msghdr);
1531 		break;
1532 	}
1533 	if (len > MCLBYTES)
1534 		panic("rtm_msg1");
1535 	m = m_gethdr(M_DONTWAIT, MT_DATA);
1536 	if (m && len > MHLEN) {
1537 		MCLGET(m, M_DONTWAIT);
1538 		if ((m->m_flags & M_EXT) == 0) {
1539 			m_free(m);
1540 			m = NULL;
1541 		}
1542 	}
1543 	if (m == NULL)
1544 		return (m);
1545 	m->m_pkthdr.len = m->m_len = hlen = len;
1546 	m->m_pkthdr.ph_ifidx = 0;
1547 	rtm = mtod(m, struct rt_msghdr *);
1548 	bzero(rtm, len);
1549 	for (i = 0; i < RTAX_MAX; i++) {
1550 		if (rtinfo == NULL || (sa = rtinfo->rti_info[i]) == NULL)
1551 			continue;
1552 		rtinfo->rti_addrs |= (1 << i);
1553 		dlen = ROUNDUP(sa->sa_len);
1554 		if (m_copyback(m, len, dlen, sa, M_NOWAIT)) {
1555 			m_freem(m);
1556 			return (NULL);
1557 		}
1558 		len += dlen;
1559 	}
1560 	rtm->rtm_msglen = len;
1561 	rtm->rtm_hdrlen = hlen;
1562 	rtm->rtm_version = RTM_VERSION;
1563 	rtm->rtm_type = type;
1564 	return (m);
1565 }
1566 
1567 int
1568 rtm_msg2(int type, int vers, struct rt_addrinfo *rtinfo, caddr_t cp,
1569     struct walkarg *w)
1570 {
1571 	int		i;
1572 	int		len, dlen, hlen, second_time = 0;
1573 	caddr_t		cp0;
1574 
1575 	rtinfo->rti_addrs = 0;
1576 again:
1577 	switch (type) {
1578 	case RTM_DELADDR:
1579 	case RTM_NEWADDR:
1580 		len = sizeof(struct ifa_msghdr);
1581 		break;
1582 	case RTM_IFINFO:
1583 		len = sizeof(struct if_msghdr);
1584 		break;
1585 	default:
1586 		len = sizeof(struct rt_msghdr);
1587 		break;
1588 	}
1589 	hlen = len;
1590 	if ((cp0 = cp) != NULL)
1591 		cp += len;
1592 	for (i = 0; i < RTAX_MAX; i++) {
1593 		struct sockaddr *sa;
1594 
1595 		if ((sa = rtinfo->rti_info[i]) == NULL)
1596 			continue;
1597 		rtinfo->rti_addrs |= (1 << i);
1598 		dlen = ROUNDUP(sa->sa_len);
1599 		if (cp) {
1600 			bcopy(sa, cp, (size_t)dlen);
1601 			cp += dlen;
1602 		}
1603 		len += dlen;
1604 	}
1605 	/* align message length to the next natural boundary */
1606 	len = ALIGN(len);
1607 	if (cp == 0 && w != NULL && !second_time) {
1608 		w->w_needed += len;
1609 		if (w->w_needed <= 0 && w->w_where) {
1610 			if (w->w_tmemsize < len) {
1611 				free(w->w_tmem, M_RTABLE, w->w_tmemsize);
1612 				w->w_tmem = malloc(len, M_RTABLE,
1613 				    M_NOWAIT | M_ZERO);
1614 				if (w->w_tmem)
1615 					w->w_tmemsize = len;
1616 			}
1617 			if (w->w_tmem) {
1618 				cp = w->w_tmem;
1619 				second_time = 1;
1620 				goto again;
1621 			} else
1622 				w->w_where = 0;
1623 		}
1624 	}
1625 	if (cp && w)		/* clear the message header */
1626 		bzero(cp0, hlen);
1627 
1628 	if (cp) {
1629 		struct rt_msghdr *rtm = (struct rt_msghdr *)cp0;
1630 
1631 		rtm->rtm_version = RTM_VERSION;
1632 		rtm->rtm_type = type;
1633 		rtm->rtm_msglen = len;
1634 		rtm->rtm_hdrlen = hlen;
1635 	}
1636 	return (len);
1637 }
1638 
1639 void
1640 rtm_send(struct rtentry *rt, int cmd, int error, unsigned int rtableid)
1641 {
1642 	struct rt_addrinfo	 info;
1643 	struct ifnet		*ifp;
1644 	struct sockaddr_rtlabel	 sa_rl;
1645 	struct sockaddr_in6	 sa_mask;
1646 
1647 	memset(&info, 0, sizeof(info));
1648 	info.rti_info[RTAX_DST] = rt_key(rt);
1649 	info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1650 	if (!ISSET(rt->rt_flags, RTF_HOST))
1651 		info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask);
1652 	info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl);
1653 	ifp = if_get(rt->rt_ifidx);
1654 	if (ifp != NULL) {
1655 		info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
1656 		info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
1657 	}
1658 
1659 	rtm_miss(cmd, &info, rt->rt_flags, rt->rt_priority, rt->rt_ifidx, error,
1660 	    rtableid);
1661 	if_put(ifp);
1662 }
1663 
1664 /*
1665  * This routine is called to generate a message from the routing
1666  * socket indicating that a redirect has occurred, a routing lookup
1667  * has failed, or that a protocol has detected timeouts to a particular
1668  * destination.
1669  */
1670 void
1671 rtm_miss(int type, struct rt_addrinfo *rtinfo, int flags, uint8_t prio,
1672     u_int ifidx, int error, u_int tableid)
1673 {
1674 	struct rt_msghdr	*rtm;
1675 	struct mbuf		*m;
1676 	struct sockaddr		*sa = rtinfo->rti_info[RTAX_DST];
1677 
1678 	if (rtptable.rtp_count == 0)
1679 		return;
1680 	m = rtm_msg1(type, rtinfo);
1681 	if (m == NULL)
1682 		return;
1683 	rtm = mtod(m, struct rt_msghdr *);
1684 	rtm->rtm_flags = RTF_DONE | flags;
1685 	rtm->rtm_priority = prio;
1686 	rtm->rtm_errno = error;
1687 	rtm->rtm_tableid = tableid;
1688 	rtm->rtm_addrs = rtinfo->rti_addrs;
1689 	rtm->rtm_index = ifidx;
1690 	route_input(m, NULL, sa ? sa->sa_family : AF_UNSPEC);
1691 }
1692 
1693 /*
1694  * This routine is called to generate a message from the routing
1695  * socket indicating that the status of a network interface has changed.
1696  */
1697 void
1698 rtm_ifchg(struct ifnet *ifp)
1699 {
1700 	struct if_msghdr	*ifm;
1701 	struct mbuf		*m;
1702 
1703 	if (rtptable.rtp_count == 0)
1704 		return;
1705 	m = rtm_msg1(RTM_IFINFO, NULL);
1706 	if (m == NULL)
1707 		return;
1708 	ifm = mtod(m, struct if_msghdr *);
1709 	ifm->ifm_index = ifp->if_index;
1710 	ifm->ifm_tableid = ifp->if_rdomain;
1711 	ifm->ifm_flags = ifp->if_flags;
1712 	ifm->ifm_xflags = ifp->if_xflags;
1713 	if_getdata(ifp, &ifm->ifm_data);
1714 	ifm->ifm_addrs = 0;
1715 	route_input(m, NULL, AF_UNSPEC);
1716 }
1717 
1718 /*
1719  * This is called to generate messages from the routing socket
1720  * indicating a network interface has had addresses associated with it.
1721  * if we ever reverse the logic and replace messages TO the routing
1722  * socket indicate a request to configure interfaces, then it will
1723  * be unnecessary as the routing socket will automatically generate
1724  * copies of it.
1725  */
1726 void
1727 rtm_addr(int cmd, struct ifaddr *ifa)
1728 {
1729 	struct ifnet		*ifp = ifa->ifa_ifp;
1730 	struct mbuf		*m;
1731 	struct rt_addrinfo	 info;
1732 	struct ifa_msghdr	*ifam;
1733 
1734 	if (rtptable.rtp_count == 0)
1735 		return;
1736 
1737 	memset(&info, 0, sizeof(info));
1738 	info.rti_info[RTAX_IFA] = ifa->ifa_addr;
1739 	info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
1740 	info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
1741 	info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
1742 	if ((m = rtm_msg1(cmd, &info)) == NULL)
1743 		return;
1744 	ifam = mtod(m, struct ifa_msghdr *);
1745 	ifam->ifam_index = ifp->if_index;
1746 	ifam->ifam_metric = ifa->ifa_metric;
1747 	ifam->ifam_flags = ifa->ifa_flags;
1748 	ifam->ifam_addrs = info.rti_addrs;
1749 	ifam->ifam_tableid = ifp->if_rdomain;
1750 
1751 	route_input(m, NULL,
1752 	    ifa->ifa_addr ? ifa->ifa_addr->sa_family : AF_UNSPEC);
1753 }
1754 
1755 /*
1756  * This is called to generate routing socket messages indicating
1757  * network interface arrival and departure.
1758  */
1759 void
1760 rtm_ifannounce(struct ifnet *ifp, int what)
1761 {
1762 	struct if_announcemsghdr	*ifan;
1763 	struct mbuf			*m;
1764 
1765 	if (rtptable.rtp_count == 0)
1766 		return;
1767 	m = rtm_msg1(RTM_IFANNOUNCE, NULL);
1768 	if (m == NULL)
1769 		return;
1770 	ifan = mtod(m, struct if_announcemsghdr *);
1771 	ifan->ifan_index = ifp->if_index;
1772 	strlcpy(ifan->ifan_name, ifp->if_xname, sizeof(ifan->ifan_name));
1773 	ifan->ifan_what = what;
1774 	route_input(m, NULL, AF_UNSPEC);
1775 }
1776 
1777 #ifdef BFD
1778 /*
1779  * This is used to generate routing socket messages indicating
1780  * the state of a BFD session.
1781  */
1782 void
1783 rtm_bfd(struct bfd_config *bfd)
1784 {
1785 	struct bfd_msghdr	*bfdm;
1786 	struct sockaddr_bfd	 sa_bfd;
1787 	struct mbuf		*m;
1788 	struct rt_addrinfo	 info;
1789 
1790 	if (rtptable.rtp_count == 0)
1791 		return;
1792 	memset(&info, 0, sizeof(info));
1793 	info.rti_info[RTAX_DST] = rt_key(bfd->bc_rt);
1794 	info.rti_info[RTAX_IFA] = bfd->bc_rt->rt_ifa->ifa_addr;
1795 
1796 	m = rtm_msg1(RTM_BFD, &info);
1797 	if (m == NULL)
1798 		return;
1799 	bfdm = mtod(m, struct bfd_msghdr *);
1800 	bfdm->bm_addrs = info.rti_addrs;
1801 
1802 	bfd2sa(bfd->bc_rt, &sa_bfd);
1803 	memcpy(&bfdm->bm_sa, &sa_bfd, sizeof(sa_bfd));
1804 
1805 	route_input(m, NULL, info.rti_info[RTAX_DST]->sa_family);
1806 }
1807 #endif /* BFD */
1808 
1809 /*
1810  * This is used to generate routing socket messages indicating
1811  * the state of an ieee80211 interface.
1812  */
1813 void
1814 rtm_80211info(struct ifnet *ifp, struct if_ieee80211_data *ifie)
1815 {
1816 	struct if_ieee80211_msghdr	*ifim;
1817 	struct mbuf			*m;
1818 
1819 	if (rtptable.rtp_count == 0)
1820 		return;
1821 	m = rtm_msg1(RTM_80211INFO, NULL);
1822 	if (m == NULL)
1823 		return;
1824 	ifim = mtod(m, struct if_ieee80211_msghdr *);
1825 	ifim->ifim_index = ifp->if_index;
1826 	ifim->ifim_tableid = ifp->if_rdomain;
1827 
1828 	memcpy(&ifim->ifim_ifie, ifie, sizeof(ifim->ifim_ifie));
1829 	route_input(m, NULL, AF_UNSPEC);
1830 }
1831 
1832 /*
1833  * This is used to generate routing socket messages indicating
1834  * the address selection proposal from an interface.
1835  */
1836 void
1837 rtm_proposal(struct ifnet *ifp, struct rt_addrinfo *rtinfo, int flags,
1838     uint8_t prio)
1839 {
1840 	struct rt_msghdr	*rtm;
1841 	struct mbuf		*m;
1842 
1843 	m = rtm_msg1(RTM_PROPOSAL, rtinfo);
1844 	if (m == NULL)
1845 		return;
1846 	rtm = mtod(m, struct rt_msghdr *);
1847 	rtm->rtm_flags = RTF_DONE | flags;
1848 	rtm->rtm_priority = prio;
1849 	rtm->rtm_tableid = ifp->if_rdomain;
1850 	rtm->rtm_index = ifp->if_index;
1851 	rtm->rtm_addrs = rtinfo->rti_addrs;
1852 
1853 	route_input(m, NULL, rtinfo->rti_info[RTAX_DNS]->sa_family);
1854 }
1855 
1856 /*
1857  * This is used in dumping the kernel table via sysctl().
1858  */
1859 int
1860 sysctl_dumpentry(struct rtentry *rt, void *v, unsigned int id)
1861 {
1862 	struct walkarg		*w = v;
1863 	int			 error = 0, size;
1864 	struct rt_addrinfo	 info;
1865 	struct ifnet		*ifp;
1866 #ifdef BFD
1867 	struct sockaddr_bfd	 sa_bfd;
1868 #endif
1869 	struct sockaddr_rtlabel	 sa_rl;
1870 	struct sockaddr_in6	 sa_mask;
1871 
1872 	if (w->w_op == NET_RT_FLAGS && !(rt->rt_flags & w->w_arg))
1873 		return 0;
1874 	if (w->w_op == NET_RT_DUMP && w->w_arg) {
1875 		u_int8_t prio = w->w_arg & RTP_MASK;
1876 		if (w->w_arg < 0) {
1877 			prio = (-w->w_arg) & RTP_MASK;
1878 			/* Show all routes that are not this priority */
1879 			if (prio == (rt->rt_priority & RTP_MASK))
1880 				return 0;
1881 		} else {
1882 			if (prio != (rt->rt_priority & RTP_MASK) &&
1883 			    prio != RTP_ANY)
1884 				return 0;
1885 		}
1886 	}
1887 	bzero(&info, sizeof(info));
1888 	info.rti_info[RTAX_DST] = rt_key(rt);
1889 	info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1890 	info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask);
1891 	ifp = if_get(rt->rt_ifidx);
1892 	if (ifp != NULL) {
1893 		info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
1894 		info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
1895 		if (ifp->if_flags & IFF_POINTOPOINT)
1896 			info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr;
1897 	}
1898 	if_put(ifp);
1899 	info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl);
1900 #ifdef BFD
1901 	if (rt->rt_flags & RTF_BFD)
1902 		info.rti_info[RTAX_BFD] = bfd2sa(rt, &sa_bfd);
1903 #endif
1904 #ifdef MPLS
1905 	if (rt->rt_flags & RTF_MPLS) {
1906 		struct sockaddr_mpls	 sa_mpls;
1907 
1908 		bzero(&sa_mpls, sizeof(sa_mpls));
1909 		sa_mpls.smpls_family = AF_MPLS;
1910 		sa_mpls.smpls_len = sizeof(sa_mpls);
1911 		sa_mpls.smpls_label = ((struct rt_mpls *)
1912 		    rt->rt_llinfo)->mpls_label;
1913 		info.rti_info[RTAX_SRC] = (struct sockaddr *)&sa_mpls;
1914 		info.rti_mpls = ((struct rt_mpls *)
1915 		    rt->rt_llinfo)->mpls_operation;
1916 	}
1917 #endif
1918 
1919 	size = rtm_msg2(RTM_GET, RTM_VERSION, &info, NULL, w);
1920 	if (w->w_where && w->w_tmem && w->w_needed <= 0) {
1921 		struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem;
1922 
1923 		rtm->rtm_pid = curproc->p_p->ps_pid;
1924 		rtm->rtm_flags = rt->rt_flags;
1925 		rtm->rtm_priority = rt->rt_priority & RTP_MASK;
1926 		rtm_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx);
1927 		/* Do not account the routing table's reference. */
1928 		rtm->rtm_rmx.rmx_refcnt = rt->rt_refcnt - 1;
1929 		rtm->rtm_index = rt->rt_ifidx;
1930 		rtm->rtm_addrs = info.rti_addrs;
1931 		rtm->rtm_tableid = id;
1932 #ifdef MPLS
1933 		rtm->rtm_mpls = info.rti_mpls;
1934 #endif
1935 		if ((error = copyout(rtm, w->w_where, size)) != 0)
1936 			w->w_where = NULL;
1937 		else
1938 			w->w_where += size;
1939 	}
1940 	return (error);
1941 }
1942 
1943 int
1944 sysctl_iflist(int af, struct walkarg *w)
1945 {
1946 	struct ifnet		*ifp;
1947 	struct ifaddr		*ifa;
1948 	struct rt_addrinfo	 info;
1949 	int			 len, error = 0;
1950 
1951 	bzero(&info, sizeof(info));
1952 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1953 		if (w->w_arg && w->w_arg != ifp->if_index)
1954 			continue;
1955 		/* Copy the link-layer address first */
1956 		info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
1957 		len = rtm_msg2(RTM_IFINFO, RTM_VERSION, &info, 0, w);
1958 		if (w->w_where && w->w_tmem && w->w_needed <= 0) {
1959 			struct if_msghdr *ifm;
1960 
1961 			ifm = (struct if_msghdr *)w->w_tmem;
1962 			ifm->ifm_index = ifp->if_index;
1963 			ifm->ifm_tableid = ifp->if_rdomain;
1964 			ifm->ifm_flags = ifp->if_flags;
1965 			if_getdata(ifp, &ifm->ifm_data);
1966 			ifm->ifm_addrs = info.rti_addrs;
1967 			error = copyout(ifm, w->w_where, len);
1968 			if (error)
1969 				return (error);
1970 			w->w_where += len;
1971 		}
1972 		info.rti_info[RTAX_IFP] = NULL;
1973 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1974 			KASSERT(ifa->ifa_addr->sa_family != AF_LINK);
1975 			if (af && af != ifa->ifa_addr->sa_family)
1976 				continue;
1977 			info.rti_info[RTAX_IFA] = ifa->ifa_addr;
1978 			info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
1979 			info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
1980 			len = rtm_msg2(RTM_NEWADDR, RTM_VERSION, &info, 0, w);
1981 			if (w->w_where && w->w_tmem && w->w_needed <= 0) {
1982 				struct ifa_msghdr *ifam;
1983 
1984 				ifam = (struct ifa_msghdr *)w->w_tmem;
1985 				ifam->ifam_index = ifa->ifa_ifp->if_index;
1986 				ifam->ifam_flags = ifa->ifa_flags;
1987 				ifam->ifam_metric = ifa->ifa_metric;
1988 				ifam->ifam_addrs = info.rti_addrs;
1989 				error = copyout(w->w_tmem, w->w_where, len);
1990 				if (error)
1991 					return (error);
1992 				w->w_where += len;
1993 			}
1994 		}
1995 		info.rti_info[RTAX_IFA] = info.rti_info[RTAX_NETMASK] =
1996 		    info.rti_info[RTAX_BRD] = NULL;
1997 	}
1998 	return (0);
1999 }
2000 
2001 int
2002 sysctl_ifnames(struct walkarg *w)
2003 {
2004 	struct if_nameindex_msg ifn;
2005 	struct ifnet *ifp;
2006 	int error = 0;
2007 
2008 	/* XXX ignore tableid for now */
2009 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
2010 		if (w->w_arg && w->w_arg != ifp->if_index)
2011 			continue;
2012 		w->w_needed += sizeof(ifn);
2013 		if (w->w_where && w->w_needed <= 0) {
2014 
2015 			memset(&ifn, 0, sizeof(ifn));
2016 			ifn.if_index = ifp->if_index;
2017 			strlcpy(ifn.if_name, ifp->if_xname,
2018 			    sizeof(ifn.if_name));
2019 			error = copyout(&ifn, w->w_where, sizeof(ifn));
2020 			if (error)
2021 				return (error);
2022 			w->w_where += sizeof(ifn);
2023 		}
2024 	}
2025 
2026 	return (0);
2027 }
2028 
2029 int
2030 sysctl_rtable(int *name, u_int namelen, void *where, size_t *given, void *new,
2031     size_t newlen)
2032 {
2033 	int			 i, error = EINVAL;
2034 	u_char			 af;
2035 	struct walkarg		 w;
2036 	struct rt_tableinfo	 tableinfo;
2037 	u_int			 tableid = 0;
2038 
2039 	if (new)
2040 		return (EPERM);
2041 	if (namelen < 3 || namelen > 4)
2042 		return (EINVAL);
2043 	af = name[0];
2044 	bzero(&w, sizeof(w));
2045 	w.w_where = where;
2046 	w.w_given = *given;
2047 	w.w_needed = 0 - w.w_given;
2048 	w.w_op = name[1];
2049 	w.w_arg = name[2];
2050 
2051 	if (namelen == 4) {
2052 		tableid = name[3];
2053 		if (!rtable_exists(tableid))
2054 			return (ENOENT);
2055 	} else
2056 		tableid = curproc->p_p->ps_rtableid;
2057 
2058 	switch (w.w_op) {
2059 	case NET_RT_DUMP:
2060 	case NET_RT_FLAGS:
2061 		NET_LOCK();
2062 		for (i = 1; i <= AF_MAX; i++) {
2063 			if (af != 0 && af != i)
2064 				continue;
2065 
2066 			error = rtable_walk(tableid, i, NULL, sysctl_dumpentry,
2067 			    &w);
2068 			if (error == EAFNOSUPPORT)
2069 				error = 0;
2070 			if (error)
2071 				break;
2072 		}
2073 		NET_UNLOCK();
2074 		break;
2075 
2076 	case NET_RT_IFLIST:
2077 		NET_LOCK();
2078 		error = sysctl_iflist(af, &w);
2079 		NET_UNLOCK();
2080 		break;
2081 
2082 	case NET_RT_STATS:
2083 		return (sysctl_rtable_rtstat(where, given, new));
2084 	case NET_RT_TABLE:
2085 		tableid = w.w_arg;
2086 		if (!rtable_exists(tableid))
2087 			return (ENOENT);
2088 		memset(&tableinfo, 0, sizeof tableinfo);
2089 		tableinfo.rti_tableid = tableid;
2090 		tableinfo.rti_domainid = rtable_l2(tableid);
2091 		error = sysctl_rdstruct(where, given, new,
2092 		    &tableinfo, sizeof(tableinfo));
2093 		return (error);
2094 	case NET_RT_IFNAMES:
2095 		NET_LOCK();
2096 		error = sysctl_ifnames(&w);
2097 		NET_UNLOCK();
2098 		break;
2099 	}
2100 	free(w.w_tmem, M_RTABLE, w.w_tmemsize);
2101 	w.w_needed += w.w_given;
2102 	if (where) {
2103 		*given = w.w_where - (caddr_t)where;
2104 		if (*given < w.w_needed)
2105 			return (ENOMEM);
2106 	} else
2107 		*given = (11 * w.w_needed) / 10;
2108 
2109 	return (error);
2110 }
2111 
2112 int
2113 sysctl_rtable_rtstat(void *oldp, size_t *oldlenp, void *newp)
2114 {
2115 	extern struct cpumem *rtcounters;
2116 	uint64_t counters[rts_ncounters];
2117 	struct rtstat rtstat;
2118 	uint32_t *words = (uint32_t *)&rtstat;
2119 	int i;
2120 
2121 	CTASSERT(sizeof(rtstat) == (nitems(counters) * sizeof(uint32_t)));
2122 	memset(&rtstat, 0, sizeof rtstat);
2123 	counters_read(rtcounters, counters, nitems(counters));
2124 
2125 	for (i = 0; i < nitems(counters); i++)
2126 		words[i] = (uint32_t)counters[i];
2127 
2128 	return (sysctl_rdstruct(oldp, oldlenp, newp, &rtstat, sizeof(rtstat)));
2129 }
2130 
2131 int
2132 rtm_validate_proposal(struct rt_addrinfo *info)
2133 {
2134 	if (info->rti_addrs & ~(RTA_NETMASK | RTA_IFA | RTA_DNS | RTA_STATIC |
2135 	    RTA_SEARCH)) {
2136 		return -1;
2137 	}
2138 
2139 	if (ISSET(info->rti_addrs, RTA_NETMASK)) {
2140 		struct sockaddr *sa = info->rti_info[RTAX_NETMASK];
2141 		if (sa == NULL)
2142 			return -1;
2143 		switch (sa->sa_family) {
2144 		case AF_INET:
2145 			if (sa->sa_len != sizeof(struct sockaddr_in))
2146 				return -1;
2147 			break;
2148 		case AF_INET6:
2149 			if (sa->sa_len != sizeof(struct sockaddr_in6))
2150 				return -1;
2151 			break;
2152 		default:
2153 			return -1;
2154 		}
2155 	}
2156 
2157 	if (ISSET(info->rti_addrs, RTA_IFA)) {
2158 		struct sockaddr *sa = info->rti_info[RTAX_IFA];
2159 		if (sa == NULL)
2160 			return -1;
2161 		switch (sa->sa_family) {
2162 		case AF_INET:
2163 			if (sa->sa_len != sizeof(struct sockaddr_in))
2164 				return -1;
2165 			break;
2166 		case AF_INET6:
2167 			if (sa->sa_len != sizeof(struct sockaddr_in6))
2168 				return -1;
2169 			break;
2170 		default:
2171 			return -1;
2172 		}
2173 	}
2174 
2175 	if (ISSET(info->rti_addrs, RTA_DNS)) {
2176 		struct sockaddr_rtdns *rtdns =
2177 		    (struct sockaddr_rtdns *)info->rti_info[RTAX_DNS];
2178 		if (rtdns == NULL)
2179 			return -1;
2180 		if (rtdns->sr_len > sizeof(*rtdns))
2181 			return -1;
2182 		if (rtdns->sr_len < offsetof(struct sockaddr_rtdns, sr_dns))
2183 			return -1;
2184 		switch (rtdns->sr_family) {
2185 		case AF_INET:
2186 			if ((rtdns->sr_len - offsetof(struct sockaddr_rtdns,
2187 			    sr_dns)) % sizeof(struct in_addr) != 0)
2188 				return -1;
2189 			break;
2190 #ifdef INET6
2191 		case AF_INET6:
2192 			if ((rtdns->sr_len - offsetof(struct sockaddr_rtdns,
2193 			    sr_dns)) % sizeof(struct in6_addr) != 0)
2194 				return -1;
2195 			break;
2196 #endif
2197 		default:
2198 			return -1;
2199 		}
2200 	}
2201 
2202 	if (ISSET(info->rti_addrs, RTA_STATIC)) {
2203 		struct sockaddr_rtstatic *rtstatic =
2204 		    (struct sockaddr_rtstatic *)info->rti_info[RTAX_STATIC];
2205 		if (rtstatic == NULL)
2206 			return -1;
2207 		if (rtstatic->sr_len > sizeof(*rtstatic))
2208 			return -1;
2209 		if (rtstatic->sr_len <=
2210 		    offsetof(struct sockaddr_rtstatic, sr_static))
2211 			return -1;
2212 	}
2213 
2214 	if (ISSET(info->rti_addrs, RTA_SEARCH)) {
2215 		struct sockaddr_rtsearch *rtsearch =
2216 		    (struct sockaddr_rtsearch *)info->rti_info[RTAX_SEARCH];
2217 		if (rtsearch == NULL)
2218 			return -1;
2219 		if (rtsearch->sr_len > sizeof(*rtsearch))
2220 			return -1;
2221 		if (rtsearch->sr_len <=
2222 		    offsetof(struct sockaddr_rtsearch, sr_search))
2223 			return -1;
2224 	}
2225 
2226 	return 0;
2227 }
2228 
2229 /*
2230  * Definitions of protocols supported in the ROUTE domain.
2231  */
2232 
2233 extern	struct domain routedomain;		/* or at least forward */
2234 
2235 struct protosw routesw[] = {
2236 {
2237   .pr_type	= SOCK_RAW,
2238   .pr_domain	= &routedomain,
2239   .pr_flags	= PR_ATOMIC|PR_ADDR|PR_WANTRCVD,
2240   .pr_output	= route_output,
2241   .pr_ctloutput	= route_ctloutput,
2242   .pr_usrreq	= route_usrreq,
2243   .pr_attach	= route_attach,
2244   .pr_detach	= route_detach,
2245   .pr_init	= route_prinit,
2246   .pr_sysctl	= sysctl_rtable
2247 }
2248 };
2249 
2250 struct domain routedomain = {
2251   .dom_family = PF_ROUTE,
2252   .dom_name = "route",
2253   .dom_init = route_init,
2254   .dom_protosw = routesw,
2255   .dom_protoswNPROTOSW = &routesw[nitems(routesw)]
2256 };
2257