xref: /openbsd-src/sys/net/rtsock.c (revision c90a81c56dcebd6a1b73fe4aff9b03385b8e63b3)
1 /*	$OpenBSD: rtsock.c,v 1.281 2018/12/20 10:27:37 claudio Exp $	*/
2 /*	$NetBSD: rtsock.c,v 1.18 1996/03/29 00:32:10 cgd Exp $	*/
3 
4 /*
5  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the project nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 /*
34  * Copyright (c) 1988, 1991, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. Neither the name of the University nor the names of its contributors
46  *    may be used to endorse or promote products derived from this software
47  *    without specific prior written permission.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59  * SUCH DAMAGE.
60  *
61  *	@(#)rtsock.c	8.6 (Berkeley) 2/11/95
62  */
63 
64 #include <sys/param.h>
65 #include <sys/systm.h>
66 #include <sys/proc.h>
67 #include <sys/sysctl.h>
68 #include <sys/mbuf.h>
69 #include <sys/socket.h>
70 #include <sys/socketvar.h>
71 #include <sys/domain.h>
72 #include <sys/protosw.h>
73 #include <sys/srp.h>
74 
75 #include <net/if.h>
76 #include <net/if_dl.h>
77 #include <net/if_var.h>
78 #include <net/route.h>
79 
80 #include <netinet/in.h>
81 
82 #ifdef MPLS
83 #include <netmpls/mpls.h>
84 #endif
85 #ifdef IPSEC
86 #include <netinet/ip_ipsp.h>
87 #include <net/if_enc.h>
88 #endif
89 #ifdef BFD
90 #include <net/bfd.h>
91 #endif
92 
93 #include <sys/stdarg.h>
94 #include <sys/kernel.h>
95 #include <sys/timeout.h>
96 
97 #define	ROUTESNDQ	8192
98 #define	ROUTERCVQ	8192
99 
100 const struct sockaddr route_src = { 2, PF_ROUTE, };
101 
102 struct walkarg {
103 	int	w_op, w_arg, w_given, w_needed, w_tmemsize;
104 	caddr_t	w_where, w_tmem;
105 };
106 
107 void	route_prinit(void);
108 void	rcb_ref(void *, void *);
109 void	rcb_unref(void *, void *);
110 int	route_output(struct mbuf *, struct socket *, struct sockaddr *,
111 	    struct mbuf *);
112 int	route_ctloutput(int, struct socket *, int, int, struct mbuf *);
113 int	route_usrreq(struct socket *, int, struct mbuf *, struct mbuf *,
114 	    struct mbuf *, struct proc *);
115 void	route_input(struct mbuf *m0, struct socket *, sa_family_t);
116 int	route_arp_conflict(struct rtentry *, struct rt_addrinfo *);
117 int	route_cleargateway(struct rtentry *, void *, unsigned int);
118 void	rtm_senddesync_timer(void *);
119 void	rtm_senddesync(struct socket *);
120 int	rtm_sendup(struct socket *, struct mbuf *, int);
121 
122 int	rtm_getifa(struct rt_addrinfo *, unsigned int);
123 int	rtm_output(struct rt_msghdr *, struct rtentry **, struct rt_addrinfo *,
124 	    uint8_t, unsigned int);
125 struct rt_msghdr *rtm_report(struct rtentry *, u_char, int, int);
126 struct mbuf	*rtm_msg1(int, struct rt_addrinfo *);
127 int		 rtm_msg2(int, int, struct rt_addrinfo *, caddr_t,
128 		     struct walkarg *);
129 void		 rtm_xaddrs(caddr_t, caddr_t, struct rt_addrinfo *);
130 int		 rtm_validate_proposal(struct rt_addrinfo *);
131 void		 rtm_setmetrics(u_long, const struct rt_metrics *,
132 		     struct rt_kmetrics *);
133 void		 rtm_getmetrics(const struct rt_kmetrics *,
134 		     struct rt_metrics *);
135 
136 int		 sysctl_iflist(int, struct walkarg *);
137 int		 sysctl_ifnames(struct walkarg *);
138 int		 sysctl_rtable_rtstat(void *, size_t *, void *);
139 
140 struct rtpcb {
141 	struct socket		*rop_socket;
142 
143 	SRPL_ENTRY(rtpcb)	rop_list;
144 	struct refcnt		rop_refcnt;
145 	struct timeout		rop_timeout;
146 	unsigned int		rop_msgfilter;
147 	unsigned int		rop_flags;
148 	u_int			rop_rtableid;
149 	unsigned short		rop_proto;
150 	u_char			rop_priority;
151 };
152 #define	sotortpcb(so)	((struct rtpcb *)(so)->so_pcb)
153 
154 struct rtptable {
155 	SRPL_HEAD(, rtpcb)	rtp_list;
156 	struct srpl_rc		rtp_rc;
157 	struct rwlock		rtp_lk;
158 	unsigned int		rtp_count;
159 };
160 
161 struct rtptable rtptable;
162 
163 /*
164  * These flags and timeout are used for indicating to userland (via a
165  * RTM_DESYNC msg) when the route socket has overflowed and messages
166  * have been lost.
167  */
168 #define ROUTECB_FLAG_DESYNC	0x1	/* Route socket out of memory */
169 #define ROUTECB_FLAG_FLUSH	0x2	/* Wait until socket is empty before
170 					   queueing more packets */
171 
172 #define ROUTE_DESYNC_RESEND_TIMEOUT	200	/* In ms */
173 
174 void
175 route_prinit(void)
176 {
177 	srpl_rc_init(&rtptable.rtp_rc, rcb_ref, rcb_unref, NULL);
178 	rw_init(&rtptable.rtp_lk, "rtsock");
179 	SRPL_INIT(&rtptable.rtp_list);
180 }
181 
182 void
183 rcb_ref(void *null, void *v)
184 {
185 	struct rtpcb *rop = v;
186 
187 	refcnt_take(&rop->rop_refcnt);
188 }
189 
190 void
191 rcb_unref(void *null, void *v)
192 {
193 	struct rtpcb *rop = v;
194 
195 	refcnt_rele_wake(&rop->rop_refcnt);
196 }
197 
198 int
199 route_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam,
200     struct mbuf *control, struct proc *p)
201 {
202 	struct rtpcb	*rop;
203 	int		 error = 0;
204 
205 	if (req == PRU_CONTROL)
206 		return (EOPNOTSUPP);
207 
208 	soassertlocked(so);
209 
210 	if (control && control->m_len) {
211 		error = EOPNOTSUPP;
212 		goto release;
213 	}
214 
215 	rop = sotortpcb(so);
216 	if (rop == NULL) {
217 		error = EINVAL;
218 		goto release;
219 	}
220 
221 	switch (req) {
222 	/* no connect, bind, accept. Socket is connected from the start */
223 	case PRU_CONNECT:
224 	case PRU_BIND:
225 	case PRU_CONNECT2:
226 	case PRU_LISTEN:
227 	case PRU_ACCEPT:
228 		error = EOPNOTSUPP;
229 		break;
230 
231 	case PRU_DISCONNECT:
232 	case PRU_ABORT:
233 		soisdisconnected(so);
234 		break;
235 	case PRU_SHUTDOWN:
236 		socantsendmore(so);
237 		break;
238 	case PRU_SENSE:
239 		/* stat: don't bother with a blocksize. */
240 		return (0);
241 
242 	/* minimal support, just implement a fake peer address */
243 	case PRU_SOCKADDR:
244 		error = EINVAL;
245 		break;
246 	case PRU_PEERADDR:
247 		bcopy(&route_src, mtod(nam, caddr_t), route_src.sa_len);
248 		nam->m_len = route_src.sa_len;
249 		break;
250 
251 	case PRU_RCVOOB:
252 		return (EOPNOTSUPP);
253 	case PRU_RCVD:
254 		/*
255 		 * If we are in a FLUSH state, check if the buffer is
256 		 * empty so that we can clear the flag.
257 		 */
258 		if (((rop->rop_flags & ROUTECB_FLAG_FLUSH) != 0) &&
259 		    ((sbspace(rop->rop_socket, &rop->rop_socket->so_rcv) ==
260 		    rop->rop_socket->so_rcv.sb_hiwat)))
261 			rop->rop_flags &= ~ROUTECB_FLAG_FLUSH;
262 		return (0);
263 
264 	case PRU_SENDOOB:
265 		error = EOPNOTSUPP;
266 		break;
267 	case PRU_SEND:
268 		if (nam) {
269 			error = EISCONN;
270 			break;
271 		}
272 		error = (*so->so_proto->pr_output)(m, so, NULL, NULL);
273 		m = NULL;
274 		break;
275 	default:
276 		panic("route_usrreq");
277 	}
278 
279  release:
280 	m_freem(control);
281 	m_freem(m);
282 	return (error);
283 }
284 
285 int
286 route_attach(struct socket *so, int proto)
287 {
288 	struct rtpcb	*rop;
289 	int		 error;
290 
291 	/*
292 	 * use the rawcb but allocate a rtpcb, this
293 	 * code does not care about the additional fields
294 	 * and works directly on the raw socket.
295 	 */
296 	rop = malloc(sizeof(struct rtpcb), M_PCB, M_WAITOK|M_ZERO);
297 	so->so_pcb = rop;
298 	/* Init the timeout structure */
299 	timeout_set(&rop->rop_timeout, rtm_senddesync_timer, so);
300 	refcnt_init(&rop->rop_refcnt);
301 
302 	if (curproc == NULL)
303 		error = EACCES;
304 	else
305 		error = soreserve(so, ROUTESNDQ, ROUTERCVQ);
306 	if (error) {
307 		free(rop, M_PCB, sizeof(struct rtpcb));
308 		return (error);
309 	}
310 
311 	rop->rop_socket = so;
312 	rop->rop_proto = proto;
313 
314 	rop->rop_rtableid = curproc->p_p->ps_rtableid;
315 
316 	soisconnected(so);
317 	so->so_options |= SO_USELOOPBACK;
318 
319 	rw_enter(&rtptable.rtp_lk, RW_WRITE);
320 	SRPL_INSERT_HEAD_LOCKED(&rtptable.rtp_rc, &rtptable.rtp_list, rop, rop_list);
321 	rtptable.rtp_count++;
322 	rw_exit(&rtptable.rtp_lk);
323 
324 	return (0);
325 }
326 
327 int
328 route_detach(struct socket *so)
329 {
330 	struct rtpcb	*rop;
331 
332 	soassertlocked(so);
333 
334 	rop = sotortpcb(so);
335 	if (rop == NULL)
336 		return (EINVAL);
337 
338 	rw_enter(&rtptable.rtp_lk, RW_WRITE);
339 
340 	timeout_del(&rop->rop_timeout);
341 	rtptable.rtp_count--;
342 
343 	SRPL_REMOVE_LOCKED(&rtptable.rtp_rc, &rtptable.rtp_list, rop, rtpcb,
344 	    rop_list);
345 	rw_exit(&rtptable.rtp_lk);
346 
347 	/* wait for all references to drop */
348 	refcnt_finalize(&rop->rop_refcnt, "rtsockrefs");
349 
350 	so->so_pcb = NULL;
351 	KASSERT((so->so_state & SS_NOFDREF) == 0);
352 	free(rop, M_PCB, sizeof(struct rtpcb));
353 
354 	return (0);
355 }
356 
357 int
358 route_ctloutput(int op, struct socket *so, int level, int optname,
359     struct mbuf *m)
360 {
361 	struct rtpcb *rop = sotortpcb(so);
362 	int error = 0;
363 	unsigned int tid, prio;
364 
365 	if (level != AF_ROUTE)
366 		return (EINVAL);
367 
368 	switch (op) {
369 	case PRCO_SETOPT:
370 		switch (optname) {
371 		case ROUTE_MSGFILTER:
372 			if (m == NULL || m->m_len != sizeof(unsigned int))
373 				error = EINVAL;
374 			else
375 				rop->rop_msgfilter = *mtod(m, unsigned int *);
376 			break;
377 		case ROUTE_TABLEFILTER:
378 			if (m == NULL || m->m_len != sizeof(unsigned int)) {
379 				error = EINVAL;
380 				break;
381 			}
382 			tid = *mtod(m, unsigned int *);
383 			if (tid != RTABLE_ANY && !rtable_exists(tid))
384 				error = ENOENT;
385 			else
386 				rop->rop_rtableid = tid;
387 			break;
388 		case ROUTE_PRIOFILTER:
389 			if (m == NULL || m->m_len != sizeof(unsigned int)) {
390 				error = EINVAL;
391 				break;
392 			}
393 			prio = *mtod(m, unsigned int *);
394 			if (prio > RTP_MAX)
395 				error = EINVAL;
396 			else
397 				rop->rop_priority = prio;
398 			break;
399 		default:
400 			error = ENOPROTOOPT;
401 			break;
402 		}
403 		break;
404 	case PRCO_GETOPT:
405 		switch (optname) {
406 		case ROUTE_MSGFILTER:
407 			m->m_len = sizeof(unsigned int);
408 			*mtod(m, unsigned int *) = rop->rop_msgfilter;
409 			break;
410 		case ROUTE_TABLEFILTER:
411 			m->m_len = sizeof(unsigned int);
412 			*mtod(m, unsigned int *) = rop->rop_rtableid;
413 			break;
414 		case ROUTE_PRIOFILTER:
415 			m->m_len = sizeof(unsigned int);
416 			*mtod(m, unsigned int *) = rop->rop_priority;
417 			break;
418 		default:
419 			error = ENOPROTOOPT;
420 			break;
421 		}
422 	}
423 	return (error);
424 }
425 
426 void
427 rtm_senddesync_timer(void *xso)
428 {
429 	struct socket	*so = xso;
430 	int 		 s;
431 
432 	s = solock(so);
433 	rtm_senddesync(so);
434 	sounlock(so, s);
435 }
436 
437 void
438 rtm_senddesync(struct socket *so)
439 {
440 	struct rtpcb	*rop = sotortpcb(so);
441 	struct mbuf	*desync_mbuf;
442 
443 	soassertlocked(so);
444 
445 	/* If we are in a DESYNC state, try to send a RTM_DESYNC packet */
446 	if ((rop->rop_flags & ROUTECB_FLAG_DESYNC) == 0)
447 		return;
448 
449 	/*
450 	 * If we fail to alloc memory or if sbappendaddr()
451 	 * fails, re-add timeout and try again.
452 	 */
453 	desync_mbuf = rtm_msg1(RTM_DESYNC, NULL);
454 	if (desync_mbuf != NULL) {
455 		if (sbappendaddr(so, &so->so_rcv, &route_src,
456 		    desync_mbuf, NULL) != 0) {
457 			rop->rop_flags &= ~ROUTECB_FLAG_DESYNC;
458 			sorwakeup(rop->rop_socket);
459 			return;
460 		}
461 		m_freem(desync_mbuf);
462 	}
463 	/* Re-add timeout to try sending msg again */
464 	timeout_add_msec(&rop->rop_timeout, ROUTE_DESYNC_RESEND_TIMEOUT);
465 }
466 
467 void
468 route_input(struct mbuf *m0, struct socket *so0, sa_family_t sa_family)
469 {
470 	struct socket *so;
471 	struct rtpcb *rop;
472 	struct rt_msghdr *rtm;
473 	struct mbuf *m = m0;
474 	struct socket *last = NULL;
475 	struct srp_ref sr;
476 	int s;
477 
478 	/* ensure that we can access the rtm_type via mtod() */
479 	if (m->m_len < offsetof(struct rt_msghdr, rtm_type) + 1) {
480 		m_freem(m);
481 		return;
482 	}
483 
484 	SRPL_FOREACH(rop, &sr, &rtptable.rtp_list, rop_list) {
485 		/*
486 		 * If route socket is bound to an address family only send
487 		 * messages that match the address family. Address family
488 		 * agnostic messages are always sent.
489 		 */
490 		if (sa_family != AF_UNSPEC && rop->rop_proto != AF_UNSPEC &&
491 		    rop->rop_proto != sa_family)
492 			continue;
493 
494 
495 		so = rop->rop_socket;
496 		s = solock(so);
497 
498 		/*
499 		 * Check to see if we don't want our own messages and
500 		 * if we can receive anything.
501 		 */
502 		if ((so0 == so && !(so0->so_options & SO_USELOOPBACK)) ||
503 		    !(so->so_state & SS_ISCONNECTED) ||
504 		    (so->so_state & SS_CANTRCVMORE)) {
505 next:
506 			sounlock(so, s);
507 			continue;
508 		}
509 
510 		/* filter messages that the process does not want */
511 		rtm = mtod(m, struct rt_msghdr *);
512 		/* but RTM_DESYNC can't be filtered */
513 		if (rtm->rtm_type != RTM_DESYNC && rop->rop_msgfilter != 0 &&
514 		    !(rop->rop_msgfilter & (1 << rtm->rtm_type)))
515 			goto next;
516 		switch (rtm->rtm_type) {
517 		case RTM_IFANNOUNCE:
518 		case RTM_DESYNC:
519 			/* no tableid */
520 			break;
521 		case RTM_RESOLVE:
522 		case RTM_NEWADDR:
523 		case RTM_DELADDR:
524 		case RTM_IFINFO:
525 		case RTM_80211INFO:
526 		case RTM_BFD:
527 			/* check against rdomain id */
528 			if (rop->rop_rtableid != RTABLE_ANY &&
529 			    rtable_l2(rop->rop_rtableid) != rtm->rtm_tableid)
530 				goto next;
531 			break;
532 		default:
533 			if (rop->rop_priority != 0 &&
534 			    rop->rop_priority < rtm->rtm_priority)
535 				goto next;
536 			/* check against rtable id */
537 			if (rop->rop_rtableid != RTABLE_ANY &&
538 			    rop->rop_rtableid != rtm->rtm_tableid)
539 				goto next;
540 			break;
541 		}
542 
543 		/*
544 		 * Check to see if the flush flag is set. If so, don't queue
545 		 * any more messages until the flag is cleared.
546 		 */
547 		if ((rop->rop_flags & ROUTECB_FLAG_FLUSH) != 0)
548 			goto next;
549 		sounlock(so, s);
550 
551 		if (last) {
552 			s = solock(last);
553 			rtm_sendup(last, m, 1);
554 			sounlock(last, s);
555 			refcnt_rele_wake(&sotortpcb(last)->rop_refcnt);
556 		}
557 		/* keep a reference for last */
558 		refcnt_take(&rop->rop_refcnt);
559 		last = rop->rop_socket;
560 	}
561 	SRPL_LEAVE(&sr);
562 
563 	if (last) {
564 		s = solock(last);
565 		rtm_sendup(last, m, 0);
566 		sounlock(last, s);
567 		refcnt_rele_wake(&sotortpcb(last)->rop_refcnt);
568 	} else
569 		m_freem(m);
570 }
571 
572 int
573 rtm_sendup(struct socket *so, struct mbuf *m0, int more)
574 {
575 	struct rtpcb *rop = sotortpcb(so);
576 	struct mbuf *m;
577 
578 	soassertlocked(so);
579 
580 	if (more) {
581 		m = m_copym(m0, 0, M_COPYALL, M_NOWAIT);
582 		if (m == NULL)
583 			return (ENOMEM);
584 	} else
585 		m = m0;
586 
587 	if (sbspace(so, &so->so_rcv) < (2 * MSIZE) ||
588 	    sbappendaddr(so, &so->so_rcv, &route_src, m, NULL) == 0) {
589 		/* Flag socket as desync'ed and flush required */
590 		rop->rop_flags |= ROUTECB_FLAG_DESYNC | ROUTECB_FLAG_FLUSH;
591 		rtm_senddesync(so);
592 		m_freem(m);
593 		return (ENOBUFS);
594 	}
595 
596 	sorwakeup(so);
597 	return (0);
598 }
599 
600 struct rt_msghdr *
601 rtm_report(struct rtentry *rt, u_char type, int seq, int tableid)
602 {
603 	struct rt_msghdr	*rtm;
604 	struct rt_addrinfo	 info;
605 	struct sockaddr_rtlabel	 sa_rl;
606 	struct sockaddr_in6	 sa_mask;
607 #ifdef BFD
608 	struct sockaddr_bfd	 sa_bfd;
609 #endif
610 	struct ifnet		*ifp = NULL;
611 	int			 len;
612 
613 	bzero(&info, sizeof(info));
614 	info.rti_info[RTAX_DST] = rt_key(rt);
615 	info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
616 	info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask);
617 	info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl);
618 #ifdef BFD
619 	if (rt->rt_flags & RTF_BFD)
620 		info.rti_info[RTAX_BFD] = bfd2sa(rt, &sa_bfd);
621 #endif
622 #ifdef MPLS
623 	if (rt->rt_flags & RTF_MPLS) {
624 		struct sockaddr_mpls	 sa_mpls;
625 
626 		bzero(&sa_mpls, sizeof(sa_mpls));
627 		sa_mpls.smpls_family = AF_MPLS;
628 		sa_mpls.smpls_len = sizeof(sa_mpls);
629 		sa_mpls.smpls_label = ((struct rt_mpls *)
630 		    rt->rt_llinfo)->mpls_label;
631 		info.rti_info[RTAX_SRC] = (struct sockaddr *)&sa_mpls;
632 		info.rti_mpls = ((struct rt_mpls *)
633 		    rt->rt_llinfo)->mpls_operation;
634 	}
635 #endif
636 	ifp = if_get(rt->rt_ifidx);
637 	if (ifp != NULL) {
638 		info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
639 		info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
640 		if (ifp->if_flags & IFF_POINTOPOINT)
641 			info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr;
642 	}
643 	if_put(ifp);
644 	/* RTAX_GENMASK, RTAX_AUTHOR, RTAX_SRCMASK ignored */
645 
646 	/* build new route message */
647 	len = rtm_msg2(type, RTM_VERSION, &info, NULL, NULL);
648 	rtm = malloc(len, M_RTABLE, M_WAITOK | M_ZERO);
649 
650 	rtm_msg2(type, RTM_VERSION, &info, (caddr_t)rtm, NULL);
651 	rtm->rtm_type = type;
652 	rtm->rtm_index = rt->rt_ifidx;
653 	rtm->rtm_tableid = tableid;
654 	rtm->rtm_priority = rt->rt_priority & RTP_MASK;
655 	rtm->rtm_flags = rt->rt_flags;
656 	rtm->rtm_pid = curproc->p_p->ps_pid;
657 	rtm->rtm_seq = seq;
658 	rtm_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx);
659 	rtm->rtm_addrs = info.rti_addrs;
660 #ifdef MPLS
661 	rtm->rtm_mpls = info.rti_mpls;
662 #endif
663 	return rtm;
664 }
665 
666 int
667 route_output(struct mbuf *m, struct socket *so, struct sockaddr *dstaddr,
668     struct mbuf *control)
669 {
670 	struct rt_msghdr	*rtm = NULL;
671 	struct rtentry		*rt = NULL;
672 	struct rt_addrinfo	 info;
673 	int			 len, seq, error = 0;
674 	u_int			 tableid;
675 	u_int8_t		 prio;
676 	u_char			 vers, type;
677 
678 	if (m == NULL || ((m->m_len < sizeof(int32_t)) &&
679 	    (m = m_pullup(m, sizeof(int32_t))) == 0))
680 		return (ENOBUFS);
681 	if ((m->m_flags & M_PKTHDR) == 0)
682 		panic("route_output");
683 	len = m->m_pkthdr.len;
684 	if (len < offsetof(struct rt_msghdr, rtm_type) + 1 ||
685 	    len != mtod(m, struct rt_msghdr *)->rtm_msglen) {
686 		error = EINVAL;
687 		goto fail;
688 	}
689 	vers = mtod(m, struct rt_msghdr *)->rtm_version;
690 	switch (vers) {
691 	case RTM_VERSION:
692 		if (len < sizeof(struct rt_msghdr)) {
693 			error = EINVAL;
694 			goto fail;
695 		}
696 		if (len > RTM_MAXSIZE) {
697 			error = EMSGSIZE;
698 			goto fail;
699 		}
700 		rtm = malloc(len, M_RTABLE, M_WAITOK);
701 		m_copydata(m, 0, len, (caddr_t)rtm);
702 		break;
703 	default:
704 		error = EPROTONOSUPPORT;
705 		goto fail;
706 	}
707 	rtm->rtm_pid = curproc->p_p->ps_pid;
708 	if (rtm->rtm_hdrlen == 0)	/* old client */
709 		rtm->rtm_hdrlen = sizeof(struct rt_msghdr);
710 	if (len < rtm->rtm_hdrlen) {
711 		error = EINVAL;
712 		goto fail;
713 	}
714 
715 	/* Verify that the caller is sending an appropriate message early */
716 	switch (rtm->rtm_type) {
717 	case RTM_ADD:
718 	case RTM_DELETE:
719 	case RTM_GET:
720 	case RTM_CHANGE:
721 	case RTM_PROPOSAL:
722 		break;
723 	default:
724 		error = EOPNOTSUPP;
725 		goto fail;
726 	}
727 
728 	/*
729 	 * Verify that the caller has the appropriate privilege; RTM_GET
730 	 * is the only operation the non-superuser is allowed.
731 	 */
732 	if (rtm->rtm_type != RTM_GET && suser(curproc) != 0) {
733 		error = EACCES;
734 		goto fail;
735 	}
736 	tableid = rtm->rtm_tableid;
737 	if (!rtable_exists(tableid)) {
738 		if (rtm->rtm_type == RTM_ADD) {
739 			if ((error = rtable_add(tableid)) != 0)
740 				goto fail;
741 		} else {
742 			error = EINVAL;
743 			goto fail;
744 		}
745 	}
746 
747 
748 	/* Do not let userland play with kernel-only flags. */
749 	if ((rtm->rtm_flags & (RTF_LOCAL|RTF_BROADCAST)) != 0) {
750 		error = EINVAL;
751 		goto fail;
752 	}
753 
754 	/* make sure that kernel-only bits are not set */
755 	rtm->rtm_priority &= RTP_MASK;
756 	rtm->rtm_flags &= ~(RTF_DONE|RTF_CLONED|RTF_CACHED);
757 	rtm->rtm_fmask &= RTF_FMASK;
758 
759 	if (rtm->rtm_priority != 0) {
760 		if (rtm->rtm_priority > RTP_MAX ||
761 		    rtm->rtm_priority == RTP_LOCAL) {
762 			error = EINVAL;
763 			goto fail;
764 		}
765 		prio = rtm->rtm_priority;
766 	} else if (rtm->rtm_type != RTM_ADD)
767 		prio = RTP_ANY;
768 	else if (rtm->rtm_flags & RTF_STATIC)
769 		prio = 0;
770 	else
771 		prio = RTP_DEFAULT;
772 
773 	bzero(&info, sizeof(info));
774 	info.rti_addrs = rtm->rtm_addrs;
775 	rtm_xaddrs(rtm->rtm_hdrlen + (caddr_t)rtm, len + (caddr_t)rtm, &info);
776 	info.rti_flags = rtm->rtm_flags;
777 	if (rtm->rtm_type != RTM_PROPOSAL &&
778 	   (info.rti_info[RTAX_DST] == NULL ||
779 	    info.rti_info[RTAX_DST]->sa_family >= AF_MAX ||
780 	    (info.rti_info[RTAX_GATEWAY] != NULL &&
781 	    info.rti_info[RTAX_GATEWAY]->sa_family >= AF_MAX) ||
782 	    info.rti_info[RTAX_GENMASK] != NULL)) {
783 		error = EINVAL;
784 		goto fail;
785 	}
786 #ifdef MPLS
787 	info.rti_mpls = rtm->rtm_mpls;
788 #endif
789 
790 	if (info.rti_info[RTAX_GATEWAY] != NULL &&
791 	    info.rti_info[RTAX_GATEWAY]->sa_family == AF_LINK &&
792 	    (info.rti_flags & RTF_CLONING) == 0) {
793 		info.rti_flags |= RTF_LLINFO;
794 	}
795 
796 	/*
797 	 * Validate RTM_PROPOSAL and pass it along or error out.
798 	 */
799 	if (rtm->rtm_type == RTM_PROPOSAL) {
800 		if (rtm_validate_proposal(&info) == -1) {
801 			error = EINVAL;
802 			goto fail;
803 		}
804 	} else {
805 		error = rtm_output(rtm, &rt, &info, prio, tableid);
806 		if (!error) {
807 			type = rtm->rtm_type;
808 			seq = rtm->rtm_seq;
809 			free(rtm, M_RTABLE, len);
810 			rtm = rtm_report(rt, type, seq, tableid);
811 			len = rtm->rtm_msglen;
812 		}
813 	}
814 
815 	rtfree(rt);
816 	if (error) {
817 		rtm->rtm_errno = error;
818 	} else {
819 		rtm->rtm_flags |= RTF_DONE;
820 	}
821 
822 	/*
823 	 * Check to see if we don't want our own messages.
824 	 */
825 	if (!(so->so_options & SO_USELOOPBACK)) {
826 		if (rtptable.rtp_count <= 1) {
827 			/* no other listener and no loopback of messages */
828 fail:
829 			free(rtm, M_RTABLE, len);
830 			m_freem(m);
831 			return (error);
832 		}
833 	}
834 	if (rtm) {
835 		if (m_copyback(m, 0, len, rtm, M_NOWAIT)) {
836 			m_freem(m);
837 			m = NULL;
838 		} else if (m->m_pkthdr.len > len)
839 			m_adj(m, len - m->m_pkthdr.len);
840 		free(rtm, M_RTABLE, len);
841 	}
842 	if (m)
843 		route_input(m, so, info.rti_info[RTAX_DST] ?
844 		    info.rti_info[RTAX_DST]->sa_family : AF_UNSPEC);
845 
846 	return (error);
847 }
848 
849 int
850 rtm_output(struct rt_msghdr *rtm, struct rtentry **prt,
851     struct rt_addrinfo *info, uint8_t prio, unsigned int tableid)
852 {
853 	struct rtentry		*rt = *prt;
854 	struct ifnet		*ifp = NULL;
855 	int			 plen, newgate = 0, error = 0;
856 
857 	switch (rtm->rtm_type) {
858 	case RTM_ADD:
859 		if (info->rti_info[RTAX_GATEWAY] == NULL) {
860 			error = EINVAL;
861 			break;
862 		}
863 
864 		rt = rtable_match(tableid, info->rti_info[RTAX_DST], NULL);
865 		if ((error = route_arp_conflict(rt, info))) {
866 			rtfree(rt);
867 			rt = NULL;
868 			break;
869 		}
870 
871 		/*
872 		 * We cannot go through a delete/create/insert cycle for
873 		 * cached route because this can lead to races in the
874 		 * receive path.  Instead we update the L2 cache.
875 		 */
876 		if ((rt != NULL) && ISSET(rt->rt_flags, RTF_CACHED))
877 			goto change;
878 
879 		rtfree(rt);
880 		rt = NULL;
881 
882 		NET_LOCK();
883 		if ((error = rtm_getifa(info, tableid)) != 0) {
884 			NET_UNLOCK();
885 			break;
886 		}
887 		error = rtrequest(RTM_ADD, info, prio, &rt, tableid);
888 		NET_UNLOCK();
889 		if (error == 0)
890 			rtm_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx,
891 			    &rt->rt_rmx);
892 		break;
893 	case RTM_DELETE:
894 		rt = rtable_lookup(tableid, info->rti_info[RTAX_DST],
895 		    info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY],
896 		    prio);
897 		if (rt == NULL) {
898 			error = ESRCH;
899 			break;
900 		}
901 
902 		/*
903 		 * If we got multipath routes, we require users to specify
904 		 * a matching gateway.
905 		 */
906 		if (ISSET(rt->rt_flags, RTF_MPATH) &&
907 		    info->rti_info[RTAX_GATEWAY] == NULL) {
908 			error = ESRCH;
909 			break;
910 		}
911 
912 		/* Detaching an interface requires the KERNEL_LOCK(). */
913 		ifp = if_get(rt->rt_ifidx);
914 		KASSERT(ifp != NULL);
915 
916 		/*
917 		 * Invalidate the cache of automagically created and
918 		 * referenced L2 entries to make sure that ``rt_gwroute''
919 		 * pointer stays valid for other CPUs.
920 		 */
921 		if ((ISSET(rt->rt_flags, RTF_CACHED))) {
922 			NET_LOCK();
923 			ifp->if_rtrequest(ifp, RTM_INVALIDATE, rt);
924 			/* Reset the MTU of the gateway route. */
925 			rtable_walk(tableid, rt_key(rt)->sa_family,
926 			    route_cleargateway, rt);
927 			NET_UNLOCK();
928 			if_put(ifp);
929 			break;
930 		}
931 
932 		/*
933 		 * Make sure that local routes are only modified by the
934 		 * kernel.
935 		 */
936 		if (ISSET(rt->rt_flags, RTF_LOCAL|RTF_BROADCAST)) {
937 			if_put(ifp);
938 			error = EINVAL;
939 			break;
940 		}
941 
942 		rtfree(rt);
943 		rt = NULL;
944 
945 		NET_LOCK();
946 		error = rtrequest_delete(info, prio, ifp, &rt, tableid);
947 		NET_UNLOCK();
948 		if_put(ifp);
949 		break;
950 	case RTM_CHANGE:
951 		rt = rtable_lookup(tableid, info->rti_info[RTAX_DST],
952 		    info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY],
953 		    prio);
954 		/*
955 		 * If we got multipath routes, we require users to specify
956 		 * a matching gateway.
957 		 */
958 		if ((rt != NULL) && ISSET(rt->rt_flags, RTF_MPATH) &&
959 		    (info->rti_info[RTAX_GATEWAY] == NULL)) {
960 			rtfree(rt);
961 			rt = NULL;
962 		}
963 		/*
964 		 * If RTAX_GATEWAY is the argument we're trying to
965 		 * change, try to find a compatible route.
966 		 */
967 		if ((rt == NULL) && (info->rti_info[RTAX_GATEWAY] != NULL) &&
968 		    (rtm->rtm_type == RTM_CHANGE)) {
969 			rt = rtable_lookup(tableid, info->rti_info[RTAX_DST],
970 			    info->rti_info[RTAX_NETMASK], NULL, prio);
971 			/* Ensure we don't pick a multipath one. */
972 			if ((rt != NULL) && ISSET(rt->rt_flags, RTF_MPATH)) {
973 				rtfree(rt);
974 				rt = NULL;
975 			}
976 		}
977 
978 		if (rt == NULL) {
979 			error = ESRCH;
980 			break;
981 		}
982 
983 		/*
984 		 * Make sure that local routes are only modified by the
985 		 * kernel.
986 		 */
987 		if (ISSET(rt->rt_flags, RTF_LOCAL|RTF_BROADCAST)) {
988 			error = EINVAL;
989 			break;
990 		}
991 
992 		/*
993 		 * RTM_CHANGE/LOCK need a perfect match.
994 		 */
995 		plen = rtable_satoplen(info->rti_info[RTAX_DST]->sa_family,
996 		    info->rti_info[RTAX_NETMASK]);
997 		if (rt_plen(rt) != plen) {
998 			error = ESRCH;
999 			break;
1000 		}
1001 
1002 		switch (rtm->rtm_type) {
1003 		case RTM_CHANGE:
1004 			if (info->rti_info[RTAX_GATEWAY] != NULL)
1005 				if (rt->rt_gateway == NULL ||
1006 				    bcmp(rt->rt_gateway,
1007 				    info->rti_info[RTAX_GATEWAY],
1008 				    info->rti_info[RTAX_GATEWAY]->sa_len)) {
1009 					newgate = 1;
1010 				}
1011 			/*
1012 			 * Check reachable gateway before changing the route.
1013 			 * New gateway could require new ifaddr, ifp;
1014 			 * flags may also be different; ifp may be specified
1015 			 * by ll sockaddr when protocol address is ambiguous.
1016 			 */
1017 			if (newgate || info->rti_info[RTAX_IFP] != NULL ||
1018 			    info->rti_info[RTAX_IFA] != NULL) {
1019 				struct ifaddr	*ifa = NULL;
1020 
1021 				NET_LOCK();
1022 				if ((error = rtm_getifa(info, tableid)) != 0) {
1023 					NET_UNLOCK();
1024 					break;
1025 				}
1026 				ifa = info->rti_ifa;
1027 				if (rt->rt_ifa != ifa) {
1028 					ifp = if_get(rt->rt_ifidx);
1029 					KASSERT(ifp != NULL);
1030 					ifp->if_rtrequest(ifp, RTM_DELETE, rt);
1031 					ifafree(rt->rt_ifa);
1032 					if_put(ifp);
1033 
1034 					ifa->ifa_refcnt++;
1035 					rt->rt_ifa = ifa;
1036 					rt->rt_ifidx = ifa->ifa_ifp->if_index;
1037 					/* recheck link state after ifp change*/
1038 					rt_if_linkstate_change(rt, ifa->ifa_ifp,
1039 					    tableid);
1040 				}
1041 				NET_UNLOCK();
1042 			}
1043 change:
1044 			if (info->rti_info[RTAX_GATEWAY] != NULL) {
1045 				/*
1046 				 * When updating the gateway, make sure it's
1047 				 * valid.
1048 				 */
1049 				if (!newgate && rt->rt_gateway->sa_family !=
1050 				    info->rti_info[RTAX_GATEWAY]->sa_family) {
1051 					error = EINVAL;
1052 					break;
1053 				}
1054 
1055 				NET_LOCK();
1056 				error = rt_setgate(rt,
1057 				    info->rti_info[RTAX_GATEWAY], tableid);
1058 				NET_UNLOCK();
1059 				if (error)
1060 					break;
1061 			}
1062 #ifdef MPLS
1063 			if ((rtm->rtm_flags & RTF_MPLS) &&
1064 			    info->rti_info[RTAX_SRC] != NULL) {
1065 				NET_LOCK();
1066 				error = rt_mpls_set(rt,
1067 				    info->rti_info[RTAX_SRC], info->rti_mpls);
1068 				NET_UNLOCK();
1069 				if (error)
1070 					break;
1071 			} else if (newgate || ((rtm->rtm_fmask & RTF_MPLS) &&
1072 			    !(rtm->rtm_flags & RTF_MPLS))) {
1073 				NET_LOCK();
1074 				/* if gateway changed remove MPLS information */
1075 				rt_mpls_clear(rt);
1076 				NET_UNLOCK();
1077 			}
1078 #endif
1079 
1080 #ifdef BFD
1081 			if (ISSET(rtm->rtm_flags, RTF_BFD)) {
1082 				if ((error = bfdset(rt)))
1083 					break;
1084 			} else if (!ISSET(rtm->rtm_flags, RTF_BFD) &&
1085 			    ISSET(rtm->rtm_fmask, RTF_BFD)) {
1086 				bfdclear(rt);
1087 			}
1088 #endif
1089 
1090 			NET_LOCK();
1091 			/* Hack to allow some flags to be toggled */
1092 			if (rtm->rtm_fmask)
1093 				rt->rt_flags =
1094 				    (rt->rt_flags & ~rtm->rtm_fmask) |
1095 				    (rtm->rtm_flags & rtm->rtm_fmask);
1096 
1097 			rtm_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx,
1098 			    &rt->rt_rmx);
1099 
1100 			ifp = if_get(rt->rt_ifidx);
1101 			KASSERT(ifp != NULL);
1102 			ifp->if_rtrequest(ifp, RTM_ADD, rt);
1103 			if_put(ifp);
1104 
1105 			if (info->rti_info[RTAX_LABEL] != NULL) {
1106 				char *rtlabel = ((struct sockaddr_rtlabel *)
1107 				    info->rti_info[RTAX_LABEL])->sr_label;
1108 				rtlabel_unref(rt->rt_labelid);
1109 				rt->rt_labelid = rtlabel_name2id(rtlabel);
1110 			}
1111 			if_group_routechange(info->rti_info[RTAX_DST],
1112 			    info->rti_info[RTAX_NETMASK]);
1113 			rt->rt_locks &= ~(rtm->rtm_inits);
1114 			rt->rt_locks |=
1115 			    (rtm->rtm_inits & rtm->rtm_rmx.rmx_locks);
1116 			NET_UNLOCK();
1117 			break;
1118 		}
1119 		break;
1120 	case RTM_GET:
1121 		rt = rtable_lookup(tableid, info->rti_info[RTAX_DST],
1122 		    info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY],
1123 		    prio);
1124 		if (rt == NULL)
1125 			error = ESRCH;
1126 		break;
1127 	}
1128 
1129 	*prt = rt;
1130 	return (error);
1131 }
1132 
1133 struct ifaddr *
1134 ifa_ifwithroute(int flags, struct sockaddr *dst, struct sockaddr *gateway,
1135     unsigned int rtableid)
1136 {
1137 	struct ifaddr	*ifa;
1138 
1139 	if ((flags & RTF_GATEWAY) == 0) {
1140 		/*
1141 		 * If we are adding a route to an interface,
1142 		 * and the interface is a pt to pt link
1143 		 * we should search for the destination
1144 		 * as our clue to the interface.  Otherwise
1145 		 * we can use the local address.
1146 		 */
1147 		ifa = NULL;
1148 		if (flags & RTF_HOST)
1149 			ifa = ifa_ifwithdstaddr(dst, rtableid);
1150 		if (ifa == NULL)
1151 			ifa = ifa_ifwithaddr(gateway, rtableid);
1152 	} else {
1153 		/*
1154 		 * If we are adding a route to a remote net
1155 		 * or host, the gateway may still be on the
1156 		 * other end of a pt to pt link.
1157 		 */
1158 		ifa = ifa_ifwithdstaddr(gateway, rtableid);
1159 	}
1160 	if (ifa == NULL) {
1161 		if (gateway->sa_family == AF_LINK) {
1162 			struct sockaddr_dl *sdl = satosdl(gateway);
1163 			struct ifnet *ifp = if_get(sdl->sdl_index);
1164 
1165 			if (ifp != NULL)
1166 				ifa = ifaof_ifpforaddr(dst, ifp);
1167 			if_put(ifp);
1168 		} else {
1169 			struct rtentry *rt;
1170 
1171 			rt = rtalloc(gateway, RT_RESOLVE, rtable_l2(rtableid));
1172 			if (rt != NULL)
1173 				ifa = rt->rt_ifa;
1174 			rtfree(rt);
1175 		}
1176 	}
1177 	if (ifa == NULL)
1178 		return (NULL);
1179 	if (ifa->ifa_addr->sa_family != dst->sa_family) {
1180 		struct ifaddr	*oifa = ifa;
1181 		ifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp);
1182 		if (ifa == NULL)
1183 			ifa = oifa;
1184 	}
1185 	return (ifa);
1186 }
1187 
1188 int
1189 rtm_getifa(struct rt_addrinfo *info, unsigned int rtid)
1190 {
1191 	struct ifnet	*ifp = NULL;
1192 
1193 	/*
1194 	 * The "returned" `ifa' is guaranteed to be alive only if
1195 	 * the NET_LOCK() is held.
1196 	 */
1197 	NET_ASSERT_LOCKED();
1198 
1199 	/*
1200 	 * ifp may be specified by sockaddr_dl when protocol address
1201 	 * is ambiguous
1202 	 */
1203 	if (info->rti_info[RTAX_IFP] != NULL) {
1204 		struct sockaddr_dl *sdl;
1205 
1206 		sdl = satosdl(info->rti_info[RTAX_IFP]);
1207 		ifp = if_get(sdl->sdl_index);
1208 	}
1209 
1210 #ifdef IPSEC
1211 	/*
1212 	 * If the destination is a PF_KEY address, we'll look
1213 	 * for the existence of a encap interface number or address
1214 	 * in the options list of the gateway. By default, we'll return
1215 	 * enc0.
1216 	 */
1217 	if (info->rti_info[RTAX_DST] &&
1218 	    info->rti_info[RTAX_DST]->sa_family == PF_KEY)
1219 		info->rti_ifa = enc_getifa(rtid, 0);
1220 #endif
1221 
1222 	if (info->rti_ifa == NULL && info->rti_info[RTAX_IFA] != NULL)
1223 		info->rti_ifa = ifa_ifwithaddr(info->rti_info[RTAX_IFA], rtid);
1224 
1225 	if (info->rti_ifa == NULL) {
1226 		struct sockaddr	*sa;
1227 
1228 		if ((sa = info->rti_info[RTAX_IFA]) == NULL)
1229 			if ((sa = info->rti_info[RTAX_GATEWAY]) == NULL)
1230 				sa = info->rti_info[RTAX_DST];
1231 
1232 		if (sa != NULL && ifp != NULL)
1233 			info->rti_ifa = ifaof_ifpforaddr(sa, ifp);
1234 		else if (info->rti_info[RTAX_DST] != NULL &&
1235 		    info->rti_info[RTAX_GATEWAY] != NULL)
1236 			info->rti_ifa = ifa_ifwithroute(info->rti_flags,
1237 			    info->rti_info[RTAX_DST],
1238 			    info->rti_info[RTAX_GATEWAY],
1239 			    rtid);
1240 		else if (sa != NULL)
1241 			info->rti_ifa = ifa_ifwithroute(info->rti_flags,
1242 			    sa, sa, rtid);
1243 	}
1244 
1245 	if_put(ifp);
1246 
1247 	if (info->rti_ifa == NULL)
1248 		return (ENETUNREACH);
1249 
1250 	return (0);
1251 }
1252 
1253 int
1254 route_cleargateway(struct rtentry *rt, void *arg, unsigned int rtableid)
1255 {
1256 	struct rtentry *nhrt = arg;
1257 
1258 	if (ISSET(rt->rt_flags, RTF_GATEWAY) && rt->rt_gwroute == nhrt &&
1259 	    !ISSET(rt->rt_locks, RTV_MTU))
1260 		rt->rt_mtu = 0;
1261 
1262 	return (0);
1263 }
1264 
1265 /*
1266  * Check if the user request to insert an ARP entry does not conflict
1267  * with existing ones.
1268  *
1269  * Only two entries are allowed for a given IP address: a private one
1270  * (priv) and a public one (pub).
1271  */
1272 int
1273 route_arp_conflict(struct rtentry *rt, struct rt_addrinfo *info)
1274 {
1275 	int		 proxy = (info->rti_flags & RTF_ANNOUNCE);
1276 
1277 	if ((info->rti_flags & RTF_LLINFO) == 0 ||
1278 	    (info->rti_info[RTAX_DST]->sa_family != AF_INET))
1279 		return (0);
1280 
1281 	if (rt == NULL || !ISSET(rt->rt_flags, RTF_LLINFO))
1282 		return (0);
1283 
1284 	/* If the entry is cached, it can be updated. */
1285 	if (ISSET(rt->rt_flags, RTF_CACHED))
1286 		return (0);
1287 
1288 	/*
1289 	 * Same destination, not cached and both "priv" or "pub" conflict.
1290 	 * If a second entry exists, it always conflict.
1291 	 */
1292 	if ((ISSET(rt->rt_flags, RTF_ANNOUNCE) == proxy) ||
1293 	    ISSET(rt->rt_flags, RTF_MPATH))
1294 		return (EEXIST);
1295 
1296 	/* No conflict but an entry exist so we need to force mpath. */
1297 	info->rti_flags |= RTF_MPATH;
1298 	return (0);
1299 }
1300 
1301 void
1302 rtm_setmetrics(u_long which, const struct rt_metrics *in,
1303     struct rt_kmetrics *out)
1304 {
1305 	int64_t expire;
1306 
1307 	if (which & RTV_MTU)
1308 		out->rmx_mtu = in->rmx_mtu;
1309 	if (which & RTV_EXPIRE) {
1310 		expire = in->rmx_expire;
1311 		if (expire != 0) {
1312 			expire -= time_second;
1313 			expire += time_uptime;
1314 		}
1315 
1316 		out->rmx_expire = expire;
1317 	}
1318 }
1319 
1320 void
1321 rtm_getmetrics(const struct rt_kmetrics *in, struct rt_metrics *out)
1322 {
1323 	int64_t expire;
1324 
1325 	expire = in->rmx_expire;
1326 	if (expire != 0) {
1327 		expire -= time_uptime;
1328 		expire += time_second;
1329 	}
1330 
1331 	bzero(out, sizeof(*out));
1332 	out->rmx_locks = in->rmx_locks;
1333 	out->rmx_mtu = in->rmx_mtu;
1334 	out->rmx_expire = expire;
1335 	out->rmx_pksent = in->rmx_pksent;
1336 }
1337 
1338 #define ROUNDUP(a) \
1339 	((a) > 0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long))
1340 #define ADVANCE(x, n) (x += ROUNDUP((n)->sa_len))
1341 
1342 void
1343 rtm_xaddrs(caddr_t cp, caddr_t cplim, struct rt_addrinfo *rtinfo)
1344 {
1345 	struct sockaddr	*sa;
1346 	int		 i;
1347 
1348 	bzero(rtinfo->rti_info, sizeof(rtinfo->rti_info));
1349 	for (i = 0; (i < RTAX_MAX) && (cp < cplim); i++) {
1350 		if ((rtinfo->rti_addrs & (1 << i)) == 0)
1351 			continue;
1352 		rtinfo->rti_info[i] = sa = (struct sockaddr *)cp;
1353 		ADVANCE(cp, sa);
1354 	}
1355 }
1356 
1357 struct mbuf *
1358 rtm_msg1(int type, struct rt_addrinfo *rtinfo)
1359 {
1360 	struct rt_msghdr	*rtm;
1361 	struct mbuf		*m;
1362 	int			 i;
1363 	struct sockaddr		*sa;
1364 	int			 len, dlen, hlen;
1365 
1366 	switch (type) {
1367 	case RTM_DELADDR:
1368 	case RTM_NEWADDR:
1369 		len = sizeof(struct ifa_msghdr);
1370 		break;
1371 	case RTM_IFINFO:
1372 		len = sizeof(struct if_msghdr);
1373 		break;
1374 	case RTM_IFANNOUNCE:
1375 		len = sizeof(struct if_announcemsghdr);
1376 		break;
1377 #ifdef BFD
1378 	case RTM_BFD:
1379 		len = sizeof(struct bfd_msghdr);
1380 		break;
1381 #endif
1382 	case RTM_80211INFO:
1383 		len = sizeof(struct if_ieee80211_msghdr);
1384 		break;
1385 	default:
1386 		len = sizeof(struct rt_msghdr);
1387 		break;
1388 	}
1389 	if (len > MCLBYTES)
1390 		panic("rtm_msg1");
1391 	m = m_gethdr(M_DONTWAIT, MT_DATA);
1392 	if (m && len > MHLEN) {
1393 		MCLGET(m, M_DONTWAIT);
1394 		if ((m->m_flags & M_EXT) == 0) {
1395 			m_free(m);
1396 			m = NULL;
1397 		}
1398 	}
1399 	if (m == NULL)
1400 		return (m);
1401 	m->m_pkthdr.len = m->m_len = hlen = len;
1402 	m->m_pkthdr.ph_ifidx = 0;
1403 	rtm = mtod(m, struct rt_msghdr *);
1404 	bzero(rtm, len);
1405 	for (i = 0; i < RTAX_MAX; i++) {
1406 		if (rtinfo == NULL || (sa = rtinfo->rti_info[i]) == NULL)
1407 			continue;
1408 		rtinfo->rti_addrs |= (1 << i);
1409 		dlen = ROUNDUP(sa->sa_len);
1410 		if (m_copyback(m, len, dlen, sa, M_NOWAIT)) {
1411 			m_freem(m);
1412 			return (NULL);
1413 		}
1414 		len += dlen;
1415 	}
1416 	rtm->rtm_msglen = len;
1417 	rtm->rtm_hdrlen = hlen;
1418 	rtm->rtm_version = RTM_VERSION;
1419 	rtm->rtm_type = type;
1420 	return (m);
1421 }
1422 
1423 int
1424 rtm_msg2(int type, int vers, struct rt_addrinfo *rtinfo, caddr_t cp,
1425     struct walkarg *w)
1426 {
1427 	int		i;
1428 	int		len, dlen, hlen, second_time = 0;
1429 	caddr_t		cp0;
1430 
1431 	rtinfo->rti_addrs = 0;
1432 again:
1433 	switch (type) {
1434 	case RTM_DELADDR:
1435 	case RTM_NEWADDR:
1436 		len = sizeof(struct ifa_msghdr);
1437 		break;
1438 	case RTM_IFINFO:
1439 		len = sizeof(struct if_msghdr);
1440 		break;
1441 	default:
1442 		len = sizeof(struct rt_msghdr);
1443 		break;
1444 	}
1445 	hlen = len;
1446 	if ((cp0 = cp) != NULL)
1447 		cp += len;
1448 	for (i = 0; i < RTAX_MAX; i++) {
1449 		struct sockaddr *sa;
1450 
1451 		if ((sa = rtinfo->rti_info[i]) == NULL)
1452 			continue;
1453 		rtinfo->rti_addrs |= (1 << i);
1454 		dlen = ROUNDUP(sa->sa_len);
1455 		if (cp) {
1456 			bcopy(sa, cp, (size_t)dlen);
1457 			cp += dlen;
1458 		}
1459 		len += dlen;
1460 	}
1461 	/* align message length to the next natural boundary */
1462 	len = ALIGN(len);
1463 	if (cp == 0 && w != NULL && !second_time) {
1464 		w->w_needed += len;
1465 		if (w->w_needed <= 0 && w->w_where) {
1466 			if (w->w_tmemsize < len) {
1467 				free(w->w_tmem, M_RTABLE, w->w_tmemsize);
1468 				w->w_tmem = malloc(len, M_RTABLE, M_NOWAIT);
1469 				if (w->w_tmem)
1470 					w->w_tmemsize = len;
1471 			}
1472 			if (w->w_tmem) {
1473 				cp = w->w_tmem;
1474 				second_time = 1;
1475 				goto again;
1476 			} else
1477 				w->w_where = 0;
1478 		}
1479 	}
1480 	if (cp && w)		/* clear the message header */
1481 		bzero(cp0, hlen);
1482 
1483 	if (cp) {
1484 		struct rt_msghdr *rtm = (struct rt_msghdr *)cp0;
1485 
1486 		rtm->rtm_version = RTM_VERSION;
1487 		rtm->rtm_type = type;
1488 		rtm->rtm_msglen = len;
1489 		rtm->rtm_hdrlen = hlen;
1490 	}
1491 	return (len);
1492 }
1493 
1494 void
1495 rtm_send(struct rtentry *rt, int cmd, int error, unsigned int rtableid)
1496 {
1497 	struct rt_addrinfo	 info;
1498 	struct ifnet		*ifp;
1499 	struct sockaddr_rtlabel	 sa_rl;
1500 	struct sockaddr_in6	 sa_mask;
1501 
1502 	memset(&info, 0, sizeof(info));
1503 	info.rti_info[RTAX_DST] = rt_key(rt);
1504 	info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1505 	if (!ISSET(rt->rt_flags, RTF_HOST))
1506 		info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask);
1507 	info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl);
1508 	ifp = if_get(rt->rt_ifidx);
1509 	if (ifp != NULL) {
1510 		info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
1511 		info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
1512 	}
1513 
1514 	rtm_miss(cmd, &info, rt->rt_flags, rt->rt_priority, rt->rt_ifidx, error,
1515 	    rtableid);
1516 	if_put(ifp);
1517 }
1518 
1519 /*
1520  * This routine is called to generate a message from the routing
1521  * socket indicating that a redirect has occurred, a routing lookup
1522  * has failed, or that a protocol has detected timeouts to a particular
1523  * destination.
1524  */
1525 void
1526 rtm_miss(int type, struct rt_addrinfo *rtinfo, int flags, uint8_t prio,
1527     u_int ifidx, int error, u_int tableid)
1528 {
1529 	struct rt_msghdr	*rtm;
1530 	struct mbuf		*m;
1531 	struct sockaddr		*sa = rtinfo->rti_info[RTAX_DST];
1532 
1533 	if (rtptable.rtp_count == 0)
1534 		return;
1535 	m = rtm_msg1(type, rtinfo);
1536 	if (m == NULL)
1537 		return;
1538 	rtm = mtod(m, struct rt_msghdr *);
1539 	rtm->rtm_flags = RTF_DONE | flags;
1540 	rtm->rtm_priority = prio;
1541 	rtm->rtm_errno = error;
1542 	rtm->rtm_tableid = tableid;
1543 	rtm->rtm_addrs = rtinfo->rti_addrs;
1544 	rtm->rtm_index = ifidx;
1545 	route_input(m, NULL, sa ? sa->sa_family : AF_UNSPEC);
1546 }
1547 
1548 /*
1549  * This routine is called to generate a message from the routing
1550  * socket indicating that the status of a network interface has changed.
1551  */
1552 void
1553 rtm_ifchg(struct ifnet *ifp)
1554 {
1555 	struct if_msghdr	*ifm;
1556 	struct mbuf		*m;
1557 
1558 	if (rtptable.rtp_count == 0)
1559 		return;
1560 	m = rtm_msg1(RTM_IFINFO, NULL);
1561 	if (m == NULL)
1562 		return;
1563 	ifm = mtod(m, struct if_msghdr *);
1564 	ifm->ifm_index = ifp->if_index;
1565 	ifm->ifm_tableid = ifp->if_rdomain;
1566 	ifm->ifm_flags = ifp->if_flags;
1567 	ifm->ifm_xflags = ifp->if_xflags;
1568 	if_getdata(ifp, &ifm->ifm_data);
1569 	ifm->ifm_addrs = 0;
1570 	route_input(m, NULL, AF_UNSPEC);
1571 }
1572 
1573 /*
1574  * This is called to generate messages from the routing socket
1575  * indicating a network interface has had addresses associated with it.
1576  * if we ever reverse the logic and replace messages TO the routing
1577  * socket indicate a request to configure interfaces, then it will
1578  * be unnecessary as the routing socket will automatically generate
1579  * copies of it.
1580  */
1581 void
1582 rtm_addr(int cmd, struct ifaddr *ifa)
1583 {
1584 	struct ifnet		*ifp = ifa->ifa_ifp;
1585 	struct mbuf		*m;
1586 	struct rt_addrinfo	 info;
1587 	struct ifa_msghdr	*ifam;
1588 
1589 	if (rtptable.rtp_count == 0)
1590 		return;
1591 
1592 	memset(&info, 0, sizeof(info));
1593 	info.rti_info[RTAX_IFA] = ifa->ifa_addr;
1594 	info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
1595 	info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
1596 	info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
1597 	if ((m = rtm_msg1(cmd, &info)) == NULL)
1598 		return;
1599 	ifam = mtod(m, struct ifa_msghdr *);
1600 	ifam->ifam_index = ifp->if_index;
1601 	ifam->ifam_metric = ifa->ifa_metric;
1602 	ifam->ifam_flags = ifa->ifa_flags;
1603 	ifam->ifam_addrs = info.rti_addrs;
1604 	ifam->ifam_tableid = ifp->if_rdomain;
1605 
1606 	route_input(m, NULL,
1607 	    ifa->ifa_addr ? ifa->ifa_addr->sa_family : AF_UNSPEC);
1608 }
1609 
1610 /*
1611  * This is called to generate routing socket messages indicating
1612  * network interface arrival and departure.
1613  */
1614 void
1615 rtm_ifannounce(struct ifnet *ifp, int what)
1616 {
1617 	struct if_announcemsghdr	*ifan;
1618 	struct mbuf			*m;
1619 
1620 	if (rtptable.rtp_count == 0)
1621 		return;
1622 	m = rtm_msg1(RTM_IFANNOUNCE, NULL);
1623 	if (m == NULL)
1624 		return;
1625 	ifan = mtod(m, struct if_announcemsghdr *);
1626 	ifan->ifan_index = ifp->if_index;
1627 	strlcpy(ifan->ifan_name, ifp->if_xname, sizeof(ifan->ifan_name));
1628 	ifan->ifan_what = what;
1629 	route_input(m, NULL, AF_UNSPEC);
1630 }
1631 
1632 #ifdef BFD
1633 /*
1634  * This is used to generate routing socket messages indicating
1635  * the state of a BFD session.
1636  */
1637 void
1638 rtm_bfd(struct bfd_config *bfd)
1639 {
1640 	struct bfd_msghdr	*bfdm;
1641 	struct sockaddr_bfd	 sa_bfd;
1642 	struct mbuf		*m;
1643 	struct rt_addrinfo	 info;
1644 
1645 	if (rtptable.rtp_count == 0)
1646 		return;
1647 	memset(&info, 0, sizeof(info));
1648 	info.rti_info[RTAX_DST] = rt_key(bfd->bc_rt);
1649 	info.rti_info[RTAX_IFA] = bfd->bc_rt->rt_ifa->ifa_addr;
1650 
1651 	m = rtm_msg1(RTM_BFD, &info);
1652 	if (m == NULL)
1653 		return;
1654 	bfdm = mtod(m, struct bfd_msghdr *);
1655 	bfdm->bm_addrs = info.rti_addrs;
1656 
1657 	bfd2sa(bfd->bc_rt, &sa_bfd);
1658 	memcpy(&bfdm->bm_sa, &sa_bfd, sizeof(sa_bfd));
1659 
1660 	route_input(m, NULL, info.rti_info[RTAX_DST]->sa_family);
1661 }
1662 #endif /* BFD */
1663 
1664 /*
1665  * This is used to generate routing socket messages indicating
1666  * the state of an ieee80211 interface.
1667  */
1668 void
1669 rtm_80211info(struct ifnet *ifp, struct if_ieee80211_data *ifie)
1670 {
1671 	struct if_ieee80211_msghdr	*ifim;
1672 	struct mbuf			*m;
1673 
1674 	if (rtptable.rtp_count == 0)
1675 		return;
1676 	m = rtm_msg1(RTM_80211INFO, NULL);
1677 	if (m == NULL)
1678 		return;
1679 	ifim = mtod(m, struct if_ieee80211_msghdr *);
1680 	ifim->ifim_index = ifp->if_index;
1681 	ifim->ifim_tableid = ifp->if_rdomain;
1682 
1683 	memcpy(&ifim->ifim_ifie, ifie, sizeof(ifim->ifim_ifie));
1684 	route_input(m, NULL, AF_UNSPEC);
1685 }
1686 
1687 /*
1688  * This is used in dumping the kernel table via sysctl().
1689  */
1690 int
1691 sysctl_dumpentry(struct rtentry *rt, void *v, unsigned int id)
1692 {
1693 	struct walkarg		*w = v;
1694 	int			 error = 0, size;
1695 	struct rt_addrinfo	 info;
1696 	struct ifnet		*ifp;
1697 #ifdef BFD
1698 	struct sockaddr_bfd	 sa_bfd;
1699 #endif
1700 	struct sockaddr_rtlabel	 sa_rl;
1701 	struct sockaddr_in6	 sa_mask;
1702 
1703 	if (w->w_op == NET_RT_FLAGS && !(rt->rt_flags & w->w_arg))
1704 		return 0;
1705 	if (w->w_op == NET_RT_DUMP && w->w_arg) {
1706 		u_int8_t prio = w->w_arg & RTP_MASK;
1707 		if (w->w_arg < 0) {
1708 			prio = (-w->w_arg) & RTP_MASK;
1709 			/* Show all routes that are not this priority */
1710 			if (prio == (rt->rt_priority & RTP_MASK))
1711 				return 0;
1712 		} else {
1713 			if (prio != (rt->rt_priority & RTP_MASK) &&
1714 			    prio != RTP_ANY)
1715 				return 0;
1716 		}
1717 	}
1718 	bzero(&info, sizeof(info));
1719 	info.rti_info[RTAX_DST] = rt_key(rt);
1720 	info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1721 	info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask);
1722 	ifp = if_get(rt->rt_ifidx);
1723 	if (ifp != NULL) {
1724 		info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
1725 		info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
1726 		if (ifp->if_flags & IFF_POINTOPOINT)
1727 			info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr;
1728 	}
1729 	if_put(ifp);
1730 	info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl);
1731 #ifdef BFD
1732 	if (rt->rt_flags & RTF_BFD)
1733 		info.rti_info[RTAX_BFD] = bfd2sa(rt, &sa_bfd);
1734 #endif
1735 #ifdef MPLS
1736 	if (rt->rt_flags & RTF_MPLS) {
1737 		struct sockaddr_mpls	 sa_mpls;
1738 
1739 		bzero(&sa_mpls, sizeof(sa_mpls));
1740 		sa_mpls.smpls_family = AF_MPLS;
1741 		sa_mpls.smpls_len = sizeof(sa_mpls);
1742 		sa_mpls.smpls_label = ((struct rt_mpls *)
1743 		    rt->rt_llinfo)->mpls_label;
1744 		info.rti_info[RTAX_SRC] = (struct sockaddr *)&sa_mpls;
1745 		info.rti_mpls = ((struct rt_mpls *)
1746 		    rt->rt_llinfo)->mpls_operation;
1747 	}
1748 #endif
1749 
1750 	size = rtm_msg2(RTM_GET, RTM_VERSION, &info, NULL, w);
1751 	if (w->w_where && w->w_tmem && w->w_needed <= 0) {
1752 		struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem;
1753 
1754 		rtm->rtm_pid = curproc->p_p->ps_pid;
1755 		rtm->rtm_flags = rt->rt_flags;
1756 		rtm->rtm_priority = rt->rt_priority & RTP_MASK;
1757 		rtm_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx);
1758 		/* Do not account the routing table's reference. */
1759 		rtm->rtm_rmx.rmx_refcnt = rt->rt_refcnt - 1;
1760 		rtm->rtm_index = rt->rt_ifidx;
1761 		rtm->rtm_addrs = info.rti_addrs;
1762 		rtm->rtm_tableid = id;
1763 #ifdef MPLS
1764 		rtm->rtm_mpls = info.rti_mpls;
1765 #endif
1766 		if ((error = copyout(rtm, w->w_where, size)) != 0)
1767 			w->w_where = NULL;
1768 		else
1769 			w->w_where += size;
1770 	}
1771 	return (error);
1772 }
1773 
1774 int
1775 sysctl_iflist(int af, struct walkarg *w)
1776 {
1777 	struct ifnet		*ifp;
1778 	struct ifaddr		*ifa;
1779 	struct rt_addrinfo	 info;
1780 	int			 len, error = 0;
1781 
1782 	bzero(&info, sizeof(info));
1783 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1784 		if (w->w_arg && w->w_arg != ifp->if_index)
1785 			continue;
1786 		/* Copy the link-layer address first */
1787 		info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
1788 		len = rtm_msg2(RTM_IFINFO, RTM_VERSION, &info, 0, w);
1789 		if (w->w_where && w->w_tmem && w->w_needed <= 0) {
1790 			struct if_msghdr *ifm;
1791 
1792 			ifm = (struct if_msghdr *)w->w_tmem;
1793 			ifm->ifm_index = ifp->if_index;
1794 			ifm->ifm_tableid = ifp->if_rdomain;
1795 			ifm->ifm_flags = ifp->if_flags;
1796 			if_getdata(ifp, &ifm->ifm_data);
1797 			ifm->ifm_addrs = info.rti_addrs;
1798 			error = copyout(ifm, w->w_where, len);
1799 			if (error)
1800 				return (error);
1801 			w->w_where += len;
1802 		}
1803 		info.rti_info[RTAX_IFP] = NULL;
1804 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1805 			KASSERT(ifa->ifa_addr->sa_family != AF_LINK);
1806 			if (af && af != ifa->ifa_addr->sa_family)
1807 				continue;
1808 			info.rti_info[RTAX_IFA] = ifa->ifa_addr;
1809 			info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
1810 			info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
1811 			len = rtm_msg2(RTM_NEWADDR, RTM_VERSION, &info, 0, w);
1812 			if (w->w_where && w->w_tmem && w->w_needed <= 0) {
1813 				struct ifa_msghdr *ifam;
1814 
1815 				ifam = (struct ifa_msghdr *)w->w_tmem;
1816 				ifam->ifam_index = ifa->ifa_ifp->if_index;
1817 				ifam->ifam_flags = ifa->ifa_flags;
1818 				ifam->ifam_metric = ifa->ifa_metric;
1819 				ifam->ifam_addrs = info.rti_addrs;
1820 				error = copyout(w->w_tmem, w->w_where, len);
1821 				if (error)
1822 					return (error);
1823 				w->w_where += len;
1824 			}
1825 		}
1826 		info.rti_info[RTAX_IFA] = info.rti_info[RTAX_NETMASK] =
1827 		    info.rti_info[RTAX_BRD] = NULL;
1828 	}
1829 	return (0);
1830 }
1831 
1832 int
1833 sysctl_ifnames(struct walkarg *w)
1834 {
1835 	struct if_nameindex_msg ifn;
1836 	struct ifnet *ifp;
1837 	int error = 0;
1838 
1839 	/* XXX ignore tableid for now */
1840 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1841 		if (w->w_arg && w->w_arg != ifp->if_index)
1842 			continue;
1843 		w->w_needed += sizeof(ifn);
1844 		if (w->w_where && w->w_needed <= 0) {
1845 
1846 			memset(&ifn, 0, sizeof(ifn));
1847 			ifn.if_index = ifp->if_index;
1848 			strlcpy(ifn.if_name, ifp->if_xname,
1849 			    sizeof(ifn.if_name));
1850 			error = copyout(&ifn, w->w_where, sizeof(ifn));
1851 			if (error)
1852 				return (error);
1853 			w->w_where += sizeof(ifn);
1854 		}
1855 	}
1856 
1857 	return (0);
1858 }
1859 
1860 int
1861 sysctl_rtable(int *name, u_int namelen, void *where, size_t *given, void *new,
1862     size_t newlen)
1863 {
1864 	int			 i, error = EINVAL;
1865 	u_char			 af;
1866 	struct walkarg		 w;
1867 	struct rt_tableinfo	 tableinfo;
1868 	u_int			 tableid = 0;
1869 
1870 	if (new)
1871 		return (EPERM);
1872 	if (namelen < 3 || namelen > 4)
1873 		return (EINVAL);
1874 	af = name[0];
1875 	bzero(&w, sizeof(w));
1876 	w.w_where = where;
1877 	w.w_given = *given;
1878 	w.w_needed = 0 - w.w_given;
1879 	w.w_op = name[1];
1880 	w.w_arg = name[2];
1881 
1882 	if (namelen == 4) {
1883 		tableid = name[3];
1884 		if (!rtable_exists(tableid))
1885 			return (ENOENT);
1886 	} else
1887 		tableid = curproc->p_p->ps_rtableid;
1888 
1889 	switch (w.w_op) {
1890 	case NET_RT_DUMP:
1891 	case NET_RT_FLAGS:
1892 		NET_LOCK();
1893 		for (i = 1; i <= AF_MAX; i++) {
1894 			if (af != 0 && af != i)
1895 				continue;
1896 
1897 			error = rtable_walk(tableid, i, sysctl_dumpentry, &w);
1898 			if (error == EAFNOSUPPORT)
1899 				error = 0;
1900 			if (error)
1901 				break;
1902 		}
1903 		NET_UNLOCK();
1904 		break;
1905 
1906 	case NET_RT_IFLIST:
1907 		NET_LOCK();
1908 		error = sysctl_iflist(af, &w);
1909 		NET_UNLOCK();
1910 		break;
1911 
1912 	case NET_RT_STATS:
1913 		return (sysctl_rtable_rtstat(where, given, new));
1914 	case NET_RT_TABLE:
1915 		tableid = w.w_arg;
1916 		if (!rtable_exists(tableid))
1917 			return (ENOENT);
1918 		memset(&tableinfo, 0, sizeof tableinfo);
1919 		tableinfo.rti_tableid = tableid;
1920 		tableinfo.rti_domainid = rtable_l2(tableid);
1921 		error = sysctl_rdstruct(where, given, new,
1922 		    &tableinfo, sizeof(tableinfo));
1923 		return (error);
1924 	case NET_RT_IFNAMES:
1925 		NET_LOCK();
1926 		error = sysctl_ifnames(&w);
1927 		NET_UNLOCK();
1928 		break;
1929 	}
1930 	free(w.w_tmem, M_RTABLE, w.w_tmemsize);
1931 	w.w_needed += w.w_given;
1932 	if (where) {
1933 		*given = w.w_where - (caddr_t)where;
1934 		if (*given < w.w_needed)
1935 			return (ENOMEM);
1936 	} else
1937 		*given = (11 * w.w_needed) / 10;
1938 
1939 	return (error);
1940 }
1941 
1942 int
1943 sysctl_rtable_rtstat(void *oldp, size_t *oldlenp, void *newp)
1944 {
1945 	extern struct cpumem *rtcounters;
1946 	uint64_t counters[rts_ncounters];
1947 	struct rtstat rtstat;
1948 	uint32_t *words = (uint32_t *)&rtstat;
1949 	int i;
1950 
1951 	CTASSERT(sizeof(rtstat) == (nitems(counters) * sizeof(uint32_t)));
1952 	memset(&rtstat, 0, sizeof rtstat);
1953 	counters_read(rtcounters, counters, nitems(counters));
1954 
1955 	for (i = 0; i < nitems(counters); i++)
1956 		words[i] = (uint32_t)counters[i];
1957 
1958 	return (sysctl_rdstruct(oldp, oldlenp, newp, &rtstat, sizeof(rtstat)));
1959 }
1960 
1961 int
1962 rtm_validate_proposal(struct rt_addrinfo *info)
1963 {
1964 	if (info->rti_addrs & ~(RTA_NETMASK | RTA_IFA | RTA_DNS | RTA_STATIC |
1965 	    RTA_SEARCH)) {
1966 		return -1;
1967 	}
1968 
1969 	if (ISSET(info->rti_addrs, RTA_NETMASK)) {
1970 		struct sockaddr *sa = info->rti_info[RTAX_NETMASK];
1971 		if (sa == NULL)
1972 			return -1;
1973 		switch (sa->sa_family) {
1974 		case AF_INET:
1975 			if (sa->sa_len != sizeof(struct sockaddr_in))
1976 				return -1;
1977 			break;
1978 		case AF_INET6:
1979 			if (sa->sa_len != sizeof(struct sockaddr_in6))
1980 				return -1;
1981 			break;
1982 		default:
1983 			return -1;
1984 		}
1985 	}
1986 
1987 	if (ISSET(info->rti_addrs, RTA_IFA)) {
1988 		struct sockaddr *sa = info->rti_info[RTAX_IFA];
1989 		if (sa == NULL)
1990 			return -1;
1991 		switch (sa->sa_family) {
1992 		case AF_INET:
1993 			if (sa->sa_len != sizeof(struct sockaddr_in))
1994 				return -1;
1995 			break;
1996 		case AF_INET6:
1997 			if (sa->sa_len != sizeof(struct sockaddr_in6))
1998 				return -1;
1999 			break;
2000 		default:
2001 			return -1;
2002 		}
2003 	}
2004 
2005 	if (ISSET(info->rti_addrs, RTA_DNS)) {
2006 		struct sockaddr_rtdns *rtdns =
2007 		    (struct sockaddr_rtdns *)info->rti_info[RTAX_DNS];
2008 		if (rtdns == NULL)
2009 			return -1;
2010 		if (rtdns->sr_len > sizeof(*rtdns))
2011 			return -1;
2012 		if (rtdns->sr_len <=
2013 		    offsetof(struct sockaddr_rtdns, sr_dns))
2014 			return -1;
2015 	}
2016 
2017 	if (ISSET(info->rti_addrs, RTA_STATIC)) {
2018 		struct sockaddr_rtstatic *rtstatic =
2019 		    (struct sockaddr_rtstatic *)info->rti_info[RTAX_STATIC];
2020 		if (rtstatic == NULL)
2021 			return -1;
2022 		if (rtstatic->sr_len > sizeof(*rtstatic))
2023 			return -1;
2024 		if (rtstatic->sr_len <=
2025 		    offsetof(struct sockaddr_rtstatic, sr_static))
2026 			return -1;
2027 	}
2028 
2029 	if (ISSET(info->rti_addrs, RTA_SEARCH)) {
2030 		struct sockaddr_rtsearch *rtsearch =
2031 		    (struct sockaddr_rtsearch *)info->rti_info[RTAX_SEARCH];
2032 		if (rtsearch == NULL)
2033 			return -1;
2034 		if (rtsearch->sr_len > sizeof(*rtsearch))
2035 			return -1;
2036 		if (rtsearch->sr_len <=
2037 		    offsetof(struct sockaddr_rtsearch, sr_search))
2038 			return -1;
2039 	}
2040 
2041 	return 0;
2042 }
2043 
2044 /*
2045  * Definitions of protocols supported in the ROUTE domain.
2046  */
2047 
2048 extern	struct domain routedomain;		/* or at least forward */
2049 
2050 struct protosw routesw[] = {
2051 {
2052   .pr_type	= SOCK_RAW,
2053   .pr_domain	= &routedomain,
2054   .pr_flags	= PR_ATOMIC|PR_ADDR|PR_WANTRCVD,
2055   .pr_output	= route_output,
2056   .pr_ctloutput	= route_ctloutput,
2057   .pr_usrreq	= route_usrreq,
2058   .pr_attach	= route_attach,
2059   .pr_detach	= route_detach,
2060   .pr_init	= route_prinit,
2061   .pr_sysctl	= sysctl_rtable
2062 }
2063 };
2064 
2065 struct domain routedomain = {
2066   .dom_family = PF_ROUTE,
2067   .dom_name = "route",
2068   .dom_init = route_init,
2069   .dom_protosw = routesw,
2070   .dom_protoswNPROTOSW = &routesw[nitems(routesw)]
2071 };
2072