xref: /openbsd-src/sys/net/rtsock.c (revision 6396a31b28c13abcc71f05292f11b42abbafd7d3)
1 /*	$OpenBSD: rtsock.c,v 1.287 2019/06/05 12:53:43 claudio Exp $	*/
2 /*	$NetBSD: rtsock.c,v 1.18 1996/03/29 00:32:10 cgd Exp $	*/
3 
4 /*
5  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the project nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 /*
34  * Copyright (c) 1988, 1991, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. Neither the name of the University nor the names of its contributors
46  *    may be used to endorse or promote products derived from this software
47  *    without specific prior written permission.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59  * SUCH DAMAGE.
60  *
61  *	@(#)rtsock.c	8.6 (Berkeley) 2/11/95
62  */
63 
64 #include <sys/param.h>
65 #include <sys/systm.h>
66 #include <sys/proc.h>
67 #include <sys/sysctl.h>
68 #include <sys/mbuf.h>
69 #include <sys/socket.h>
70 #include <sys/socketvar.h>
71 #include <sys/domain.h>
72 #include <sys/protosw.h>
73 #include <sys/srp.h>
74 
75 #include <net/if.h>
76 #include <net/if_dl.h>
77 #include <net/if_var.h>
78 #include <net/route.h>
79 
80 #include <netinet/in.h>
81 
82 #ifdef MPLS
83 #include <netmpls/mpls.h>
84 #endif
85 #ifdef IPSEC
86 #include <netinet/ip_ipsp.h>
87 #include <net/if_enc.h>
88 #endif
89 #ifdef BFD
90 #include <net/bfd.h>
91 #endif
92 
93 #include <sys/stdarg.h>
94 #include <sys/kernel.h>
95 #include <sys/timeout.h>
96 
97 #define	ROUTESNDQ	8192
98 #define	ROUTERCVQ	8192
99 
100 const struct sockaddr route_src = { 2, PF_ROUTE, };
101 
102 struct walkarg {
103 	int	w_op, w_arg, w_given, w_needed, w_tmemsize;
104 	caddr_t	w_where, w_tmem;
105 };
106 
107 void	route_prinit(void);
108 void	rcb_ref(void *, void *);
109 void	rcb_unref(void *, void *);
110 int	route_output(struct mbuf *, struct socket *, struct sockaddr *,
111 	    struct mbuf *);
112 int	route_ctloutput(int, struct socket *, int, int, struct mbuf *);
113 int	route_usrreq(struct socket *, int, struct mbuf *, struct mbuf *,
114 	    struct mbuf *, struct proc *);
115 void	route_input(struct mbuf *m0, struct socket *, sa_family_t);
116 int	route_arp_conflict(struct rtentry *, struct rt_addrinfo *);
117 int	route_cleargateway(struct rtentry *, void *, unsigned int);
118 void	rtm_senddesync_timer(void *);
119 void	rtm_senddesync(struct socket *);
120 int	rtm_sendup(struct socket *, struct mbuf *, int);
121 
122 int	rtm_getifa(struct rt_addrinfo *, unsigned int);
123 int	rtm_output(struct rt_msghdr *, struct rtentry **, struct rt_addrinfo *,
124 	    uint8_t, unsigned int);
125 struct rt_msghdr *rtm_report(struct rtentry *, u_char, int, int);
126 struct mbuf	*rtm_msg1(int, struct rt_addrinfo *);
127 int		 rtm_msg2(int, int, struct rt_addrinfo *, caddr_t,
128 		     struct walkarg *);
129 int		 rtm_xaddrs(caddr_t, caddr_t, struct rt_addrinfo *);
130 int		 rtm_validate_proposal(struct rt_addrinfo *);
131 void		 rtm_setmetrics(u_long, const struct rt_metrics *,
132 		     struct rt_kmetrics *);
133 void		 rtm_getmetrics(const struct rt_kmetrics *,
134 		     struct rt_metrics *);
135 
136 int		 sysctl_iflist(int, struct walkarg *);
137 int		 sysctl_ifnames(struct walkarg *);
138 int		 sysctl_rtable_rtstat(void *, size_t *, void *);
139 
140 struct rtpcb {
141 	struct socket		*rop_socket;
142 
143 	SRPL_ENTRY(rtpcb)	rop_list;
144 	struct refcnt		rop_refcnt;
145 	struct timeout		rop_timeout;
146 	unsigned int		rop_msgfilter;
147 	unsigned int		rop_flags;
148 	u_int			rop_rtableid;
149 	unsigned short		rop_proto;
150 	u_char			rop_priority;
151 };
152 #define	sotortpcb(so)	((struct rtpcb *)(so)->so_pcb)
153 
154 struct rtptable {
155 	SRPL_HEAD(, rtpcb)	rtp_list;
156 	struct srpl_rc		rtp_rc;
157 	struct rwlock		rtp_lk;
158 	unsigned int		rtp_count;
159 };
160 
161 struct rtptable rtptable;
162 
163 /*
164  * These flags and timeout are used for indicating to userland (via a
165  * RTM_DESYNC msg) when the route socket has overflowed and messages
166  * have been lost.
167  */
168 #define ROUTECB_FLAG_DESYNC	0x1	/* Route socket out of memory */
169 #define ROUTECB_FLAG_FLUSH	0x2	/* Wait until socket is empty before
170 					   queueing more packets */
171 
172 #define ROUTE_DESYNC_RESEND_TIMEOUT	200	/* In ms */
173 
174 void
175 route_prinit(void)
176 {
177 	srpl_rc_init(&rtptable.rtp_rc, rcb_ref, rcb_unref, NULL);
178 	rw_init(&rtptable.rtp_lk, "rtsock");
179 	SRPL_INIT(&rtptable.rtp_list);
180 }
181 
182 void
183 rcb_ref(void *null, void *v)
184 {
185 	struct rtpcb *rop = v;
186 
187 	refcnt_take(&rop->rop_refcnt);
188 }
189 
190 void
191 rcb_unref(void *null, void *v)
192 {
193 	struct rtpcb *rop = v;
194 
195 	refcnt_rele_wake(&rop->rop_refcnt);
196 }
197 
198 int
199 route_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam,
200     struct mbuf *control, struct proc *p)
201 {
202 	struct rtpcb	*rop;
203 	int		 error = 0;
204 
205 	if (req == PRU_CONTROL)
206 		return (EOPNOTSUPP);
207 
208 	soassertlocked(so);
209 
210 	if (control && control->m_len) {
211 		error = EOPNOTSUPP;
212 		goto release;
213 	}
214 
215 	rop = sotortpcb(so);
216 	if (rop == NULL) {
217 		error = EINVAL;
218 		goto release;
219 	}
220 
221 	switch (req) {
222 	/* no connect, bind, accept. Socket is connected from the start */
223 	case PRU_CONNECT:
224 	case PRU_BIND:
225 	case PRU_CONNECT2:
226 	case PRU_LISTEN:
227 	case PRU_ACCEPT:
228 		error = EOPNOTSUPP;
229 		break;
230 
231 	case PRU_DISCONNECT:
232 	case PRU_ABORT:
233 		soisdisconnected(so);
234 		break;
235 	case PRU_SHUTDOWN:
236 		socantsendmore(so);
237 		break;
238 	case PRU_SENSE:
239 		/* stat: don't bother with a blocksize. */
240 		break;
241 
242 	/* minimal support, just implement a fake peer address */
243 	case PRU_SOCKADDR:
244 		error = EINVAL;
245 		break;
246 	case PRU_PEERADDR:
247 		bcopy(&route_src, mtod(nam, caddr_t), route_src.sa_len);
248 		nam->m_len = route_src.sa_len;
249 		break;
250 
251 	case PRU_RCVD:
252 		/*
253 		 * If we are in a FLUSH state, check if the buffer is
254 		 * empty so that we can clear the flag.
255 		 */
256 		if (((rop->rop_flags & ROUTECB_FLAG_FLUSH) != 0) &&
257 		    ((sbspace(rop->rop_socket, &rop->rop_socket->so_rcv) ==
258 		    rop->rop_socket->so_rcv.sb_hiwat)))
259 			rop->rop_flags &= ~ROUTECB_FLAG_FLUSH;
260 		break;
261 
262 	case PRU_RCVOOB:
263 	case PRU_SENDOOB:
264 		error = EOPNOTSUPP;
265 		break;
266 	case PRU_SEND:
267 		if (nam) {
268 			error = EISCONN;
269 			break;
270 		}
271 		error = (*so->so_proto->pr_output)(m, so, NULL, NULL);
272 		m = NULL;
273 		break;
274 	default:
275 		panic("route_usrreq");
276 	}
277 
278  release:
279 	if (req != PRU_RCVD && req != PRU_RCVOOB && req != PRU_SENSE) {
280 		m_freem(control);
281 		m_freem(m);
282 	}
283 	return (error);
284 }
285 
286 int
287 route_attach(struct socket *so, int proto)
288 {
289 	struct rtpcb	*rop;
290 	int		 error;
291 
292 	/*
293 	 * use the rawcb but allocate a rtpcb, this
294 	 * code does not care about the additional fields
295 	 * and works directly on the raw socket.
296 	 */
297 	rop = malloc(sizeof(struct rtpcb), M_PCB, M_WAITOK|M_ZERO);
298 	so->so_pcb = rop;
299 	/* Init the timeout structure */
300 	timeout_set(&rop->rop_timeout, rtm_senddesync_timer, so);
301 	refcnt_init(&rop->rop_refcnt);
302 
303 	if (curproc == NULL)
304 		error = EACCES;
305 	else
306 		error = soreserve(so, ROUTESNDQ, ROUTERCVQ);
307 	if (error) {
308 		free(rop, M_PCB, sizeof(struct rtpcb));
309 		return (error);
310 	}
311 
312 	rop->rop_socket = so;
313 	rop->rop_proto = proto;
314 
315 	rop->rop_rtableid = curproc->p_p->ps_rtableid;
316 
317 	soisconnected(so);
318 	so->so_options |= SO_USELOOPBACK;
319 
320 	rw_enter(&rtptable.rtp_lk, RW_WRITE);
321 	SRPL_INSERT_HEAD_LOCKED(&rtptable.rtp_rc, &rtptable.rtp_list, rop, rop_list);
322 	rtptable.rtp_count++;
323 	rw_exit(&rtptable.rtp_lk);
324 
325 	return (0);
326 }
327 
328 int
329 route_detach(struct socket *so)
330 {
331 	struct rtpcb	*rop;
332 
333 	soassertlocked(so);
334 
335 	rop = sotortpcb(so);
336 	if (rop == NULL)
337 		return (EINVAL);
338 
339 	rw_enter(&rtptable.rtp_lk, RW_WRITE);
340 
341 	timeout_del(&rop->rop_timeout);
342 	rtptable.rtp_count--;
343 
344 	SRPL_REMOVE_LOCKED(&rtptable.rtp_rc, &rtptable.rtp_list, rop, rtpcb,
345 	    rop_list);
346 	rw_exit(&rtptable.rtp_lk);
347 
348 	/* wait for all references to drop */
349 	refcnt_finalize(&rop->rop_refcnt, "rtsockrefs");
350 
351 	so->so_pcb = NULL;
352 	KASSERT((so->so_state & SS_NOFDREF) == 0);
353 	free(rop, M_PCB, sizeof(struct rtpcb));
354 
355 	return (0);
356 }
357 
358 int
359 route_ctloutput(int op, struct socket *so, int level, int optname,
360     struct mbuf *m)
361 {
362 	struct rtpcb *rop = sotortpcb(so);
363 	int error = 0;
364 	unsigned int tid, prio;
365 
366 	if (level != AF_ROUTE)
367 		return (EINVAL);
368 
369 	switch (op) {
370 	case PRCO_SETOPT:
371 		switch (optname) {
372 		case ROUTE_MSGFILTER:
373 			if (m == NULL || m->m_len != sizeof(unsigned int))
374 				error = EINVAL;
375 			else
376 				rop->rop_msgfilter = *mtod(m, unsigned int *);
377 			break;
378 		case ROUTE_TABLEFILTER:
379 			if (m == NULL || m->m_len != sizeof(unsigned int)) {
380 				error = EINVAL;
381 				break;
382 			}
383 			tid = *mtod(m, unsigned int *);
384 			if (tid != RTABLE_ANY && !rtable_exists(tid))
385 				error = ENOENT;
386 			else
387 				rop->rop_rtableid = tid;
388 			break;
389 		case ROUTE_PRIOFILTER:
390 			if (m == NULL || m->m_len != sizeof(unsigned int)) {
391 				error = EINVAL;
392 				break;
393 			}
394 			prio = *mtod(m, unsigned int *);
395 			if (prio > RTP_MAX)
396 				error = EINVAL;
397 			else
398 				rop->rop_priority = prio;
399 			break;
400 		default:
401 			error = ENOPROTOOPT;
402 			break;
403 		}
404 		break;
405 	case PRCO_GETOPT:
406 		switch (optname) {
407 		case ROUTE_MSGFILTER:
408 			m->m_len = sizeof(unsigned int);
409 			*mtod(m, unsigned int *) = rop->rop_msgfilter;
410 			break;
411 		case ROUTE_TABLEFILTER:
412 			m->m_len = sizeof(unsigned int);
413 			*mtod(m, unsigned int *) = rop->rop_rtableid;
414 			break;
415 		case ROUTE_PRIOFILTER:
416 			m->m_len = sizeof(unsigned int);
417 			*mtod(m, unsigned int *) = rop->rop_priority;
418 			break;
419 		default:
420 			error = ENOPROTOOPT;
421 			break;
422 		}
423 	}
424 	return (error);
425 }
426 
427 void
428 rtm_senddesync_timer(void *xso)
429 {
430 	struct socket	*so = xso;
431 	int 		 s;
432 
433 	s = solock(so);
434 	rtm_senddesync(so);
435 	sounlock(so, s);
436 }
437 
438 void
439 rtm_senddesync(struct socket *so)
440 {
441 	struct rtpcb	*rop = sotortpcb(so);
442 	struct mbuf	*desync_mbuf;
443 
444 	soassertlocked(so);
445 
446 	/* If we are in a DESYNC state, try to send a RTM_DESYNC packet */
447 	if ((rop->rop_flags & ROUTECB_FLAG_DESYNC) == 0)
448 		return;
449 
450 	/*
451 	 * If we fail to alloc memory or if sbappendaddr()
452 	 * fails, re-add timeout and try again.
453 	 */
454 	desync_mbuf = rtm_msg1(RTM_DESYNC, NULL);
455 	if (desync_mbuf != NULL) {
456 		if (sbappendaddr(so, &so->so_rcv, &route_src,
457 		    desync_mbuf, NULL) != 0) {
458 			rop->rop_flags &= ~ROUTECB_FLAG_DESYNC;
459 			sorwakeup(rop->rop_socket);
460 			return;
461 		}
462 		m_freem(desync_mbuf);
463 	}
464 	/* Re-add timeout to try sending msg again */
465 	timeout_add_msec(&rop->rop_timeout, ROUTE_DESYNC_RESEND_TIMEOUT);
466 }
467 
468 void
469 route_input(struct mbuf *m0, struct socket *so0, sa_family_t sa_family)
470 {
471 	struct socket *so;
472 	struct rtpcb *rop;
473 	struct rt_msghdr *rtm;
474 	struct mbuf *m = m0;
475 	struct socket *last = NULL;
476 	struct srp_ref sr;
477 	int s;
478 
479 	/* ensure that we can access the rtm_type via mtod() */
480 	if (m->m_len < offsetof(struct rt_msghdr, rtm_type) + 1) {
481 		m_freem(m);
482 		return;
483 	}
484 
485 	SRPL_FOREACH(rop, &sr, &rtptable.rtp_list, rop_list) {
486 		/*
487 		 * If route socket is bound to an address family only send
488 		 * messages that match the address family. Address family
489 		 * agnostic messages are always sent.
490 		 */
491 		if (sa_family != AF_UNSPEC && rop->rop_proto != AF_UNSPEC &&
492 		    rop->rop_proto != sa_family)
493 			continue;
494 
495 
496 		so = rop->rop_socket;
497 		s = solock(so);
498 
499 		/*
500 		 * Check to see if we don't want our own messages and
501 		 * if we can receive anything.
502 		 */
503 		if ((so0 == so && !(so0->so_options & SO_USELOOPBACK)) ||
504 		    !(so->so_state & SS_ISCONNECTED) ||
505 		    (so->so_state & SS_CANTRCVMORE)) {
506 next:
507 			sounlock(so, s);
508 			continue;
509 		}
510 
511 		/* filter messages that the process does not want */
512 		rtm = mtod(m, struct rt_msghdr *);
513 		/* but RTM_DESYNC can't be filtered */
514 		if (rtm->rtm_type != RTM_DESYNC && rop->rop_msgfilter != 0 &&
515 		    !(rop->rop_msgfilter & (1 << rtm->rtm_type)))
516 			goto next;
517 		switch (rtm->rtm_type) {
518 		case RTM_IFANNOUNCE:
519 		case RTM_DESYNC:
520 			/* no tableid */
521 			break;
522 		case RTM_RESOLVE:
523 		case RTM_NEWADDR:
524 		case RTM_DELADDR:
525 		case RTM_IFINFO:
526 		case RTM_80211INFO:
527 		case RTM_BFD:
528 			/* check against rdomain id */
529 			if (rop->rop_rtableid != RTABLE_ANY &&
530 			    rtable_l2(rop->rop_rtableid) != rtm->rtm_tableid)
531 				goto next;
532 			break;
533 		default:
534 			if (rop->rop_priority != 0 &&
535 			    rop->rop_priority < rtm->rtm_priority)
536 				goto next;
537 			/* check against rtable id */
538 			if (rop->rop_rtableid != RTABLE_ANY &&
539 			    rop->rop_rtableid != rtm->rtm_tableid)
540 				goto next;
541 			break;
542 		}
543 
544 		/*
545 		 * Check to see if the flush flag is set. If so, don't queue
546 		 * any more messages until the flag is cleared.
547 		 */
548 		if ((rop->rop_flags & ROUTECB_FLAG_FLUSH) != 0)
549 			goto next;
550 		sounlock(so, s);
551 
552 		if (last) {
553 			s = solock(last);
554 			rtm_sendup(last, m, 1);
555 			sounlock(last, s);
556 			refcnt_rele_wake(&sotortpcb(last)->rop_refcnt);
557 		}
558 		/* keep a reference for last */
559 		refcnt_take(&rop->rop_refcnt);
560 		last = rop->rop_socket;
561 	}
562 	SRPL_LEAVE(&sr);
563 
564 	if (last) {
565 		s = solock(last);
566 		rtm_sendup(last, m, 0);
567 		sounlock(last, s);
568 		refcnt_rele_wake(&sotortpcb(last)->rop_refcnt);
569 	} else
570 		m_freem(m);
571 }
572 
573 int
574 rtm_sendup(struct socket *so, struct mbuf *m0, int more)
575 {
576 	struct rtpcb *rop = sotortpcb(so);
577 	struct mbuf *m;
578 
579 	soassertlocked(so);
580 
581 	if (more) {
582 		m = m_copym(m0, 0, M_COPYALL, M_NOWAIT);
583 		if (m == NULL)
584 			return (ENOMEM);
585 	} else
586 		m = m0;
587 
588 	if (sbspace(so, &so->so_rcv) < (2 * MSIZE) ||
589 	    sbappendaddr(so, &so->so_rcv, &route_src, m, NULL) == 0) {
590 		/* Flag socket as desync'ed and flush required */
591 		rop->rop_flags |= ROUTECB_FLAG_DESYNC | ROUTECB_FLAG_FLUSH;
592 		rtm_senddesync(so);
593 		m_freem(m);
594 		return (ENOBUFS);
595 	}
596 
597 	sorwakeup(so);
598 	return (0);
599 }
600 
601 struct rt_msghdr *
602 rtm_report(struct rtentry *rt, u_char type, int seq, int tableid)
603 {
604 	struct rt_msghdr	*rtm;
605 	struct rt_addrinfo	 info;
606 	struct sockaddr_rtlabel	 sa_rl;
607 	struct sockaddr_in6	 sa_mask;
608 #ifdef BFD
609 	struct sockaddr_bfd	 sa_bfd;
610 #endif
611 	struct ifnet		*ifp = NULL;
612 	int			 len;
613 
614 	bzero(&info, sizeof(info));
615 	info.rti_info[RTAX_DST] = rt_key(rt);
616 	info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
617 	info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask);
618 	info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl);
619 #ifdef BFD
620 	if (rt->rt_flags & RTF_BFD)
621 		info.rti_info[RTAX_BFD] = bfd2sa(rt, &sa_bfd);
622 #endif
623 #ifdef MPLS
624 	if (rt->rt_flags & RTF_MPLS) {
625 		struct sockaddr_mpls	 sa_mpls;
626 
627 		bzero(&sa_mpls, sizeof(sa_mpls));
628 		sa_mpls.smpls_family = AF_MPLS;
629 		sa_mpls.smpls_len = sizeof(sa_mpls);
630 		sa_mpls.smpls_label = ((struct rt_mpls *)
631 		    rt->rt_llinfo)->mpls_label;
632 		info.rti_info[RTAX_SRC] = (struct sockaddr *)&sa_mpls;
633 		info.rti_mpls = ((struct rt_mpls *)
634 		    rt->rt_llinfo)->mpls_operation;
635 	}
636 #endif
637 	ifp = if_get(rt->rt_ifidx);
638 	if (ifp != NULL) {
639 		info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
640 		info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
641 		if (ifp->if_flags & IFF_POINTOPOINT)
642 			info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr;
643 	}
644 	if_put(ifp);
645 	/* RTAX_GENMASK, RTAX_AUTHOR, RTAX_SRCMASK ignored */
646 
647 	/* build new route message */
648 	len = rtm_msg2(type, RTM_VERSION, &info, NULL, NULL);
649 	rtm = malloc(len, M_RTABLE, M_WAITOK | M_ZERO);
650 
651 	rtm_msg2(type, RTM_VERSION, &info, (caddr_t)rtm, NULL);
652 	rtm->rtm_type = type;
653 	rtm->rtm_index = rt->rt_ifidx;
654 	rtm->rtm_tableid = tableid;
655 	rtm->rtm_priority = rt->rt_priority & RTP_MASK;
656 	rtm->rtm_flags = rt->rt_flags;
657 	rtm->rtm_pid = curproc->p_p->ps_pid;
658 	rtm->rtm_seq = seq;
659 	rtm_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx);
660 	rtm->rtm_addrs = info.rti_addrs;
661 #ifdef MPLS
662 	rtm->rtm_mpls = info.rti_mpls;
663 #endif
664 	return rtm;
665 }
666 
667 int
668 route_output(struct mbuf *m, struct socket *so, struct sockaddr *dstaddr,
669     struct mbuf *control)
670 {
671 	struct rt_msghdr	*rtm = NULL;
672 	struct rtentry		*rt = NULL;
673 	struct rt_addrinfo	 info;
674 	int			 len, seq, error = 0;
675 	u_int			 tableid;
676 	u_int8_t		 prio;
677 	u_char			 vers, type;
678 
679 	if (m == NULL || ((m->m_len < sizeof(int32_t)) &&
680 	    (m = m_pullup(m, sizeof(int32_t))) == 0))
681 		return (ENOBUFS);
682 	if ((m->m_flags & M_PKTHDR) == 0)
683 		panic("route_output");
684 	len = m->m_pkthdr.len;
685 	if (len < offsetof(struct rt_msghdr, rtm_hdrlen) + 1 ||
686 	    len != mtod(m, struct rt_msghdr *)->rtm_msglen) {
687 		error = EINVAL;
688 		goto fail;
689 	}
690 	vers = mtod(m, struct rt_msghdr *)->rtm_version;
691 	switch (vers) {
692 	case RTM_VERSION:
693 		if (len < sizeof(struct rt_msghdr)) {
694 			error = EINVAL;
695 			goto fail;
696 		}
697 		if (len > RTM_MAXSIZE) {
698 			error = EMSGSIZE;
699 			goto fail;
700 		}
701 		rtm = malloc(len, M_RTABLE, M_WAITOK);
702 		m_copydata(m, 0, len, (caddr_t)rtm);
703 		break;
704 	default:
705 		error = EPROTONOSUPPORT;
706 		goto fail;
707 	}
708 
709 	/* Verify that the caller is sending an appropriate message early */
710 	switch (rtm->rtm_type) {
711 	case RTM_ADD:
712 	case RTM_DELETE:
713 	case RTM_GET:
714 	case RTM_CHANGE:
715 	case RTM_PROPOSAL:
716 		break;
717 	default:
718 		error = EOPNOTSUPP;
719 		goto fail;
720 	}
721 	/*
722 	 * Verify that the header length is valid.
723 	 * All messages from userland start with a struct rt_msghdr.
724 	 */
725 	if (rtm->rtm_hdrlen == 0)	/* old client */
726 		rtm->rtm_hdrlen = sizeof(struct rt_msghdr);
727 	if (rtm->rtm_hdrlen < sizeof(struct rt_msghdr) ||
728 	    len < rtm->rtm_hdrlen) {
729 		error = EINVAL;
730 		goto fail;
731 	}
732 
733 	rtm->rtm_pid = curproc->p_p->ps_pid;
734 
735 	/*
736 	 * Verify that the caller has the appropriate privilege; RTM_GET
737 	 * is the only operation the non-superuser is allowed.
738 	 */
739 	if (rtm->rtm_type != RTM_GET && suser(curproc) != 0) {
740 		error = EACCES;
741 		goto fail;
742 	}
743 	tableid = rtm->rtm_tableid;
744 	if (!rtable_exists(tableid)) {
745 		if (rtm->rtm_type == RTM_ADD) {
746 			if ((error = rtable_add(tableid)) != 0)
747 				goto fail;
748 		} else {
749 			error = EINVAL;
750 			goto fail;
751 		}
752 	}
753 
754 
755 	/* Do not let userland play with kernel-only flags. */
756 	if ((rtm->rtm_flags & (RTF_LOCAL|RTF_BROADCAST)) != 0) {
757 		error = EINVAL;
758 		goto fail;
759 	}
760 
761 	/* make sure that kernel-only bits are not set */
762 	rtm->rtm_priority &= RTP_MASK;
763 	rtm->rtm_flags &= ~(RTF_DONE|RTF_CLONED|RTF_CACHED);
764 	rtm->rtm_fmask &= RTF_FMASK;
765 
766 	if (rtm->rtm_priority != 0) {
767 		if (rtm->rtm_priority > RTP_MAX ||
768 		    rtm->rtm_priority == RTP_LOCAL) {
769 			error = EINVAL;
770 			goto fail;
771 		}
772 		prio = rtm->rtm_priority;
773 	} else if (rtm->rtm_type != RTM_ADD)
774 		prio = RTP_ANY;
775 	else if (rtm->rtm_flags & RTF_STATIC)
776 		prio = 0;
777 	else
778 		prio = RTP_DEFAULT;
779 
780 	bzero(&info, sizeof(info));
781 	info.rti_addrs = rtm->rtm_addrs;
782 	if ((error = rtm_xaddrs(rtm->rtm_hdrlen + (caddr_t)rtm,
783 	    len + (caddr_t)rtm, &info)) != 0)
784 		goto fail;
785 	info.rti_flags = rtm->rtm_flags;
786 	if (rtm->rtm_type != RTM_PROPOSAL &&
787 	   (info.rti_info[RTAX_DST] == NULL ||
788 	    info.rti_info[RTAX_DST]->sa_family >= AF_MAX ||
789 	    (info.rti_info[RTAX_GATEWAY] != NULL &&
790 	    info.rti_info[RTAX_GATEWAY]->sa_family >= AF_MAX) ||
791 	    info.rti_info[RTAX_GENMASK] != NULL)) {
792 		error = EINVAL;
793 		goto fail;
794 	}
795 #ifdef MPLS
796 	info.rti_mpls = rtm->rtm_mpls;
797 #endif
798 
799 	if (info.rti_info[RTAX_GATEWAY] != NULL &&
800 	    info.rti_info[RTAX_GATEWAY]->sa_family == AF_LINK &&
801 	    (info.rti_flags & RTF_CLONING) == 0) {
802 		info.rti_flags |= RTF_LLINFO;
803 	}
804 
805 	/*
806 	 * Validate RTM_PROPOSAL and pass it along or error out.
807 	 */
808 	if (rtm->rtm_type == RTM_PROPOSAL) {
809 		if (rtm_validate_proposal(&info) == -1) {
810 			error = EINVAL;
811 			goto fail;
812 		}
813 	} else {
814 		error = rtm_output(rtm, &rt, &info, prio, tableid);
815 		if (!error) {
816 			type = rtm->rtm_type;
817 			seq = rtm->rtm_seq;
818 			free(rtm, M_RTABLE, len);
819 			rtm = rtm_report(rt, type, seq, tableid);
820 			len = rtm->rtm_msglen;
821 		}
822 	}
823 
824 	rtfree(rt);
825 	if (error) {
826 		rtm->rtm_errno = error;
827 	} else {
828 		rtm->rtm_flags |= RTF_DONE;
829 	}
830 
831 	/*
832 	 * Check to see if we don't want our own messages.
833 	 */
834 	if (!(so->so_options & SO_USELOOPBACK)) {
835 		if (rtptable.rtp_count <= 1) {
836 			/* no other listener and no loopback of messages */
837 fail:
838 			free(rtm, M_RTABLE, len);
839 			m_freem(m);
840 			return (error);
841 		}
842 	}
843 	if (rtm) {
844 		if (m_copyback(m, 0, len, rtm, M_NOWAIT)) {
845 			m_freem(m);
846 			m = NULL;
847 		} else if (m->m_pkthdr.len > len)
848 			m_adj(m, len - m->m_pkthdr.len);
849 		free(rtm, M_RTABLE, len);
850 	}
851 	if (m)
852 		route_input(m, so, info.rti_info[RTAX_DST] ?
853 		    info.rti_info[RTAX_DST]->sa_family : AF_UNSPEC);
854 
855 	return (error);
856 }
857 
858 int
859 rtm_output(struct rt_msghdr *rtm, struct rtentry **prt,
860     struct rt_addrinfo *info, uint8_t prio, unsigned int tableid)
861 {
862 	struct rtentry		*rt = *prt;
863 	struct ifnet		*ifp = NULL;
864 	int			 plen, newgate = 0, error = 0;
865 
866 	switch (rtm->rtm_type) {
867 	case RTM_ADD:
868 		if (info->rti_info[RTAX_GATEWAY] == NULL) {
869 			error = EINVAL;
870 			break;
871 		}
872 
873 		rt = rtable_match(tableid, info->rti_info[RTAX_DST], NULL);
874 		if ((error = route_arp_conflict(rt, info))) {
875 			rtfree(rt);
876 			rt = NULL;
877 			break;
878 		}
879 
880 		/*
881 		 * We cannot go through a delete/create/insert cycle for
882 		 * cached route because this can lead to races in the
883 		 * receive path.  Instead we update the L2 cache.
884 		 */
885 		if ((rt != NULL) && ISSET(rt->rt_flags, RTF_CACHED))
886 			goto change;
887 
888 		rtfree(rt);
889 		rt = NULL;
890 
891 		NET_LOCK();
892 		if ((error = rtm_getifa(info, tableid)) != 0) {
893 			NET_UNLOCK();
894 			break;
895 		}
896 		error = rtrequest(RTM_ADD, info, prio, &rt, tableid);
897 		NET_UNLOCK();
898 		if (error == 0)
899 			rtm_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx,
900 			    &rt->rt_rmx);
901 		break;
902 	case RTM_DELETE:
903 		rt = rtable_lookup(tableid, info->rti_info[RTAX_DST],
904 		    info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY],
905 		    prio);
906 		if (rt == NULL) {
907 			error = ESRCH;
908 			break;
909 		}
910 
911 		/*
912 		 * If we got multipath routes, we require users to specify
913 		 * a matching gateway.
914 		 */
915 		if (ISSET(rt->rt_flags, RTF_MPATH) &&
916 		    info->rti_info[RTAX_GATEWAY] == NULL) {
917 			error = ESRCH;
918 			break;
919 		}
920 
921 		/* Detaching an interface requires the KERNEL_LOCK(). */
922 		ifp = if_get(rt->rt_ifidx);
923 		KASSERT(ifp != NULL);
924 
925 		/*
926 		 * Invalidate the cache of automagically created and
927 		 * referenced L2 entries to make sure that ``rt_gwroute''
928 		 * pointer stays valid for other CPUs.
929 		 */
930 		if ((ISSET(rt->rt_flags, RTF_CACHED))) {
931 			NET_LOCK();
932 			ifp->if_rtrequest(ifp, RTM_INVALIDATE, rt);
933 			/* Reset the MTU of the gateway route. */
934 			rtable_walk(tableid, rt_key(rt)->sa_family,
935 			    route_cleargateway, rt);
936 			NET_UNLOCK();
937 			if_put(ifp);
938 			break;
939 		}
940 
941 		/*
942 		 * Make sure that local routes are only modified by the
943 		 * kernel.
944 		 */
945 		if (ISSET(rt->rt_flags, RTF_LOCAL|RTF_BROADCAST)) {
946 			if_put(ifp);
947 			error = EINVAL;
948 			break;
949 		}
950 
951 		rtfree(rt);
952 		rt = NULL;
953 
954 		NET_LOCK();
955 		error = rtrequest_delete(info, prio, ifp, &rt, tableid);
956 		NET_UNLOCK();
957 		if_put(ifp);
958 		break;
959 	case RTM_CHANGE:
960 		rt = rtable_lookup(tableid, info->rti_info[RTAX_DST],
961 		    info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY],
962 		    prio);
963 		/*
964 		 * If we got multipath routes, we require users to specify
965 		 * a matching gateway.
966 		 */
967 		if ((rt != NULL) && ISSET(rt->rt_flags, RTF_MPATH) &&
968 		    (info->rti_info[RTAX_GATEWAY] == NULL)) {
969 			rtfree(rt);
970 			rt = NULL;
971 		}
972 		/*
973 		 * If RTAX_GATEWAY is the argument we're trying to
974 		 * change, try to find a compatible route.
975 		 */
976 		if ((rt == NULL) && (info->rti_info[RTAX_GATEWAY] != NULL) &&
977 		    (rtm->rtm_type == RTM_CHANGE)) {
978 			rt = rtable_lookup(tableid, info->rti_info[RTAX_DST],
979 			    info->rti_info[RTAX_NETMASK], NULL, prio);
980 			/* Ensure we don't pick a multipath one. */
981 			if ((rt != NULL) && ISSET(rt->rt_flags, RTF_MPATH)) {
982 				rtfree(rt);
983 				rt = NULL;
984 			}
985 		}
986 
987 		if (rt == NULL) {
988 			error = ESRCH;
989 			break;
990 		}
991 
992 		/*
993 		 * Make sure that local routes are only modified by the
994 		 * kernel.
995 		 */
996 		if (ISSET(rt->rt_flags, RTF_LOCAL|RTF_BROADCAST)) {
997 			error = EINVAL;
998 			break;
999 		}
1000 
1001 		/*
1002 		 * RTM_CHANGE/LOCK need a perfect match.
1003 		 */
1004 		plen = rtable_satoplen(info->rti_info[RTAX_DST]->sa_family,
1005 		    info->rti_info[RTAX_NETMASK]);
1006 		if (rt_plen(rt) != plen) {
1007 			error = ESRCH;
1008 			break;
1009 		}
1010 
1011 		switch (rtm->rtm_type) {
1012 		case RTM_CHANGE:
1013 			if (info->rti_info[RTAX_GATEWAY] != NULL)
1014 				if (rt->rt_gateway == NULL ||
1015 				    bcmp(rt->rt_gateway,
1016 				    info->rti_info[RTAX_GATEWAY],
1017 				    info->rti_info[RTAX_GATEWAY]->sa_len)) {
1018 					newgate = 1;
1019 				}
1020 			/*
1021 			 * Check reachable gateway before changing the route.
1022 			 * New gateway could require new ifaddr, ifp;
1023 			 * flags may also be different; ifp may be specified
1024 			 * by ll sockaddr when protocol address is ambiguous.
1025 			 */
1026 			if (newgate || info->rti_info[RTAX_IFP] != NULL ||
1027 			    info->rti_info[RTAX_IFA] != NULL) {
1028 				struct ifaddr	*ifa = NULL;
1029 
1030 				NET_LOCK();
1031 				if ((error = rtm_getifa(info, tableid)) != 0) {
1032 					NET_UNLOCK();
1033 					break;
1034 				}
1035 				ifa = info->rti_ifa;
1036 				if (rt->rt_ifa != ifa) {
1037 					ifp = if_get(rt->rt_ifidx);
1038 					KASSERT(ifp != NULL);
1039 					ifp->if_rtrequest(ifp, RTM_DELETE, rt);
1040 					ifafree(rt->rt_ifa);
1041 					if_put(ifp);
1042 
1043 					ifa->ifa_refcnt++;
1044 					rt->rt_ifa = ifa;
1045 					rt->rt_ifidx = ifa->ifa_ifp->if_index;
1046 					/* recheck link state after ifp change*/
1047 					rt_if_linkstate_change(rt, ifa->ifa_ifp,
1048 					    tableid);
1049 				}
1050 				NET_UNLOCK();
1051 			}
1052 change:
1053 			if (info->rti_info[RTAX_GATEWAY] != NULL) {
1054 				/*
1055 				 * When updating the gateway, make sure it's
1056 				 * valid.
1057 				 */
1058 				if (!newgate && rt->rt_gateway->sa_family !=
1059 				    info->rti_info[RTAX_GATEWAY]->sa_family) {
1060 					error = EINVAL;
1061 					break;
1062 				}
1063 
1064 				NET_LOCK();
1065 				error = rt_setgate(rt,
1066 				    info->rti_info[RTAX_GATEWAY], tableid);
1067 				NET_UNLOCK();
1068 				if (error)
1069 					break;
1070 			}
1071 #ifdef MPLS
1072 			if (rtm->rtm_flags & RTF_MPLS) {
1073 				NET_LOCK();
1074 				error = rt_mpls_set(rt,
1075 				    info->rti_info[RTAX_SRC], info->rti_mpls);
1076 				NET_UNLOCK();
1077 				if (error)
1078 					break;
1079 			} else if (newgate || (rtm->rtm_fmask & RTF_MPLS)) {
1080 				NET_LOCK();
1081 				/* if gateway changed remove MPLS information */
1082 				rt_mpls_clear(rt);
1083 				NET_UNLOCK();
1084 			}
1085 #endif
1086 
1087 #ifdef BFD
1088 			if (ISSET(rtm->rtm_flags, RTF_BFD)) {
1089 				if ((error = bfdset(rt)))
1090 					break;
1091 			} else if (!ISSET(rtm->rtm_flags, RTF_BFD) &&
1092 			    ISSET(rtm->rtm_fmask, RTF_BFD)) {
1093 				bfdclear(rt);
1094 			}
1095 #endif
1096 
1097 			NET_LOCK();
1098 			/* Hack to allow some flags to be toggled */
1099 			if (rtm->rtm_fmask) {
1100 				/* MPLS flag it is set by rt_mpls_set() */
1101 				rtm->rtm_fmask &= ~RTF_MPLS;
1102 				rtm->rtm_flags &= ~RTF_MPLS;
1103 				rt->rt_flags =
1104 				    (rt->rt_flags & ~rtm->rtm_fmask) |
1105 				    (rtm->rtm_flags & rtm->rtm_fmask);
1106 			}
1107 			rtm_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx,
1108 			    &rt->rt_rmx);
1109 
1110 			ifp = if_get(rt->rt_ifidx);
1111 			KASSERT(ifp != NULL);
1112 			ifp->if_rtrequest(ifp, RTM_ADD, rt);
1113 			if_put(ifp);
1114 
1115 			if (info->rti_info[RTAX_LABEL] != NULL) {
1116 				char *rtlabel = ((struct sockaddr_rtlabel *)
1117 				    info->rti_info[RTAX_LABEL])->sr_label;
1118 				rtlabel_unref(rt->rt_labelid);
1119 				rt->rt_labelid = rtlabel_name2id(rtlabel);
1120 			}
1121 			if_group_routechange(info->rti_info[RTAX_DST],
1122 			    info->rti_info[RTAX_NETMASK]);
1123 			rt->rt_locks &= ~(rtm->rtm_inits);
1124 			rt->rt_locks |=
1125 			    (rtm->rtm_inits & rtm->rtm_rmx.rmx_locks);
1126 			NET_UNLOCK();
1127 			break;
1128 		}
1129 		break;
1130 	case RTM_GET:
1131 		rt = rtable_lookup(tableid, info->rti_info[RTAX_DST],
1132 		    info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY],
1133 		    prio);
1134 		if (rt == NULL)
1135 			error = ESRCH;
1136 		break;
1137 	}
1138 
1139 	*prt = rt;
1140 	return (error);
1141 }
1142 
1143 struct ifaddr *
1144 ifa_ifwithroute(int flags, struct sockaddr *dst, struct sockaddr *gateway,
1145     unsigned int rtableid)
1146 {
1147 	struct ifaddr	*ifa;
1148 
1149 	if ((flags & RTF_GATEWAY) == 0) {
1150 		/*
1151 		 * If we are adding a route to an interface,
1152 		 * and the interface is a pt to pt link
1153 		 * we should search for the destination
1154 		 * as our clue to the interface.  Otherwise
1155 		 * we can use the local address.
1156 		 */
1157 		ifa = NULL;
1158 		if (flags & RTF_HOST)
1159 			ifa = ifa_ifwithdstaddr(dst, rtableid);
1160 		if (ifa == NULL)
1161 			ifa = ifa_ifwithaddr(gateway, rtableid);
1162 	} else {
1163 		/*
1164 		 * If we are adding a route to a remote net
1165 		 * or host, the gateway may still be on the
1166 		 * other end of a pt to pt link.
1167 		 */
1168 		ifa = ifa_ifwithdstaddr(gateway, rtableid);
1169 	}
1170 	if (ifa == NULL) {
1171 		if (gateway->sa_family == AF_LINK) {
1172 			struct sockaddr_dl *sdl = satosdl(gateway);
1173 			struct ifnet *ifp = if_get(sdl->sdl_index);
1174 
1175 			if (ifp != NULL)
1176 				ifa = ifaof_ifpforaddr(dst, ifp);
1177 			if_put(ifp);
1178 		} else {
1179 			struct rtentry *rt;
1180 
1181 			rt = rtalloc(gateway, RT_RESOLVE, rtable_l2(rtableid));
1182 			if (rt != NULL)
1183 				ifa = rt->rt_ifa;
1184 			rtfree(rt);
1185 		}
1186 	}
1187 	if (ifa == NULL)
1188 		return (NULL);
1189 	if (ifa->ifa_addr->sa_family != dst->sa_family) {
1190 		struct ifaddr	*oifa = ifa;
1191 		ifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp);
1192 		if (ifa == NULL)
1193 			ifa = oifa;
1194 	}
1195 	return (ifa);
1196 }
1197 
1198 int
1199 rtm_getifa(struct rt_addrinfo *info, unsigned int rtid)
1200 {
1201 	struct ifnet	*ifp = NULL;
1202 
1203 	/*
1204 	 * The "returned" `ifa' is guaranteed to be alive only if
1205 	 * the NET_LOCK() is held.
1206 	 */
1207 	NET_ASSERT_LOCKED();
1208 
1209 	/*
1210 	 * ifp may be specified by sockaddr_dl when protocol address
1211 	 * is ambiguous
1212 	 */
1213 	if (info->rti_info[RTAX_IFP] != NULL) {
1214 		struct sockaddr_dl *sdl;
1215 
1216 		sdl = satosdl(info->rti_info[RTAX_IFP]);
1217 		ifp = if_get(sdl->sdl_index);
1218 	}
1219 
1220 #ifdef IPSEC
1221 	/*
1222 	 * If the destination is a PF_KEY address, we'll look
1223 	 * for the existence of a encap interface number or address
1224 	 * in the options list of the gateway. By default, we'll return
1225 	 * enc0.
1226 	 */
1227 	if (info->rti_info[RTAX_DST] &&
1228 	    info->rti_info[RTAX_DST]->sa_family == PF_KEY)
1229 		info->rti_ifa = enc_getifa(rtid, 0);
1230 #endif
1231 
1232 	if (info->rti_ifa == NULL && info->rti_info[RTAX_IFA] != NULL)
1233 		info->rti_ifa = ifa_ifwithaddr(info->rti_info[RTAX_IFA], rtid);
1234 
1235 	if (info->rti_ifa == NULL) {
1236 		struct sockaddr	*sa;
1237 
1238 		if ((sa = info->rti_info[RTAX_IFA]) == NULL)
1239 			if ((sa = info->rti_info[RTAX_GATEWAY]) == NULL)
1240 				sa = info->rti_info[RTAX_DST];
1241 
1242 		if (sa != NULL && ifp != NULL)
1243 			info->rti_ifa = ifaof_ifpforaddr(sa, ifp);
1244 		else if (info->rti_info[RTAX_DST] != NULL &&
1245 		    info->rti_info[RTAX_GATEWAY] != NULL)
1246 			info->rti_ifa = ifa_ifwithroute(info->rti_flags,
1247 			    info->rti_info[RTAX_DST],
1248 			    info->rti_info[RTAX_GATEWAY],
1249 			    rtid);
1250 		else if (sa != NULL)
1251 			info->rti_ifa = ifa_ifwithroute(info->rti_flags,
1252 			    sa, sa, rtid);
1253 	}
1254 
1255 	if_put(ifp);
1256 
1257 	if (info->rti_ifa == NULL)
1258 		return (ENETUNREACH);
1259 
1260 	return (0);
1261 }
1262 
1263 int
1264 route_cleargateway(struct rtentry *rt, void *arg, unsigned int rtableid)
1265 {
1266 	struct rtentry *nhrt = arg;
1267 
1268 	if (ISSET(rt->rt_flags, RTF_GATEWAY) && rt->rt_gwroute == nhrt &&
1269 	    !ISSET(rt->rt_locks, RTV_MTU))
1270 		rt->rt_mtu = 0;
1271 
1272 	return (0);
1273 }
1274 
1275 /*
1276  * Check if the user request to insert an ARP entry does not conflict
1277  * with existing ones.
1278  *
1279  * Only two entries are allowed for a given IP address: a private one
1280  * (priv) and a public one (pub).
1281  */
1282 int
1283 route_arp_conflict(struct rtentry *rt, struct rt_addrinfo *info)
1284 {
1285 	int		 proxy = (info->rti_flags & RTF_ANNOUNCE);
1286 
1287 	if ((info->rti_flags & RTF_LLINFO) == 0 ||
1288 	    (info->rti_info[RTAX_DST]->sa_family != AF_INET))
1289 		return (0);
1290 
1291 	if (rt == NULL || !ISSET(rt->rt_flags, RTF_LLINFO))
1292 		return (0);
1293 
1294 	/* If the entry is cached, it can be updated. */
1295 	if (ISSET(rt->rt_flags, RTF_CACHED))
1296 		return (0);
1297 
1298 	/*
1299 	 * Same destination, not cached and both "priv" or "pub" conflict.
1300 	 * If a second entry exists, it always conflict.
1301 	 */
1302 	if ((ISSET(rt->rt_flags, RTF_ANNOUNCE) == proxy) ||
1303 	    ISSET(rt->rt_flags, RTF_MPATH))
1304 		return (EEXIST);
1305 
1306 	/* No conflict but an entry exist so we need to force mpath. */
1307 	info->rti_flags |= RTF_MPATH;
1308 	return (0);
1309 }
1310 
1311 void
1312 rtm_setmetrics(u_long which, const struct rt_metrics *in,
1313     struct rt_kmetrics *out)
1314 {
1315 	int64_t expire;
1316 
1317 	if (which & RTV_MTU)
1318 		out->rmx_mtu = in->rmx_mtu;
1319 	if (which & RTV_EXPIRE) {
1320 		expire = in->rmx_expire;
1321 		if (expire != 0) {
1322 			expire -= time_second;
1323 			expire += time_uptime;
1324 		}
1325 
1326 		out->rmx_expire = expire;
1327 	}
1328 }
1329 
1330 void
1331 rtm_getmetrics(const struct rt_kmetrics *in, struct rt_metrics *out)
1332 {
1333 	int64_t expire;
1334 
1335 	expire = in->rmx_expire;
1336 	if (expire != 0) {
1337 		expire -= time_uptime;
1338 		expire += time_second;
1339 	}
1340 
1341 	bzero(out, sizeof(*out));
1342 	out->rmx_locks = in->rmx_locks;
1343 	out->rmx_mtu = in->rmx_mtu;
1344 	out->rmx_expire = expire;
1345 	out->rmx_pksent = in->rmx_pksent;
1346 }
1347 
1348 #define ROUNDUP(a) \
1349 	((a) > 0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long))
1350 #define ADVANCE(x, n) (x += ROUNDUP((n)->sa_len))
1351 
1352 int
1353 rtm_xaddrs(caddr_t cp, caddr_t cplim, struct rt_addrinfo *rtinfo)
1354 {
1355 	struct sockaddr	*sa;
1356 	int		 i;
1357 
1358 	bzero(rtinfo->rti_info, sizeof(rtinfo->rti_info));
1359 	for (i = 0; i < sizeof(rtinfo->rti_addrs) * 8; i++) {
1360 		if ((rtinfo->rti_addrs & (1 << i)) == 0)
1361 			continue;
1362 		if (i >= RTAX_MAX || cp + sizeof(socklen_t) > cplim)
1363 			return (EINVAL);
1364 		sa = (struct sockaddr *)cp;
1365 		if (cp + sa->sa_len > cplim)
1366 			return (EINVAL);
1367 		rtinfo->rti_info[i] = sa;
1368 		ADVANCE(cp, sa);
1369 	}
1370 	return (0);
1371 }
1372 
1373 struct mbuf *
1374 rtm_msg1(int type, struct rt_addrinfo *rtinfo)
1375 {
1376 	struct rt_msghdr	*rtm;
1377 	struct mbuf		*m;
1378 	int			 i;
1379 	struct sockaddr		*sa;
1380 	int			 len, dlen, hlen;
1381 
1382 	switch (type) {
1383 	case RTM_DELADDR:
1384 	case RTM_NEWADDR:
1385 		len = sizeof(struct ifa_msghdr);
1386 		break;
1387 	case RTM_IFINFO:
1388 		len = sizeof(struct if_msghdr);
1389 		break;
1390 	case RTM_IFANNOUNCE:
1391 		len = sizeof(struct if_announcemsghdr);
1392 		break;
1393 #ifdef BFD
1394 	case RTM_BFD:
1395 		len = sizeof(struct bfd_msghdr);
1396 		break;
1397 #endif
1398 	case RTM_80211INFO:
1399 		len = sizeof(struct if_ieee80211_msghdr);
1400 		break;
1401 	default:
1402 		len = sizeof(struct rt_msghdr);
1403 		break;
1404 	}
1405 	if (len > MCLBYTES)
1406 		panic("rtm_msg1");
1407 	m = m_gethdr(M_DONTWAIT, MT_DATA);
1408 	if (m && len > MHLEN) {
1409 		MCLGET(m, M_DONTWAIT);
1410 		if ((m->m_flags & M_EXT) == 0) {
1411 			m_free(m);
1412 			m = NULL;
1413 		}
1414 	}
1415 	if (m == NULL)
1416 		return (m);
1417 	m->m_pkthdr.len = m->m_len = hlen = len;
1418 	m->m_pkthdr.ph_ifidx = 0;
1419 	rtm = mtod(m, struct rt_msghdr *);
1420 	bzero(rtm, len);
1421 	for (i = 0; i < RTAX_MAX; i++) {
1422 		if (rtinfo == NULL || (sa = rtinfo->rti_info[i]) == NULL)
1423 			continue;
1424 		rtinfo->rti_addrs |= (1 << i);
1425 		dlen = ROUNDUP(sa->sa_len);
1426 		if (m_copyback(m, len, dlen, sa, M_NOWAIT)) {
1427 			m_freem(m);
1428 			return (NULL);
1429 		}
1430 		len += dlen;
1431 	}
1432 	rtm->rtm_msglen = len;
1433 	rtm->rtm_hdrlen = hlen;
1434 	rtm->rtm_version = RTM_VERSION;
1435 	rtm->rtm_type = type;
1436 	return (m);
1437 }
1438 
1439 int
1440 rtm_msg2(int type, int vers, struct rt_addrinfo *rtinfo, caddr_t cp,
1441     struct walkarg *w)
1442 {
1443 	int		i;
1444 	int		len, dlen, hlen, second_time = 0;
1445 	caddr_t		cp0;
1446 
1447 	rtinfo->rti_addrs = 0;
1448 again:
1449 	switch (type) {
1450 	case RTM_DELADDR:
1451 	case RTM_NEWADDR:
1452 		len = sizeof(struct ifa_msghdr);
1453 		break;
1454 	case RTM_IFINFO:
1455 		len = sizeof(struct if_msghdr);
1456 		break;
1457 	default:
1458 		len = sizeof(struct rt_msghdr);
1459 		break;
1460 	}
1461 	hlen = len;
1462 	if ((cp0 = cp) != NULL)
1463 		cp += len;
1464 	for (i = 0; i < RTAX_MAX; i++) {
1465 		struct sockaddr *sa;
1466 
1467 		if ((sa = rtinfo->rti_info[i]) == NULL)
1468 			continue;
1469 		rtinfo->rti_addrs |= (1 << i);
1470 		dlen = ROUNDUP(sa->sa_len);
1471 		if (cp) {
1472 			bcopy(sa, cp, (size_t)dlen);
1473 			cp += dlen;
1474 		}
1475 		len += dlen;
1476 	}
1477 	/* align message length to the next natural boundary */
1478 	len = ALIGN(len);
1479 	if (cp == 0 && w != NULL && !second_time) {
1480 		w->w_needed += len;
1481 		if (w->w_needed <= 0 && w->w_where) {
1482 			if (w->w_tmemsize < len) {
1483 				free(w->w_tmem, M_RTABLE, w->w_tmemsize);
1484 				w->w_tmem = malloc(len, M_RTABLE,
1485 				    M_NOWAIT | M_ZERO);
1486 				if (w->w_tmem)
1487 					w->w_tmemsize = len;
1488 			}
1489 			if (w->w_tmem) {
1490 				cp = w->w_tmem;
1491 				second_time = 1;
1492 				goto again;
1493 			} else
1494 				w->w_where = 0;
1495 		}
1496 	}
1497 	if (cp && w)		/* clear the message header */
1498 		bzero(cp0, hlen);
1499 
1500 	if (cp) {
1501 		struct rt_msghdr *rtm = (struct rt_msghdr *)cp0;
1502 
1503 		rtm->rtm_version = RTM_VERSION;
1504 		rtm->rtm_type = type;
1505 		rtm->rtm_msglen = len;
1506 		rtm->rtm_hdrlen = hlen;
1507 	}
1508 	return (len);
1509 }
1510 
1511 void
1512 rtm_send(struct rtentry *rt, int cmd, int error, unsigned int rtableid)
1513 {
1514 	struct rt_addrinfo	 info;
1515 	struct ifnet		*ifp;
1516 	struct sockaddr_rtlabel	 sa_rl;
1517 	struct sockaddr_in6	 sa_mask;
1518 
1519 	memset(&info, 0, sizeof(info));
1520 	info.rti_info[RTAX_DST] = rt_key(rt);
1521 	info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1522 	if (!ISSET(rt->rt_flags, RTF_HOST))
1523 		info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask);
1524 	info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl);
1525 	ifp = if_get(rt->rt_ifidx);
1526 	if (ifp != NULL) {
1527 		info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
1528 		info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
1529 	}
1530 
1531 	rtm_miss(cmd, &info, rt->rt_flags, rt->rt_priority, rt->rt_ifidx, error,
1532 	    rtableid);
1533 	if_put(ifp);
1534 }
1535 
1536 /*
1537  * This routine is called to generate a message from the routing
1538  * socket indicating that a redirect has occurred, a routing lookup
1539  * has failed, or that a protocol has detected timeouts to a particular
1540  * destination.
1541  */
1542 void
1543 rtm_miss(int type, struct rt_addrinfo *rtinfo, int flags, uint8_t prio,
1544     u_int ifidx, int error, u_int tableid)
1545 {
1546 	struct rt_msghdr	*rtm;
1547 	struct mbuf		*m;
1548 	struct sockaddr		*sa = rtinfo->rti_info[RTAX_DST];
1549 
1550 	if (rtptable.rtp_count == 0)
1551 		return;
1552 	m = rtm_msg1(type, rtinfo);
1553 	if (m == NULL)
1554 		return;
1555 	rtm = mtod(m, struct rt_msghdr *);
1556 	rtm->rtm_flags = RTF_DONE | flags;
1557 	rtm->rtm_priority = prio;
1558 	rtm->rtm_errno = error;
1559 	rtm->rtm_tableid = tableid;
1560 	rtm->rtm_addrs = rtinfo->rti_addrs;
1561 	rtm->rtm_index = ifidx;
1562 	route_input(m, NULL, sa ? sa->sa_family : AF_UNSPEC);
1563 }
1564 
1565 /*
1566  * This routine is called to generate a message from the routing
1567  * socket indicating that the status of a network interface has changed.
1568  */
1569 void
1570 rtm_ifchg(struct ifnet *ifp)
1571 {
1572 	struct if_msghdr	*ifm;
1573 	struct mbuf		*m;
1574 
1575 	if (rtptable.rtp_count == 0)
1576 		return;
1577 	m = rtm_msg1(RTM_IFINFO, NULL);
1578 	if (m == NULL)
1579 		return;
1580 	ifm = mtod(m, struct if_msghdr *);
1581 	ifm->ifm_index = ifp->if_index;
1582 	ifm->ifm_tableid = ifp->if_rdomain;
1583 	ifm->ifm_flags = ifp->if_flags;
1584 	ifm->ifm_xflags = ifp->if_xflags;
1585 	if_getdata(ifp, &ifm->ifm_data);
1586 	ifm->ifm_addrs = 0;
1587 	route_input(m, NULL, AF_UNSPEC);
1588 }
1589 
1590 /*
1591  * This is called to generate messages from the routing socket
1592  * indicating a network interface has had addresses associated with it.
1593  * if we ever reverse the logic and replace messages TO the routing
1594  * socket indicate a request to configure interfaces, then it will
1595  * be unnecessary as the routing socket will automatically generate
1596  * copies of it.
1597  */
1598 void
1599 rtm_addr(int cmd, struct ifaddr *ifa)
1600 {
1601 	struct ifnet		*ifp = ifa->ifa_ifp;
1602 	struct mbuf		*m;
1603 	struct rt_addrinfo	 info;
1604 	struct ifa_msghdr	*ifam;
1605 
1606 	if (rtptable.rtp_count == 0)
1607 		return;
1608 
1609 	memset(&info, 0, sizeof(info));
1610 	info.rti_info[RTAX_IFA] = ifa->ifa_addr;
1611 	info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
1612 	info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
1613 	info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
1614 	if ((m = rtm_msg1(cmd, &info)) == NULL)
1615 		return;
1616 	ifam = mtod(m, struct ifa_msghdr *);
1617 	ifam->ifam_index = ifp->if_index;
1618 	ifam->ifam_metric = ifa->ifa_metric;
1619 	ifam->ifam_flags = ifa->ifa_flags;
1620 	ifam->ifam_addrs = info.rti_addrs;
1621 	ifam->ifam_tableid = ifp->if_rdomain;
1622 
1623 	route_input(m, NULL,
1624 	    ifa->ifa_addr ? ifa->ifa_addr->sa_family : AF_UNSPEC);
1625 }
1626 
1627 /*
1628  * This is called to generate routing socket messages indicating
1629  * network interface arrival and departure.
1630  */
1631 void
1632 rtm_ifannounce(struct ifnet *ifp, int what)
1633 {
1634 	struct if_announcemsghdr	*ifan;
1635 	struct mbuf			*m;
1636 
1637 	if (rtptable.rtp_count == 0)
1638 		return;
1639 	m = rtm_msg1(RTM_IFANNOUNCE, NULL);
1640 	if (m == NULL)
1641 		return;
1642 	ifan = mtod(m, struct if_announcemsghdr *);
1643 	ifan->ifan_index = ifp->if_index;
1644 	strlcpy(ifan->ifan_name, ifp->if_xname, sizeof(ifan->ifan_name));
1645 	ifan->ifan_what = what;
1646 	route_input(m, NULL, AF_UNSPEC);
1647 }
1648 
1649 #ifdef BFD
1650 /*
1651  * This is used to generate routing socket messages indicating
1652  * the state of a BFD session.
1653  */
1654 void
1655 rtm_bfd(struct bfd_config *bfd)
1656 {
1657 	struct bfd_msghdr	*bfdm;
1658 	struct sockaddr_bfd	 sa_bfd;
1659 	struct mbuf		*m;
1660 	struct rt_addrinfo	 info;
1661 
1662 	if (rtptable.rtp_count == 0)
1663 		return;
1664 	memset(&info, 0, sizeof(info));
1665 	info.rti_info[RTAX_DST] = rt_key(bfd->bc_rt);
1666 	info.rti_info[RTAX_IFA] = bfd->bc_rt->rt_ifa->ifa_addr;
1667 
1668 	m = rtm_msg1(RTM_BFD, &info);
1669 	if (m == NULL)
1670 		return;
1671 	bfdm = mtod(m, struct bfd_msghdr *);
1672 	bfdm->bm_addrs = info.rti_addrs;
1673 
1674 	bfd2sa(bfd->bc_rt, &sa_bfd);
1675 	memcpy(&bfdm->bm_sa, &sa_bfd, sizeof(sa_bfd));
1676 
1677 	route_input(m, NULL, info.rti_info[RTAX_DST]->sa_family);
1678 }
1679 #endif /* BFD */
1680 
1681 /*
1682  * This is used to generate routing socket messages indicating
1683  * the state of an ieee80211 interface.
1684  */
1685 void
1686 rtm_80211info(struct ifnet *ifp, struct if_ieee80211_data *ifie)
1687 {
1688 	struct if_ieee80211_msghdr	*ifim;
1689 	struct mbuf			*m;
1690 
1691 	if (rtptable.rtp_count == 0)
1692 		return;
1693 	m = rtm_msg1(RTM_80211INFO, NULL);
1694 	if (m == NULL)
1695 		return;
1696 	ifim = mtod(m, struct if_ieee80211_msghdr *);
1697 	ifim->ifim_index = ifp->if_index;
1698 	ifim->ifim_tableid = ifp->if_rdomain;
1699 
1700 	memcpy(&ifim->ifim_ifie, ifie, sizeof(ifim->ifim_ifie));
1701 	route_input(m, NULL, AF_UNSPEC);
1702 }
1703 
1704 /*
1705  * This is used in dumping the kernel table via sysctl().
1706  */
1707 int
1708 sysctl_dumpentry(struct rtentry *rt, void *v, unsigned int id)
1709 {
1710 	struct walkarg		*w = v;
1711 	int			 error = 0, size;
1712 	struct rt_addrinfo	 info;
1713 	struct ifnet		*ifp;
1714 #ifdef BFD
1715 	struct sockaddr_bfd	 sa_bfd;
1716 #endif
1717 	struct sockaddr_rtlabel	 sa_rl;
1718 	struct sockaddr_in6	 sa_mask;
1719 
1720 	if (w->w_op == NET_RT_FLAGS && !(rt->rt_flags & w->w_arg))
1721 		return 0;
1722 	if (w->w_op == NET_RT_DUMP && w->w_arg) {
1723 		u_int8_t prio = w->w_arg & RTP_MASK;
1724 		if (w->w_arg < 0) {
1725 			prio = (-w->w_arg) & RTP_MASK;
1726 			/* Show all routes that are not this priority */
1727 			if (prio == (rt->rt_priority & RTP_MASK))
1728 				return 0;
1729 		} else {
1730 			if (prio != (rt->rt_priority & RTP_MASK) &&
1731 			    prio != RTP_ANY)
1732 				return 0;
1733 		}
1734 	}
1735 	bzero(&info, sizeof(info));
1736 	info.rti_info[RTAX_DST] = rt_key(rt);
1737 	info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1738 	info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask);
1739 	ifp = if_get(rt->rt_ifidx);
1740 	if (ifp != NULL) {
1741 		info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
1742 		info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
1743 		if (ifp->if_flags & IFF_POINTOPOINT)
1744 			info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr;
1745 	}
1746 	if_put(ifp);
1747 	info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl);
1748 #ifdef BFD
1749 	if (rt->rt_flags & RTF_BFD)
1750 		info.rti_info[RTAX_BFD] = bfd2sa(rt, &sa_bfd);
1751 #endif
1752 #ifdef MPLS
1753 	if (rt->rt_flags & RTF_MPLS) {
1754 		struct sockaddr_mpls	 sa_mpls;
1755 
1756 		bzero(&sa_mpls, sizeof(sa_mpls));
1757 		sa_mpls.smpls_family = AF_MPLS;
1758 		sa_mpls.smpls_len = sizeof(sa_mpls);
1759 		sa_mpls.smpls_label = ((struct rt_mpls *)
1760 		    rt->rt_llinfo)->mpls_label;
1761 		info.rti_info[RTAX_SRC] = (struct sockaddr *)&sa_mpls;
1762 		info.rti_mpls = ((struct rt_mpls *)
1763 		    rt->rt_llinfo)->mpls_operation;
1764 	}
1765 #endif
1766 
1767 	size = rtm_msg2(RTM_GET, RTM_VERSION, &info, NULL, w);
1768 	if (w->w_where && w->w_tmem && w->w_needed <= 0) {
1769 		struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem;
1770 
1771 		rtm->rtm_pid = curproc->p_p->ps_pid;
1772 		rtm->rtm_flags = rt->rt_flags;
1773 		rtm->rtm_priority = rt->rt_priority & RTP_MASK;
1774 		rtm_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx);
1775 		/* Do not account the routing table's reference. */
1776 		rtm->rtm_rmx.rmx_refcnt = rt->rt_refcnt - 1;
1777 		rtm->rtm_index = rt->rt_ifidx;
1778 		rtm->rtm_addrs = info.rti_addrs;
1779 		rtm->rtm_tableid = id;
1780 #ifdef MPLS
1781 		rtm->rtm_mpls = info.rti_mpls;
1782 #endif
1783 		if ((error = copyout(rtm, w->w_where, size)) != 0)
1784 			w->w_where = NULL;
1785 		else
1786 			w->w_where += size;
1787 	}
1788 	return (error);
1789 }
1790 
1791 int
1792 sysctl_iflist(int af, struct walkarg *w)
1793 {
1794 	struct ifnet		*ifp;
1795 	struct ifaddr		*ifa;
1796 	struct rt_addrinfo	 info;
1797 	int			 len, error = 0;
1798 
1799 	bzero(&info, sizeof(info));
1800 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1801 		if (w->w_arg && w->w_arg != ifp->if_index)
1802 			continue;
1803 		/* Copy the link-layer address first */
1804 		info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
1805 		len = rtm_msg2(RTM_IFINFO, RTM_VERSION, &info, 0, w);
1806 		if (w->w_where && w->w_tmem && w->w_needed <= 0) {
1807 			struct if_msghdr *ifm;
1808 
1809 			ifm = (struct if_msghdr *)w->w_tmem;
1810 			ifm->ifm_index = ifp->if_index;
1811 			ifm->ifm_tableid = ifp->if_rdomain;
1812 			ifm->ifm_flags = ifp->if_flags;
1813 			if_getdata(ifp, &ifm->ifm_data);
1814 			ifm->ifm_addrs = info.rti_addrs;
1815 			error = copyout(ifm, w->w_where, len);
1816 			if (error)
1817 				return (error);
1818 			w->w_where += len;
1819 		}
1820 		info.rti_info[RTAX_IFP] = NULL;
1821 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1822 			KASSERT(ifa->ifa_addr->sa_family != AF_LINK);
1823 			if (af && af != ifa->ifa_addr->sa_family)
1824 				continue;
1825 			info.rti_info[RTAX_IFA] = ifa->ifa_addr;
1826 			info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
1827 			info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
1828 			len = rtm_msg2(RTM_NEWADDR, RTM_VERSION, &info, 0, w);
1829 			if (w->w_where && w->w_tmem && w->w_needed <= 0) {
1830 				struct ifa_msghdr *ifam;
1831 
1832 				ifam = (struct ifa_msghdr *)w->w_tmem;
1833 				ifam->ifam_index = ifa->ifa_ifp->if_index;
1834 				ifam->ifam_flags = ifa->ifa_flags;
1835 				ifam->ifam_metric = ifa->ifa_metric;
1836 				ifam->ifam_addrs = info.rti_addrs;
1837 				error = copyout(w->w_tmem, w->w_where, len);
1838 				if (error)
1839 					return (error);
1840 				w->w_where += len;
1841 			}
1842 		}
1843 		info.rti_info[RTAX_IFA] = info.rti_info[RTAX_NETMASK] =
1844 		    info.rti_info[RTAX_BRD] = NULL;
1845 	}
1846 	return (0);
1847 }
1848 
1849 int
1850 sysctl_ifnames(struct walkarg *w)
1851 {
1852 	struct if_nameindex_msg ifn;
1853 	struct ifnet *ifp;
1854 	int error = 0;
1855 
1856 	/* XXX ignore tableid for now */
1857 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1858 		if (w->w_arg && w->w_arg != ifp->if_index)
1859 			continue;
1860 		w->w_needed += sizeof(ifn);
1861 		if (w->w_where && w->w_needed <= 0) {
1862 
1863 			memset(&ifn, 0, sizeof(ifn));
1864 			ifn.if_index = ifp->if_index;
1865 			strlcpy(ifn.if_name, ifp->if_xname,
1866 			    sizeof(ifn.if_name));
1867 			error = copyout(&ifn, w->w_where, sizeof(ifn));
1868 			if (error)
1869 				return (error);
1870 			w->w_where += sizeof(ifn);
1871 		}
1872 	}
1873 
1874 	return (0);
1875 }
1876 
1877 int
1878 sysctl_rtable(int *name, u_int namelen, void *where, size_t *given, void *new,
1879     size_t newlen)
1880 {
1881 	int			 i, error = EINVAL;
1882 	u_char			 af;
1883 	struct walkarg		 w;
1884 	struct rt_tableinfo	 tableinfo;
1885 	u_int			 tableid = 0;
1886 
1887 	if (new)
1888 		return (EPERM);
1889 	if (namelen < 3 || namelen > 4)
1890 		return (EINVAL);
1891 	af = name[0];
1892 	bzero(&w, sizeof(w));
1893 	w.w_where = where;
1894 	w.w_given = *given;
1895 	w.w_needed = 0 - w.w_given;
1896 	w.w_op = name[1];
1897 	w.w_arg = name[2];
1898 
1899 	if (namelen == 4) {
1900 		tableid = name[3];
1901 		if (!rtable_exists(tableid))
1902 			return (ENOENT);
1903 	} else
1904 		tableid = curproc->p_p->ps_rtableid;
1905 
1906 	switch (w.w_op) {
1907 	case NET_RT_DUMP:
1908 	case NET_RT_FLAGS:
1909 		NET_LOCK();
1910 		for (i = 1; i <= AF_MAX; i++) {
1911 			if (af != 0 && af != i)
1912 				continue;
1913 
1914 			error = rtable_walk(tableid, i, sysctl_dumpentry, &w);
1915 			if (error == EAFNOSUPPORT)
1916 				error = 0;
1917 			if (error)
1918 				break;
1919 		}
1920 		NET_UNLOCK();
1921 		break;
1922 
1923 	case NET_RT_IFLIST:
1924 		NET_LOCK();
1925 		error = sysctl_iflist(af, &w);
1926 		NET_UNLOCK();
1927 		break;
1928 
1929 	case NET_RT_STATS:
1930 		return (sysctl_rtable_rtstat(where, given, new));
1931 	case NET_RT_TABLE:
1932 		tableid = w.w_arg;
1933 		if (!rtable_exists(tableid))
1934 			return (ENOENT);
1935 		memset(&tableinfo, 0, sizeof tableinfo);
1936 		tableinfo.rti_tableid = tableid;
1937 		tableinfo.rti_domainid = rtable_l2(tableid);
1938 		error = sysctl_rdstruct(where, given, new,
1939 		    &tableinfo, sizeof(tableinfo));
1940 		return (error);
1941 	case NET_RT_IFNAMES:
1942 		NET_LOCK();
1943 		error = sysctl_ifnames(&w);
1944 		NET_UNLOCK();
1945 		break;
1946 	}
1947 	free(w.w_tmem, M_RTABLE, w.w_tmemsize);
1948 	w.w_needed += w.w_given;
1949 	if (where) {
1950 		*given = w.w_where - (caddr_t)where;
1951 		if (*given < w.w_needed)
1952 			return (ENOMEM);
1953 	} else
1954 		*given = (11 * w.w_needed) / 10;
1955 
1956 	return (error);
1957 }
1958 
1959 int
1960 sysctl_rtable_rtstat(void *oldp, size_t *oldlenp, void *newp)
1961 {
1962 	extern struct cpumem *rtcounters;
1963 	uint64_t counters[rts_ncounters];
1964 	struct rtstat rtstat;
1965 	uint32_t *words = (uint32_t *)&rtstat;
1966 	int i;
1967 
1968 	CTASSERT(sizeof(rtstat) == (nitems(counters) * sizeof(uint32_t)));
1969 	memset(&rtstat, 0, sizeof rtstat);
1970 	counters_read(rtcounters, counters, nitems(counters));
1971 
1972 	for (i = 0; i < nitems(counters); i++)
1973 		words[i] = (uint32_t)counters[i];
1974 
1975 	return (sysctl_rdstruct(oldp, oldlenp, newp, &rtstat, sizeof(rtstat)));
1976 }
1977 
1978 int
1979 rtm_validate_proposal(struct rt_addrinfo *info)
1980 {
1981 	if (info->rti_addrs & ~(RTA_NETMASK | RTA_IFA | RTA_DNS | RTA_STATIC |
1982 	    RTA_SEARCH)) {
1983 		return -1;
1984 	}
1985 
1986 	if (ISSET(info->rti_addrs, RTA_NETMASK)) {
1987 		struct sockaddr *sa = info->rti_info[RTAX_NETMASK];
1988 		if (sa == NULL)
1989 			return -1;
1990 		switch (sa->sa_family) {
1991 		case AF_INET:
1992 			if (sa->sa_len != sizeof(struct sockaddr_in))
1993 				return -1;
1994 			break;
1995 		case AF_INET6:
1996 			if (sa->sa_len != sizeof(struct sockaddr_in6))
1997 				return -1;
1998 			break;
1999 		default:
2000 			return -1;
2001 		}
2002 	}
2003 
2004 	if (ISSET(info->rti_addrs, RTA_IFA)) {
2005 		struct sockaddr *sa = info->rti_info[RTAX_IFA];
2006 		if (sa == NULL)
2007 			return -1;
2008 		switch (sa->sa_family) {
2009 		case AF_INET:
2010 			if (sa->sa_len != sizeof(struct sockaddr_in))
2011 				return -1;
2012 			break;
2013 		case AF_INET6:
2014 			if (sa->sa_len != sizeof(struct sockaddr_in6))
2015 				return -1;
2016 			break;
2017 		default:
2018 			return -1;
2019 		}
2020 	}
2021 
2022 	if (ISSET(info->rti_addrs, RTA_DNS)) {
2023 		struct sockaddr_rtdns *rtdns =
2024 		    (struct sockaddr_rtdns *)info->rti_info[RTAX_DNS];
2025 		if (rtdns == NULL)
2026 			return -1;
2027 		if (rtdns->sr_len > sizeof(*rtdns))
2028 			return -1;
2029 		if (rtdns->sr_len <=
2030 		    offsetof(struct sockaddr_rtdns, sr_dns))
2031 			return -1;
2032 	}
2033 
2034 	if (ISSET(info->rti_addrs, RTA_STATIC)) {
2035 		struct sockaddr_rtstatic *rtstatic =
2036 		    (struct sockaddr_rtstatic *)info->rti_info[RTAX_STATIC];
2037 		if (rtstatic == NULL)
2038 			return -1;
2039 		if (rtstatic->sr_len > sizeof(*rtstatic))
2040 			return -1;
2041 		if (rtstatic->sr_len <=
2042 		    offsetof(struct sockaddr_rtstatic, sr_static))
2043 			return -1;
2044 	}
2045 
2046 	if (ISSET(info->rti_addrs, RTA_SEARCH)) {
2047 		struct sockaddr_rtsearch *rtsearch =
2048 		    (struct sockaddr_rtsearch *)info->rti_info[RTAX_SEARCH];
2049 		if (rtsearch == NULL)
2050 			return -1;
2051 		if (rtsearch->sr_len > sizeof(*rtsearch))
2052 			return -1;
2053 		if (rtsearch->sr_len <=
2054 		    offsetof(struct sockaddr_rtsearch, sr_search))
2055 			return -1;
2056 	}
2057 
2058 	return 0;
2059 }
2060 
2061 /*
2062  * Definitions of protocols supported in the ROUTE domain.
2063  */
2064 
2065 extern	struct domain routedomain;		/* or at least forward */
2066 
2067 struct protosw routesw[] = {
2068 {
2069   .pr_type	= SOCK_RAW,
2070   .pr_domain	= &routedomain,
2071   .pr_flags	= PR_ATOMIC|PR_ADDR|PR_WANTRCVD,
2072   .pr_output	= route_output,
2073   .pr_ctloutput	= route_ctloutput,
2074   .pr_usrreq	= route_usrreq,
2075   .pr_attach	= route_attach,
2076   .pr_detach	= route_detach,
2077   .pr_init	= route_prinit,
2078   .pr_sysctl	= sysctl_rtable
2079 }
2080 };
2081 
2082 struct domain routedomain = {
2083   .dom_family = PF_ROUTE,
2084   .dom_name = "route",
2085   .dom_init = route_init,
2086   .dom_protosw = routesw,
2087   .dom_protoswNPROTOSW = &routesw[nitems(routesw)]
2088 };
2089