xref: /openbsd-src/sys/net/rtsock.c (revision 897fc685943471cf985a0fe38ba076ea6fe74fa5)
1 /*	$OpenBSD: rtsock.c,v 1.262 2018/02/19 08:59:52 mpi Exp $	*/
2 /*	$NetBSD: rtsock.c,v 1.18 1996/03/29 00:32:10 cgd Exp $	*/
3 
4 /*
5  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the project nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 /*
34  * Copyright (c) 1988, 1991, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. Neither the name of the University nor the names of its contributors
46  *    may be used to endorse or promote products derived from this software
47  *    without specific prior written permission.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59  * SUCH DAMAGE.
60  *
61  *	@(#)rtsock.c	8.6 (Berkeley) 2/11/95
62  */
63 
64 #include <sys/param.h>
65 #include <sys/systm.h>
66 #include <sys/proc.h>
67 #include <sys/sysctl.h>
68 #include <sys/mbuf.h>
69 #include <sys/socket.h>
70 #include <sys/socketvar.h>
71 #include <sys/domain.h>
72 #include <sys/protosw.h>
73 #include <sys/srp.h>
74 
75 #include <net/if.h>
76 #include <net/if_dl.h>
77 #include <net/if_var.h>
78 #include <net/route.h>
79 #include <net/raw_cb.h>
80 
81 #include <netinet/in.h>
82 
83 #ifdef MPLS
84 #include <netmpls/mpls.h>
85 #endif
86 #ifdef IPSEC
87 #include <netinet/ip_ipsp.h>
88 #include <net/if_enc.h>
89 #endif
90 #ifdef BFD
91 #include <net/bfd.h>
92 #endif
93 
94 #include <sys/stdarg.h>
95 #include <sys/kernel.h>
96 #include <sys/timeout.h>
97 
98 struct sockaddr		route_src = { 2, PF_ROUTE, };
99 
100 struct walkarg {
101 	int	w_op, w_arg, w_given, w_needed, w_tmemsize;
102 	caddr_t	w_where, w_tmem;
103 };
104 
105 void	route_prinit(void);
106 void	route_ref(void *, void *);
107 void	route_unref(void *, void *);
108 int	route_output(struct mbuf *, struct socket *, struct sockaddr *,
109 	    struct mbuf *);
110 int	route_ctloutput(int, struct socket *, int, int, struct mbuf *);
111 int	route_usrreq(struct socket *, int, struct mbuf *, struct mbuf *,
112 	    struct mbuf *, struct proc *);
113 void	route_input(struct mbuf *m0, struct socket *, sa_family_t);
114 int	route_arp_conflict(struct rtentry *, struct rt_addrinfo *);
115 int	route_cleargateway(struct rtentry *, void *, unsigned int);
116 void	route_senddesync(void *);
117 
118 int	rtm_getifa(struct rt_addrinfo *, unsigned int);
119 int	rtm_output(struct rt_msghdr *, struct rtentry **, struct rt_addrinfo *,
120 	    uint8_t, unsigned int);
121 struct rt_msghdr *rtm_report(struct rtentry *, u_char, int, int);
122 struct mbuf	*rtm_msg1(int, struct rt_addrinfo *);
123 int		 rtm_msg2(int, int, struct rt_addrinfo *, caddr_t,
124 		     struct walkarg *);
125 void		 rtm_xaddrs(caddr_t, caddr_t, struct rt_addrinfo *);
126 int		 rtm_validate_proposal(struct rt_addrinfo *);
127 void		 rtm_setmetrics(u_long, const struct rt_metrics *,
128 		     struct rt_kmetrics *);
129 void		 rtm_getmetrics(const struct rt_kmetrics *,
130 		     struct rt_metrics *);
131 
132 int		 sysctl_iflist(int, struct walkarg *);
133 int		 sysctl_ifnames(struct walkarg *);
134 int		 sysctl_rtable_rtstat(void *, size_t *, void *);
135 
136 struct routecb {
137 	struct rawcb		rcb;
138 	SRPL_ENTRY(routecb)	rcb_list;
139 	struct refcnt		refcnt;
140 	struct timeout		timeout;
141 	unsigned int		msgfilter;
142 	unsigned int		flags;
143 	u_int			rtableid;
144 	u_char			priority;
145 };
146 #define	sotoroutecb(so)	((struct routecb *)(so)->so_pcb)
147 
148 struct route_cb {
149 	SRPL_HEAD(, routecb)	rcb;
150 	struct srpl_rc		rcb_rc;
151 	struct rwlock		rcb_lk;
152 	unsigned int		any_count;
153 };
154 
155 struct route_cb route_cb;
156 
157 /*
158  * These flags and timeout are used for indicating to userland (via a
159  * RTM_DESYNC msg) when the route socket has overflowed and messages
160  * have been lost.
161  */
162 #define ROUTECB_FLAG_DESYNC	0x1	/* Route socket out of memory */
163 #define ROUTECB_FLAG_FLUSH	0x2	/* Wait until socket is empty before
164 					   queueing more packets */
165 
166 #define ROUTE_DESYNC_RESEND_TIMEOUT	(hz / 5)	/* In hz */
167 
168 void
169 route_prinit(void)
170 {
171 	srpl_rc_init(&route_cb.rcb_rc, route_ref, route_unref, NULL);
172 	rw_init(&route_cb.rcb_lk, "rtsock");
173 	SRPL_INIT(&route_cb.rcb);
174 }
175 
176 void
177 route_ref(void *null, void *v)
178 {
179 	struct routecb *rop = v;
180 
181 	refcnt_take(&rop->refcnt);
182 }
183 
184 void
185 route_unref(void *null, void *v)
186 {
187 	struct routecb *rop = v;
188 
189 	refcnt_rele_wake(&rop->refcnt);
190 }
191 
192 int
193 route_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam,
194     struct mbuf *control, struct proc *p)
195 {
196 	struct routecb	*rop;
197 	int		 error = 0;
198 
199 	soassertlocked(so);
200 
201 	rop = sotoroutecb(so);
202 	if (rop == NULL) {
203 		m_freem(m);
204 		return (EINVAL);
205 	}
206 
207 	switch (req) {
208 	case PRU_RCVD:
209 		/*
210 		 * If we are in a FLUSH state, check if the buffer is
211 		 * empty so that we can clear the flag.
212 		 */
213 		if (((rop->flags & ROUTECB_FLAG_FLUSH) != 0) &&
214 		    ((sbspace(rop->rcb.rcb_socket,
215 		    &rop->rcb.rcb_socket->so_rcv) ==
216 		    rop->rcb.rcb_socket->so_rcv.sb_hiwat)))
217 			rop->flags &= ~ROUTECB_FLAG_FLUSH;
218 		break;
219 
220 	default:
221 		error = raw_usrreq(so, req, m, nam, control, p);
222 	}
223 
224 	return (error);
225 }
226 
227 int
228 route_attach(struct socket *so, int proto)
229 {
230 	struct rawcb    *rp;
231 	struct routecb	*rop;
232 	int		 error;
233 
234 	/*
235 	 * use the rawcb but allocate a routecb, this
236 	 * code does not care about the additional fields
237 	 * and works directly on the raw socket.
238 	 */
239 	rop = malloc(sizeof(struct routecb), M_PCB, M_WAITOK|M_ZERO);
240 	rp = &rop->rcb;
241 	so->so_pcb = rop;
242 	/* Init the timeout structure */
243 	timeout_set(&rop->timeout, route_senddesync, rop);
244 	refcnt_init(&rop->refcnt);
245 
246 	if (curproc == NULL)
247 		error = EACCES;
248 	else
249 		error = soreserve(so, RAWSNDQ, RAWRCVQ);
250 	if (error) {
251 		free(rop, M_PCB, sizeof(struct routecb));
252 		return (error);
253 	}
254 
255 	rp->rcb_socket = so;
256 	rp->rcb_proto.sp_family = so->so_proto->pr_domain->dom_family;
257 	rp->rcb_proto.sp_protocol = proto;
258 
259 	rop->rtableid = curproc->p_p->ps_rtableid;
260 
261 	soisconnected(so);
262 	so->so_options |= SO_USELOOPBACK;
263 
264 	rp->rcb_faddr = &route_src;
265 
266 	rw_enter(&route_cb.rcb_lk, RW_WRITE);
267 
268 	SRPL_INSERT_HEAD_LOCKED(&route_cb.rcb_rc, &route_cb.rcb, rop, rcb_list);
269 	route_cb.any_count++;
270 
271 	rw_exit(&route_cb.rcb_lk);
272 
273 	return (0);
274 }
275 
276 int
277 route_detach(struct socket *so)
278 {
279 	struct routecb	*rop;
280 
281 	soassertlocked(so);
282 
283 	rop = sotoroutecb(so);
284 	if (rop == NULL)
285 		return (EINVAL);
286 
287 	rw_enter(&route_cb.rcb_lk, RW_WRITE);
288 
289 	timeout_del(&rop->timeout);
290 	route_cb.any_count--;
291 
292 	SRPL_REMOVE_LOCKED(&route_cb.rcb_rc, &route_cb.rcb,
293 	    rop, routecb, rcb_list);
294 
295 	rw_exit(&route_cb.rcb_lk);
296 	/* wait for all references to drop */
297 	refcnt_finalize(&rop->refcnt, "rtsockrefs");
298 
299 	so->so_pcb = NULL;
300 	sofree(so);
301 	free(rop, M_PCB, sizeof(struct routecb));
302 
303 	return (0);
304 }
305 
306 int
307 route_ctloutput(int op, struct socket *so, int level, int optname,
308     struct mbuf *m)
309 {
310 	struct routecb *rop = sotoroutecb(so);
311 	int error = 0;
312 	unsigned int tid, prio;
313 
314 	if (level != AF_ROUTE)
315 		return (EINVAL);
316 
317 	switch (op) {
318 	case PRCO_SETOPT:
319 		switch (optname) {
320 		case ROUTE_MSGFILTER:
321 			if (m == NULL || m->m_len != sizeof(unsigned int))
322 				error = EINVAL;
323 			else
324 				rop->msgfilter = *mtod(m, unsigned int *);
325 			break;
326 		case ROUTE_TABLEFILTER:
327 			if (m == NULL || m->m_len != sizeof(unsigned int)) {
328 				error = EINVAL;
329 				break;
330 			}
331 			tid = *mtod(m, unsigned int *);
332 			if (tid != RTABLE_ANY && !rtable_exists(tid))
333 				error = ENOENT;
334 			else
335 				rop->rtableid = tid;
336 			break;
337 		case ROUTE_PRIOFILTER:
338 			if (m == NULL || m->m_len != sizeof(unsigned int)) {
339 				error = EINVAL;
340 				break;
341 			}
342 			prio = *mtod(m, unsigned int *);
343 			if (prio > RTP_MAX)
344 				error = EINVAL;
345 			else
346 				rop->priority = prio;
347 			break;
348 		default:
349 			error = ENOPROTOOPT;
350 			break;
351 		}
352 		break;
353 	case PRCO_GETOPT:
354 		switch (optname) {
355 		case ROUTE_MSGFILTER:
356 			m->m_len = sizeof(unsigned int);
357 			*mtod(m, unsigned int *) = rop->msgfilter;
358 			break;
359 		case ROUTE_TABLEFILTER:
360 			m->m_len = sizeof(unsigned int);
361 			*mtod(m, unsigned int *) = rop->rtableid;
362 			break;
363 		case ROUTE_PRIOFILTER:
364 			m->m_len = sizeof(unsigned int);
365 			*mtod(m, unsigned int *) = rop->priority;
366 			break;
367 		default:
368 			error = ENOPROTOOPT;
369 			break;
370 		}
371 	}
372 	return (error);
373 }
374 
375 void
376 route_senddesync(void *data)
377 {
378 	struct routecb	*rop;
379 	struct mbuf	*desync_mbuf;
380 
381 	rop = (struct routecb *)data;
382 
383 	/* If we are in a DESYNC state, try to send a RTM_DESYNC packet */
384 	if ((rop->flags & ROUTECB_FLAG_DESYNC) == 0)
385 		return;
386 
387 	/*
388 	 * If we fail to alloc memory or if sbappendaddr()
389 	 * fails, re-add timeout and try again.
390 	 */
391 	desync_mbuf = rtm_msg1(RTM_DESYNC, NULL);
392 	if (desync_mbuf != NULL) {
393 		struct socket *so = rop->rcb.rcb_socket;
394 		if (sbappendaddr(so, &so->so_rcv, &route_src,
395 		    desync_mbuf, NULL) != 0) {
396 			rop->flags &= ~ROUTECB_FLAG_DESYNC;
397 			sorwakeup(rop->rcb.rcb_socket);
398 			return;
399 		}
400 		m_freem(desync_mbuf);
401 	}
402 	/* Re-add timeout to try sending msg again */
403 	timeout_add(&rop->timeout, ROUTE_DESYNC_RESEND_TIMEOUT);
404 }
405 
406 void
407 route_input(struct mbuf *m0, struct socket *so, sa_family_t sa_family)
408 {
409 	struct routecb *rop;
410 	struct rawcb *rp;
411 	struct rt_msghdr *rtm;
412 	struct mbuf *m = m0;
413 	struct socket *last = NULL;
414 	struct srp_ref sr;
415 
416 	KERNEL_ASSERT_LOCKED();
417 
418 	/* ensure that we can access the rtm_type via mtod() */
419 	if (m->m_len < offsetof(struct rt_msghdr, rtm_type) + 1) {
420 		m_freem(m);
421 		return;
422 	}
423 
424 	SRPL_FOREACH(rop, &sr, &route_cb.rcb, rcb_list) {
425 		rp = &rop->rcb;
426 		if (!(rp->rcb_socket->so_state & SS_ISCONNECTED))
427 			continue;
428 		if (rp->rcb_socket->so_state & SS_CANTRCVMORE)
429 			continue;
430 		/* Check to see if we don't want our own messages. */
431 		if (so == rp->rcb_socket && !(so->so_options & SO_USELOOPBACK))
432 			continue;
433 
434 		/*
435 		 * If route socket is bound to an address family only send
436 		 * messages that match the address family. Address family
437 		 * agnostic messages are always send.
438 		 */
439 		if (rp->rcb_proto.sp_protocol != AF_UNSPEC &&
440 		    sa_family != AF_UNSPEC &&
441 		    rp->rcb_proto.sp_protocol != sa_family)
442 			continue;
443 
444 		/* filter messages that the process does not want */
445 		rtm = mtod(m, struct rt_msghdr *);
446 		/* but RTM_DESYNC can't be filtered */
447 		if (rtm->rtm_type != RTM_DESYNC && rop->msgfilter != 0 &&
448 		    !(rop->msgfilter & (1 << rtm->rtm_type)))
449 			continue;
450 		if (rop->priority != 0 && rop->priority < rtm->rtm_priority)
451 			continue;
452 		switch (rtm->rtm_type) {
453 		case RTM_IFANNOUNCE:
454 		case RTM_DESYNC:
455 			/* no tableid */
456 			break;
457 		case RTM_RESOLVE:
458 		case RTM_NEWADDR:
459 		case RTM_DELADDR:
460 		case RTM_IFINFO:
461 			/* check against rdomain id */
462 			if (rop->rtableid != RTABLE_ANY &&
463 			    rtable_l2(rop->rtableid) != rtm->rtm_tableid)
464 				continue;
465 			break;
466 		default:
467 			/* check against rtable id */
468 			if (rop->rtableid != RTABLE_ANY &&
469 			    rop->rtableid != rtm->rtm_tableid)
470 				continue;
471 			break;
472 		}
473 
474 		/*
475 		 * Check to see if the flush flag is set. If so, don't queue
476 		 * any more messages until the flag is cleared.
477 		 */
478 		if ((rop->flags & ROUTECB_FLAG_FLUSH) != 0)
479 			continue;
480 
481 		if (last) {
482 			struct mbuf *n;
483 			if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) != NULL) {
484 				if (sbspace(last, &last->so_rcv) < (2*MSIZE) ||
485 				    sbappendaddr(last, &last->so_rcv,
486 				    &route_src, n, (struct mbuf *)NULL) == 0) {
487 					/*
488 					 * Flag socket as desync'ed and
489 					 * flush required
490 					 */
491 					sotoroutecb(last)->flags |=
492 					    ROUTECB_FLAG_DESYNC |
493 					    ROUTECB_FLAG_FLUSH;
494 					route_senddesync(sotoroutecb(last));
495 					m_freem(n);
496 				} else {
497 					sorwakeup(last);
498 				}
499 			}
500 			refcnt_rele_wake(&sotoroutecb(last)->refcnt);
501 		}
502 		/* keep a reference for last */
503 		refcnt_take(&rop->refcnt);
504 		last = rop->rcb.rcb_socket;
505 	}
506 	if (last) {
507 		if (sbspace(last, &last->so_rcv) < (2 * MSIZE) ||
508 		    sbappendaddr(last, &last->so_rcv, &route_src,
509 		    m, (struct mbuf *)NULL) == 0) {
510 			/* Flag socket as desync'ed and flush required */
511 			sotoroutecb(last)->flags |=
512 			    ROUTECB_FLAG_DESYNC | ROUTECB_FLAG_FLUSH;
513 			route_senddesync(sotoroutecb(last));
514 			m_freem(m);
515 		} else {
516 			sorwakeup(last);
517 		}
518 		refcnt_rele_wake(&sotoroutecb(last)->refcnt);
519 	} else
520 		m_freem(m);
521 
522 	SRPL_LEAVE(&sr);
523 }
524 
525 struct rt_msghdr *
526 rtm_report(struct rtentry *rt, u_char type, int seq, int tableid)
527 {
528 	struct rt_msghdr	*rtm;
529 	struct rt_addrinfo	 info;
530 	struct sockaddr_rtlabel	 sa_rl;
531 	struct sockaddr_in6	 sa_mask;
532 #ifdef BFD
533 	struct sockaddr_bfd	 sa_bfd;
534 #endif
535 #ifdef MPLS
536 	struct sockaddr_mpls	 sa_mpls;
537 #endif
538 	struct ifnet		*ifp = NULL;
539 	int			 len;
540 
541 	bzero(&info, sizeof(info));
542 	info.rti_info[RTAX_DST] = rt_key(rt);
543 	info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
544 	info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask);
545 	info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl);
546 #ifdef BFD
547 	if (rt->rt_flags & RTF_BFD)
548 		info.rti_info[RTAX_BFD] = bfd2sa(rt, &sa_bfd);
549 #endif
550 #ifdef MPLS
551 	if (rt->rt_flags & RTF_MPLS) {
552 		bzero(&sa_mpls, sizeof(sa_mpls));
553 		sa_mpls.smpls_family = AF_MPLS;
554 		sa_mpls.smpls_len = sizeof(sa_mpls);
555 		sa_mpls.smpls_label = ((struct rt_mpls *)
556 		    rt->rt_llinfo)->mpls_label;
557 		info.rti_info[RTAX_SRC] = (struct sockaddr *)&sa_mpls;
558 		info.rti_mpls = ((struct rt_mpls *)
559 		    rt->rt_llinfo)->mpls_operation;
560 	}
561 #endif
562 	ifp = if_get(rt->rt_ifidx);
563 	if (ifp != NULL) {
564 		info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
565 		info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
566 		if (ifp->if_flags & IFF_POINTOPOINT)
567 			info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr;
568 	}
569 	if_put(ifp);
570 	/* RTAX_GENMASK, RTAX_AUTHOR, RTAX_SRCMASK ignored */
571 
572 	/* build new route message */
573 	len = rtm_msg2(type, RTM_VERSION, &info, NULL, NULL);
574 	rtm = malloc(len, M_RTABLE, M_WAITOK | M_ZERO);
575 
576 	rtm_msg2(type, RTM_VERSION, &info, (caddr_t)rtm, NULL);
577 	rtm->rtm_type = type;
578 	rtm->rtm_index = rt->rt_ifidx;
579 	rtm->rtm_tableid = tableid;
580 	rtm->rtm_priority = rt->rt_priority & RTP_MASK;
581 	rtm->rtm_flags = rt->rt_flags;
582 	rtm->rtm_pid = curproc->p_p->ps_pid;
583 	rtm->rtm_seq = seq;
584 	rtm_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx);
585 	rtm->rtm_addrs = info.rti_addrs;
586 #ifdef MPLS
587 	rtm->rtm_mpls = info.rti_mpls;
588 #endif
589 	return rtm;
590 }
591 
592 int
593 route_output(struct mbuf *m, struct socket *so, struct sockaddr *dstaddr,
594     struct mbuf *control)
595 {
596 	struct rt_msghdr	*rtm = NULL;
597 	struct rtentry		*rt = NULL;
598 	struct rt_addrinfo	 info;
599 	int			 len, seq, error = 0;
600 	u_int			 tableid;
601 	u_int8_t		 prio;
602 	u_char			 vers, type;
603 
604 	if (m == NULL || ((m->m_len < sizeof(int32_t)) &&
605 	    (m = m_pullup(m, sizeof(int32_t))) == 0))
606 		return (ENOBUFS);
607 	if ((m->m_flags & M_PKTHDR) == 0)
608 		panic("route_output");
609 	len = m->m_pkthdr.len;
610 	if (len < offsetof(struct rt_msghdr, rtm_type) + 1 ||
611 	    len != mtod(m, struct rt_msghdr *)->rtm_msglen) {
612 		error = EINVAL;
613 		goto fail;
614 	}
615 	vers = mtod(m, struct rt_msghdr *)->rtm_version;
616 	switch (vers) {
617 	case RTM_VERSION:
618 		if (len < sizeof(struct rt_msghdr)) {
619 			error = EINVAL;
620 			goto fail;
621 		}
622 		if (len > RTM_MAXSIZE) {
623 			error = EMSGSIZE;
624 			goto fail;
625 		}
626 		rtm = malloc(len, M_RTABLE, M_WAITOK);
627 		m_copydata(m, 0, len, (caddr_t)rtm);
628 		break;
629 	default:
630 		error = EPROTONOSUPPORT;
631 		goto fail;
632 	}
633 	rtm->rtm_pid = curproc->p_p->ps_pid;
634 	if (rtm->rtm_hdrlen == 0)	/* old client */
635 		rtm->rtm_hdrlen = sizeof(struct rt_msghdr);
636 	if (len < rtm->rtm_hdrlen) {
637 		error = EINVAL;
638 		goto fail;
639 	}
640 
641 	/* Verify that the caller is sending an appropriate message early */
642 	switch (rtm->rtm_type) {
643 	case RTM_ADD:
644 	case RTM_DELETE:
645 	case RTM_GET:
646 	case RTM_CHANGE:
647 	case RTM_LOCK:
648 	case RTM_PROPOSAL:
649 		break;
650 	default:
651 		error = EOPNOTSUPP;
652 		goto fail;
653 	}
654 
655 	/*
656 	 * Verify that the caller has the appropriate privilege; RTM_GET
657 	 * is the only operation the non-superuser is allowed.
658 	 */
659 	if (rtm->rtm_type != RTM_GET && suser(curproc) != 0) {
660 		error = EACCES;
661 		goto fail;
662 	}
663 	tableid = rtm->rtm_tableid;
664 	if (!rtable_exists(tableid)) {
665 		if (rtm->rtm_type == RTM_ADD) {
666 			if ((error = rtable_add(tableid)) != 0)
667 				goto fail;
668 		} else {
669 			error = EINVAL;
670 			goto fail;
671 		}
672 	}
673 
674 
675 	/* Do not let userland play with kernel-only flags. */
676 	if ((rtm->rtm_flags & (RTF_LOCAL|RTF_BROADCAST)) != 0) {
677 		error = EINVAL;
678 		goto fail;
679 	}
680 
681 	/* make sure that kernel-only bits are not set */
682 	rtm->rtm_priority &= RTP_MASK;
683 	rtm->rtm_flags &= ~(RTF_DONE|RTF_CLONED|RTF_CACHED);
684 	rtm->rtm_fmask &= RTF_FMASK;
685 
686 	if (rtm->rtm_priority != 0) {
687 		if (rtm->rtm_priority > RTP_MAX ||
688 		    rtm->rtm_priority == RTP_LOCAL) {
689 			error = EINVAL;
690 			goto fail;
691 		}
692 		prio = rtm->rtm_priority;
693 	} else if (rtm->rtm_type != RTM_ADD)
694 		prio = RTP_ANY;
695 	else if (rtm->rtm_flags & RTF_STATIC)
696 		prio = 0;
697 	else
698 		prio = RTP_DEFAULT;
699 
700 	bzero(&info, sizeof(info));
701 	info.rti_addrs = rtm->rtm_addrs;
702 	rtm_xaddrs(rtm->rtm_hdrlen + (caddr_t)rtm, len + (caddr_t)rtm, &info);
703 	info.rti_flags = rtm->rtm_flags;
704 	if (rtm->rtm_type != RTM_PROPOSAL &&
705 	   (info.rti_info[RTAX_DST] == NULL ||
706 	    info.rti_info[RTAX_DST]->sa_family >= AF_MAX ||
707 	    (info.rti_info[RTAX_GATEWAY] != NULL &&
708 	    info.rti_info[RTAX_GATEWAY]->sa_family >= AF_MAX) ||
709 	    info.rti_info[RTAX_GENMASK] != NULL)) {
710 		error = EINVAL;
711 		goto fail;
712 	}
713 #ifdef MPLS
714 	info.rti_mpls = rtm->rtm_mpls;
715 #endif
716 
717 	if (info.rti_info[RTAX_GATEWAY] != NULL &&
718 	    info.rti_info[RTAX_GATEWAY]->sa_family == AF_LINK &&
719 	    (info.rti_flags & RTF_CLONING) == 0) {
720 		info.rti_flags |= RTF_LLINFO;
721 	}
722 
723 	/*
724 	 * Validate RTM_PROPOSAL and pass it along or error out.
725 	 */
726 	if (rtm->rtm_type == RTM_PROPOSAL) {
727 		if (rtm_validate_proposal(&info) == -1) {
728 			error = EINVAL;
729 			goto fail;
730 		}
731 	} else {
732 		error = rtm_output(rtm, &rt, &info, prio, tableid);
733 		if (!error) {
734 			type = rtm->rtm_type;
735 			seq = rtm->rtm_seq;
736 			free(rtm, M_RTABLE, len);
737 			rtm = rtm_report(rt, type, seq, tableid);
738 			len = rtm->rtm_msglen;
739 		}
740 	}
741 
742 	rtfree(rt);
743 	if (error) {
744 		rtm->rtm_errno = error;
745 	} else {
746 		rtm->rtm_flags |= RTF_DONE;
747 	}
748 
749 	/*
750 	 * Check to see if we don't want our own messages.
751 	 */
752 	if (!(so->so_options & SO_USELOOPBACK)) {
753 		if (route_cb.any_count <= 1) {
754 			/* no other listener and no loopback of messages */
755 fail:
756 			free(rtm, M_RTABLE, len);
757 			m_freem(m);
758 			return (error);
759 		}
760 	}
761 	if (rtm) {
762 		if (m_copyback(m, 0, len, rtm, M_NOWAIT)) {
763 			m_freem(m);
764 			m = NULL;
765 		} else if (m->m_pkthdr.len > len)
766 			m_adj(m, len - m->m_pkthdr.len);
767 		free(rtm, M_RTABLE, len);
768 	}
769 	if (m)
770 		route_input(m, so, info.rti_info[RTAX_DST] ?
771 		    info.rti_info[RTAX_DST]->sa_family : AF_UNSPEC);
772 
773 	return (error);
774 }
775 
776 int
777 rtm_output(struct rt_msghdr *rtm, struct rtentry **prt,
778     struct rt_addrinfo *info, uint8_t prio, unsigned int tableid)
779 {
780 	struct rtentry		*rt = *prt;
781 	struct ifnet		*ifp = NULL;
782 	struct ifaddr		*ifa = NULL;
783 #ifdef MPLS
784 	struct sockaddr_mpls	*psa_mpls;
785 #endif
786 	int			 plen, newgate = 0, error = 0;
787 
788 	NET_LOCK();
789 	switch (rtm->rtm_type) {
790 	case RTM_ADD:
791 		if (info->rti_info[RTAX_GATEWAY] == NULL) {
792 			error = EINVAL;
793 			break;
794 		}
795 
796 		rt = rtable_match(tableid, info->rti_info[RTAX_DST], NULL);
797 		if ((error = route_arp_conflict(rt, info))) {
798 			rtfree(rt);
799 			rt = NULL;
800 			break;
801 		}
802 
803 		/*
804 		 * We cannot go through a delete/create/insert cycle for
805 		 * cached route because this can lead to races in the
806 		 * receive path.  Instead we update the L2 cache.
807 		 */
808 		if ((rt != NULL) && ISSET(rt->rt_flags, RTF_CACHED))
809 			goto change;
810 
811 		rtfree(rt);
812 		rt = NULL;
813 
814 		if ((error = rtm_getifa(info, tableid)) != 0)
815 			break;
816 		error = rtrequest(RTM_ADD, info, prio, &rt, tableid);
817 		if (error == 0)
818 			rtm_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx,
819 			    &rt->rt_rmx);
820 		break;
821 	case RTM_DELETE:
822 		rt = rtable_lookup(tableid, info->rti_info[RTAX_DST],
823 		    info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY],
824 		    prio);
825 		if (rt == NULL) {
826 			error = ESRCH;
827 			break;
828 		}
829 
830 		/*
831 		 * If we got multipath routes, we require users to specify
832 		 * a matching gateway.
833 		 */
834 		if (ISSET(rt->rt_flags, RTF_MPATH) &&
835 		    info->rti_info[RTAX_GATEWAY] == NULL) {
836 			error = ESRCH;
837 			break;
838 		}
839 
840 		/* Detaching an interface requires the KERNEL_LOCK(). */
841 		ifp = if_get(rt->rt_ifidx);
842 		KASSERT(ifp != NULL);
843 
844 		/*
845 		 * Invalidate the cache of automagically created and
846 		 * referenced L2 entries to make sure that ``rt_gwroute''
847 		 * pointer stays valid for other CPUs.
848 		 */
849 		if ((ISSET(rt->rt_flags, RTF_CACHED))) {
850 			ifp->if_rtrequest(ifp, RTM_INVALIDATE, rt);
851 			/* Reset the MTU of the gateway route. */
852 			rtable_walk(tableid, rt_key(rt)->sa_family,
853 			    route_cleargateway, rt);
854 			if_put(ifp);
855 			break;
856 		}
857 
858 		/*
859 		 * Make sure that local routes are only modified by the
860 		 * kernel.
861 		 */
862 		if (ISSET(rt->rt_flags, RTF_LOCAL|RTF_BROADCAST)) {
863 			if_put(ifp);
864 			error = EINVAL;
865 			break;
866 		}
867 
868 		rtfree(rt);
869 		rt = NULL;
870 
871 		error = rtrequest_delete(info, prio, ifp, &rt, tableid);
872 		if_put(ifp);
873 		break;
874 	case RTM_CHANGE:
875 	case RTM_LOCK:
876 		rt = rtable_lookup(tableid, info->rti_info[RTAX_DST],
877 		    info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY],
878 		    prio);
879 		/*
880 		 * If we got multipath routes, we require users to specify
881 		 * a matching gateway.
882 		 */
883 		if ((rt != NULL) && ISSET(rt->rt_flags, RTF_MPATH) &&
884 		    (info->rti_info[RTAX_GATEWAY] == NULL)) {
885 			rtfree(rt);
886 			rt = NULL;
887 		}
888 		/*
889 		 * If RTAX_GATEWAY is the argument we're trying to
890 		 * change, try to find a compatible route.
891 		 */
892 		if ((rt == NULL) && (info->rti_info[RTAX_GATEWAY] != NULL) &&
893 		    (rtm->rtm_type == RTM_CHANGE)) {
894 			rt = rtable_lookup(tableid, info->rti_info[RTAX_DST],
895 			    info->rti_info[RTAX_NETMASK], NULL, prio);
896 			/* Ensure we don't pick a multipath one. */
897 			if ((rt != NULL) && ISSET(rt->rt_flags, RTF_MPATH)) {
898 				rtfree(rt);
899 				rt = NULL;
900 			}
901 		}
902 
903 		if (rt == NULL) {
904 			error = ESRCH;
905 			break;
906 		}
907 
908 		/*
909 		 * Make sure that local routes are only modified by the
910 		 * kernel.
911 		 */
912 		if (ISSET(rt->rt_flags, RTF_LOCAL|RTF_BROADCAST)) {
913 			error = EINVAL;
914 			break;
915 		}
916 
917 		/*
918 		 * RTM_CHANGE/LOCK need a perfect match.
919 		 */
920 		plen = rtable_satoplen(info->rti_info[RTAX_DST]->sa_family,
921 		    info->rti_info[RTAX_NETMASK]);
922 		if (rt_plen(rt) != plen) {
923 			error = ESRCH;
924 			break;
925 		}
926 
927 		switch (rtm->rtm_type) {
928 		case RTM_CHANGE:
929 			if (info->rti_info[RTAX_GATEWAY] != NULL)
930 				if (rt->rt_gateway == NULL ||
931 				    bcmp(rt->rt_gateway,
932 				    info->rti_info[RTAX_GATEWAY],
933 				    info->rti_info[RTAX_GATEWAY]->sa_len)) {
934 					newgate = 1;
935 				}
936 			/*
937 			 * Check reachable gateway before changing the route.
938 			 * New gateway could require new ifaddr, ifp;
939 			 * flags may also be different; ifp may be specified
940 			 * by ll sockaddr when protocol address is ambiguous.
941 			 */
942 			if (newgate || info->rti_info[RTAX_IFP] != NULL ||
943 			    info->rti_info[RTAX_IFA] != NULL) {
944 				if ((error = rtm_getifa(info, tableid)) != 0)
945 					break;
946 				ifa = info->rti_ifa;
947 				if (rt->rt_ifa != ifa) {
948 					ifp = if_get(rt->rt_ifidx);
949 					KASSERT(ifp != NULL);
950 					ifp->if_rtrequest(ifp, RTM_DELETE, rt);
951 					ifafree(rt->rt_ifa);
952 					if_put(ifp);
953 
954 					ifa->ifa_refcnt++;
955 					rt->rt_ifa = ifa;
956 					rt->rt_ifidx = ifa->ifa_ifp->if_index;
957 					/* recheck link state after ifp change*/
958 					rt_if_linkstate_change(rt, ifa->ifa_ifp,
959 					    tableid);
960 				}
961 			}
962 change:
963 			if (info->rti_info[RTAX_GATEWAY] != NULL) {
964 				/*
965 				 * When updating the gateway, make sure it's
966 				 * valid.
967 				 */
968 				if (!newgate && rt->rt_gateway->sa_family !=
969 				    info->rti_info[RTAX_GATEWAY]->sa_family) {
970 				    	error = EINVAL;
971 					break;
972 				}
973 
974 				error = rt_setgate(rt,
975 				    info->rti_info[RTAX_GATEWAY], tableid);
976 				if (error)
977 					break;
978 			}
979 #ifdef MPLS
980 			if ((rtm->rtm_flags & RTF_MPLS) &&
981 			    info->rti_info[RTAX_SRC] != NULL) {
982 				struct rt_mpls *rt_mpls;
983 
984 				psa_mpls = (struct sockaddr_mpls *)
985 				    info->rti_info[RTAX_SRC];
986 
987 				if (rt->rt_llinfo == NULL) {
988 					rt->rt_llinfo =
989 					    malloc(sizeof(struct rt_mpls),
990 					    M_TEMP, M_WAITOK | M_ZERO);
991 				}
992 
993 				rt_mpls = (struct rt_mpls *)rt->rt_llinfo;
994 
995 				if (psa_mpls != NULL) {
996 					rt_mpls->mpls_label =
997 					    psa_mpls->smpls_label;
998 				}
999 
1000 				rt_mpls->mpls_operation = info->rti_mpls;
1001 
1002 				/* XXX: set experimental bits */
1003 
1004 				rt->rt_flags |= RTF_MPLS;
1005 			} else if (newgate || ((rtm->rtm_fmask & RTF_MPLS) &&
1006 			    !(rtm->rtm_flags & RTF_MPLS))) {
1007 				/* if gateway changed remove MPLS information */
1008 				if (rt->rt_llinfo != NULL &&
1009 				    rt->rt_flags & RTF_MPLS) {
1010 					free(rt->rt_llinfo, M_TEMP,
1011 					    sizeof(struct rt_mpls));
1012 					rt->rt_llinfo = NULL;
1013 					rt->rt_flags &= ~RTF_MPLS;
1014 				}
1015 			}
1016 #endif
1017 
1018 #ifdef BFD
1019 			if (ISSET(rtm->rtm_flags, RTF_BFD)) {
1020 				if ((error = bfdset(rt)))
1021 					break;
1022 			} else if (!ISSET(rtm->rtm_flags, RTF_BFD) &&
1023 			    ISSET(rtm->rtm_fmask, RTF_BFD)) {
1024 				bfdclear(rt);
1025 			}
1026 #endif
1027 
1028 			/* Hack to allow some flags to be toggled */
1029 			if (rtm->rtm_fmask)
1030 				rt->rt_flags =
1031 				    (rt->rt_flags & ~rtm->rtm_fmask) |
1032 				    (rtm->rtm_flags & rtm->rtm_fmask);
1033 
1034 			rtm_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx,
1035 			    &rt->rt_rmx);
1036 
1037 			ifp = if_get(rt->rt_ifidx);
1038 			KASSERT(ifp != NULL);
1039 			ifp->if_rtrequest(ifp, RTM_ADD, rt);
1040 			if_put(ifp);
1041 
1042 			if (info->rti_info[RTAX_LABEL] != NULL) {
1043 				char *rtlabel = ((struct sockaddr_rtlabel *)
1044 				    info->rti_info[RTAX_LABEL])->sr_label;
1045 				rtlabel_unref(rt->rt_labelid);
1046 				rt->rt_labelid = rtlabel_name2id(rtlabel);
1047 			}
1048 			if_group_routechange(info->rti_info[RTAX_DST],
1049 			    info->rti_info[RTAX_NETMASK]);
1050 			/* FALLTHROUGH */
1051 		case RTM_LOCK:
1052 			rt->rt_locks &= ~(rtm->rtm_inits);
1053 			rt->rt_locks |=
1054 			    (rtm->rtm_inits & rtm->rtm_rmx.rmx_locks);
1055 			break;
1056 		}
1057 		break;
1058 	case RTM_GET:
1059 		rt = rtable_lookup(tableid, info->rti_info[RTAX_DST],
1060 		    info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY],
1061 		    prio);
1062 		if (rt == NULL)
1063 			error = ESRCH;
1064 		break;
1065 	}
1066 	NET_UNLOCK();
1067 
1068 	*prt = rt;
1069 	return (error);
1070 }
1071 
1072 struct ifaddr *
1073 ifa_ifwithroute(int flags, struct sockaddr *dst, struct sockaddr *gateway,
1074     unsigned int rtableid)
1075 {
1076 	struct ifaddr	*ifa;
1077 
1078 	if ((flags & RTF_GATEWAY) == 0) {
1079 		/*
1080 		 * If we are adding a route to an interface,
1081 		 * and the interface is a pt to pt link
1082 		 * we should search for the destination
1083 		 * as our clue to the interface.  Otherwise
1084 		 * we can use the local address.
1085 		 */
1086 		ifa = NULL;
1087 		if (flags & RTF_HOST)
1088 			ifa = ifa_ifwithdstaddr(dst, rtableid);
1089 		if (ifa == NULL)
1090 			ifa = ifa_ifwithaddr(gateway, rtableid);
1091 	} else {
1092 		/*
1093 		 * If we are adding a route to a remote net
1094 		 * or host, the gateway may still be on the
1095 		 * other end of a pt to pt link.
1096 		 */
1097 		ifa = ifa_ifwithdstaddr(gateway, rtableid);
1098 	}
1099 	if (ifa == NULL) {
1100 		if (gateway->sa_family == AF_LINK) {
1101 			struct sockaddr_dl *sdl = satosdl(gateway);
1102 			struct ifnet *ifp = if_get(sdl->sdl_index);
1103 
1104 			if (ifp != NULL)
1105 				ifa = ifaof_ifpforaddr(dst, ifp);
1106 			if_put(ifp);
1107 		} else {
1108 			struct rtentry *rt;
1109 
1110 			rt = rtalloc(gateway, RT_RESOLVE, rtable_l2(rtableid));
1111 			if (rt != NULL)
1112 				ifa = rt->rt_ifa;
1113 			rtfree(rt);
1114 		}
1115 	}
1116 	if (ifa == NULL)
1117 		return (NULL);
1118 	if (ifa->ifa_addr->sa_family != dst->sa_family) {
1119 		struct ifaddr	*oifa = ifa;
1120 		ifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp);
1121 		if (ifa == NULL)
1122 			ifa = oifa;
1123 	}
1124 	return (ifa);
1125 }
1126 
1127 int
1128 rtm_getifa(struct rt_addrinfo *info, unsigned int rtid)
1129 {
1130 	struct ifnet	*ifp = NULL;
1131 
1132 	/*
1133 	 * ifp may be specified by sockaddr_dl when protocol address
1134 	 * is ambiguous
1135 	 */
1136 	if (info->rti_info[RTAX_IFP] != NULL) {
1137 		struct sockaddr_dl *sdl;
1138 
1139 		sdl = satosdl(info->rti_info[RTAX_IFP]);
1140 		ifp = if_get(sdl->sdl_index);
1141 	}
1142 
1143 #ifdef IPSEC
1144 	/*
1145 	 * If the destination is a PF_KEY address, we'll look
1146 	 * for the existence of a encap interface number or address
1147 	 * in the options list of the gateway. By default, we'll return
1148 	 * enc0.
1149 	 */
1150 	if (info->rti_info[RTAX_DST] &&
1151 	    info->rti_info[RTAX_DST]->sa_family == PF_KEY)
1152 		info->rti_ifa = enc_getifa(rtid, 0);
1153 #endif
1154 
1155 	if (info->rti_ifa == NULL && info->rti_info[RTAX_IFA] != NULL)
1156 		info->rti_ifa = ifa_ifwithaddr(info->rti_info[RTAX_IFA], rtid);
1157 
1158 	if (info->rti_ifa == NULL) {
1159 		struct sockaddr	*sa;
1160 
1161 		if ((sa = info->rti_info[RTAX_IFA]) == NULL)
1162 			if ((sa = info->rti_info[RTAX_GATEWAY]) == NULL)
1163 				sa = info->rti_info[RTAX_DST];
1164 
1165 		if (sa != NULL && ifp != NULL)
1166 			info->rti_ifa = ifaof_ifpforaddr(sa, ifp);
1167 		else if (info->rti_info[RTAX_DST] != NULL &&
1168 		    info->rti_info[RTAX_GATEWAY] != NULL)
1169 			info->rti_ifa = ifa_ifwithroute(info->rti_flags,
1170 			    info->rti_info[RTAX_DST],
1171 			    info->rti_info[RTAX_GATEWAY],
1172 			    rtid);
1173 		else if (sa != NULL)
1174 			info->rti_ifa = ifa_ifwithroute(info->rti_flags,
1175 			    sa, sa, rtid);
1176 	}
1177 
1178 	if_put(ifp);
1179 
1180 	if (info->rti_ifa == NULL)
1181 		return (ENETUNREACH);
1182 
1183 	return (0);
1184 }
1185 
1186 int
1187 route_cleargateway(struct rtentry *rt, void *arg, unsigned int rtableid)
1188 {
1189 	struct rtentry *nhrt = arg;
1190 
1191 	if (ISSET(rt->rt_flags, RTF_GATEWAY) && rt->rt_gwroute == nhrt &&
1192 	    !ISSET(rt->rt_locks, RTV_MTU))
1193 		rt->rt_mtu = 0;
1194 
1195 	return (0);
1196 }
1197 
1198 /*
1199  * Check if the user request to insert an ARP entry does not conflict
1200  * with existing ones.
1201  *
1202  * Only two entries are allowed for a given IP address: a private one
1203  * (priv) and a public one (pub).
1204  */
1205 int
1206 route_arp_conflict(struct rtentry *rt, struct rt_addrinfo *info)
1207 {
1208 	int		 proxy = (info->rti_flags & RTF_ANNOUNCE);
1209 
1210 	if ((info->rti_flags & RTF_LLINFO) == 0 ||
1211 	    (info->rti_info[RTAX_DST]->sa_family != AF_INET))
1212 		return (0);
1213 
1214 	if (rt == NULL || !ISSET(rt->rt_flags, RTF_LLINFO))
1215 		return (0);
1216 
1217 	/* If the entry is cached, it can be updated. */
1218 	if (ISSET(rt->rt_flags, RTF_CACHED))
1219 		return (0);
1220 
1221 	/*
1222 	 * Same destination, not cached and both "priv" or "pub" conflict.
1223 	 * If a second entry exists, it always conflict.
1224 	 */
1225 	if ((ISSET(rt->rt_flags, RTF_ANNOUNCE) == proxy) ||
1226 	    ISSET(rt->rt_flags, RTF_MPATH))
1227 		return (EEXIST);
1228 
1229 	/* No conflict but an entry exist so we need to force mpath. */
1230 	info->rti_flags |= RTF_MPATH;
1231 	return (0);
1232 }
1233 
1234 void
1235 rtm_setmetrics(u_long which, const struct rt_metrics *in,
1236     struct rt_kmetrics *out)
1237 {
1238 	int64_t expire;
1239 
1240 	if (which & RTV_MTU)
1241 		out->rmx_mtu = in->rmx_mtu;
1242 	if (which & RTV_EXPIRE) {
1243 		expire = in->rmx_expire;
1244 		if (expire != 0) {
1245 			expire -= time_second;
1246 			expire += time_uptime;
1247 		}
1248 
1249 		out->rmx_expire = expire;
1250 	}
1251 }
1252 
1253 void
1254 rtm_getmetrics(const struct rt_kmetrics *in, struct rt_metrics *out)
1255 {
1256 	int64_t expire;
1257 
1258 	expire = in->rmx_expire;
1259 	if (expire != 0) {
1260 		expire -= time_uptime;
1261 		expire += time_second;
1262 	}
1263 
1264 	bzero(out, sizeof(*out));
1265 	out->rmx_locks = in->rmx_locks;
1266 	out->rmx_mtu = in->rmx_mtu;
1267 	out->rmx_expire = expire;
1268 	out->rmx_pksent = in->rmx_pksent;
1269 }
1270 
1271 #define ROUNDUP(a) \
1272 	((a) > 0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long))
1273 #define ADVANCE(x, n) (x += ROUNDUP((n)->sa_len))
1274 
1275 void
1276 rtm_xaddrs(caddr_t cp, caddr_t cplim, struct rt_addrinfo *rtinfo)
1277 {
1278 	struct sockaddr	*sa;
1279 	int		 i;
1280 
1281 	bzero(rtinfo->rti_info, sizeof(rtinfo->rti_info));
1282 	for (i = 0; (i < RTAX_MAX) && (cp < cplim); i++) {
1283 		if ((rtinfo->rti_addrs & (1 << i)) == 0)
1284 			continue;
1285 		rtinfo->rti_info[i] = sa = (struct sockaddr *)cp;
1286 		ADVANCE(cp, sa);
1287 	}
1288 }
1289 
1290 struct mbuf *
1291 rtm_msg1(int type, struct rt_addrinfo *rtinfo)
1292 {
1293 	struct rt_msghdr	*rtm;
1294 	struct mbuf		*m;
1295 	int			 i;
1296 	struct sockaddr		*sa;
1297 	int			 len, dlen, hlen;
1298 
1299 	switch (type) {
1300 	case RTM_DELADDR:
1301 	case RTM_NEWADDR:
1302 		len = sizeof(struct ifa_msghdr);
1303 		break;
1304 	case RTM_IFINFO:
1305 		len = sizeof(struct if_msghdr);
1306 		break;
1307 	case RTM_IFANNOUNCE:
1308 		len = sizeof(struct if_announcemsghdr);
1309 		break;
1310 #ifdef BFD
1311 	case RTM_BFD:
1312 		len = sizeof(struct bfd_msghdr);
1313 		break;
1314 #endif
1315 	default:
1316 		len = sizeof(struct rt_msghdr);
1317 		break;
1318 	}
1319 	if (len > MCLBYTES)
1320 		panic("rtm_msg1");
1321 	m = m_gethdr(M_DONTWAIT, MT_DATA);
1322 	if (m && len > MHLEN) {
1323 		MCLGET(m, M_DONTWAIT);
1324 		if ((m->m_flags & M_EXT) == 0) {
1325 			m_free(m);
1326 			m = NULL;
1327 		}
1328 	}
1329 	if (m == NULL)
1330 		return (m);
1331 	m->m_pkthdr.len = m->m_len = hlen = len;
1332 	m->m_pkthdr.ph_ifidx = 0;
1333 	rtm = mtod(m, struct rt_msghdr *);
1334 	bzero(rtm, len);
1335 	for (i = 0; i < RTAX_MAX; i++) {
1336 		if (rtinfo == NULL || (sa = rtinfo->rti_info[i]) == NULL)
1337 			continue;
1338 		rtinfo->rti_addrs |= (1 << i);
1339 		dlen = ROUNDUP(sa->sa_len);
1340 		if (m_copyback(m, len, dlen, sa, M_NOWAIT)) {
1341 			m_freem(m);
1342 			return (NULL);
1343 		}
1344 		len += dlen;
1345 	}
1346 	rtm->rtm_msglen = len;
1347 	rtm->rtm_hdrlen = hlen;
1348 	rtm->rtm_version = RTM_VERSION;
1349 	rtm->rtm_type = type;
1350 	return (m);
1351 }
1352 
1353 int
1354 rtm_msg2(int type, int vers, struct rt_addrinfo *rtinfo, caddr_t cp,
1355     struct walkarg *w)
1356 {
1357 	int		i;
1358 	int		len, dlen, hlen, second_time = 0;
1359 	caddr_t		cp0;
1360 
1361 	rtinfo->rti_addrs = 0;
1362 again:
1363 	switch (type) {
1364 	case RTM_DELADDR:
1365 	case RTM_NEWADDR:
1366 		len = sizeof(struct ifa_msghdr);
1367 		break;
1368 	case RTM_IFINFO:
1369 		len = sizeof(struct if_msghdr);
1370 		break;
1371 	default:
1372 		len = sizeof(struct rt_msghdr);
1373 		break;
1374 	}
1375 	hlen = len;
1376 	if ((cp0 = cp) != NULL)
1377 		cp += len;
1378 	for (i = 0; i < RTAX_MAX; i++) {
1379 		struct sockaddr *sa;
1380 
1381 		if ((sa = rtinfo->rti_info[i]) == NULL)
1382 			continue;
1383 		rtinfo->rti_addrs |= (1 << i);
1384 		dlen = ROUNDUP(sa->sa_len);
1385 		if (cp) {
1386 			bcopy(sa, cp, (size_t)dlen);
1387 			cp += dlen;
1388 		}
1389 		len += dlen;
1390 	}
1391 	/* align message length to the next natural boundary */
1392 	len = ALIGN(len);
1393 	if (cp == 0 && w != NULL && !second_time) {
1394 		w->w_needed += len;
1395 		if (w->w_needed <= 0 && w->w_where) {
1396 			if (w->w_tmemsize < len) {
1397 				free(w->w_tmem, M_RTABLE, w->w_tmemsize);
1398 				w->w_tmem = malloc(len, M_RTABLE, M_NOWAIT);
1399 				if (w->w_tmem)
1400 					w->w_tmemsize = len;
1401 			}
1402 			if (w->w_tmem) {
1403 				cp = w->w_tmem;
1404 				second_time = 1;
1405 				goto again;
1406 			} else
1407 				w->w_where = 0;
1408 		}
1409 	}
1410 	if (cp && w)		/* clear the message header */
1411 		bzero(cp0, hlen);
1412 
1413 	if (cp) {
1414 		struct rt_msghdr *rtm = (struct rt_msghdr *)cp0;
1415 
1416 		rtm->rtm_version = RTM_VERSION;
1417 		rtm->rtm_type = type;
1418 		rtm->rtm_msglen = len;
1419 		rtm->rtm_hdrlen = hlen;
1420 	}
1421 	return (len);
1422 }
1423 
1424 void
1425 rtm_send(struct rtentry *rt, int cmd, int error, unsigned int rtableid)
1426 {
1427 	struct rt_addrinfo	 info;
1428 	struct ifnet		*ifp;
1429 	struct sockaddr_rtlabel	 sa_rl;
1430 	struct sockaddr_in6	 sa_mask;
1431 
1432 	memset(&info, 0, sizeof(info));
1433 	info.rti_info[RTAX_DST] = rt_key(rt);
1434 	info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1435 	if (!ISSET(rt->rt_flags, RTF_HOST))
1436 		info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask);
1437 	info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl);
1438 	ifp = if_get(rt->rt_ifidx);
1439 	if (ifp != NULL) {
1440 		info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
1441 		info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
1442 	}
1443 
1444 	rtm_miss(cmd, &info, rt->rt_flags, rt->rt_priority, rt->rt_ifidx, error,
1445 	    rtableid);
1446 	if_put(ifp);
1447 }
1448 
1449 /*
1450  * This routine is called to generate a message from the routing
1451  * socket indicating that a redirect has occurred, a routing lookup
1452  * has failed, or that a protocol has detected timeouts to a particular
1453  * destination.
1454  */
1455 void
1456 rtm_miss(int type, struct rt_addrinfo *rtinfo, int flags, uint8_t prio,
1457     u_int ifidx, int error, u_int tableid)
1458 {
1459 	struct rt_msghdr	*rtm;
1460 	struct mbuf		*m;
1461 	struct sockaddr		*sa = rtinfo->rti_info[RTAX_DST];
1462 
1463 	if (route_cb.any_count == 0)
1464 		return;
1465 	m = rtm_msg1(type, rtinfo);
1466 	if (m == NULL)
1467 		return;
1468 	rtm = mtod(m, struct rt_msghdr *);
1469 	rtm->rtm_flags = RTF_DONE | flags;
1470 	rtm->rtm_priority = prio;
1471 	rtm->rtm_errno = error;
1472 	rtm->rtm_tableid = tableid;
1473 	rtm->rtm_addrs = rtinfo->rti_addrs;
1474 	rtm->rtm_index = ifidx;
1475 	route_input(m, NULL, sa ? sa->sa_family : AF_UNSPEC);
1476 }
1477 
1478 /*
1479  * This routine is called to generate a message from the routing
1480  * socket indicating that the status of a network interface has changed.
1481  */
1482 void
1483 rtm_ifchg(struct ifnet *ifp)
1484 {
1485 	struct if_msghdr	*ifm;
1486 	struct mbuf		*m;
1487 
1488 	if (route_cb.any_count == 0)
1489 		return;
1490 	m = rtm_msg1(RTM_IFINFO, NULL);
1491 	if (m == NULL)
1492 		return;
1493 	ifm = mtod(m, struct if_msghdr *);
1494 	ifm->ifm_index = ifp->if_index;
1495 	ifm->ifm_tableid = ifp->if_rdomain;
1496 	ifm->ifm_flags = ifp->if_flags;
1497 	ifm->ifm_xflags = ifp->if_xflags;
1498 	if_getdata(ifp, &ifm->ifm_data);
1499 	ifm->ifm_addrs = 0;
1500 	route_input(m, NULL, AF_UNSPEC);
1501 }
1502 
1503 /*
1504  * This is called to generate messages from the routing socket
1505  * indicating a network interface has had addresses associated with it.
1506  * if we ever reverse the logic and replace messages TO the routing
1507  * socket indicate a request to configure interfaces, then it will
1508  * be unnecessary as the routing socket will automatically generate
1509  * copies of it.
1510  */
1511 void
1512 rtm_addr(struct rtentry *rt, int cmd, struct ifaddr *ifa)
1513 {
1514 	struct ifnet		*ifp = ifa->ifa_ifp;
1515 	struct mbuf		*m;
1516 	struct rt_addrinfo	 info;
1517 	struct ifa_msghdr	*ifam;
1518 
1519 	if (route_cb.any_count == 0)
1520 		return;
1521 
1522 	memset(&info, 0, sizeof(info));
1523 	info.rti_info[RTAX_IFA] = ifa->ifa_addr;
1524 	info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
1525 	info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
1526 	info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
1527 	if ((m = rtm_msg1(cmd, &info)) == NULL)
1528 		return;
1529 	ifam = mtod(m, struct ifa_msghdr *);
1530 	ifam->ifam_index = ifp->if_index;
1531 	ifam->ifam_metric = ifa->ifa_metric;
1532 	ifam->ifam_flags = ifa->ifa_flags;
1533 	ifam->ifam_addrs = info.rti_addrs;
1534 	ifam->ifam_tableid = ifp->if_rdomain;
1535 
1536 	route_input(m, NULL,
1537 	    ifa->ifa_addr ? ifa->ifa_addr->sa_family : AF_UNSPEC);
1538 }
1539 
1540 /*
1541  * This is called to generate routing socket messages indicating
1542  * network interface arrival and departure.
1543  */
1544 void
1545 rtm_ifannounce(struct ifnet *ifp, int what)
1546 {
1547 	struct if_announcemsghdr	*ifan;
1548 	struct mbuf			*m;
1549 
1550 	if (route_cb.any_count == 0)
1551 		return;
1552 	m = rtm_msg1(RTM_IFANNOUNCE, NULL);
1553 	if (m == NULL)
1554 		return;
1555 	ifan = mtod(m, struct if_announcemsghdr *);
1556 	ifan->ifan_index = ifp->if_index;
1557 	strlcpy(ifan->ifan_name, ifp->if_xname, sizeof(ifan->ifan_name));
1558 	ifan->ifan_what = what;
1559 	route_input(m, NULL, AF_UNSPEC);
1560 }
1561 
1562 #ifdef BFD
1563 /*
1564  * This is used to generate routing socket messages indicating
1565  * the state of a BFD session.
1566  */
1567 void
1568 rtm_bfd(struct bfd_config *bfd)
1569 {
1570 	struct bfd_msghdr	*bfdm;
1571 	struct sockaddr_bfd	 sa_bfd;
1572 	struct mbuf		*m;
1573 	struct rt_addrinfo	 info;
1574 
1575 	if (route_cb.any_count == 0)
1576 		return;
1577 	memset(&info, 0, sizeof(info));
1578 	info.rti_info[RTAX_DST] = rt_key(bfd->bc_rt);
1579 	info.rti_info[RTAX_IFA] = bfd->bc_rt->rt_ifa->ifa_addr;
1580 
1581 	m = rtm_msg1(RTM_BFD, &info);
1582 	if (m == NULL)
1583 		return;
1584 	bfdm = mtod(m, struct bfd_msghdr *);
1585 	bfdm->bm_addrs = info.rti_addrs;
1586 
1587 	bfd2sa(bfd->bc_rt, &sa_bfd);
1588 	memcpy(&bfdm->bm_sa, &sa_bfd, sizeof(sa_bfd));
1589 
1590 	route_input(m, NULL, info.rti_info[RTAX_DST]->sa_family);
1591 }
1592 #endif /* BFD */
1593 
1594 /*
1595  * This is used in dumping the kernel table via sysctl().
1596  */
1597 int
1598 sysctl_dumpentry(struct rtentry *rt, void *v, unsigned int id)
1599 {
1600 	struct walkarg		*w = v;
1601 	int			 error = 0, size;
1602 	struct rt_addrinfo	 info;
1603 	struct ifnet		*ifp;
1604 #ifdef BFD
1605 	struct sockaddr_bfd	 sa_bfd;
1606 #endif
1607 #ifdef MPLS
1608 	struct sockaddr_mpls	 sa_mpls;
1609 #endif
1610 	struct sockaddr_rtlabel	 sa_rl;
1611 	struct sockaddr_in6	 sa_mask;
1612 
1613 	if (w->w_op == NET_RT_FLAGS && !(rt->rt_flags & w->w_arg))
1614 		return 0;
1615 	if (w->w_op == NET_RT_DUMP && w->w_arg) {
1616 		u_int8_t prio = w->w_arg & RTP_MASK;
1617 		if (w->w_arg < 0) {
1618 			prio = (-w->w_arg) & RTP_MASK;
1619 			/* Show all routes that are not this priority */
1620 			if (prio == (rt->rt_priority & RTP_MASK))
1621 				return 0;
1622 		} else {
1623 			if (prio != (rt->rt_priority & RTP_MASK) &&
1624 			    prio != RTP_ANY)
1625 				return 0;
1626 		}
1627 	}
1628 	bzero(&info, sizeof(info));
1629 	info.rti_info[RTAX_DST] = rt_key(rt);
1630 	info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1631 	info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask);
1632 	ifp = if_get(rt->rt_ifidx);
1633 	if (ifp != NULL) {
1634 		info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
1635 		info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
1636 		if (ifp->if_flags & IFF_POINTOPOINT)
1637 			info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr;
1638 	}
1639 	if_put(ifp);
1640 	info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl);
1641 #ifdef BFD
1642 	if (rt->rt_flags & RTF_BFD)
1643 		info.rti_info[RTAX_BFD] = bfd2sa(rt, &sa_bfd);
1644 #endif
1645 #ifdef MPLS
1646 	if (rt->rt_flags & RTF_MPLS) {
1647 		bzero(&sa_mpls, sizeof(sa_mpls));
1648 		sa_mpls.smpls_family = AF_MPLS;
1649 		sa_mpls.smpls_len = sizeof(sa_mpls);
1650 		sa_mpls.smpls_label = ((struct rt_mpls *)
1651 		    rt->rt_llinfo)->mpls_label;
1652 		info.rti_info[RTAX_SRC] = (struct sockaddr *)&sa_mpls;
1653 		info.rti_mpls = ((struct rt_mpls *)
1654 		    rt->rt_llinfo)->mpls_operation;
1655 	}
1656 #endif
1657 
1658 	size = rtm_msg2(RTM_GET, RTM_VERSION, &info, NULL, w);
1659 	if (w->w_where && w->w_tmem && w->w_needed <= 0) {
1660 		struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem;
1661 
1662 		rtm->rtm_pid = curproc->p_p->ps_pid;
1663 		rtm->rtm_flags = rt->rt_flags;
1664 		rtm->rtm_priority = rt->rt_priority & RTP_MASK;
1665 		rtm_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx);
1666 		/* Do not account the routing table's reference. */
1667 		rtm->rtm_rmx.rmx_refcnt = rt->rt_refcnt - 1;
1668 		rtm->rtm_index = rt->rt_ifidx;
1669 		rtm->rtm_addrs = info.rti_addrs;
1670 		rtm->rtm_tableid = id;
1671 #ifdef MPLS
1672 		rtm->rtm_mpls = info.rti_mpls;
1673 #endif
1674 		if ((error = copyout(rtm, w->w_where, size)) != 0)
1675 			w->w_where = NULL;
1676 		else
1677 			w->w_where += size;
1678 	}
1679 	return (error);
1680 }
1681 
1682 int
1683 sysctl_iflist(int af, struct walkarg *w)
1684 {
1685 	struct ifnet		*ifp;
1686 	struct ifaddr		*ifa;
1687 	struct rt_addrinfo	 info;
1688 	int			 len, error = 0;
1689 
1690 	bzero(&info, sizeof(info));
1691 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1692 		if (w->w_arg && w->w_arg != ifp->if_index)
1693 			continue;
1694 		/* Copy the link-layer address first */
1695 		info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
1696 		len = rtm_msg2(RTM_IFINFO, RTM_VERSION, &info, 0, w);
1697 		if (w->w_where && w->w_tmem && w->w_needed <= 0) {
1698 			struct if_msghdr *ifm;
1699 
1700 			ifm = (struct if_msghdr *)w->w_tmem;
1701 			ifm->ifm_index = ifp->if_index;
1702 			ifm->ifm_tableid = ifp->if_rdomain;
1703 			ifm->ifm_flags = ifp->if_flags;
1704 			if_getdata(ifp, &ifm->ifm_data);
1705 			ifm->ifm_addrs = info.rti_addrs;
1706 			error = copyout(ifm, w->w_where, len);
1707 			if (error)
1708 				return (error);
1709 			w->w_where += len;
1710 		}
1711 		info.rti_info[RTAX_IFP] = NULL;
1712 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1713 			KASSERT(ifa->ifa_addr->sa_family != AF_LINK);
1714 			if (af && af != ifa->ifa_addr->sa_family)
1715 				continue;
1716 			info.rti_info[RTAX_IFA] = ifa->ifa_addr;
1717 			info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
1718 			info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
1719 			len = rtm_msg2(RTM_NEWADDR, RTM_VERSION, &info, 0, w);
1720 			if (w->w_where && w->w_tmem && w->w_needed <= 0) {
1721 				struct ifa_msghdr *ifam;
1722 
1723 				ifam = (struct ifa_msghdr *)w->w_tmem;
1724 				ifam->ifam_index = ifa->ifa_ifp->if_index;
1725 				ifam->ifam_flags = ifa->ifa_flags;
1726 				ifam->ifam_metric = ifa->ifa_metric;
1727 				ifam->ifam_addrs = info.rti_addrs;
1728 				error = copyout(w->w_tmem, w->w_where, len);
1729 				if (error)
1730 					return (error);
1731 				w->w_where += len;
1732 			}
1733 		}
1734 		info.rti_info[RTAX_IFA] = info.rti_info[RTAX_NETMASK] =
1735 		    info.rti_info[RTAX_BRD] = NULL;
1736 	}
1737 	return (0);
1738 }
1739 
1740 int
1741 sysctl_ifnames(struct walkarg *w)
1742 {
1743 	struct if_nameindex_msg ifn;
1744 	struct ifnet *ifp;
1745 	int error = 0;
1746 
1747 	/* XXX ignore tableid for now */
1748 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1749 		if (w->w_arg && w->w_arg != ifp->if_index)
1750 			continue;
1751 		w->w_needed += sizeof(ifn);
1752 		if (w->w_where && w->w_needed <= 0) {
1753 
1754 			memset(&ifn, 0, sizeof(ifn));
1755 			ifn.if_index = ifp->if_index;
1756 			strlcpy(ifn.if_name, ifp->if_xname,
1757 			    sizeof(ifn.if_name));
1758 			error = copyout(&ifn, w->w_where, sizeof(ifn));
1759 			if (error)
1760 				return (error);
1761 			w->w_where += sizeof(ifn);
1762 		}
1763 	}
1764 
1765 	return (0);
1766 }
1767 
1768 int
1769 sysctl_rtable(int *name, u_int namelen, void *where, size_t *given, void *new,
1770     size_t newlen)
1771 {
1772 	int			 i, error = EINVAL;
1773 	u_char			 af;
1774 	struct walkarg		 w;
1775 	struct rt_tableinfo	 tableinfo;
1776 	u_int			 tableid = 0;
1777 
1778 	if (new)
1779 		return (EPERM);
1780 	if (namelen < 3 || namelen > 4)
1781 		return (EINVAL);
1782 	af = name[0];
1783 	bzero(&w, sizeof(w));
1784 	w.w_where = where;
1785 	w.w_given = *given;
1786 	w.w_needed = 0 - w.w_given;
1787 	w.w_op = name[1];
1788 	w.w_arg = name[2];
1789 
1790 	if (namelen == 4) {
1791 		tableid = name[3];
1792 		if (!rtable_exists(tableid))
1793 			return (ENOENT);
1794 	} else
1795 		tableid = curproc->p_p->ps_rtableid;
1796 
1797 	switch (w.w_op) {
1798 	case NET_RT_DUMP:
1799 	case NET_RT_FLAGS:
1800 		NET_LOCK();
1801 		for (i = 1; i <= AF_MAX; i++) {
1802 			if (af != 0 && af != i)
1803 				continue;
1804 
1805 			error = rtable_walk(tableid, i, sysctl_dumpentry, &w);
1806 			if (error == EAFNOSUPPORT)
1807 				error = 0;
1808 			if (error)
1809 				break;
1810 		}
1811 		NET_UNLOCK();
1812 		break;
1813 
1814 	case NET_RT_IFLIST:
1815 		NET_LOCK();
1816 		error = sysctl_iflist(af, &w);
1817 		NET_UNLOCK();
1818 		break;
1819 
1820 	case NET_RT_STATS:
1821 		return (sysctl_rtable_rtstat(where, given, new));
1822 	case NET_RT_TABLE:
1823 		tableid = w.w_arg;
1824 		if (!rtable_exists(tableid))
1825 			return (ENOENT);
1826 		memset(&tableinfo, 0, sizeof tableinfo);
1827 		tableinfo.rti_tableid = tableid;
1828 		tableinfo.rti_domainid = rtable_l2(tableid);
1829 		error = sysctl_rdstruct(where, given, new,
1830 		    &tableinfo, sizeof(tableinfo));
1831 		return (error);
1832 	case NET_RT_IFNAMES:
1833 		NET_LOCK();
1834 		error = sysctl_ifnames(&w);
1835 		NET_UNLOCK();
1836 		break;
1837 	}
1838 	free(w.w_tmem, M_RTABLE, w.w_tmemsize);
1839 	w.w_needed += w.w_given;
1840 	if (where) {
1841 		*given = w.w_where - (caddr_t)where;
1842 		if (*given < w.w_needed)
1843 			return (ENOMEM);
1844 	} else
1845 		*given = (11 * w.w_needed) / 10;
1846 
1847 	return (error);
1848 }
1849 
1850 int
1851 sysctl_rtable_rtstat(void *oldp, size_t *oldlenp, void *newp)
1852 {
1853 	extern struct cpumem *rtcounters;
1854 	uint64_t counters[rts_ncounters];
1855 	struct rtstat rtstat;
1856 	uint32_t *words = (uint32_t *)&rtstat;
1857 	int i;
1858 
1859 	CTASSERT(sizeof(rtstat) == (nitems(counters) * sizeof(uint32_t)));
1860 	memset(&rtstat, 0, sizeof rtstat);
1861 	counters_read(rtcounters, counters, nitems(counters));
1862 
1863 	for (i = 0; i < nitems(counters); i++)
1864 		words[i] = (uint32_t)counters[i];
1865 
1866 	return (sysctl_rdstruct(oldp, oldlenp, newp, &rtstat, sizeof(rtstat)));
1867 }
1868 
1869 int
1870 rtm_validate_proposal(struct rt_addrinfo *info)
1871 {
1872 	if (info->rti_addrs & ~(RTA_NETMASK | RTA_IFA | RTA_DNS | RTA_STATIC |
1873 	    RTA_SEARCH)) {
1874 		return -1;
1875 	}
1876 
1877 	if (ISSET(info->rti_addrs, RTA_NETMASK)) {
1878 		struct sockaddr *sa = info->rti_info[RTAX_NETMASK];
1879 		if (sa == NULL)
1880 			return -1;
1881 		switch (sa->sa_family) {
1882 		case AF_INET:
1883 			if (sa->sa_len != sizeof(struct sockaddr_in))
1884 				return -1;
1885 			break;
1886 		case AF_INET6:
1887 			if (sa->sa_len != sizeof(struct sockaddr_in6))
1888 				return -1;
1889 			break;
1890 		default:
1891 			return -1;
1892 		}
1893 	}
1894 
1895 	if (ISSET(info->rti_addrs, RTA_IFA)) {
1896 		struct sockaddr *sa = info->rti_info[RTAX_IFA];
1897 		if (sa == NULL)
1898 			return -1;
1899 		switch (sa->sa_family) {
1900 		case AF_INET:
1901 			if (sa->sa_len != sizeof(struct sockaddr_in))
1902 				return -1;
1903 			break;
1904 		case AF_INET6:
1905 			if (sa->sa_len != sizeof(struct sockaddr_in6))
1906 				return -1;
1907 			break;
1908 		default:
1909 			return -1;
1910 		}
1911 	}
1912 
1913 	if (ISSET(info->rti_addrs, RTA_DNS)) {
1914 		struct sockaddr_rtdns *rtdns =
1915 		    (struct sockaddr_rtdns *)info->rti_info[RTAX_DNS];
1916 		if (rtdns == NULL)
1917 			return -1;
1918 		if (rtdns->sr_len > sizeof(*rtdns))
1919 			return -1;
1920 		if (rtdns->sr_len <=
1921 		    offsetof(struct sockaddr_rtdns, sr_dns))
1922 			return -1;
1923 	}
1924 
1925 	if (ISSET(info->rti_addrs, RTA_STATIC)) {
1926 		struct sockaddr_rtstatic *rtstatic =
1927 		    (struct sockaddr_rtstatic *)info->rti_info[RTAX_STATIC];
1928 		if (rtstatic == NULL)
1929 			return -1;
1930 		if (rtstatic->sr_len > sizeof(*rtstatic))
1931 			return -1;
1932 		if (rtstatic->sr_len <=
1933 		    offsetof(struct sockaddr_rtstatic, sr_static))
1934 			return -1;
1935 	}
1936 
1937 	if (ISSET(info->rti_addrs, RTA_SEARCH)) {
1938 		struct sockaddr_rtsearch *rtsearch =
1939 		    (struct sockaddr_rtsearch *)info->rti_info[RTAX_SEARCH];
1940 		if (rtsearch == NULL)
1941 			return -1;
1942 		if (rtsearch->sr_len > sizeof(*rtsearch))
1943 			return -1;
1944 		if (rtsearch->sr_len <=
1945 		    offsetof(struct sockaddr_rtsearch, sr_search))
1946 			return -1;
1947 	}
1948 
1949 	return 0;
1950 }
1951 
1952 /*
1953  * Definitions of protocols supported in the ROUTE domain.
1954  */
1955 
1956 extern	struct domain routedomain;		/* or at least forward */
1957 
1958 struct protosw routesw[] = {
1959 {
1960   .pr_type	= SOCK_RAW,
1961   .pr_domain	= &routedomain,
1962   .pr_flags	= PR_ATOMIC|PR_ADDR|PR_WANTRCVD,
1963   .pr_output	= route_output,
1964   .pr_ctloutput	= route_ctloutput,
1965   .pr_usrreq	= route_usrreq,
1966   .pr_attach	= route_attach,
1967   .pr_detach	= route_detach,
1968   .pr_init	= route_prinit,
1969   .pr_sysctl	= sysctl_rtable
1970 }
1971 };
1972 
1973 struct domain routedomain = {
1974   .dom_family = PF_ROUTE,
1975   .dom_name = "route",
1976   .dom_init = route_init,
1977   .dom_protosw = routesw,
1978   .dom_protoswNPROTOSW = &routesw[nitems(routesw)]
1979 };
1980