xref: /openbsd-src/sys/net/rtsock.c (revision ae3cb403620ab940fbaabb3055fac045a63d56b7)
1 /*	$OpenBSD: rtsock.c,v 1.259 2017/12/18 09:40:17 mpi Exp $	*/
2 /*	$NetBSD: rtsock.c,v 1.18 1996/03/29 00:32:10 cgd Exp $	*/
3 
4 /*
5  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the project nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 /*
34  * Copyright (c) 1988, 1991, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. Neither the name of the University nor the names of its contributors
46  *    may be used to endorse or promote products derived from this software
47  *    without specific prior written permission.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59  * SUCH DAMAGE.
60  *
61  *	@(#)rtsock.c	8.6 (Berkeley) 2/11/95
62  */
63 
64 #include <sys/param.h>
65 #include <sys/systm.h>
66 #include <sys/proc.h>
67 #include <sys/sysctl.h>
68 #include <sys/mbuf.h>
69 #include <sys/socket.h>
70 #include <sys/socketvar.h>
71 #include <sys/domain.h>
72 #include <sys/protosw.h>
73 
74 #include <net/if.h>
75 #include <net/if_dl.h>
76 #include <net/if_var.h>
77 #include <net/route.h>
78 #include <net/raw_cb.h>
79 
80 #include <netinet/in.h>
81 
82 #ifdef MPLS
83 #include <netmpls/mpls.h>
84 #endif
85 #ifdef IPSEC
86 #include <netinet/ip_ipsp.h>
87 #include <net/if_enc.h>
88 #endif
89 #ifdef BFD
90 #include <net/bfd.h>
91 #endif
92 
93 #include <sys/stdarg.h>
94 #include <sys/kernel.h>
95 #include <sys/timeout.h>
96 
97 struct sockaddr		route_dst = { 2, PF_ROUTE, };
98 struct sockaddr		route_src = { 2, PF_ROUTE, };
99 
100 struct walkarg {
101 	int	w_op, w_arg, w_given, w_needed, w_tmemsize;
102 	caddr_t	w_where, w_tmem;
103 };
104 
105 void	route_prinit(void);
106 int	route_output(struct mbuf *, struct socket *, struct sockaddr *,
107 	    struct mbuf *);
108 int	route_ctloutput(int, struct socket *, int, int, struct mbuf *);
109 int	route_usrreq(struct socket *, int, struct mbuf *, struct mbuf *,
110 	    struct mbuf *, struct proc *);
111 void	route_input(struct mbuf *m0, struct socket *, sa_family_t);
112 int	route_arp_conflict(struct rtentry *, struct rt_addrinfo *);
113 int	route_cleargateway(struct rtentry *, void *, unsigned int);
114 void	route_senddesync(void *);
115 
116 int	rtm_getifa(struct rt_addrinfo *, unsigned int);
117 int	rtm_output(struct rt_msghdr *, struct rtentry **, struct rt_addrinfo *,
118 	    uint8_t, unsigned int);
119 struct rt_msghdr *rtm_report(struct rtentry *, u_char, int, int);
120 struct mbuf	*rtm_msg1(int, struct rt_addrinfo *);
121 int		 rtm_msg2(int, int, struct rt_addrinfo *, caddr_t,
122 		     struct walkarg *);
123 void		 rtm_xaddrs(caddr_t, caddr_t, struct rt_addrinfo *);
124 int		 rtm_validate_proposal(struct rt_addrinfo *);
125 void		 rtm_setmetrics(u_long, const struct rt_metrics *,
126 		     struct rt_kmetrics *);
127 void		 rtm_getmetrics(const struct rt_kmetrics *,
128 		     struct rt_metrics *);
129 
130 int		 sysctl_iflist(int, struct walkarg *);
131 int		 sysctl_ifnames(struct walkarg *);
132 int		 sysctl_rtable_rtstat(void *, size_t *, void *);
133 
134 struct routecb {
135 	struct rawcb		rcb;
136 	LIST_ENTRY(routecb)	rcb_list;
137 	struct timeout		timeout;
138 	unsigned int		msgfilter;
139 	unsigned int		flags;
140 	u_int			rtableid;
141 };
142 #define	sotoroutecb(so)	((struct routecb *)(so)->so_pcb)
143 
144 struct route_cb {
145 	LIST_HEAD(, routecb)	rcb;
146 	int			ip_count;
147 	int			ip6_count;
148 	int			mpls_count;
149 	int			any_count;
150 };
151 
152 struct route_cb route_cb;
153 
154 /*
155  * These flags and timeout are used for indicating to userland (via a
156  * RTM_DESYNC msg) when the route socket has overflowed and messages
157  * have been lost.
158  */
159 #define ROUTECB_FLAG_DESYNC	0x1	/* Route socket out of memory */
160 #define ROUTECB_FLAG_FLUSH	0x2	/* Wait until socket is empty before
161 					   queueing more packets */
162 
163 #define ROUTE_DESYNC_RESEND_TIMEOUT	(hz / 5)	/* In hz */
164 
165 void
166 route_prinit(void)
167 {
168 	LIST_INIT(&route_cb.rcb);
169 }
170 
171 
172 int
173 route_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam,
174     struct mbuf *control, struct proc *p)
175 {
176 	struct routecb	*rop;
177 	int		 error = 0;
178 
179 	soassertlocked(so);
180 
181 	rop = sotoroutecb(so);
182 	if (rop == NULL) {
183 		m_freem(m);
184 		return (EINVAL);
185 	}
186 
187 	switch (req) {
188 	case PRU_RCVD:
189 		/*
190 		 * If we are in a FLUSH state, check if the buffer is
191 		 * empty so that we can clear the flag.
192 		 */
193 		if (((rop->flags & ROUTECB_FLAG_FLUSH) != 0) &&
194 		    ((sbspace(rop->rcb.rcb_socket,
195 		    &rop->rcb.rcb_socket->so_rcv) ==
196 		    rop->rcb.rcb_socket->so_rcv.sb_hiwat)))
197 			rop->flags &= ~ROUTECB_FLAG_FLUSH;
198 		break;
199 
200 	default:
201 		error = raw_usrreq(so, req, m, nam, control, p);
202 	}
203 
204 	return (error);
205 }
206 
207 int
208 route_attach(struct socket *so, int proto)
209 {
210 	struct rawcb    *rp;
211 	struct routecb	*rop;
212 	int		 error;
213 
214 	/*
215 	 * use the rawcb but allocate a routecb, this
216 	 * code does not care about the additional fields
217 	 * and works directly on the raw socket.
218 	 */
219 	rop = malloc(sizeof(struct routecb), M_PCB, M_WAITOK|M_ZERO);
220 	rp = &rop->rcb;
221 	so->so_pcb = rp;
222 	/* Init the timeout structure */
223 	timeout_set(&rop->timeout, route_senddesync, rp);
224 
225 	if (curproc == NULL)
226 		error = EACCES;
227 	else
228 		error = soreserve(so, RAWSNDQ, RAWRCVQ);
229 	if (error) {
230 		free(rop, M_PCB, sizeof(struct routecb));
231 		return (error);
232 	}
233 	rp->rcb_socket = so;
234 	rp->rcb_proto.sp_family = so->so_proto->pr_domain->dom_family;
235 	rp->rcb_proto.sp_protocol = proto;
236 
237 	rop->rtableid = curproc->p_p->ps_rtableid;
238 	switch (rp->rcb_proto.sp_protocol) {
239 	case AF_INET:
240 		route_cb.ip_count++;
241 		break;
242 	case AF_INET6:
243 		route_cb.ip6_count++;
244 		break;
245 #ifdef MPLS
246 	case AF_MPLS:
247 		route_cb.mpls_count++;
248 		break;
249 #endif
250 	}
251 
252 	soisconnected(so);
253 	so->so_options |= SO_USELOOPBACK;
254 
255 	rp->rcb_faddr = &route_src;
256 	route_cb.any_count++;
257 	LIST_INSERT_HEAD(&route_cb.rcb, rop, rcb_list);
258 
259 	return (0);
260 }
261 
262 int
263 route_detach(struct socket *so)
264 {
265 	struct routecb	*rop;
266 	int		 af;
267 
268 	soassertlocked(so);
269 
270 	rop = sotoroutecb(so);
271 	if (rop == NULL)
272 		return (EINVAL);
273 
274 	timeout_del(&rop->timeout);
275 	af = rop->rcb.rcb_proto.sp_protocol;
276 	if (af == AF_INET)
277 		route_cb.ip_count--;
278 	else if (af == AF_INET6)
279 		route_cb.ip6_count--;
280 #ifdef MPLS
281 	else if (af == AF_MPLS)
282 		route_cb.mpls_count--;
283 #endif
284 	route_cb.any_count--;
285 	LIST_REMOVE(rop, rcb_list);
286 
287 	so->so_pcb = NULL;
288 	sofree(so);
289 	free(rop, M_PCB, sizeof(struct routecb));
290 
291 	return (0);
292 }
293 
294 int
295 route_ctloutput(int op, struct socket *so, int level, int optname,
296     struct mbuf *m)
297 {
298 	struct routecb *rop = sotoroutecb(so);
299 	int error = 0;
300 	unsigned int tid;
301 
302 	if (level != AF_ROUTE)
303 		return (EINVAL);
304 
305 	switch (op) {
306 	case PRCO_SETOPT:
307 		switch (optname) {
308 		case ROUTE_MSGFILTER:
309 			if (m == NULL || m->m_len != sizeof(unsigned int))
310 				error = EINVAL;
311 			else
312 				rop->msgfilter = *mtod(m, unsigned int *);
313 			break;
314 		case ROUTE_TABLEFILTER:
315 			if (m == NULL || m->m_len != sizeof(unsigned int)) {
316 				error = EINVAL;
317 				break;
318 			}
319 			tid = *mtod(m, unsigned int *);
320 			if (tid != RTABLE_ANY && !rtable_exists(tid))
321 				error = ENOENT;
322 			else
323 				rop->rtableid = tid;
324 			break;
325 		default:
326 			error = ENOPROTOOPT;
327 			break;
328 		}
329 		break;
330 	case PRCO_GETOPT:
331 		switch (optname) {
332 		case ROUTE_MSGFILTER:
333 			m->m_len = sizeof(unsigned int);
334 			*mtod(m, unsigned int *) = rop->msgfilter;
335 			break;
336 		case ROUTE_TABLEFILTER:
337 			m->m_len = sizeof(unsigned int);
338 			*mtod(m, unsigned int *) = rop->rtableid;
339 			break;
340 		default:
341 			error = ENOPROTOOPT;
342 			break;
343 		}
344 	}
345 	return (error);
346 }
347 
348 void
349 route_senddesync(void *data)
350 {
351 	struct rawcb	*rp;
352 	struct routecb	*rop;
353 	struct mbuf	*desync_mbuf;
354 
355 	rp = (struct rawcb *)data;
356 	rop = (struct routecb *)rp;
357 
358 	/* If we are in a DESYNC state, try to send a RTM_DESYNC packet */
359 	if ((rop->flags & ROUTECB_FLAG_DESYNC) == 0)
360 		return;
361 
362 	/*
363 	 * If we fail to alloc memory or if sbappendaddr()
364 	 * fails, re-add timeout and try again.
365 	 */
366 	desync_mbuf = rtm_msg1(RTM_DESYNC, NULL);
367 	if (desync_mbuf != NULL) {
368 		struct socket *so = rp->rcb_socket;
369 		if (sbappendaddr(so, &so->so_rcv, &route_src,
370 		    desync_mbuf, NULL) != 0) {
371 			rop->flags &= ~ROUTECB_FLAG_DESYNC;
372 			sorwakeup(rp->rcb_socket);
373 			return;
374 		}
375 		m_freem(desync_mbuf);
376 	}
377 	/* Re-add timeout to try sending msg again */
378 	timeout_add(&rop->timeout, ROUTE_DESYNC_RESEND_TIMEOUT);
379 }
380 
381 void
382 route_input(struct mbuf *m0, struct socket *so, sa_family_t sa_family)
383 {
384 	struct rawcb *rp;
385 	struct routecb *rop;
386 	struct rt_msghdr *rtm;
387 	struct mbuf *m = m0;
388 	int sockets = 0;
389 	struct socket *last = NULL;
390 	struct sockaddr *sosrc, *sodst;
391 
392 	KERNEL_ASSERT_LOCKED();
393 
394 	sosrc = &route_src;
395 	sodst = &route_dst;
396 
397 	/* ensure that we can access the rtm_type via mtod() */
398 	if (m->m_len < offsetof(struct rt_msghdr, rtm_type) + 1) {
399 		m_freem(m);
400 		return;
401 	}
402 
403 	LIST_FOREACH(rop, &route_cb.rcb, rcb_list) {
404 		rp = &rop->rcb;
405 		if (!(rp->rcb_socket->so_state & SS_ISCONNECTED))
406 			continue;
407 		if (rp->rcb_socket->so_state & SS_CANTRCVMORE)
408 			continue;
409 		/* Check to see if we don't want our own messages. */
410 		if (so == rp->rcb_socket && !(so->so_options & SO_USELOOPBACK))
411 			continue;
412 
413 		/*
414 		 * If route socket is bound to an address family only send
415 		 * messages that match the address family. Address family
416 		 * agnostic messages are always send.
417 		 */
418 		if (rp->rcb_proto.sp_protocol != AF_UNSPEC &&
419 		    sa_family != AF_UNSPEC &&
420 		    rp->rcb_proto.sp_protocol != sa_family)
421 			continue;
422 
423 		/* filter messages that the process does not want */
424 		rtm = mtod(m, struct rt_msghdr *);
425 		/* but RTM_DESYNC can't be filtered */
426 		if (rtm->rtm_type != RTM_DESYNC && rop->msgfilter != 0 &&
427 		    !(rop->msgfilter & (1 << rtm->rtm_type)))
428 			continue;
429 		switch (rtm->rtm_type) {
430 		case RTM_IFANNOUNCE:
431 		case RTM_DESYNC:
432 			/* no tableid */
433 			break;
434 		case RTM_RESOLVE:
435 		case RTM_NEWADDR:
436 		case RTM_DELADDR:
437 		case RTM_IFINFO:
438 			/* check against rdomain id */
439 			if (rop->rtableid != RTABLE_ANY &&
440 			    rtable_l2(rop->rtableid) != rtm->rtm_tableid)
441 				continue;
442 			break;
443 		default:
444 			/* check against rtable id */
445 			if (rop->rtableid != RTABLE_ANY &&
446 			    rop->rtableid != rtm->rtm_tableid)
447 				continue;
448 			break;
449 		}
450 
451 		/*
452 		 * Check to see if the flush flag is set. If so, don't queue
453 		 * any more messages until the flag is cleared.
454 		 */
455 		if ((rop->flags & ROUTECB_FLAG_FLUSH) != 0)
456 			continue;
457 
458 		if (last) {
459 			struct mbuf *n;
460 			if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) != NULL) {
461 				if (sbspace(last, &last->so_rcv) < (2*MSIZE) ||
462 				    sbappendaddr(last, &last->so_rcv, sosrc,
463 				    n, (struct mbuf *)NULL) == 0) {
464 					/*
465 					 * Flag socket as desync'ed and
466 					 * flush required
467 					 */
468 					sotoroutecb(last)->flags |=
469 					    ROUTECB_FLAG_DESYNC |
470 					    ROUTECB_FLAG_FLUSH;
471 					route_senddesync(sotorawcb(last));
472 					m_freem(n);
473 				} else {
474 					sorwakeup(last);
475 					sockets++;
476 				}
477 			}
478 		}
479 		last = rp->rcb_socket;
480 	}
481 	if (last) {
482 		if (sbspace(last, &last->so_rcv) < (2 * MSIZE) ||
483 		    sbappendaddr(last, &last->so_rcv, sosrc,
484 		    m, (struct mbuf *)NULL) == 0) {
485 			/* Flag socket as desync'ed and flush required */
486 			sotoroutecb(last)->flags |=
487 			    ROUTECB_FLAG_DESYNC | ROUTECB_FLAG_FLUSH;
488 			route_senddesync(sotorawcb(last));
489 			m_freem(m);
490 		} else {
491 			sorwakeup(last);
492 			sockets++;
493 		}
494 	} else
495 		m_freem(m);
496 }
497 
498 struct rt_msghdr *
499 rtm_report(struct rtentry *rt, u_char type, int seq, int tableid)
500 {
501 	struct rt_msghdr	*rtm;
502 	struct rt_addrinfo	 info;
503 	struct sockaddr_rtlabel	 sa_rl;
504 	struct sockaddr_in6	 sa_mask;
505 #ifdef BFD
506 	struct sockaddr_bfd	 sa_bfd;
507 #endif
508 #ifdef MPLS
509 	struct sockaddr_mpls	 sa_mpls;
510 #endif
511 	struct ifnet		*ifp = NULL;
512 	int			 len;
513 
514 	bzero(&info, sizeof(info));
515 	info.rti_info[RTAX_DST] = rt_key(rt);
516 	info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
517 	info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask);
518 	info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl);
519 #ifdef BFD
520 	if (rt->rt_flags & RTF_BFD)
521 		info.rti_info[RTAX_BFD] = bfd2sa(rt, &sa_bfd);
522 #endif
523 #ifdef MPLS
524 	if (rt->rt_flags & RTF_MPLS) {
525 		bzero(&sa_mpls, sizeof(sa_mpls));
526 		sa_mpls.smpls_family = AF_MPLS;
527 		sa_mpls.smpls_len = sizeof(sa_mpls);
528 		sa_mpls.smpls_label = ((struct rt_mpls *)
529 		    rt->rt_llinfo)->mpls_label;
530 		info.rti_info[RTAX_SRC] = (struct sockaddr *)&sa_mpls;
531 		info.rti_mpls = ((struct rt_mpls *)
532 		    rt->rt_llinfo)->mpls_operation;
533 	}
534 #endif
535 	ifp = if_get(rt->rt_ifidx);
536 	if (ifp != NULL) {
537 		info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
538 		info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
539 		if (ifp->if_flags & IFF_POINTOPOINT)
540 			info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr;
541 	}
542 	if_put(ifp);
543 	/* RTAX_GENMASK, RTAX_AUTHOR, RTAX_SRCMASK ignored */
544 
545 	/* build new route message */
546 	len = rtm_msg2(type, RTM_VERSION, &info, NULL, NULL);
547 	rtm = malloc(len, M_RTABLE, M_WAITOK | M_ZERO);
548 
549 	rtm_msg2(type, RTM_VERSION, &info, (caddr_t)rtm, NULL);
550 	rtm->rtm_type = type;
551 	rtm->rtm_index = rt->rt_ifidx;
552 	rtm->rtm_tableid = tableid;
553 	rtm->rtm_priority = rt->rt_priority & RTP_MASK;
554 	rtm->rtm_flags = rt->rt_flags;
555 	rtm->rtm_pid = curproc->p_p->ps_pid;
556 	rtm->rtm_seq = seq;
557 	rtm_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx);
558 	rtm->rtm_addrs = info.rti_addrs;
559 #ifdef MPLS
560 	rtm->rtm_mpls = info.rti_mpls;
561 #endif
562 	return rtm;
563 }
564 
565 int
566 route_output(struct mbuf *m, struct socket *so, struct sockaddr *dstaddr,
567     struct mbuf *control)
568 {
569 	struct rt_msghdr	*rtm = NULL;
570 	struct rtentry		*rt = NULL;
571 	struct rt_addrinfo	 info;
572 	int			 len, seq, error = 0;
573 	u_int			 tableid;
574 	u_int8_t		 prio;
575 	u_char			 vers, type;
576 
577 	if (m == NULL || ((m->m_len < sizeof(int32_t)) &&
578 	    (m = m_pullup(m, sizeof(int32_t))) == 0))
579 		return (ENOBUFS);
580 	if ((m->m_flags & M_PKTHDR) == 0)
581 		panic("route_output");
582 	len = m->m_pkthdr.len;
583 	if (len < offsetof(struct rt_msghdr, rtm_type) + 1 ||
584 	    len != mtod(m, struct rt_msghdr *)->rtm_msglen) {
585 		error = EINVAL;
586 		goto fail;
587 	}
588 	vers = mtod(m, struct rt_msghdr *)->rtm_version;
589 	switch (vers) {
590 	case RTM_VERSION:
591 		if (len < sizeof(struct rt_msghdr)) {
592 			error = EINVAL;
593 			goto fail;
594 		}
595 		if (len > RTM_MAXSIZE) {
596 			error = EMSGSIZE;
597 			goto fail;
598 		}
599 		rtm = malloc(len, M_RTABLE, M_WAITOK);
600 		m_copydata(m, 0, len, (caddr_t)rtm);
601 		break;
602 	default:
603 		error = EPROTONOSUPPORT;
604 		goto fail;
605 	}
606 	rtm->rtm_pid = curproc->p_p->ps_pid;
607 	if (rtm->rtm_hdrlen == 0)	/* old client */
608 		rtm->rtm_hdrlen = sizeof(struct rt_msghdr);
609 	if (len < rtm->rtm_hdrlen) {
610 		error = EINVAL;
611 		goto fail;
612 	}
613 
614 	/* Verify that the caller is sending an appropriate message early */
615 	switch (rtm->rtm_type) {
616 	case RTM_ADD:
617 	case RTM_DELETE:
618 	case RTM_GET:
619 	case RTM_CHANGE:
620 	case RTM_LOCK:
621 	case RTM_PROPOSAL:
622 		break;
623 	default:
624 		error = EOPNOTSUPP;
625 		goto fail;
626 	}
627 
628 	/*
629 	 * Verify that the caller has the appropriate privilege; RTM_GET
630 	 * is the only operation the non-superuser is allowed.
631 	 */
632 	if (rtm->rtm_type != RTM_GET && suser(curproc, 0) != 0) {
633 		error = EACCES;
634 		goto fail;
635 	}
636 	tableid = rtm->rtm_tableid;
637 	if (!rtable_exists(tableid)) {
638 		if (rtm->rtm_type == RTM_ADD) {
639 			if ((error = rtable_add(tableid)) != 0)
640 				goto fail;
641 		} else {
642 			error = EINVAL;
643 			goto fail;
644 		}
645 	}
646 
647 
648 	/* Do not let userland play with kernel-only flags. */
649 	if ((rtm->rtm_flags & (RTF_LOCAL|RTF_BROADCAST)) != 0) {
650 		error = EINVAL;
651 		goto fail;
652 	}
653 
654 	/* make sure that kernel-only bits are not set */
655 	rtm->rtm_priority &= RTP_MASK;
656 	rtm->rtm_flags &= ~(RTF_DONE|RTF_CLONED|RTF_CACHED);
657 	rtm->rtm_fmask &= RTF_FMASK;
658 
659 	if (rtm->rtm_priority != 0) {
660 		if (rtm->rtm_priority > RTP_MAX ||
661 		    rtm->rtm_priority == RTP_LOCAL) {
662 			error = EINVAL;
663 			goto fail;
664 		}
665 		prio = rtm->rtm_priority;
666 	} else if (rtm->rtm_type != RTM_ADD)
667 		prio = RTP_ANY;
668 	else if (rtm->rtm_flags & RTF_STATIC)
669 		prio = 0;
670 	else
671 		prio = RTP_DEFAULT;
672 
673 	bzero(&info, sizeof(info));
674 	info.rti_addrs = rtm->rtm_addrs;
675 	rtm_xaddrs(rtm->rtm_hdrlen + (caddr_t)rtm, len + (caddr_t)rtm, &info);
676 	info.rti_flags = rtm->rtm_flags;
677 	if (rtm->rtm_type != RTM_PROPOSAL &&
678 	   (info.rti_info[RTAX_DST] == NULL ||
679 	    info.rti_info[RTAX_DST]->sa_family >= AF_MAX ||
680 	    (info.rti_info[RTAX_GATEWAY] != NULL &&
681 	    info.rti_info[RTAX_GATEWAY]->sa_family >= AF_MAX) ||
682 	    info.rti_info[RTAX_GENMASK] != NULL)) {
683 		error = EINVAL;
684 		goto fail;
685 	}
686 #ifdef MPLS
687 	info.rti_mpls = rtm->rtm_mpls;
688 #endif
689 
690 	if (info.rti_info[RTAX_GATEWAY] != NULL &&
691 	    info.rti_info[RTAX_GATEWAY]->sa_family == AF_LINK &&
692 	    (info.rti_flags & RTF_CLONING) == 0) {
693 		info.rti_flags |= RTF_LLINFO;
694 	}
695 
696 	/*
697 	 * Validate RTM_PROPOSAL and pass it along or error out.
698 	 */
699 	if (rtm->rtm_type == RTM_PROPOSAL) {
700 		if (rtm_validate_proposal(&info) == -1) {
701 			error = EINVAL;
702 			goto fail;
703 		}
704 	} else {
705 		error = rtm_output(rtm, &rt, &info, prio, tableid);
706 		if (!error) {
707 			type = rtm->rtm_type;
708 			seq = rtm->rtm_seq;
709 			free(rtm, M_RTABLE, len);
710 			rtm = rtm_report(rt, type, seq, tableid);
711 			len = rtm->rtm_msglen;
712 		}
713 	}
714 
715 	rtfree(rt);
716 	if (error) {
717 		rtm->rtm_errno = error;
718 	} else {
719 		rtm->rtm_flags |= RTF_DONE;
720 	}
721 
722 	/*
723 	 * Check to see if we don't want our own messages.
724 	 */
725 	if (!(so->so_options & SO_USELOOPBACK)) {
726 		if (route_cb.any_count <= 1) {
727 			/* no other listener and no loopback of messages */
728 fail:
729 			free(rtm, M_RTABLE, len);
730 			m_freem(m);
731 			return (error);
732 		}
733 	}
734 	if (rtm) {
735 		if (m_copyback(m, 0, len, rtm, M_NOWAIT)) {
736 			m_freem(m);
737 			m = NULL;
738 		} else if (m->m_pkthdr.len > len)
739 			m_adj(m, len - m->m_pkthdr.len);
740 		free(rtm, M_RTABLE, len);
741 	}
742 	if (m)
743 		route_input(m, so, info.rti_info[RTAX_DST] ?
744 		    info.rti_info[RTAX_DST]->sa_family : AF_UNSPEC);
745 
746 	return (error);
747 }
748 
749 int
750 rtm_output(struct rt_msghdr *rtm, struct rtentry **prt,
751     struct rt_addrinfo *info, uint8_t prio, unsigned int tableid)
752 {
753 	struct rtentry		*rt = *prt;
754 	struct ifnet		*ifp = NULL;
755 	struct ifaddr		*ifa = NULL;
756 #ifdef MPLS
757 	struct sockaddr_mpls	*psa_mpls;
758 #endif
759 	int			 plen, newgate = 0, error = 0;
760 
761 	NET_LOCK();
762 	switch (rtm->rtm_type) {
763 	case RTM_ADD:
764 		if (info->rti_info[RTAX_GATEWAY] == NULL) {
765 			error = EINVAL;
766 			break;
767 		}
768 
769 		rt = rtable_match(tableid, info->rti_info[RTAX_DST], NULL);
770 		if ((error = route_arp_conflict(rt, info))) {
771 			rtfree(rt);
772 			rt = NULL;
773 			break;
774 		}
775 
776 		/*
777 		 * We cannot go through a delete/create/insert cycle for
778 		 * cached route because this can lead to races in the
779 		 * receive path.  Instead we update the L2 cache.
780 		 */
781 		if ((rt != NULL) && ISSET(rt->rt_flags, RTF_CACHED))
782 			goto change;
783 
784 		rtfree(rt);
785 		rt = NULL;
786 
787 		if ((error = rtm_getifa(info, tableid)) != 0)
788 			break;
789 		error = rtrequest(RTM_ADD, info, prio, &rt, tableid);
790 		if (error == 0)
791 			rtm_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx,
792 			    &rt->rt_rmx);
793 		break;
794 	case RTM_DELETE:
795 		rt = rtable_lookup(tableid, info->rti_info[RTAX_DST],
796 		    info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY],
797 		    prio);
798 		if (rt == NULL) {
799 			error = ESRCH;
800 			break;
801 		}
802 
803 		/*
804 		 * If we got multipath routes, we require users to specify
805 		 * a matching gateway.
806 		 */
807 		if (ISSET(rt->rt_flags, RTF_MPATH) &&
808 		    info->rti_info[RTAX_GATEWAY] == NULL) {
809 			error = ESRCH;
810 			break;
811 		}
812 
813 		/* Detaching an interface requires the KERNEL_LOCK(). */
814 		ifp = if_get(rt->rt_ifidx);
815 		KASSERT(ifp != NULL);
816 
817 		/*
818 		 * Invalidate the cache of automagically created and
819 		 * referenced L2 entries to make sure that ``rt_gwroute''
820 		 * pointer stays valid for other CPUs.
821 		 */
822 		if ((ISSET(rt->rt_flags, RTF_CACHED))) {
823 			ifp->if_rtrequest(ifp, RTM_INVALIDATE, rt);
824 			/* Reset the MTU of the gateway route. */
825 			rtable_walk(tableid, rt_key(rt)->sa_family,
826 			    route_cleargateway, rt);
827 			if_put(ifp);
828 			break;
829 		}
830 
831 		/*
832 		 * Make sure that local routes are only modified by the
833 		 * kernel.
834 		 */
835 		if (ISSET(rt->rt_flags, RTF_LOCAL|RTF_BROADCAST)) {
836 			if_put(ifp);
837 			error = EINVAL;
838 			break;
839 		}
840 
841 		rtfree(rt);
842 		rt = NULL;
843 
844 		error = rtrequest_delete(info, prio, ifp, &rt, tableid);
845 		if_put(ifp);
846 		break;
847 	case RTM_CHANGE:
848 	case RTM_LOCK:
849 		rt = rtable_lookup(tableid, info->rti_info[RTAX_DST],
850 		    info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY],
851 		    prio);
852 		/*
853 		 * If we got multipath routes, we require users to specify
854 		 * a matching gateway.
855 		 */
856 		if ((rt != NULL) && ISSET(rt->rt_flags, RTF_MPATH) &&
857 		    (info->rti_info[RTAX_GATEWAY] == NULL)) {
858 			rtfree(rt);
859 			rt = NULL;
860 		}
861 		/*
862 		 * If RTAX_GATEWAY is the argument we're trying to
863 		 * change, try to find a compatible route.
864 		 */
865 		if ((rt == NULL) && (info->rti_info[RTAX_GATEWAY] != NULL) &&
866 		    (rtm->rtm_type == RTM_CHANGE)) {
867 			rt = rtable_lookup(tableid, info->rti_info[RTAX_DST],
868 			    info->rti_info[RTAX_NETMASK], NULL, prio);
869 			/* Ensure we don't pick a multipath one. */
870 			if ((rt != NULL) && ISSET(rt->rt_flags, RTF_MPATH)) {
871 				rtfree(rt);
872 				rt = NULL;
873 			}
874 		}
875 
876 		if (rt == NULL) {
877 			error = ESRCH;
878 			break;
879 		}
880 
881 		/*
882 		 * Make sure that local routes are only modified by the
883 		 * kernel.
884 		 */
885 		if (ISSET(rt->rt_flags, RTF_LOCAL|RTF_BROADCAST)) {
886 			error = EINVAL;
887 			break;
888 		}
889 
890 		/*
891 		 * RTM_CHANGE/LOCK need a perfect match.
892 		 */
893 		plen = rtable_satoplen(info->rti_info[RTAX_DST]->sa_family,
894 		    info->rti_info[RTAX_NETMASK]);
895 		if (rt_plen(rt) != plen) {
896 			error = ESRCH;
897 			break;
898 		}
899 
900 		switch (rtm->rtm_type) {
901 		case RTM_CHANGE:
902 			if (info->rti_info[RTAX_GATEWAY] != NULL)
903 				if (rt->rt_gateway == NULL ||
904 				    bcmp(rt->rt_gateway,
905 				    info->rti_info[RTAX_GATEWAY],
906 				    info->rti_info[RTAX_GATEWAY]->sa_len)) {
907 					newgate = 1;
908 				}
909 			/*
910 			 * Check reachable gateway before changing the route.
911 			 * New gateway could require new ifaddr, ifp;
912 			 * flags may also be different; ifp may be specified
913 			 * by ll sockaddr when protocol address is ambiguous.
914 			 */
915 			if (newgate || info->rti_info[RTAX_IFP] != NULL ||
916 			    info->rti_info[RTAX_IFA] != NULL) {
917 				if ((error = rtm_getifa(info, tableid)) != 0)
918 					break;
919 				ifa = info->rti_ifa;
920 				if (rt->rt_ifa != ifa) {
921 					ifp = if_get(rt->rt_ifidx);
922 					KASSERT(ifp != NULL);
923 					ifp->if_rtrequest(ifp, RTM_DELETE, rt);
924 					ifafree(rt->rt_ifa);
925 					if_put(ifp);
926 
927 					ifa->ifa_refcnt++;
928 					rt->rt_ifa = ifa;
929 					rt->rt_ifidx = ifa->ifa_ifp->if_index;
930 					/* recheck link state after ifp change*/
931 					rt_if_linkstate_change(rt, ifa->ifa_ifp,
932 					    tableid);
933 				}
934 			}
935 change:
936 			if (info->rti_info[RTAX_GATEWAY] != NULL) {
937 				/*
938 				 * When updating the gateway, make sure it's
939 				 * valid.
940 				 */
941 				if (!newgate && rt->rt_gateway->sa_family !=
942 				    info->rti_info[RTAX_GATEWAY]->sa_family) {
943 				    	error = EINVAL;
944 					break;
945 				}
946 
947 				error = rt_setgate(rt,
948 				    info->rti_info[RTAX_GATEWAY], tableid);
949 				if (error)
950 					break;
951 			}
952 #ifdef MPLS
953 			if ((rtm->rtm_flags & RTF_MPLS) &&
954 			    info->rti_info[RTAX_SRC] != NULL) {
955 				struct rt_mpls *rt_mpls;
956 
957 				psa_mpls = (struct sockaddr_mpls *)
958 				    info->rti_info[RTAX_SRC];
959 
960 				if (rt->rt_llinfo == NULL) {
961 					rt->rt_llinfo =
962 					    malloc(sizeof(struct rt_mpls),
963 					    M_TEMP, M_WAITOK | M_ZERO);
964 				}
965 
966 				rt_mpls = (struct rt_mpls *)rt->rt_llinfo;
967 
968 				if (psa_mpls != NULL) {
969 					rt_mpls->mpls_label =
970 					    psa_mpls->smpls_label;
971 				}
972 
973 				rt_mpls->mpls_operation = info->rti_mpls;
974 
975 				/* XXX: set experimental bits */
976 
977 				rt->rt_flags |= RTF_MPLS;
978 			} else if (newgate || ((rtm->rtm_fmask & RTF_MPLS) &&
979 			    !(rtm->rtm_flags & RTF_MPLS))) {
980 				/* if gateway changed remove MPLS information */
981 				if (rt->rt_llinfo != NULL &&
982 				    rt->rt_flags & RTF_MPLS) {
983 					free(rt->rt_llinfo, M_TEMP,
984 					    sizeof(struct rt_mpls));
985 					rt->rt_llinfo = NULL;
986 					rt->rt_flags &= ~RTF_MPLS;
987 				}
988 			}
989 #endif
990 
991 #ifdef BFD
992 			if (ISSET(rtm->rtm_flags, RTF_BFD)) {
993 				if ((error = bfdset(rt)))
994 					break;
995 			} else if (!ISSET(rtm->rtm_flags, RTF_BFD) &&
996 			    ISSET(rtm->rtm_fmask, RTF_BFD)) {
997 				bfdclear(rt);
998 			}
999 #endif
1000 
1001 			/* Hack to allow some flags to be toggled */
1002 			if (rtm->rtm_fmask)
1003 				rt->rt_flags =
1004 				    (rt->rt_flags & ~rtm->rtm_fmask) |
1005 				    (rtm->rtm_flags & rtm->rtm_fmask);
1006 
1007 			rtm_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx,
1008 			    &rt->rt_rmx);
1009 
1010 			ifp = if_get(rt->rt_ifidx);
1011 			KASSERT(ifp != NULL);
1012 			ifp->if_rtrequest(ifp, RTM_ADD, rt);
1013 			if_put(ifp);
1014 
1015 			if (info->rti_info[RTAX_LABEL] != NULL) {
1016 				char *rtlabel = ((struct sockaddr_rtlabel *)
1017 				    info->rti_info[RTAX_LABEL])->sr_label;
1018 				rtlabel_unref(rt->rt_labelid);
1019 				rt->rt_labelid = rtlabel_name2id(rtlabel);
1020 			}
1021 			if_group_routechange(info->rti_info[RTAX_DST],
1022 			    info->rti_info[RTAX_NETMASK]);
1023 			/* FALLTHROUGH */
1024 		case RTM_LOCK:
1025 			rt->rt_locks &= ~(rtm->rtm_inits);
1026 			rt->rt_locks |=
1027 			    (rtm->rtm_inits & rtm->rtm_rmx.rmx_locks);
1028 			break;
1029 		}
1030 		break;
1031 	case RTM_GET:
1032 		rt = rtable_lookup(tableid, info->rti_info[RTAX_DST],
1033 		    info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY],
1034 		    prio);
1035 		if (rt == NULL)
1036 			error = ESRCH;
1037 		break;
1038 	}
1039 	NET_UNLOCK();
1040 
1041 	*prt = rt;
1042 	return (error);
1043 }
1044 
1045 struct ifaddr *
1046 ifa_ifwithroute(int flags, struct sockaddr *dst, struct sockaddr *gateway,
1047     unsigned int rtableid)
1048 {
1049 	struct ifaddr	*ifa;
1050 
1051 	if ((flags & RTF_GATEWAY) == 0) {
1052 		/*
1053 		 * If we are adding a route to an interface,
1054 		 * and the interface is a pt to pt link
1055 		 * we should search for the destination
1056 		 * as our clue to the interface.  Otherwise
1057 		 * we can use the local address.
1058 		 */
1059 		ifa = NULL;
1060 		if (flags & RTF_HOST)
1061 			ifa = ifa_ifwithdstaddr(dst, rtableid);
1062 		if (ifa == NULL)
1063 			ifa = ifa_ifwithaddr(gateway, rtableid);
1064 	} else {
1065 		/*
1066 		 * If we are adding a route to a remote net
1067 		 * or host, the gateway may still be on the
1068 		 * other end of a pt to pt link.
1069 		 */
1070 		ifa = ifa_ifwithdstaddr(gateway, rtableid);
1071 	}
1072 	if (ifa == NULL) {
1073 		if (gateway->sa_family == AF_LINK) {
1074 			struct sockaddr_dl *sdl = satosdl(gateway);
1075 			struct ifnet *ifp = if_get(sdl->sdl_index);
1076 
1077 			if (ifp != NULL)
1078 				ifa = ifaof_ifpforaddr(dst, ifp);
1079 			if_put(ifp);
1080 		} else {
1081 			struct rtentry *rt;
1082 
1083 			rt = rtalloc(gateway, RT_RESOLVE, rtable_l2(rtableid));
1084 			if (rt != NULL)
1085 				ifa = rt->rt_ifa;
1086 			rtfree(rt);
1087 		}
1088 	}
1089 	if (ifa == NULL)
1090 		return (NULL);
1091 	if (ifa->ifa_addr->sa_family != dst->sa_family) {
1092 		struct ifaddr	*oifa = ifa;
1093 		ifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp);
1094 		if (ifa == NULL)
1095 			ifa = oifa;
1096 	}
1097 	return (ifa);
1098 }
1099 
1100 int
1101 rtm_getifa(struct rt_addrinfo *info, unsigned int rtid)
1102 {
1103 	struct ifnet	*ifp = NULL;
1104 
1105 	/*
1106 	 * ifp may be specified by sockaddr_dl when protocol address
1107 	 * is ambiguous
1108 	 */
1109 	if (info->rti_info[RTAX_IFP] != NULL) {
1110 		struct sockaddr_dl *sdl;
1111 
1112 		sdl = satosdl(info->rti_info[RTAX_IFP]);
1113 		ifp = if_get(sdl->sdl_index);
1114 	}
1115 
1116 #ifdef IPSEC
1117 	/*
1118 	 * If the destination is a PF_KEY address, we'll look
1119 	 * for the existence of a encap interface number or address
1120 	 * in the options list of the gateway. By default, we'll return
1121 	 * enc0.
1122 	 */
1123 	if (info->rti_info[RTAX_DST] &&
1124 	    info->rti_info[RTAX_DST]->sa_family == PF_KEY)
1125 		info->rti_ifa = enc_getifa(rtid, 0);
1126 #endif
1127 
1128 	if (info->rti_ifa == NULL && info->rti_info[RTAX_IFA] != NULL)
1129 		info->rti_ifa = ifa_ifwithaddr(info->rti_info[RTAX_IFA], rtid);
1130 
1131 	if (info->rti_ifa == NULL) {
1132 		struct sockaddr	*sa;
1133 
1134 		if ((sa = info->rti_info[RTAX_IFA]) == NULL)
1135 			if ((sa = info->rti_info[RTAX_GATEWAY]) == NULL)
1136 				sa = info->rti_info[RTAX_DST];
1137 
1138 		if (sa != NULL && ifp != NULL)
1139 			info->rti_ifa = ifaof_ifpforaddr(sa, ifp);
1140 		else if (info->rti_info[RTAX_DST] != NULL &&
1141 		    info->rti_info[RTAX_GATEWAY] != NULL)
1142 			info->rti_ifa = ifa_ifwithroute(info->rti_flags,
1143 			    info->rti_info[RTAX_DST],
1144 			    info->rti_info[RTAX_GATEWAY],
1145 			    rtid);
1146 		else if (sa != NULL)
1147 			info->rti_ifa = ifa_ifwithroute(info->rti_flags,
1148 			    sa, sa, rtid);
1149 	}
1150 
1151 	if_put(ifp);
1152 
1153 	if (info->rti_ifa == NULL)
1154 		return (ENETUNREACH);
1155 
1156 	return (0);
1157 }
1158 
1159 int
1160 route_cleargateway(struct rtentry *rt, void *arg, unsigned int rtableid)
1161 {
1162 	struct rtentry *nhrt = arg;
1163 
1164 	if (ISSET(rt->rt_flags, RTF_GATEWAY) && rt->rt_gwroute == nhrt &&
1165 	    !ISSET(rt->rt_locks, RTV_MTU))
1166 		rt->rt_mtu = 0;
1167 
1168 	return (0);
1169 }
1170 
1171 /*
1172  * Check if the user request to insert an ARP entry does not conflict
1173  * with existing ones.
1174  *
1175  * Only two entries are allowed for a given IP address: a private one
1176  * (priv) and a public one (pub).
1177  */
1178 int
1179 route_arp_conflict(struct rtentry *rt, struct rt_addrinfo *info)
1180 {
1181 	int		 proxy = (info->rti_flags & RTF_ANNOUNCE);
1182 
1183 	if ((info->rti_flags & RTF_LLINFO) == 0 ||
1184 	    (info->rti_info[RTAX_DST]->sa_family != AF_INET))
1185 		return (0);
1186 
1187 	if (rt == NULL || !ISSET(rt->rt_flags, RTF_LLINFO))
1188 		return (0);
1189 
1190 	/* If the entry is cached, it can be updated. */
1191 	if (ISSET(rt->rt_flags, RTF_CACHED))
1192 		return (0);
1193 
1194 	/*
1195 	 * Same destination, not cached and both "priv" or "pub" conflict.
1196 	 * If a second entry exists, it always conflict.
1197 	 */
1198 	if ((ISSET(rt->rt_flags, RTF_ANNOUNCE) == proxy) ||
1199 	    ISSET(rt->rt_flags, RTF_MPATH))
1200 		return (EEXIST);
1201 
1202 	/* No conflict but an entry exist so we need to force mpath. */
1203 	info->rti_flags |= RTF_MPATH;
1204 	return (0);
1205 }
1206 
1207 void
1208 rtm_setmetrics(u_long which, const struct rt_metrics *in,
1209     struct rt_kmetrics *out)
1210 {
1211 	int64_t expire;
1212 
1213 	if (which & RTV_MTU)
1214 		out->rmx_mtu = in->rmx_mtu;
1215 	if (which & RTV_EXPIRE) {
1216 		expire = in->rmx_expire;
1217 		if (expire != 0) {
1218 			expire -= time_second;
1219 			expire += time_uptime;
1220 		}
1221 
1222 		out->rmx_expire = expire;
1223 	}
1224 }
1225 
1226 void
1227 rtm_getmetrics(const struct rt_kmetrics *in, struct rt_metrics *out)
1228 {
1229 	int64_t expire;
1230 
1231 	expire = in->rmx_expire;
1232 	if (expire != 0) {
1233 		expire -= time_uptime;
1234 		expire += time_second;
1235 	}
1236 
1237 	bzero(out, sizeof(*out));
1238 	out->rmx_locks = in->rmx_locks;
1239 	out->rmx_mtu = in->rmx_mtu;
1240 	out->rmx_expire = expire;
1241 	out->rmx_pksent = in->rmx_pksent;
1242 }
1243 
1244 #define ROUNDUP(a) \
1245 	((a) > 0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long))
1246 #define ADVANCE(x, n) (x += ROUNDUP((n)->sa_len))
1247 
1248 void
1249 rtm_xaddrs(caddr_t cp, caddr_t cplim, struct rt_addrinfo *rtinfo)
1250 {
1251 	struct sockaddr	*sa;
1252 	int		 i;
1253 
1254 	bzero(rtinfo->rti_info, sizeof(rtinfo->rti_info));
1255 	for (i = 0; (i < RTAX_MAX) && (cp < cplim); i++) {
1256 		if ((rtinfo->rti_addrs & (1 << i)) == 0)
1257 			continue;
1258 		rtinfo->rti_info[i] = sa = (struct sockaddr *)cp;
1259 		ADVANCE(cp, sa);
1260 	}
1261 }
1262 
1263 struct mbuf *
1264 rtm_msg1(int type, struct rt_addrinfo *rtinfo)
1265 {
1266 	struct rt_msghdr	*rtm;
1267 	struct mbuf		*m;
1268 	int			 i;
1269 	struct sockaddr		*sa;
1270 	int			 len, dlen, hlen;
1271 
1272 	switch (type) {
1273 	case RTM_DELADDR:
1274 	case RTM_NEWADDR:
1275 		len = sizeof(struct ifa_msghdr);
1276 		break;
1277 	case RTM_IFINFO:
1278 		len = sizeof(struct if_msghdr);
1279 		break;
1280 	case RTM_IFANNOUNCE:
1281 		len = sizeof(struct if_announcemsghdr);
1282 		break;
1283 #ifdef BFD
1284 	case RTM_BFD:
1285 		len = sizeof(struct bfd_msghdr);
1286 		break;
1287 #endif
1288 	default:
1289 		len = sizeof(struct rt_msghdr);
1290 		break;
1291 	}
1292 	if (len > MCLBYTES)
1293 		panic("rtm_msg1");
1294 	m = m_gethdr(M_DONTWAIT, MT_DATA);
1295 	if (m && len > MHLEN) {
1296 		MCLGET(m, M_DONTWAIT);
1297 		if ((m->m_flags & M_EXT) == 0) {
1298 			m_free(m);
1299 			m = NULL;
1300 		}
1301 	}
1302 	if (m == NULL)
1303 		return (m);
1304 	m->m_pkthdr.len = m->m_len = hlen = len;
1305 	m->m_pkthdr.ph_ifidx = 0;
1306 	rtm = mtod(m, struct rt_msghdr *);
1307 	bzero(rtm, len);
1308 	for (i = 0; i < RTAX_MAX; i++) {
1309 		if (rtinfo == NULL || (sa = rtinfo->rti_info[i]) == NULL)
1310 			continue;
1311 		rtinfo->rti_addrs |= (1 << i);
1312 		dlen = ROUNDUP(sa->sa_len);
1313 		if (m_copyback(m, len, dlen, sa, M_NOWAIT)) {
1314 			m_freem(m);
1315 			return (NULL);
1316 		}
1317 		len += dlen;
1318 	}
1319 	rtm->rtm_msglen = len;
1320 	rtm->rtm_hdrlen = hlen;
1321 	rtm->rtm_version = RTM_VERSION;
1322 	rtm->rtm_type = type;
1323 	return (m);
1324 }
1325 
1326 int
1327 rtm_msg2(int type, int vers, struct rt_addrinfo *rtinfo, caddr_t cp,
1328     struct walkarg *w)
1329 {
1330 	int		i;
1331 	int		len, dlen, hlen, second_time = 0;
1332 	caddr_t		cp0;
1333 
1334 	rtinfo->rti_addrs = 0;
1335 again:
1336 	switch (type) {
1337 	case RTM_DELADDR:
1338 	case RTM_NEWADDR:
1339 		len = sizeof(struct ifa_msghdr);
1340 		break;
1341 	case RTM_IFINFO:
1342 		len = sizeof(struct if_msghdr);
1343 		break;
1344 	default:
1345 		len = sizeof(struct rt_msghdr);
1346 		break;
1347 	}
1348 	hlen = len;
1349 	if ((cp0 = cp) != NULL)
1350 		cp += len;
1351 	for (i = 0; i < RTAX_MAX; i++) {
1352 		struct sockaddr *sa;
1353 
1354 		if ((sa = rtinfo->rti_info[i]) == NULL)
1355 			continue;
1356 		rtinfo->rti_addrs |= (1 << i);
1357 		dlen = ROUNDUP(sa->sa_len);
1358 		if (cp) {
1359 			bcopy(sa, cp, (size_t)dlen);
1360 			cp += dlen;
1361 		}
1362 		len += dlen;
1363 	}
1364 	/* align message length to the next natural boundary */
1365 	len = ALIGN(len);
1366 	if (cp == 0 && w != NULL && !second_time) {
1367 		w->w_needed += len;
1368 		if (w->w_needed <= 0 && w->w_where) {
1369 			if (w->w_tmemsize < len) {
1370 				free(w->w_tmem, M_RTABLE, w->w_tmemsize);
1371 				w->w_tmem = malloc(len, M_RTABLE, M_NOWAIT);
1372 				if (w->w_tmem)
1373 					w->w_tmemsize = len;
1374 			}
1375 			if (w->w_tmem) {
1376 				cp = w->w_tmem;
1377 				second_time = 1;
1378 				goto again;
1379 			} else
1380 				w->w_where = 0;
1381 		}
1382 	}
1383 	if (cp && w)		/* clear the message header */
1384 		bzero(cp0, hlen);
1385 
1386 	if (cp) {
1387 		struct rt_msghdr *rtm = (struct rt_msghdr *)cp0;
1388 
1389 		rtm->rtm_version = RTM_VERSION;
1390 		rtm->rtm_type = type;
1391 		rtm->rtm_msglen = len;
1392 		rtm->rtm_hdrlen = hlen;
1393 	}
1394 	return (len);
1395 }
1396 
1397 void
1398 rtm_send(struct rtentry *rt, int cmd, int error, unsigned int rtableid)
1399 {
1400 	struct rt_addrinfo	 info;
1401 	struct ifnet		*ifp;
1402 	struct sockaddr_rtlabel	 sa_rl;
1403 	struct sockaddr_in6	 sa_mask;
1404 
1405 	memset(&info, 0, sizeof(info));
1406 	info.rti_info[RTAX_DST] = rt_key(rt);
1407 	info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1408 	if (!ISSET(rt->rt_flags, RTF_HOST))
1409 		info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask);
1410 	info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl);
1411 	ifp = if_get(rt->rt_ifidx);
1412 	if (ifp != NULL) {
1413 		info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
1414 		info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
1415 	}
1416 
1417 	rtm_miss(cmd, &info, rt->rt_flags, rt->rt_priority, rt->rt_ifidx, error,
1418 	    rtableid);
1419 	if_put(ifp);
1420 }
1421 
1422 /*
1423  * This routine is called to generate a message from the routing
1424  * socket indicating that a redirect has occurred, a routing lookup
1425  * has failed, or that a protocol has detected timeouts to a particular
1426  * destination.
1427  */
1428 void
1429 rtm_miss(int type, struct rt_addrinfo *rtinfo, int flags, uint8_t prio,
1430     u_int ifidx, int error, u_int tableid)
1431 {
1432 	struct rt_msghdr	*rtm;
1433 	struct mbuf		*m;
1434 	struct sockaddr		*sa = rtinfo->rti_info[RTAX_DST];
1435 
1436 	if (route_cb.any_count == 0)
1437 		return;
1438 	m = rtm_msg1(type, rtinfo);
1439 	if (m == NULL)
1440 		return;
1441 	rtm = mtod(m, struct rt_msghdr *);
1442 	rtm->rtm_flags = RTF_DONE | flags;
1443 	rtm->rtm_priority = prio;
1444 	rtm->rtm_errno = error;
1445 	rtm->rtm_tableid = tableid;
1446 	rtm->rtm_addrs = rtinfo->rti_addrs;
1447 	rtm->rtm_index = ifidx;
1448 	route_input(m, NULL, sa ? sa->sa_family : AF_UNSPEC);
1449 }
1450 
1451 /*
1452  * This routine is called to generate a message from the routing
1453  * socket indicating that the status of a network interface has changed.
1454  */
1455 void
1456 rtm_ifchg(struct ifnet *ifp)
1457 {
1458 	struct if_msghdr	*ifm;
1459 	struct mbuf		*m;
1460 
1461 	if (route_cb.any_count == 0)
1462 		return;
1463 	m = rtm_msg1(RTM_IFINFO, NULL);
1464 	if (m == NULL)
1465 		return;
1466 	ifm = mtod(m, struct if_msghdr *);
1467 	ifm->ifm_index = ifp->if_index;
1468 	ifm->ifm_tableid = ifp->if_rdomain;
1469 	ifm->ifm_flags = ifp->if_flags;
1470 	ifm->ifm_xflags = ifp->if_xflags;
1471 	if_getdata(ifp, &ifm->ifm_data);
1472 	ifm->ifm_addrs = 0;
1473 	route_input(m, NULL, AF_UNSPEC);
1474 }
1475 
1476 /*
1477  * This is called to generate messages from the routing socket
1478  * indicating a network interface has had addresses associated with it.
1479  * if we ever reverse the logic and replace messages TO the routing
1480  * socket indicate a request to configure interfaces, then it will
1481  * be unnecessary as the routing socket will automatically generate
1482  * copies of it.
1483  */
1484 void
1485 rtm_addr(struct rtentry *rt, int cmd, struct ifaddr *ifa)
1486 {
1487 	struct ifnet		*ifp = ifa->ifa_ifp;
1488 	struct mbuf		*m;
1489 	struct rt_addrinfo	 info;
1490 	struct ifa_msghdr	*ifam;
1491 
1492 	if (route_cb.any_count == 0)
1493 		return;
1494 
1495 	memset(&info, 0, sizeof(info));
1496 	info.rti_info[RTAX_IFA] = ifa->ifa_addr;
1497 	info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
1498 	info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
1499 	info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
1500 	if ((m = rtm_msg1(cmd, &info)) == NULL)
1501 		return;
1502 	ifam = mtod(m, struct ifa_msghdr *);
1503 	ifam->ifam_index = ifp->if_index;
1504 	ifam->ifam_metric = ifa->ifa_metric;
1505 	ifam->ifam_flags = ifa->ifa_flags;
1506 	ifam->ifam_addrs = info.rti_addrs;
1507 	ifam->ifam_tableid = ifp->if_rdomain;
1508 
1509 	route_input(m, NULL,
1510 	    ifa->ifa_addr ? ifa->ifa_addr->sa_family : AF_UNSPEC);
1511 }
1512 
1513 /*
1514  * This is called to generate routing socket messages indicating
1515  * network interface arrival and departure.
1516  */
1517 void
1518 rtm_ifannounce(struct ifnet *ifp, int what)
1519 {
1520 	struct if_announcemsghdr	*ifan;
1521 	struct mbuf			*m;
1522 
1523 	if (route_cb.any_count == 0)
1524 		return;
1525 	m = rtm_msg1(RTM_IFANNOUNCE, NULL);
1526 	if (m == NULL)
1527 		return;
1528 	ifan = mtod(m, struct if_announcemsghdr *);
1529 	ifan->ifan_index = ifp->if_index;
1530 	strlcpy(ifan->ifan_name, ifp->if_xname, sizeof(ifan->ifan_name));
1531 	ifan->ifan_what = what;
1532 	route_input(m, NULL, AF_UNSPEC);
1533 }
1534 
1535 #ifdef BFD
1536 /*
1537  * This is used to generate routing socket messages indicating
1538  * the state of a BFD session.
1539  */
1540 void
1541 rtm_bfd(struct bfd_config *bfd)
1542 {
1543 	struct bfd_msghdr	*bfdm;
1544 	struct sockaddr_bfd	 sa_bfd;
1545 	struct mbuf		*m;
1546 	struct rt_addrinfo	 info;
1547 
1548 	if (route_cb.any_count == 0)
1549 		return;
1550 	memset(&info, 0, sizeof(info));
1551 	info.rti_info[RTAX_DST] = rt_key(bfd->bc_rt);
1552 	info.rti_info[RTAX_IFA] = bfd->bc_rt->rt_ifa->ifa_addr;
1553 
1554 	m = rtm_msg1(RTM_BFD, &info);
1555 	if (m == NULL)
1556 		return;
1557 	bfdm = mtod(m, struct bfd_msghdr *);
1558 	bfdm->bm_addrs = info.rti_addrs;
1559 
1560 	bfd2sa(bfd->bc_rt, &sa_bfd);
1561 	memcpy(&bfdm->bm_sa, &sa_bfd, sizeof(sa_bfd));
1562 
1563 	route_input(m, NULL, info.rti_info[RTAX_DST]->sa_family);
1564 }
1565 #endif /* BFD */
1566 
1567 /*
1568  * This is used in dumping the kernel table via sysctl().
1569  */
1570 int
1571 sysctl_dumpentry(struct rtentry *rt, void *v, unsigned int id)
1572 {
1573 	struct walkarg		*w = v;
1574 	int			 error = 0, size;
1575 	struct rt_addrinfo	 info;
1576 	struct ifnet		*ifp;
1577 #ifdef BFD
1578 	struct sockaddr_bfd	 sa_bfd;
1579 #endif
1580 #ifdef MPLS
1581 	struct sockaddr_mpls	 sa_mpls;
1582 #endif
1583 	struct sockaddr_rtlabel	 sa_rl;
1584 	struct sockaddr_in6	 sa_mask;
1585 
1586 	if (w->w_op == NET_RT_FLAGS && !(rt->rt_flags & w->w_arg))
1587 		return 0;
1588 	if (w->w_op == NET_RT_DUMP && w->w_arg) {
1589 		u_int8_t prio = w->w_arg & RTP_MASK;
1590 		if (w->w_arg < 0) {
1591 			prio = (-w->w_arg) & RTP_MASK;
1592 			/* Show all routes that are not this priority */
1593 			if (prio == (rt->rt_priority & RTP_MASK))
1594 				return 0;
1595 		} else {
1596 			if (prio != (rt->rt_priority & RTP_MASK) &&
1597 			    prio != RTP_ANY)
1598 				return 0;
1599 		}
1600 	}
1601 	bzero(&info, sizeof(info));
1602 	info.rti_info[RTAX_DST] = rt_key(rt);
1603 	info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1604 	info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask);
1605 	ifp = if_get(rt->rt_ifidx);
1606 	if (ifp != NULL) {
1607 		info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
1608 		info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
1609 		if (ifp->if_flags & IFF_POINTOPOINT)
1610 			info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr;
1611 	}
1612 	if_put(ifp);
1613 	info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl);
1614 #ifdef BFD
1615 	if (rt->rt_flags & RTF_BFD)
1616 		info.rti_info[RTAX_BFD] = bfd2sa(rt, &sa_bfd);
1617 #endif
1618 #ifdef MPLS
1619 	if (rt->rt_flags & RTF_MPLS) {
1620 		bzero(&sa_mpls, sizeof(sa_mpls));
1621 		sa_mpls.smpls_family = AF_MPLS;
1622 		sa_mpls.smpls_len = sizeof(sa_mpls);
1623 		sa_mpls.smpls_label = ((struct rt_mpls *)
1624 		    rt->rt_llinfo)->mpls_label;
1625 		info.rti_info[RTAX_SRC] = (struct sockaddr *)&sa_mpls;
1626 		info.rti_mpls = ((struct rt_mpls *)
1627 		    rt->rt_llinfo)->mpls_operation;
1628 	}
1629 #endif
1630 
1631 	size = rtm_msg2(RTM_GET, RTM_VERSION, &info, NULL, w);
1632 	if (w->w_where && w->w_tmem && w->w_needed <= 0) {
1633 		struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem;
1634 
1635 		rtm->rtm_pid = curproc->p_p->ps_pid;
1636 		rtm->rtm_flags = rt->rt_flags;
1637 		rtm->rtm_priority = rt->rt_priority & RTP_MASK;
1638 		rtm_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx);
1639 		/* Do not account the routing table's reference. */
1640 		rtm->rtm_rmx.rmx_refcnt = rt->rt_refcnt - 1;
1641 		rtm->rtm_index = rt->rt_ifidx;
1642 		rtm->rtm_addrs = info.rti_addrs;
1643 		rtm->rtm_tableid = id;
1644 #ifdef MPLS
1645 		rtm->rtm_mpls = info.rti_mpls;
1646 #endif
1647 		if ((error = copyout(rtm, w->w_where, size)) != 0)
1648 			w->w_where = NULL;
1649 		else
1650 			w->w_where += size;
1651 	}
1652 	return (error);
1653 }
1654 
1655 int
1656 sysctl_iflist(int af, struct walkarg *w)
1657 {
1658 	struct ifnet		*ifp;
1659 	struct ifaddr		*ifa;
1660 	struct rt_addrinfo	 info;
1661 	int			 len, error = 0;
1662 
1663 	bzero(&info, sizeof(info));
1664 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1665 		if (w->w_arg && w->w_arg != ifp->if_index)
1666 			continue;
1667 		/* Copy the link-layer address first */
1668 		info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
1669 		len = rtm_msg2(RTM_IFINFO, RTM_VERSION, &info, 0, w);
1670 		if (w->w_where && w->w_tmem && w->w_needed <= 0) {
1671 			struct if_msghdr *ifm;
1672 
1673 			ifm = (struct if_msghdr *)w->w_tmem;
1674 			ifm->ifm_index = ifp->if_index;
1675 			ifm->ifm_tableid = ifp->if_rdomain;
1676 			ifm->ifm_flags = ifp->if_flags;
1677 			if_getdata(ifp, &ifm->ifm_data);
1678 			ifm->ifm_addrs = info.rti_addrs;
1679 			error = copyout(ifm, w->w_where, len);
1680 			if (error)
1681 				return (error);
1682 			w->w_where += len;
1683 		}
1684 		info.rti_info[RTAX_IFP] = NULL;
1685 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1686 			KASSERT(ifa->ifa_addr->sa_family != AF_LINK);
1687 			if (af && af != ifa->ifa_addr->sa_family)
1688 				continue;
1689 			info.rti_info[RTAX_IFA] = ifa->ifa_addr;
1690 			info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
1691 			info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
1692 			len = rtm_msg2(RTM_NEWADDR, RTM_VERSION, &info, 0, w);
1693 			if (w->w_where && w->w_tmem && w->w_needed <= 0) {
1694 				struct ifa_msghdr *ifam;
1695 
1696 				ifam = (struct ifa_msghdr *)w->w_tmem;
1697 				ifam->ifam_index = ifa->ifa_ifp->if_index;
1698 				ifam->ifam_flags = ifa->ifa_flags;
1699 				ifam->ifam_metric = ifa->ifa_metric;
1700 				ifam->ifam_addrs = info.rti_addrs;
1701 				error = copyout(w->w_tmem, w->w_where, len);
1702 				if (error)
1703 					return (error);
1704 				w->w_where += len;
1705 			}
1706 		}
1707 		info.rti_info[RTAX_IFA] = info.rti_info[RTAX_NETMASK] =
1708 		    info.rti_info[RTAX_BRD] = NULL;
1709 	}
1710 	return (0);
1711 }
1712 
1713 int
1714 sysctl_ifnames(struct walkarg *w)
1715 {
1716 	struct if_nameindex_msg ifn;
1717 	struct ifnet *ifp;
1718 	int error = 0;
1719 
1720 	/* XXX ignore tableid for now */
1721 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1722 		if (w->w_arg && w->w_arg != ifp->if_index)
1723 			continue;
1724 		w->w_needed += sizeof(ifn);
1725 		if (w->w_where && w->w_needed <= 0) {
1726 
1727 			memset(&ifn, 0, sizeof(ifn));
1728 			ifn.if_index = ifp->if_index;
1729 			strlcpy(ifn.if_name, ifp->if_xname,
1730 			    sizeof(ifn.if_name));
1731 			error = copyout(&ifn, w->w_where, sizeof(ifn));
1732 			if (error)
1733 				return (error);
1734 			w->w_where += sizeof(ifn);
1735 		}
1736 	}
1737 
1738 	return (0);
1739 }
1740 
1741 int
1742 sysctl_rtable(int *name, u_int namelen, void *where, size_t *given, void *new,
1743     size_t newlen)
1744 {
1745 	int			 i, error = EINVAL;
1746 	u_char			 af;
1747 	struct walkarg		 w;
1748 	struct rt_tableinfo	 tableinfo;
1749 	u_int			 tableid = 0;
1750 
1751 	if (new)
1752 		return (EPERM);
1753 	if (namelen < 3 || namelen > 4)
1754 		return (EINVAL);
1755 	af = name[0];
1756 	bzero(&w, sizeof(w));
1757 	w.w_where = where;
1758 	w.w_given = *given;
1759 	w.w_needed = 0 - w.w_given;
1760 	w.w_op = name[1];
1761 	w.w_arg = name[2];
1762 
1763 	if (namelen == 4) {
1764 		tableid = name[3];
1765 		if (!rtable_exists(tableid))
1766 			return (ENOENT);
1767 	} else
1768 		tableid = curproc->p_p->ps_rtableid;
1769 
1770 	switch (w.w_op) {
1771 	case NET_RT_DUMP:
1772 	case NET_RT_FLAGS:
1773 		NET_LOCK();
1774 		for (i = 1; i <= AF_MAX; i++) {
1775 			if (af != 0 && af != i)
1776 				continue;
1777 
1778 			error = rtable_walk(tableid, i, sysctl_dumpentry, &w);
1779 			if (error == EAFNOSUPPORT)
1780 				error = 0;
1781 			if (error)
1782 				break;
1783 		}
1784 		NET_UNLOCK();
1785 		break;
1786 
1787 	case NET_RT_IFLIST:
1788 		NET_LOCK();
1789 		error = sysctl_iflist(af, &w);
1790 		NET_UNLOCK();
1791 		break;
1792 
1793 	case NET_RT_STATS:
1794 		return (sysctl_rtable_rtstat(where, given, new));
1795 	case NET_RT_TABLE:
1796 		tableid = w.w_arg;
1797 		if (!rtable_exists(tableid))
1798 			return (ENOENT);
1799 		memset(&tableinfo, 0, sizeof tableinfo);
1800 		tableinfo.rti_tableid = tableid;
1801 		tableinfo.rti_domainid = rtable_l2(tableid);
1802 		error = sysctl_rdstruct(where, given, new,
1803 		    &tableinfo, sizeof(tableinfo));
1804 		return (error);
1805 	case NET_RT_IFNAMES:
1806 		NET_LOCK();
1807 		error = sysctl_ifnames(&w);
1808 		NET_UNLOCK();
1809 		break;
1810 	}
1811 	free(w.w_tmem, M_RTABLE, w.w_tmemsize);
1812 	w.w_needed += w.w_given;
1813 	if (where) {
1814 		*given = w.w_where - (caddr_t)where;
1815 		if (*given < w.w_needed)
1816 			return (ENOMEM);
1817 	} else
1818 		*given = (11 * w.w_needed) / 10;
1819 
1820 	return (error);
1821 }
1822 
1823 int
1824 sysctl_rtable_rtstat(void *oldp, size_t *oldlenp, void *newp)
1825 {
1826 	extern struct cpumem *rtcounters;
1827 	uint64_t counters[rts_ncounters];
1828 	struct rtstat rtstat;
1829 	uint32_t *words = (uint32_t *)&rtstat;
1830 	int i;
1831 
1832 	CTASSERT(sizeof(rtstat) == (nitems(counters) * sizeof(uint32_t)));
1833 	memset(&rtstat, 0, sizeof rtstat);
1834 	counters_read(rtcounters, counters, nitems(counters));
1835 
1836 	for (i = 0; i < nitems(counters); i++)
1837 		words[i] = (uint32_t)counters[i];
1838 
1839 	return (sysctl_rdstruct(oldp, oldlenp, newp, &rtstat, sizeof(rtstat)));
1840 }
1841 
1842 int
1843 rtm_validate_proposal(struct rt_addrinfo *info)
1844 {
1845 	if (info->rti_addrs & ~(RTA_NETMASK | RTA_IFA | RTA_DNS | RTA_STATIC |
1846 	    RTA_SEARCH)) {
1847 		return -1;
1848 	}
1849 
1850 	if (ISSET(info->rti_addrs, RTA_NETMASK)) {
1851 		struct sockaddr *sa = info->rti_info[RTAX_NETMASK];
1852 		if (sa == NULL)
1853 			return -1;
1854 		switch (sa->sa_family) {
1855 		case AF_INET:
1856 			if (sa->sa_len != sizeof(struct sockaddr_in))
1857 				return -1;
1858 			break;
1859 		case AF_INET6:
1860 			if (sa->sa_len != sizeof(struct sockaddr_in6))
1861 				return -1;
1862 			break;
1863 		default:
1864 			return -1;
1865 		}
1866 	}
1867 
1868 	if (ISSET(info->rti_addrs, RTA_IFA)) {
1869 		struct sockaddr *sa = info->rti_info[RTAX_IFA];
1870 		if (sa == NULL)
1871 			return -1;
1872 		switch (sa->sa_family) {
1873 		case AF_INET:
1874 			if (sa->sa_len != sizeof(struct sockaddr_in))
1875 				return -1;
1876 			break;
1877 		case AF_INET6:
1878 			if (sa->sa_len != sizeof(struct sockaddr_in6))
1879 				return -1;
1880 			break;
1881 		default:
1882 			return -1;
1883 		}
1884 	}
1885 
1886 	if (ISSET(info->rti_addrs, RTA_DNS)) {
1887 		struct sockaddr_rtdns *rtdns =
1888 		    (struct sockaddr_rtdns *)info->rti_info[RTAX_DNS];
1889 		if (rtdns == NULL)
1890 			return -1;
1891 		if (rtdns->sr_len > sizeof(*rtdns))
1892 			return -1;
1893 		if (rtdns->sr_len <=
1894 		    offsetof(struct sockaddr_rtdns, sr_dns))
1895 			return -1;
1896 	}
1897 
1898 	if (ISSET(info->rti_addrs, RTA_STATIC)) {
1899 		struct sockaddr_rtstatic *rtstatic =
1900 		    (struct sockaddr_rtstatic *)info->rti_info[RTAX_STATIC];
1901 		if (rtstatic == NULL)
1902 			return -1;
1903 		if (rtstatic->sr_len > sizeof(*rtstatic))
1904 			return -1;
1905 		if (rtstatic->sr_len <=
1906 		    offsetof(struct sockaddr_rtstatic, sr_static))
1907 			return -1;
1908 	}
1909 
1910 	if (ISSET(info->rti_addrs, RTA_SEARCH)) {
1911 		struct sockaddr_rtsearch *rtsearch =
1912 		    (struct sockaddr_rtsearch *)info->rti_info[RTAX_SEARCH];
1913 		if (rtsearch == NULL)
1914 			return -1;
1915 		if (rtsearch->sr_len > sizeof(*rtsearch))
1916 			return -1;
1917 		if (rtsearch->sr_len <=
1918 		    offsetof(struct sockaddr_rtsearch, sr_search))
1919 			return -1;
1920 	}
1921 
1922 	return 0;
1923 }
1924 
1925 /*
1926  * Definitions of protocols supported in the ROUTE domain.
1927  */
1928 
1929 extern	struct domain routedomain;		/* or at least forward */
1930 
1931 struct protosw routesw[] = {
1932 {
1933   .pr_type	= SOCK_RAW,
1934   .pr_domain	= &routedomain,
1935   .pr_flags	= PR_ATOMIC|PR_ADDR|PR_WANTRCVD,
1936   .pr_output	= route_output,
1937   .pr_ctloutput	= route_ctloutput,
1938   .pr_usrreq	= route_usrreq,
1939   .pr_attach	= route_attach,
1940   .pr_detach	= route_detach,
1941   .pr_init	= route_prinit,
1942   .pr_sysctl	= sysctl_rtable
1943 }
1944 };
1945 
1946 struct domain routedomain = {
1947   .dom_family = PF_ROUTE,
1948   .dom_name = "route",
1949   .dom_init = route_init,
1950   .dom_protosw = routesw,
1951   .dom_protoswNPROTOSW = &routesw[nitems(routesw)]
1952 };
1953