xref: /openbsd-src/sys/net/rtsock.c (revision 1a8dbaac879b9f3335ad7fb25429ce63ac1d6bac)
1 /*	$OpenBSD: rtsock.c,v 1.302 2020/09/23 17:52:58 mvs Exp $	*/
2 /*	$NetBSD: rtsock.c,v 1.18 1996/03/29 00:32:10 cgd Exp $	*/
3 
4 /*
5  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the project nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 /*
34  * Copyright (c) 1988, 1991, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. Neither the name of the University nor the names of its contributors
46  *    may be used to endorse or promote products derived from this software
47  *    without specific prior written permission.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59  * SUCH DAMAGE.
60  *
61  *	@(#)rtsock.c	8.6 (Berkeley) 2/11/95
62  */
63 
64 #include <sys/param.h>
65 #include <sys/systm.h>
66 #include <sys/proc.h>
67 #include <sys/sysctl.h>
68 #include <sys/mbuf.h>
69 #include <sys/socket.h>
70 #include <sys/socketvar.h>
71 #include <sys/domain.h>
72 #include <sys/pool.h>
73 #include <sys/protosw.h>
74 #include <sys/srp.h>
75 
76 #include <net/if.h>
77 #include <net/if_dl.h>
78 #include <net/if_var.h>
79 #include <net/route.h>
80 
81 #include <netinet/in.h>
82 
83 #ifdef MPLS
84 #include <netmpls/mpls.h>
85 #endif
86 #ifdef IPSEC
87 #include <netinet/ip_ipsp.h>
88 #include <net/if_enc.h>
89 #endif
90 #ifdef BFD
91 #include <net/bfd.h>
92 #endif
93 
94 #include <sys/stdarg.h>
95 #include <sys/kernel.h>
96 #include <sys/timeout.h>
97 
98 #define	ROUTESNDQ	8192
99 #define	ROUTERCVQ	8192
100 
101 const struct sockaddr route_src = { 2, PF_ROUTE, };
102 
103 struct walkarg {
104 	int	w_op, w_arg, w_given, w_needed, w_tmemsize;
105 	caddr_t	w_where, w_tmem;
106 };
107 
108 void	route_prinit(void);
109 void	rcb_ref(void *, void *);
110 void	rcb_unref(void *, void *);
111 int	route_output(struct mbuf *, struct socket *, struct sockaddr *,
112 	    struct mbuf *);
113 int	route_ctloutput(int, struct socket *, int, int, struct mbuf *);
114 int	route_usrreq(struct socket *, int, struct mbuf *, struct mbuf *,
115 	    struct mbuf *, struct proc *);
116 void	route_input(struct mbuf *m0, struct socket *, sa_family_t);
117 int	route_arp_conflict(struct rtentry *, struct rt_addrinfo *);
118 int	route_cleargateway(struct rtentry *, void *, unsigned int);
119 void	rtm_senddesync_timer(void *);
120 void	rtm_senddesync(struct socket *);
121 int	rtm_sendup(struct socket *, struct mbuf *, int);
122 
123 int	rtm_getifa(struct rt_addrinfo *, unsigned int);
124 int	rtm_output(struct rt_msghdr *, struct rtentry **, struct rt_addrinfo *,
125 	    uint8_t, unsigned int);
126 struct rt_msghdr *rtm_report(struct rtentry *, u_char, int, int);
127 struct mbuf	*rtm_msg1(int, struct rt_addrinfo *);
128 int		 rtm_msg2(int, int, struct rt_addrinfo *, caddr_t,
129 		     struct walkarg *);
130 int		 rtm_xaddrs(caddr_t, caddr_t, struct rt_addrinfo *);
131 int		 rtm_validate_proposal(struct rt_addrinfo *);
132 void		 rtm_setmetrics(u_long, const struct rt_metrics *,
133 		     struct rt_kmetrics *);
134 void		 rtm_getmetrics(const struct rt_kmetrics *,
135 		     struct rt_metrics *);
136 
137 int		 sysctl_iflist(int, struct walkarg *);
138 int		 sysctl_ifnames(struct walkarg *);
139 int		 sysctl_rtable_rtstat(void *, size_t *, void *);
140 
141 /*
142  * Locks used to protect struct members
143  *       I       immutable after creation
144  *       sK      solock (kernel lock)
145  */
146 struct rtpcb {
147 	struct socket		*rop_socket;		/* [I] */
148 
149 	SRPL_ENTRY(rtpcb)	rop_list;
150 	struct refcnt		rop_refcnt;
151 	struct timeout		rop_timeout;
152 	unsigned int		rop_msgfilter;		/* [sK] */
153 	unsigned int		rop_flagfilter;		/* [sK] */
154 	unsigned int		rop_flags;		/* [sK] */
155 	u_int			rop_rtableid;		/* [sK] */
156 	unsigned short		rop_proto;		/* [I] */
157 	u_char			rop_priority;		/* [sK] */
158 };
159 #define	sotortpcb(so)	((struct rtpcb *)(so)->so_pcb)
160 
161 struct rtptable {
162 	SRPL_HEAD(, rtpcb)	rtp_list;
163 	struct srpl_rc		rtp_rc;
164 	struct rwlock		rtp_lk;
165 	unsigned int		rtp_count;
166 };
167 
168 struct pool rtpcb_pool;
169 struct rtptable rtptable;
170 
171 /*
172  * These flags and timeout are used for indicating to userland (via a
173  * RTM_DESYNC msg) when the route socket has overflowed and messages
174  * have been lost.
175  */
176 #define ROUTECB_FLAG_DESYNC	0x1	/* Route socket out of memory */
177 #define ROUTECB_FLAG_FLUSH	0x2	/* Wait until socket is empty before
178 					   queueing more packets */
179 
180 #define ROUTE_DESYNC_RESEND_TIMEOUT	200	/* In ms */
181 
182 void
183 route_prinit(void)
184 {
185 	srpl_rc_init(&rtptable.rtp_rc, rcb_ref, rcb_unref, NULL);
186 	rw_init(&rtptable.rtp_lk, "rtsock");
187 	SRPL_INIT(&rtptable.rtp_list);
188 	pool_init(&rtpcb_pool, sizeof(struct rtpcb), 0,
189 	    IPL_NONE, PR_WAITOK, "rtpcb", NULL);
190 }
191 
192 void
193 rcb_ref(void *null, void *v)
194 {
195 	struct rtpcb *rop = v;
196 
197 	refcnt_take(&rop->rop_refcnt);
198 }
199 
200 void
201 rcb_unref(void *null, void *v)
202 {
203 	struct rtpcb *rop = v;
204 
205 	refcnt_rele_wake(&rop->rop_refcnt);
206 }
207 
208 int
209 route_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam,
210     struct mbuf *control, struct proc *p)
211 {
212 	struct rtpcb	*rop;
213 	int		 error = 0;
214 
215 	if (req == PRU_CONTROL)
216 		return (EOPNOTSUPP);
217 
218 	soassertlocked(so);
219 
220 	if (control && control->m_len) {
221 		error = EOPNOTSUPP;
222 		goto release;
223 	}
224 
225 	rop = sotortpcb(so);
226 	if (rop == NULL) {
227 		error = EINVAL;
228 		goto release;
229 	}
230 
231 	switch (req) {
232 	/* no connect, bind, accept. Socket is connected from the start */
233 	case PRU_CONNECT:
234 	case PRU_BIND:
235 	case PRU_CONNECT2:
236 	case PRU_LISTEN:
237 	case PRU_ACCEPT:
238 		error = EOPNOTSUPP;
239 		break;
240 
241 	case PRU_DISCONNECT:
242 	case PRU_ABORT:
243 		soisdisconnected(so);
244 		break;
245 	case PRU_SHUTDOWN:
246 		socantsendmore(so);
247 		break;
248 	case PRU_SENSE:
249 		/* stat: don't bother with a blocksize. */
250 		break;
251 
252 	/* minimal support, just implement a fake peer address */
253 	case PRU_SOCKADDR:
254 		error = EINVAL;
255 		break;
256 	case PRU_PEERADDR:
257 		bcopy(&route_src, mtod(nam, caddr_t), route_src.sa_len);
258 		nam->m_len = route_src.sa_len;
259 		break;
260 
261 	case PRU_RCVD:
262 		/*
263 		 * If we are in a FLUSH state, check if the buffer is
264 		 * empty so that we can clear the flag.
265 		 */
266 		if (((rop->rop_flags & ROUTECB_FLAG_FLUSH) != 0) &&
267 		    ((sbspace(rop->rop_socket, &rop->rop_socket->so_rcv) ==
268 		    rop->rop_socket->so_rcv.sb_hiwat)))
269 			rop->rop_flags &= ~ROUTECB_FLAG_FLUSH;
270 		break;
271 
272 	case PRU_RCVOOB:
273 	case PRU_SENDOOB:
274 		error = EOPNOTSUPP;
275 		break;
276 	case PRU_SEND:
277 		if (nam) {
278 			error = EISCONN;
279 			break;
280 		}
281 		error = (*so->so_proto->pr_output)(m, so, NULL, NULL);
282 		m = NULL;
283 		break;
284 	default:
285 		panic("route_usrreq");
286 	}
287 
288  release:
289 	if (req != PRU_RCVD && req != PRU_RCVOOB && req != PRU_SENSE) {
290 		m_freem(control);
291 		m_freem(m);
292 	}
293 	return (error);
294 }
295 
296 int
297 route_attach(struct socket *so, int proto)
298 {
299 	struct rtpcb	*rop;
300 	int		 error;
301 
302 	/*
303 	 * use the rawcb but allocate a rtpcb, this
304 	 * code does not care about the additional fields
305 	 * and works directly on the raw socket.
306 	 */
307 	rop = pool_get(&rtpcb_pool, PR_WAITOK|PR_ZERO);
308 	so->so_pcb = rop;
309 	/* Init the timeout structure */
310 	timeout_set(&rop->rop_timeout, rtm_senddesync_timer, so);
311 	refcnt_init(&rop->rop_refcnt);
312 
313 	if (curproc == NULL)
314 		error = EACCES;
315 	else
316 		error = soreserve(so, ROUTESNDQ, ROUTERCVQ);
317 	if (error) {
318 		pool_put(&rtpcb_pool, rop);
319 		return (error);
320 	}
321 
322 	rop->rop_socket = so;
323 	rop->rop_proto = proto;
324 
325 	rop->rop_rtableid = curproc->p_p->ps_rtableid;
326 
327 	soisconnected(so);
328 	so->so_options |= SO_USELOOPBACK;
329 
330 	rw_enter(&rtptable.rtp_lk, RW_WRITE);
331 	SRPL_INSERT_HEAD_LOCKED(&rtptable.rtp_rc, &rtptable.rtp_list, rop,
332 	    rop_list);
333 	rtptable.rtp_count++;
334 	rw_exit(&rtptable.rtp_lk);
335 
336 	return (0);
337 }
338 
339 int
340 route_detach(struct socket *so)
341 {
342 	struct rtpcb	*rop;
343 
344 	soassertlocked(so);
345 
346 	rop = sotortpcb(so);
347 	if (rop == NULL)
348 		return (EINVAL);
349 
350 	rw_enter(&rtptable.rtp_lk, RW_WRITE);
351 
352 	timeout_del(&rop->rop_timeout);
353 	rtptable.rtp_count--;
354 
355 	SRPL_REMOVE_LOCKED(&rtptable.rtp_rc, &rtptable.rtp_list, rop, rtpcb,
356 	    rop_list);
357 	rw_exit(&rtptable.rtp_lk);
358 
359 	/* wait for all references to drop */
360 	refcnt_finalize(&rop->rop_refcnt, "rtsockrefs");
361 
362 	so->so_pcb = NULL;
363 	KASSERT((so->so_state & SS_NOFDREF) == 0);
364 	pool_put(&rtpcb_pool, rop);
365 
366 	return (0);
367 }
368 
369 int
370 route_ctloutput(int op, struct socket *so, int level, int optname,
371     struct mbuf *m)
372 {
373 	struct rtpcb *rop = sotortpcb(so);
374 	int error = 0;
375 	unsigned int tid, prio;
376 
377 	if (level != AF_ROUTE)
378 		return (EINVAL);
379 
380 	switch (op) {
381 	case PRCO_SETOPT:
382 		switch (optname) {
383 		case ROUTE_MSGFILTER:
384 			if (m == NULL || m->m_len != sizeof(unsigned int))
385 				error = EINVAL;
386 			else
387 				rop->rop_msgfilter = *mtod(m, unsigned int *);
388 			break;
389 		case ROUTE_TABLEFILTER:
390 			if (m == NULL || m->m_len != sizeof(unsigned int)) {
391 				error = EINVAL;
392 				break;
393 			}
394 			tid = *mtod(m, unsigned int *);
395 			if (tid != RTABLE_ANY && !rtable_exists(tid))
396 				error = ENOENT;
397 			else
398 				rop->rop_rtableid = tid;
399 			break;
400 		case ROUTE_PRIOFILTER:
401 			if (m == NULL || m->m_len != sizeof(unsigned int)) {
402 				error = EINVAL;
403 				break;
404 			}
405 			prio = *mtod(m, unsigned int *);
406 			if (prio > RTP_MAX)
407 				error = EINVAL;
408 			else
409 				rop->rop_priority = prio;
410 			break;
411 		case ROUTE_FLAGFILTER:
412 			if (m == NULL || m->m_len != sizeof(unsigned int))
413 				error = EINVAL;
414 			else
415 				rop->rop_flagfilter = *mtod(m, unsigned int *);
416 			break;
417 		default:
418 			error = ENOPROTOOPT;
419 			break;
420 		}
421 		break;
422 	case PRCO_GETOPT:
423 		switch (optname) {
424 		case ROUTE_MSGFILTER:
425 			m->m_len = sizeof(unsigned int);
426 			*mtod(m, unsigned int *) = rop->rop_msgfilter;
427 			break;
428 		case ROUTE_TABLEFILTER:
429 			m->m_len = sizeof(unsigned int);
430 			*mtod(m, unsigned int *) = rop->rop_rtableid;
431 			break;
432 		case ROUTE_PRIOFILTER:
433 			m->m_len = sizeof(unsigned int);
434 			*mtod(m, unsigned int *) = rop->rop_priority;
435 			break;
436 		case ROUTE_FLAGFILTER:
437 			m->m_len = sizeof(unsigned int);
438 			*mtod(m, unsigned int *) = rop->rop_flagfilter;
439 			break;
440 		default:
441 			error = ENOPROTOOPT;
442 			break;
443 		}
444 	}
445 	return (error);
446 }
447 
448 void
449 rtm_senddesync_timer(void *xso)
450 {
451 	struct socket	*so = xso;
452 	int		 s;
453 
454 	s = solock(so);
455 	rtm_senddesync(so);
456 	sounlock(so, s);
457 }
458 
459 void
460 rtm_senddesync(struct socket *so)
461 {
462 	struct rtpcb	*rop = sotortpcb(so);
463 	struct mbuf	*desync_mbuf;
464 
465 	soassertlocked(so);
466 
467 	/* If we are in a DESYNC state, try to send a RTM_DESYNC packet */
468 	if ((rop->rop_flags & ROUTECB_FLAG_DESYNC) == 0)
469 		return;
470 
471 	/*
472 	 * If we fail to alloc memory or if sbappendaddr()
473 	 * fails, re-add timeout and try again.
474 	 */
475 	desync_mbuf = rtm_msg1(RTM_DESYNC, NULL);
476 	if (desync_mbuf != NULL) {
477 		if (sbappendaddr(so, &so->so_rcv, &route_src,
478 		    desync_mbuf, NULL) != 0) {
479 			rop->rop_flags &= ~ROUTECB_FLAG_DESYNC;
480 			sorwakeup(rop->rop_socket);
481 			return;
482 		}
483 		m_freem(desync_mbuf);
484 	}
485 	/* Re-add timeout to try sending msg again */
486 	timeout_add_msec(&rop->rop_timeout, ROUTE_DESYNC_RESEND_TIMEOUT);
487 }
488 
489 void
490 route_input(struct mbuf *m0, struct socket *so0, sa_family_t sa_family)
491 {
492 	struct socket *so;
493 	struct rtpcb *rop;
494 	struct rt_msghdr *rtm;
495 	struct mbuf *m = m0;
496 	struct socket *last = NULL;
497 	struct srp_ref sr;
498 	int s;
499 
500 	/* ensure that we can access the rtm_type via mtod() */
501 	if (m->m_len < offsetof(struct rt_msghdr, rtm_type) + 1) {
502 		m_freem(m);
503 		return;
504 	}
505 
506 	SRPL_FOREACH(rop, &sr, &rtptable.rtp_list, rop_list) {
507 		/*
508 		 * If route socket is bound to an address family only send
509 		 * messages that match the address family. Address family
510 		 * agnostic messages are always sent.
511 		 */
512 		if (sa_family != AF_UNSPEC && rop->rop_proto != AF_UNSPEC &&
513 		    rop->rop_proto != sa_family)
514 			continue;
515 
516 
517 		so = rop->rop_socket;
518 		s = solock(so);
519 
520 		/*
521 		 * Check to see if we don't want our own messages and
522 		 * if we can receive anything.
523 		 */
524 		if ((so0 == so && !(so0->so_options & SO_USELOOPBACK)) ||
525 		    !(so->so_state & SS_ISCONNECTED) ||
526 		    (so->so_state & SS_CANTRCVMORE)) {
527 next:
528 			sounlock(so, s);
529 			continue;
530 		}
531 
532 		/* filter messages that the process does not want */
533 		rtm = mtod(m, struct rt_msghdr *);
534 		/* but RTM_DESYNC can't be filtered */
535 		if (rtm->rtm_type != RTM_DESYNC) {
536 			if (rop->rop_msgfilter != 0 &&
537 			    !(rop->rop_msgfilter & (1 << rtm->rtm_type)))
538 				goto next;
539 			if (ISSET(rop->rop_flagfilter, rtm->rtm_flags))
540 				goto next;
541 		}
542 		switch (rtm->rtm_type) {
543 		case RTM_IFANNOUNCE:
544 		case RTM_DESYNC:
545 			/* no tableid */
546 			break;
547 		case RTM_RESOLVE:
548 		case RTM_NEWADDR:
549 		case RTM_DELADDR:
550 		case RTM_IFINFO:
551 		case RTM_80211INFO:
552 		case RTM_BFD:
553 			/* check against rdomain id */
554 			if (rop->rop_rtableid != RTABLE_ANY &&
555 			    rtable_l2(rop->rop_rtableid) != rtm->rtm_tableid)
556 				goto next;
557 			break;
558 		default:
559 			if (rop->rop_priority != 0 &&
560 			    rop->rop_priority < rtm->rtm_priority)
561 				goto next;
562 			/* check against rtable id */
563 			if (rop->rop_rtableid != RTABLE_ANY &&
564 			    rop->rop_rtableid != rtm->rtm_tableid)
565 				goto next;
566 			break;
567 		}
568 
569 		/*
570 		 * Check to see if the flush flag is set. If so, don't queue
571 		 * any more messages until the flag is cleared.
572 		 */
573 		if ((rop->rop_flags & ROUTECB_FLAG_FLUSH) != 0)
574 			goto next;
575 		sounlock(so, s);
576 
577 		if (last) {
578 			s = solock(last);
579 			rtm_sendup(last, m, 1);
580 			sounlock(last, s);
581 			refcnt_rele_wake(&sotortpcb(last)->rop_refcnt);
582 		}
583 		/* keep a reference for last */
584 		refcnt_take(&rop->rop_refcnt);
585 		last = rop->rop_socket;
586 	}
587 	SRPL_LEAVE(&sr);
588 
589 	if (last) {
590 		s = solock(last);
591 		rtm_sendup(last, m, 0);
592 		sounlock(last, s);
593 		refcnt_rele_wake(&sotortpcb(last)->rop_refcnt);
594 	} else
595 		m_freem(m);
596 }
597 
598 int
599 rtm_sendup(struct socket *so, struct mbuf *m0, int more)
600 {
601 	struct rtpcb *rop = sotortpcb(so);
602 	struct mbuf *m;
603 
604 	soassertlocked(so);
605 
606 	if (more) {
607 		m = m_copym(m0, 0, M_COPYALL, M_NOWAIT);
608 		if (m == NULL)
609 			return (ENOMEM);
610 	} else
611 		m = m0;
612 
613 	if (sbspace(so, &so->so_rcv) < (2 * MSIZE) ||
614 	    sbappendaddr(so, &so->so_rcv, &route_src, m, NULL) == 0) {
615 		/* Flag socket as desync'ed and flush required */
616 		rop->rop_flags |= ROUTECB_FLAG_DESYNC | ROUTECB_FLAG_FLUSH;
617 		rtm_senddesync(so);
618 		m_freem(m);
619 		return (ENOBUFS);
620 	}
621 
622 	sorwakeup(so);
623 	return (0);
624 }
625 
626 struct rt_msghdr *
627 rtm_report(struct rtentry *rt, u_char type, int seq, int tableid)
628 {
629 	struct rt_msghdr	*rtm;
630 	struct rt_addrinfo	 info;
631 	struct sockaddr_rtlabel	 sa_rl;
632 	struct sockaddr_in6	 sa_mask;
633 #ifdef BFD
634 	struct sockaddr_bfd	 sa_bfd;
635 #endif
636 	struct ifnet		*ifp = NULL;
637 	int			 len;
638 
639 	bzero(&info, sizeof(info));
640 	info.rti_info[RTAX_DST] = rt_key(rt);
641 	info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
642 	info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask);
643 	info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl);
644 #ifdef BFD
645 	if (rt->rt_flags & RTF_BFD)
646 		info.rti_info[RTAX_BFD] = bfd2sa(rt, &sa_bfd);
647 #endif
648 #ifdef MPLS
649 	if (rt->rt_flags & RTF_MPLS) {
650 		struct sockaddr_mpls	 sa_mpls;
651 
652 		bzero(&sa_mpls, sizeof(sa_mpls));
653 		sa_mpls.smpls_family = AF_MPLS;
654 		sa_mpls.smpls_len = sizeof(sa_mpls);
655 		sa_mpls.smpls_label = ((struct rt_mpls *)
656 		    rt->rt_llinfo)->mpls_label;
657 		info.rti_info[RTAX_SRC] = (struct sockaddr *)&sa_mpls;
658 		info.rti_mpls = ((struct rt_mpls *)
659 		    rt->rt_llinfo)->mpls_operation;
660 	}
661 #endif
662 	ifp = if_get(rt->rt_ifidx);
663 	if (ifp != NULL) {
664 		info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
665 		info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
666 		if (ifp->if_flags & IFF_POINTOPOINT)
667 			info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr;
668 	}
669 	if_put(ifp);
670 	/* RTAX_GENMASK, RTAX_AUTHOR, RTAX_SRCMASK ignored */
671 
672 	/* build new route message */
673 	len = rtm_msg2(type, RTM_VERSION, &info, NULL, NULL);
674 	rtm = malloc(len, M_RTABLE, M_WAITOK | M_ZERO);
675 
676 	rtm_msg2(type, RTM_VERSION, &info, (caddr_t)rtm, NULL);
677 	rtm->rtm_type = type;
678 	rtm->rtm_index = rt->rt_ifidx;
679 	rtm->rtm_tableid = tableid;
680 	rtm->rtm_priority = rt->rt_priority & RTP_MASK;
681 	rtm->rtm_flags = rt->rt_flags;
682 	rtm->rtm_pid = curproc->p_p->ps_pid;
683 	rtm->rtm_seq = seq;
684 	rtm_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx);
685 	rtm->rtm_addrs = info.rti_addrs;
686 #ifdef MPLS
687 	rtm->rtm_mpls = info.rti_mpls;
688 #endif
689 	return rtm;
690 }
691 
692 int
693 route_output(struct mbuf *m, struct socket *so, struct sockaddr *dstaddr,
694     struct mbuf *control)
695 {
696 	struct rt_msghdr	*rtm = NULL;
697 	struct rtentry		*rt = NULL;
698 	struct rt_addrinfo	 info;
699 	int			 len, seq, error = 0;
700 	u_int			 tableid;
701 	u_int8_t		 prio;
702 	u_char			 vers, type;
703 
704 	if (m == NULL || ((m->m_len < sizeof(int32_t)) &&
705 	    (m = m_pullup(m, sizeof(int32_t))) == 0))
706 		return (ENOBUFS);
707 	if ((m->m_flags & M_PKTHDR) == 0)
708 		panic("route_output");
709 	len = m->m_pkthdr.len;
710 	if (len < offsetof(struct rt_msghdr, rtm_hdrlen) + 1 ||
711 	    len != mtod(m, struct rt_msghdr *)->rtm_msglen) {
712 		error = EINVAL;
713 		goto fail;
714 	}
715 	vers = mtod(m, struct rt_msghdr *)->rtm_version;
716 	switch (vers) {
717 	case RTM_VERSION:
718 		if (len < sizeof(struct rt_msghdr)) {
719 			error = EINVAL;
720 			goto fail;
721 		}
722 		if (len > RTM_MAXSIZE) {
723 			error = EMSGSIZE;
724 			goto fail;
725 		}
726 		rtm = malloc(len, M_RTABLE, M_WAITOK);
727 		m_copydata(m, 0, len, (caddr_t)rtm);
728 		break;
729 	default:
730 		error = EPROTONOSUPPORT;
731 		goto fail;
732 	}
733 
734 	/* Verify that the caller is sending an appropriate message early */
735 	switch (rtm->rtm_type) {
736 	case RTM_ADD:
737 	case RTM_DELETE:
738 	case RTM_GET:
739 	case RTM_CHANGE:
740 	case RTM_PROPOSAL:
741 		break;
742 	default:
743 		error = EOPNOTSUPP;
744 		goto fail;
745 	}
746 	/*
747 	 * Verify that the header length is valid.
748 	 * All messages from userland start with a struct rt_msghdr.
749 	 */
750 	if (rtm->rtm_hdrlen == 0)	/* old client */
751 		rtm->rtm_hdrlen = sizeof(struct rt_msghdr);
752 	if (rtm->rtm_hdrlen < sizeof(struct rt_msghdr) ||
753 	    len < rtm->rtm_hdrlen) {
754 		error = EINVAL;
755 		goto fail;
756 	}
757 
758 	rtm->rtm_pid = curproc->p_p->ps_pid;
759 
760 	/*
761 	 * Verify that the caller has the appropriate privilege; RTM_GET
762 	 * is the only operation the non-superuser is allowed.
763 	 */
764 	if (rtm->rtm_type != RTM_GET && suser(curproc) != 0) {
765 		error = EACCES;
766 		goto fail;
767 	}
768 	tableid = rtm->rtm_tableid;
769 	if (!rtable_exists(tableid)) {
770 		if (rtm->rtm_type == RTM_ADD) {
771 			if ((error = rtable_add(tableid)) != 0)
772 				goto fail;
773 		} else {
774 			error = EINVAL;
775 			goto fail;
776 		}
777 	}
778 
779 
780 	/* Do not let userland play with kernel-only flags. */
781 	if ((rtm->rtm_flags & (RTF_LOCAL|RTF_BROADCAST)) != 0) {
782 		error = EINVAL;
783 		goto fail;
784 	}
785 
786 	/* make sure that kernel-only bits are not set */
787 	rtm->rtm_priority &= RTP_MASK;
788 	rtm->rtm_flags &= ~(RTF_DONE|RTF_CLONED|RTF_CACHED);
789 	rtm->rtm_fmask &= RTF_FMASK;
790 
791 	if (rtm->rtm_priority != 0) {
792 		if (rtm->rtm_priority > RTP_MAX ||
793 		    rtm->rtm_priority == RTP_LOCAL) {
794 			error = EINVAL;
795 			goto fail;
796 		}
797 		prio = rtm->rtm_priority;
798 	} else if (rtm->rtm_type != RTM_ADD)
799 		prio = RTP_ANY;
800 	else if (rtm->rtm_flags & RTF_STATIC)
801 		prio = 0;
802 	else
803 		prio = RTP_DEFAULT;
804 
805 	bzero(&info, sizeof(info));
806 	info.rti_addrs = rtm->rtm_addrs;
807 	if ((error = rtm_xaddrs(rtm->rtm_hdrlen + (caddr_t)rtm,
808 	    len + (caddr_t)rtm, &info)) != 0)
809 		goto fail;
810 	info.rti_flags = rtm->rtm_flags;
811 	if (rtm->rtm_type != RTM_PROPOSAL &&
812 	   (info.rti_info[RTAX_DST] == NULL ||
813 	    info.rti_info[RTAX_DST]->sa_family >= AF_MAX ||
814 	    (info.rti_info[RTAX_GATEWAY] != NULL &&
815 	    info.rti_info[RTAX_GATEWAY]->sa_family >= AF_MAX) ||
816 	    info.rti_info[RTAX_GENMASK] != NULL)) {
817 		error = EINVAL;
818 		goto fail;
819 	}
820 #ifdef MPLS
821 	info.rti_mpls = rtm->rtm_mpls;
822 #endif
823 
824 	if (info.rti_info[RTAX_GATEWAY] != NULL &&
825 	    info.rti_info[RTAX_GATEWAY]->sa_family == AF_LINK &&
826 	    (info.rti_flags & RTF_CLONING) == 0) {
827 		info.rti_flags |= RTF_LLINFO;
828 	}
829 
830 	/*
831 	 * Validate RTM_PROPOSAL and pass it along or error out.
832 	 */
833 	if (rtm->rtm_type == RTM_PROPOSAL) {
834 		if (rtm_validate_proposal(&info) == -1) {
835 			error = EINVAL;
836 			goto fail;
837 		}
838 		/*
839 		 * If this is a solicitation proposal forward request to
840 		 * all interfaces. Most handlers will ignore it but at least
841 		 * umb(4) will send a response to this event.
842 		 */
843 		if (rtm->rtm_priority == RTP_PROPOSAL_SOLICIT) {
844 			struct ifnet *ifp;
845 			NET_LOCK();
846 			TAILQ_FOREACH(ifp, &ifnet, if_list) {
847 				ifp->if_rtrequest(ifp, RTM_PROPOSAL, NULL);
848 			}
849 			NET_UNLOCK();
850 		}
851 	} else {
852 		error = rtm_output(rtm, &rt, &info, prio, tableid);
853 		if (!error) {
854 			type = rtm->rtm_type;
855 			seq = rtm->rtm_seq;
856 			free(rtm, M_RTABLE, len);
857 			rtm = rtm_report(rt, type, seq, tableid);
858 			len = rtm->rtm_msglen;
859 		}
860 	}
861 
862 	rtfree(rt);
863 	if (error) {
864 		rtm->rtm_errno = error;
865 	} else {
866 		rtm->rtm_flags |= RTF_DONE;
867 	}
868 
869 	/*
870 	 * Check to see if we don't want our own messages.
871 	 */
872 	if (!(so->so_options & SO_USELOOPBACK)) {
873 		if (rtptable.rtp_count <= 1) {
874 			/* no other listener and no loopback of messages */
875 fail:
876 			free(rtm, M_RTABLE, len);
877 			m_freem(m);
878 			return (error);
879 		}
880 	}
881 	if (m_copyback(m, 0, len, rtm, M_NOWAIT)) {
882 		m_freem(m);
883 		m = NULL;
884 	} else if (m->m_pkthdr.len > len)
885 		m_adj(m, len - m->m_pkthdr.len);
886 	free(rtm, M_RTABLE, len);
887 	if (m)
888 		route_input(m, so, info.rti_info[RTAX_DST] ?
889 		    info.rti_info[RTAX_DST]->sa_family : AF_UNSPEC);
890 
891 	return (error);
892 }
893 
894 int
895 rtm_output(struct rt_msghdr *rtm, struct rtentry **prt,
896     struct rt_addrinfo *info, uint8_t prio, unsigned int tableid)
897 {
898 	struct rtentry		*rt = *prt;
899 	struct ifnet		*ifp = NULL;
900 	int			 plen, newgate = 0, error = 0;
901 
902 	switch (rtm->rtm_type) {
903 	case RTM_ADD:
904 		if (info->rti_info[RTAX_GATEWAY] == NULL) {
905 			error = EINVAL;
906 			break;
907 		}
908 
909 		rt = rtable_match(tableid, info->rti_info[RTAX_DST], NULL);
910 		if ((error = route_arp_conflict(rt, info))) {
911 			rtfree(rt);
912 			rt = NULL;
913 			break;
914 		}
915 
916 		/*
917 		 * We cannot go through a delete/create/insert cycle for
918 		 * cached route because this can lead to races in the
919 		 * receive path.  Instead we update the L2 cache.
920 		 */
921 		if ((rt != NULL) && ISSET(rt->rt_flags, RTF_CACHED))
922 			goto change;
923 
924 		rtfree(rt);
925 		rt = NULL;
926 
927 		NET_LOCK();
928 		if ((error = rtm_getifa(info, tableid)) != 0) {
929 			NET_UNLOCK();
930 			break;
931 		}
932 		error = rtrequest(RTM_ADD, info, prio, &rt, tableid);
933 		NET_UNLOCK();
934 		if (error == 0)
935 			rtm_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx,
936 			    &rt->rt_rmx);
937 		break;
938 	case RTM_DELETE:
939 		rt = rtable_lookup(tableid, info->rti_info[RTAX_DST],
940 		    info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY],
941 		    prio);
942 		if (rt == NULL) {
943 			error = ESRCH;
944 			break;
945 		}
946 
947 		/*
948 		 * If we got multipath routes, we require users to specify
949 		 * a matching gateway.
950 		 */
951 		if (ISSET(rt->rt_flags, RTF_MPATH) &&
952 		    info->rti_info[RTAX_GATEWAY] == NULL) {
953 			error = ESRCH;
954 			break;
955 		}
956 
957 		/* Detaching an interface requires the KERNEL_LOCK(). */
958 		ifp = if_get(rt->rt_ifidx);
959 		KASSERT(ifp != NULL);
960 
961 		/*
962 		 * Invalidate the cache of automagically created and
963 		 * referenced L2 entries to make sure that ``rt_gwroute''
964 		 * pointer stays valid for other CPUs.
965 		 */
966 		if ((ISSET(rt->rt_flags, RTF_CACHED))) {
967 			NET_LOCK();
968 			ifp->if_rtrequest(ifp, RTM_INVALIDATE, rt);
969 			/* Reset the MTU of the gateway route. */
970 			rtable_walk(tableid, rt_key(rt)->sa_family, NULL,
971 			    route_cleargateway, rt);
972 			NET_UNLOCK();
973 			if_put(ifp);
974 			break;
975 		}
976 
977 		/*
978 		 * Make sure that local routes are only modified by the
979 		 * kernel.
980 		 */
981 		if (ISSET(rt->rt_flags, RTF_LOCAL|RTF_BROADCAST)) {
982 			if_put(ifp);
983 			error = EINVAL;
984 			break;
985 		}
986 
987 		rtfree(rt);
988 		rt = NULL;
989 
990 		NET_LOCK();
991 		error = rtrequest_delete(info, prio, ifp, &rt, tableid);
992 		NET_UNLOCK();
993 		if_put(ifp);
994 		break;
995 	case RTM_CHANGE:
996 		rt = rtable_lookup(tableid, info->rti_info[RTAX_DST],
997 		    info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY],
998 		    prio);
999 		/*
1000 		 * If we got multipath routes, we require users to specify
1001 		 * a matching gateway.
1002 		 */
1003 		if ((rt != NULL) && ISSET(rt->rt_flags, RTF_MPATH) &&
1004 		    (info->rti_info[RTAX_GATEWAY] == NULL)) {
1005 			rtfree(rt);
1006 			rt = NULL;
1007 		}
1008 		/*
1009 		 * If RTAX_GATEWAY is the argument we're trying to
1010 		 * change, try to find a compatible route.
1011 		 */
1012 		if ((rt == NULL) && (info->rti_info[RTAX_GATEWAY] != NULL)) {
1013 			rt = rtable_lookup(tableid, info->rti_info[RTAX_DST],
1014 			    info->rti_info[RTAX_NETMASK], NULL, prio);
1015 			/* Ensure we don't pick a multipath one. */
1016 			if ((rt != NULL) && ISSET(rt->rt_flags, RTF_MPATH)) {
1017 				rtfree(rt);
1018 				rt = NULL;
1019 			}
1020 		}
1021 
1022 		if (rt == NULL) {
1023 			error = ESRCH;
1024 			break;
1025 		}
1026 
1027 		/*
1028 		 * Make sure that local routes are only modified by the
1029 		 * kernel.
1030 		 */
1031 		if (ISSET(rt->rt_flags, RTF_LOCAL|RTF_BROADCAST)) {
1032 			error = EINVAL;
1033 			break;
1034 		}
1035 
1036 		/*
1037 		 * RTM_CHANGE needs a perfect match.
1038 		 */
1039 		plen = rtable_satoplen(info->rti_info[RTAX_DST]->sa_family,
1040 		    info->rti_info[RTAX_NETMASK]);
1041 		if (rt_plen(rt) != plen) {
1042 			error = ESRCH;
1043 			break;
1044 		}
1045 
1046 		if (info->rti_info[RTAX_GATEWAY] != NULL)
1047 			if (rt->rt_gateway == NULL ||
1048 			    bcmp(rt->rt_gateway,
1049 			    info->rti_info[RTAX_GATEWAY],
1050 			    info->rti_info[RTAX_GATEWAY]->sa_len)) {
1051 				newgate = 1;
1052 			}
1053 		/*
1054 		 * Check reachable gateway before changing the route.
1055 		 * New gateway could require new ifaddr, ifp;
1056 		 * flags may also be different; ifp may be specified
1057 		 * by ll sockaddr when protocol address is ambiguous.
1058 		 */
1059 		if (newgate || info->rti_info[RTAX_IFP] != NULL ||
1060 		    info->rti_info[RTAX_IFA] != NULL) {
1061 			struct ifaddr	*ifa = NULL;
1062 
1063 			NET_LOCK();
1064 			if ((error = rtm_getifa(info, tableid)) != 0) {
1065 				NET_UNLOCK();
1066 				break;
1067 			}
1068 			ifa = info->rti_ifa;
1069 			if (rt->rt_ifa != ifa) {
1070 				ifp = if_get(rt->rt_ifidx);
1071 				KASSERT(ifp != NULL);
1072 				ifp->if_rtrequest(ifp, RTM_DELETE, rt);
1073 				ifafree(rt->rt_ifa);
1074 				if_put(ifp);
1075 
1076 				ifa->ifa_refcnt++;
1077 				rt->rt_ifa = ifa;
1078 				rt->rt_ifidx = ifa->ifa_ifp->if_index;
1079 				/* recheck link state after ifp change */
1080 				rt_if_linkstate_change(rt, ifa->ifa_ifp,
1081 				    tableid);
1082 			}
1083 			NET_UNLOCK();
1084 		}
1085 change:
1086 		if (info->rti_info[RTAX_GATEWAY] != NULL) {
1087 			/* When updating the gateway, make sure it is valid. */
1088 			if (!newgate && rt->rt_gateway->sa_family !=
1089 			    info->rti_info[RTAX_GATEWAY]->sa_family) {
1090 				error = EINVAL;
1091 				break;
1092 			}
1093 
1094 			NET_LOCK();
1095 			error = rt_setgate(rt,
1096 			    info->rti_info[RTAX_GATEWAY], tableid);
1097 			NET_UNLOCK();
1098 			if (error)
1099 				break;
1100 		}
1101 #ifdef MPLS
1102 		if (rtm->rtm_flags & RTF_MPLS) {
1103 			NET_LOCK();
1104 			error = rt_mpls_set(rt,
1105 			    info->rti_info[RTAX_SRC], info->rti_mpls);
1106 			NET_UNLOCK();
1107 			if (error)
1108 				break;
1109 		} else if (newgate || (rtm->rtm_fmask & RTF_MPLS)) {
1110 			NET_LOCK();
1111 			/* if gateway changed remove MPLS information */
1112 			rt_mpls_clear(rt);
1113 			NET_UNLOCK();
1114 		}
1115 #endif
1116 
1117 #ifdef BFD
1118 		if (ISSET(rtm->rtm_flags, RTF_BFD)) {
1119 			if ((error = bfdset(rt)))
1120 				break;
1121 		} else if (!ISSET(rtm->rtm_flags, RTF_BFD) &&
1122 		    ISSET(rtm->rtm_fmask, RTF_BFD)) {
1123 			bfdclear(rt);
1124 		}
1125 #endif
1126 
1127 		NET_LOCK();
1128 		/* Hack to allow some flags to be toggled */
1129 		if (rtm->rtm_fmask) {
1130 			/* MPLS flag it is set by rt_mpls_set() */
1131 			rtm->rtm_fmask &= ~RTF_MPLS;
1132 			rtm->rtm_flags &= ~RTF_MPLS;
1133 			rt->rt_flags =
1134 			    (rt->rt_flags & ~rtm->rtm_fmask) |
1135 			    (rtm->rtm_flags & rtm->rtm_fmask);
1136 		}
1137 		rtm_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx, &rt->rt_rmx);
1138 
1139 		ifp = if_get(rt->rt_ifidx);
1140 		KASSERT(ifp != NULL);
1141 		ifp->if_rtrequest(ifp, RTM_ADD, rt);
1142 		if_put(ifp);
1143 
1144 		if (info->rti_info[RTAX_LABEL] != NULL) {
1145 			char *rtlabel = ((struct sockaddr_rtlabel *)
1146 			    info->rti_info[RTAX_LABEL])->sr_label;
1147 			rtlabel_unref(rt->rt_labelid);
1148 			rt->rt_labelid = rtlabel_name2id(rtlabel);
1149 		}
1150 		if_group_routechange(info->rti_info[RTAX_DST],
1151 		    info->rti_info[RTAX_NETMASK]);
1152 		rt->rt_locks &= ~(rtm->rtm_inits);
1153 		rt->rt_locks |= (rtm->rtm_inits & rtm->rtm_rmx.rmx_locks);
1154 		NET_UNLOCK();
1155 		break;
1156 	case RTM_GET:
1157 		rt = rtable_lookup(tableid, info->rti_info[RTAX_DST],
1158 		    info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY],
1159 		    prio);
1160 		if (rt == NULL)
1161 			error = ESRCH;
1162 		break;
1163 	}
1164 
1165 	*prt = rt;
1166 	return (error);
1167 }
1168 
1169 struct ifaddr *
1170 ifa_ifwithroute(int flags, struct sockaddr *dst, struct sockaddr *gateway,
1171     unsigned int rtableid)
1172 {
1173 	struct ifaddr	*ifa;
1174 
1175 	if ((flags & RTF_GATEWAY) == 0) {
1176 		/*
1177 		 * If we are adding a route to an interface,
1178 		 * and the interface is a pt to pt link
1179 		 * we should search for the destination
1180 		 * as our clue to the interface.  Otherwise
1181 		 * we can use the local address.
1182 		 */
1183 		ifa = NULL;
1184 		if (flags & RTF_HOST)
1185 			ifa = ifa_ifwithdstaddr(dst, rtableid);
1186 		if (ifa == NULL)
1187 			ifa = ifa_ifwithaddr(gateway, rtableid);
1188 	} else {
1189 		/*
1190 		 * If we are adding a route to a remote net
1191 		 * or host, the gateway may still be on the
1192 		 * other end of a pt to pt link.
1193 		 */
1194 		ifa = ifa_ifwithdstaddr(gateway, rtableid);
1195 	}
1196 	if (ifa == NULL) {
1197 		if (gateway->sa_family == AF_LINK) {
1198 			struct sockaddr_dl *sdl = satosdl(gateway);
1199 			struct ifnet *ifp = if_get(sdl->sdl_index);
1200 
1201 			if (ifp != NULL)
1202 				ifa = ifaof_ifpforaddr(dst, ifp);
1203 			if_put(ifp);
1204 		} else {
1205 			struct rtentry *rt;
1206 
1207 			rt = rtalloc(gateway, RT_RESOLVE, rtable_l2(rtableid));
1208 			if (rt != NULL)
1209 				ifa = rt->rt_ifa;
1210 			rtfree(rt);
1211 		}
1212 	}
1213 	if (ifa == NULL)
1214 		return (NULL);
1215 	if (ifa->ifa_addr->sa_family != dst->sa_family) {
1216 		struct ifaddr	*oifa = ifa;
1217 		ifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp);
1218 		if (ifa == NULL)
1219 			ifa = oifa;
1220 	}
1221 	return (ifa);
1222 }
1223 
1224 int
1225 rtm_getifa(struct rt_addrinfo *info, unsigned int rtid)
1226 {
1227 	struct ifnet	*ifp = NULL;
1228 
1229 	/*
1230 	 * The "returned" `ifa' is guaranteed to be alive only if
1231 	 * the NET_LOCK() is held.
1232 	 */
1233 	NET_ASSERT_LOCKED();
1234 
1235 	/*
1236 	 * ifp may be specified by sockaddr_dl when protocol address
1237 	 * is ambiguous
1238 	 */
1239 	if (info->rti_info[RTAX_IFP] != NULL) {
1240 		struct sockaddr_dl *sdl;
1241 
1242 		sdl = satosdl(info->rti_info[RTAX_IFP]);
1243 		ifp = if_get(sdl->sdl_index);
1244 	}
1245 
1246 #ifdef IPSEC
1247 	/*
1248 	 * If the destination is a PF_KEY address, we'll look
1249 	 * for the existence of a encap interface number or address
1250 	 * in the options list of the gateway. By default, we'll return
1251 	 * enc0.
1252 	 */
1253 	if (info->rti_info[RTAX_DST] &&
1254 	    info->rti_info[RTAX_DST]->sa_family == PF_KEY)
1255 		info->rti_ifa = enc_getifa(rtid, 0);
1256 #endif
1257 
1258 	if (info->rti_ifa == NULL && info->rti_info[RTAX_IFA] != NULL)
1259 		info->rti_ifa = ifa_ifwithaddr(info->rti_info[RTAX_IFA], rtid);
1260 
1261 	if (info->rti_ifa == NULL) {
1262 		struct sockaddr	*sa;
1263 
1264 		if ((sa = info->rti_info[RTAX_IFA]) == NULL)
1265 			if ((sa = info->rti_info[RTAX_GATEWAY]) == NULL)
1266 				sa = info->rti_info[RTAX_DST];
1267 
1268 		if (sa != NULL && ifp != NULL)
1269 			info->rti_ifa = ifaof_ifpforaddr(sa, ifp);
1270 		else if (info->rti_info[RTAX_DST] != NULL &&
1271 		    info->rti_info[RTAX_GATEWAY] != NULL)
1272 			info->rti_ifa = ifa_ifwithroute(info->rti_flags,
1273 			    info->rti_info[RTAX_DST],
1274 			    info->rti_info[RTAX_GATEWAY],
1275 			    rtid);
1276 		else if (sa != NULL)
1277 			info->rti_ifa = ifa_ifwithroute(info->rti_flags,
1278 			    sa, sa, rtid);
1279 	}
1280 
1281 	if_put(ifp);
1282 
1283 	if (info->rti_ifa == NULL)
1284 		return (ENETUNREACH);
1285 
1286 	return (0);
1287 }
1288 
1289 int
1290 route_cleargateway(struct rtentry *rt, void *arg, unsigned int rtableid)
1291 {
1292 	struct rtentry *nhrt = arg;
1293 
1294 	if (ISSET(rt->rt_flags, RTF_GATEWAY) && rt->rt_gwroute == nhrt &&
1295 	    !ISSET(rt->rt_locks, RTV_MTU))
1296 		rt->rt_mtu = 0;
1297 
1298 	return (0);
1299 }
1300 
1301 /*
1302  * Check if the user request to insert an ARP entry does not conflict
1303  * with existing ones.
1304  *
1305  * Only two entries are allowed for a given IP address: a private one
1306  * (priv) and a public one (pub).
1307  */
1308 int
1309 route_arp_conflict(struct rtentry *rt, struct rt_addrinfo *info)
1310 {
1311 	int		 proxy = (info->rti_flags & RTF_ANNOUNCE);
1312 
1313 	if ((info->rti_flags & RTF_LLINFO) == 0 ||
1314 	    (info->rti_info[RTAX_DST]->sa_family != AF_INET))
1315 		return (0);
1316 
1317 	if (rt == NULL || !ISSET(rt->rt_flags, RTF_LLINFO))
1318 		return (0);
1319 
1320 	/* If the entry is cached, it can be updated. */
1321 	if (ISSET(rt->rt_flags, RTF_CACHED))
1322 		return (0);
1323 
1324 	/*
1325 	 * Same destination, not cached and both "priv" or "pub" conflict.
1326 	 * If a second entry exists, it always conflict.
1327 	 */
1328 	if ((ISSET(rt->rt_flags, RTF_ANNOUNCE) == proxy) ||
1329 	    ISSET(rt->rt_flags, RTF_MPATH))
1330 		return (EEXIST);
1331 
1332 	/* No conflict but an entry exist so we need to force mpath. */
1333 	info->rti_flags |= RTF_MPATH;
1334 	return (0);
1335 }
1336 
1337 void
1338 rtm_setmetrics(u_long which, const struct rt_metrics *in,
1339     struct rt_kmetrics *out)
1340 {
1341 	int64_t expire;
1342 
1343 	if (which & RTV_MTU)
1344 		out->rmx_mtu = in->rmx_mtu;
1345 	if (which & RTV_EXPIRE) {
1346 		expire = in->rmx_expire;
1347 		if (expire != 0) {
1348 			expire -= gettime();
1349 			expire += getuptime();
1350 		}
1351 
1352 		out->rmx_expire = expire;
1353 	}
1354 }
1355 
1356 void
1357 rtm_getmetrics(const struct rt_kmetrics *in, struct rt_metrics *out)
1358 {
1359 	int64_t expire;
1360 
1361 	expire = in->rmx_expire;
1362 	if (expire != 0) {
1363 		expire -= getuptime();
1364 		expire += gettime();
1365 	}
1366 
1367 	bzero(out, sizeof(*out));
1368 	out->rmx_locks = in->rmx_locks;
1369 	out->rmx_mtu = in->rmx_mtu;
1370 	out->rmx_expire = expire;
1371 	out->rmx_pksent = in->rmx_pksent;
1372 }
1373 
1374 #define ROUNDUP(a) \
1375 	((a) > 0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long))
1376 #define ADVANCE(x, n) (x += ROUNDUP((n)->sa_len))
1377 
1378 int
1379 rtm_xaddrs(caddr_t cp, caddr_t cplim, struct rt_addrinfo *rtinfo)
1380 {
1381 	struct sockaddr	*sa;
1382 	int		 i;
1383 
1384 	/*
1385 	 * Parse address bits, split address storage in chunks, and
1386 	 * set info pointers.  Use sa_len for traversing the memory
1387 	 * and check that we stay within in the limit.
1388 	 */
1389 	bzero(rtinfo->rti_info, sizeof(rtinfo->rti_info));
1390 	for (i = 0; i < sizeof(rtinfo->rti_addrs) * 8; i++) {
1391 		if ((rtinfo->rti_addrs & (1 << i)) == 0)
1392 			continue;
1393 		if (i >= RTAX_MAX || cp + sizeof(socklen_t) > cplim)
1394 			return (EINVAL);
1395 		sa = (struct sockaddr *)cp;
1396 		if (cp + sa->sa_len > cplim)
1397 			return (EINVAL);
1398 		rtinfo->rti_info[i] = sa;
1399 		ADVANCE(cp, sa);
1400 	}
1401 	/*
1402 	 * Check that the address family is suitable for the route address
1403 	 * type.  Check that each address has a size that fits its family
1404 	 * and its length is within the size.  Strings within addresses must
1405 	 * be NUL terminated.
1406 	 */
1407 	for (i = 0; i < RTAX_MAX; i++) {
1408 		size_t len, maxlen, size;
1409 
1410 		sa = rtinfo->rti_info[i];
1411 		if (sa == NULL)
1412 			continue;
1413 		maxlen = size = 0;
1414 		switch (i) {
1415 		case RTAX_DST:
1416 		case RTAX_GATEWAY:
1417 		case RTAX_SRC:
1418 			switch (sa->sa_family) {
1419 			case AF_INET:
1420 				size = sizeof(struct sockaddr_in);
1421 				break;
1422 			case AF_LINK:
1423 				size = sizeof(struct sockaddr_dl);
1424 				break;
1425 #ifdef INET6
1426 			case AF_INET6:
1427 				size = sizeof(struct sockaddr_in6);
1428 				break;
1429 #endif
1430 #ifdef MPLS
1431 			case AF_MPLS:
1432 				size = sizeof(struct sockaddr_mpls);
1433 				break;
1434 #endif
1435 			}
1436 			break;
1437 		case RTAX_IFP:
1438 			if (sa->sa_family != AF_LINK)
1439 				return (EAFNOSUPPORT);
1440 			/*
1441 			 * XXX Should be sizeof(struct sockaddr_dl), but
1442 			 * route(8) has a bug and provides less memory.
1443 			 * arp(8) has another bug and uses sizeof pointer.
1444 			 */
1445 			size = 4;
1446 			break;
1447 		case RTAX_IFA:
1448 			switch (sa->sa_family) {
1449 			case AF_INET:
1450 				size = sizeof(struct sockaddr_in);
1451 				break;
1452 #ifdef INET6
1453 			case AF_INET6:
1454 				size = sizeof(struct sockaddr_in6);
1455 				break;
1456 #endif
1457 			default:
1458 				return (EAFNOSUPPORT);
1459 			}
1460 			break;
1461 		case RTAX_LABEL:
1462 			sa->sa_family = AF_UNSPEC;
1463 			maxlen = RTLABEL_LEN;
1464 			size = sizeof(struct sockaddr_rtlabel);
1465 			break;
1466 #ifdef BFD
1467 		case RTAX_BFD:
1468 			sa->sa_family = AF_UNSPEC;
1469 			size = sizeof(struct sockaddr_bfd);
1470 			break;
1471 #endif
1472 		case RTAX_DNS:
1473 			/* more validation in rtm_validate_proposal */
1474 			if (sa->sa_len > sizeof(struct sockaddr_rtdns))
1475 				return (EINVAL);
1476 			if (sa->sa_len < offsetof(struct sockaddr_rtdns,
1477 			    sr_dns))
1478 				return (EINVAL);
1479 			switch (sa->sa_family) {
1480 			case AF_INET:
1481 #ifdef INET6
1482 			case AF_INET6:
1483 #endif
1484 				break;
1485 			default:
1486 				return (EAFNOSUPPORT);
1487 			}
1488 			break;
1489 		case RTAX_STATIC:
1490 			sa->sa_family = AF_UNSPEC;
1491 			maxlen = RTSTATIC_LEN;
1492 			size = sizeof(struct sockaddr_rtstatic);
1493 			break;
1494 		case RTAX_SEARCH:
1495 			sa->sa_family = AF_UNSPEC;
1496 			maxlen = RTSEARCH_LEN;
1497 			size = sizeof(struct sockaddr_rtsearch);
1498 			break;
1499 		}
1500 		if (size) {
1501 			/* memory for the full struct must be provided */
1502 			if (sa->sa_len < size)
1503 				return (EINVAL);
1504 		}
1505 		if (maxlen) {
1506 			/* this should not happen */
1507 			if (2 + maxlen > size)
1508 				return (EINVAL);
1509 			/* strings must be NUL terminated within the struct */
1510 			len = strnlen(sa->sa_data, maxlen);
1511 			if (len >= maxlen || 2 + len >= sa->sa_len)
1512 				return (EINVAL);
1513 			break;
1514 		}
1515 	}
1516 	return (0);
1517 }
1518 
1519 struct mbuf *
1520 rtm_msg1(int type, struct rt_addrinfo *rtinfo)
1521 {
1522 	struct rt_msghdr	*rtm;
1523 	struct mbuf		*m;
1524 	int			 i;
1525 	struct sockaddr		*sa;
1526 	int			 len, dlen, hlen;
1527 
1528 	switch (type) {
1529 	case RTM_DELADDR:
1530 	case RTM_NEWADDR:
1531 		len = sizeof(struct ifa_msghdr);
1532 		break;
1533 	case RTM_IFINFO:
1534 		len = sizeof(struct if_msghdr);
1535 		break;
1536 	case RTM_IFANNOUNCE:
1537 		len = sizeof(struct if_announcemsghdr);
1538 		break;
1539 #ifdef BFD
1540 	case RTM_BFD:
1541 		len = sizeof(struct bfd_msghdr);
1542 		break;
1543 #endif
1544 	case RTM_80211INFO:
1545 		len = sizeof(struct if_ieee80211_msghdr);
1546 		break;
1547 	default:
1548 		len = sizeof(struct rt_msghdr);
1549 		break;
1550 	}
1551 	if (len > MCLBYTES)
1552 		panic("rtm_msg1");
1553 	m = m_gethdr(M_DONTWAIT, MT_DATA);
1554 	if (m && len > MHLEN) {
1555 		MCLGET(m, M_DONTWAIT);
1556 		if ((m->m_flags & M_EXT) == 0) {
1557 			m_free(m);
1558 			m = NULL;
1559 		}
1560 	}
1561 	if (m == NULL)
1562 		return (m);
1563 	m->m_pkthdr.len = m->m_len = hlen = len;
1564 	m->m_pkthdr.ph_ifidx = 0;
1565 	rtm = mtod(m, struct rt_msghdr *);
1566 	bzero(rtm, len);
1567 	for (i = 0; i < RTAX_MAX; i++) {
1568 		if (rtinfo == NULL || (sa = rtinfo->rti_info[i]) == NULL)
1569 			continue;
1570 		rtinfo->rti_addrs |= (1 << i);
1571 		dlen = ROUNDUP(sa->sa_len);
1572 		if (m_copyback(m, len, dlen, sa, M_NOWAIT)) {
1573 			m_freem(m);
1574 			return (NULL);
1575 		}
1576 		len += dlen;
1577 	}
1578 	rtm->rtm_msglen = len;
1579 	rtm->rtm_hdrlen = hlen;
1580 	rtm->rtm_version = RTM_VERSION;
1581 	rtm->rtm_type = type;
1582 	return (m);
1583 }
1584 
1585 int
1586 rtm_msg2(int type, int vers, struct rt_addrinfo *rtinfo, caddr_t cp,
1587     struct walkarg *w)
1588 {
1589 	int		i;
1590 	int		len, dlen, hlen, second_time = 0;
1591 	caddr_t		cp0;
1592 
1593 	rtinfo->rti_addrs = 0;
1594 again:
1595 	switch (type) {
1596 	case RTM_DELADDR:
1597 	case RTM_NEWADDR:
1598 		len = sizeof(struct ifa_msghdr);
1599 		break;
1600 	case RTM_IFINFO:
1601 		len = sizeof(struct if_msghdr);
1602 		break;
1603 	default:
1604 		len = sizeof(struct rt_msghdr);
1605 		break;
1606 	}
1607 	hlen = len;
1608 	if ((cp0 = cp) != NULL)
1609 		cp += len;
1610 	for (i = 0; i < RTAX_MAX; i++) {
1611 		struct sockaddr *sa;
1612 
1613 		if ((sa = rtinfo->rti_info[i]) == NULL)
1614 			continue;
1615 		rtinfo->rti_addrs |= (1 << i);
1616 		dlen = ROUNDUP(sa->sa_len);
1617 		if (cp) {
1618 			bcopy(sa, cp, (size_t)dlen);
1619 			cp += dlen;
1620 		}
1621 		len += dlen;
1622 	}
1623 	/* align message length to the next natural boundary */
1624 	len = ALIGN(len);
1625 	if (cp == 0 && w != NULL && !second_time) {
1626 		w->w_needed += len;
1627 		if (w->w_needed <= 0 && w->w_where) {
1628 			if (w->w_tmemsize < len) {
1629 				free(w->w_tmem, M_RTABLE, w->w_tmemsize);
1630 				w->w_tmem = malloc(len, M_RTABLE,
1631 				    M_NOWAIT | M_ZERO);
1632 				if (w->w_tmem)
1633 					w->w_tmemsize = len;
1634 			}
1635 			if (w->w_tmem) {
1636 				cp = w->w_tmem;
1637 				second_time = 1;
1638 				goto again;
1639 			} else
1640 				w->w_where = 0;
1641 		}
1642 	}
1643 	if (cp && w)		/* clear the message header */
1644 		bzero(cp0, hlen);
1645 
1646 	if (cp) {
1647 		struct rt_msghdr *rtm = (struct rt_msghdr *)cp0;
1648 
1649 		rtm->rtm_version = RTM_VERSION;
1650 		rtm->rtm_type = type;
1651 		rtm->rtm_msglen = len;
1652 		rtm->rtm_hdrlen = hlen;
1653 	}
1654 	return (len);
1655 }
1656 
1657 void
1658 rtm_send(struct rtentry *rt, int cmd, int error, unsigned int rtableid)
1659 {
1660 	struct rt_addrinfo	 info;
1661 	struct ifnet		*ifp;
1662 	struct sockaddr_rtlabel	 sa_rl;
1663 	struct sockaddr_in6	 sa_mask;
1664 
1665 	memset(&info, 0, sizeof(info));
1666 	info.rti_info[RTAX_DST] = rt_key(rt);
1667 	info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1668 	if (!ISSET(rt->rt_flags, RTF_HOST))
1669 		info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask);
1670 	info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl);
1671 	ifp = if_get(rt->rt_ifidx);
1672 	if (ifp != NULL) {
1673 		info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
1674 		info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
1675 	}
1676 
1677 	rtm_miss(cmd, &info, rt->rt_flags, rt->rt_priority, rt->rt_ifidx, error,
1678 	    rtableid);
1679 	if_put(ifp);
1680 }
1681 
1682 /*
1683  * This routine is called to generate a message from the routing
1684  * socket indicating that a redirect has occurred, a routing lookup
1685  * has failed, or that a protocol has detected timeouts to a particular
1686  * destination.
1687  */
1688 void
1689 rtm_miss(int type, struct rt_addrinfo *rtinfo, int flags, uint8_t prio,
1690     u_int ifidx, int error, u_int tableid)
1691 {
1692 	struct rt_msghdr	*rtm;
1693 	struct mbuf		*m;
1694 	struct sockaddr		*sa = rtinfo->rti_info[RTAX_DST];
1695 
1696 	if (rtptable.rtp_count == 0)
1697 		return;
1698 	m = rtm_msg1(type, rtinfo);
1699 	if (m == NULL)
1700 		return;
1701 	rtm = mtod(m, struct rt_msghdr *);
1702 	rtm->rtm_flags = RTF_DONE | flags;
1703 	rtm->rtm_priority = prio;
1704 	rtm->rtm_errno = error;
1705 	rtm->rtm_tableid = tableid;
1706 	rtm->rtm_addrs = rtinfo->rti_addrs;
1707 	rtm->rtm_index = ifidx;
1708 	route_input(m, NULL, sa ? sa->sa_family : AF_UNSPEC);
1709 }
1710 
1711 /*
1712  * This routine is called to generate a message from the routing
1713  * socket indicating that the status of a network interface has changed.
1714  */
1715 void
1716 rtm_ifchg(struct ifnet *ifp)
1717 {
1718 	struct if_msghdr	*ifm;
1719 	struct mbuf		*m;
1720 
1721 	if (rtptable.rtp_count == 0)
1722 		return;
1723 	m = rtm_msg1(RTM_IFINFO, NULL);
1724 	if (m == NULL)
1725 		return;
1726 	ifm = mtod(m, struct if_msghdr *);
1727 	ifm->ifm_index = ifp->if_index;
1728 	ifm->ifm_tableid = ifp->if_rdomain;
1729 	ifm->ifm_flags = ifp->if_flags;
1730 	ifm->ifm_xflags = ifp->if_xflags;
1731 	if_getdata(ifp, &ifm->ifm_data);
1732 	ifm->ifm_addrs = 0;
1733 	route_input(m, NULL, AF_UNSPEC);
1734 }
1735 
1736 /*
1737  * This is called to generate messages from the routing socket
1738  * indicating a network interface has had addresses associated with it.
1739  * if we ever reverse the logic and replace messages TO the routing
1740  * socket indicate a request to configure interfaces, then it will
1741  * be unnecessary as the routing socket will automatically generate
1742  * copies of it.
1743  */
1744 void
1745 rtm_addr(int cmd, struct ifaddr *ifa)
1746 {
1747 	struct ifnet		*ifp = ifa->ifa_ifp;
1748 	struct mbuf		*m;
1749 	struct rt_addrinfo	 info;
1750 	struct ifa_msghdr	*ifam;
1751 
1752 	if (rtptable.rtp_count == 0)
1753 		return;
1754 
1755 	memset(&info, 0, sizeof(info));
1756 	info.rti_info[RTAX_IFA] = ifa->ifa_addr;
1757 	info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
1758 	info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
1759 	info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
1760 	if ((m = rtm_msg1(cmd, &info)) == NULL)
1761 		return;
1762 	ifam = mtod(m, struct ifa_msghdr *);
1763 	ifam->ifam_index = ifp->if_index;
1764 	ifam->ifam_metric = ifa->ifa_metric;
1765 	ifam->ifam_flags = ifa->ifa_flags;
1766 	ifam->ifam_addrs = info.rti_addrs;
1767 	ifam->ifam_tableid = ifp->if_rdomain;
1768 
1769 	route_input(m, NULL,
1770 	    ifa->ifa_addr ? ifa->ifa_addr->sa_family : AF_UNSPEC);
1771 }
1772 
1773 /*
1774  * This is called to generate routing socket messages indicating
1775  * network interface arrival and departure.
1776  */
1777 void
1778 rtm_ifannounce(struct ifnet *ifp, int what)
1779 {
1780 	struct if_announcemsghdr	*ifan;
1781 	struct mbuf			*m;
1782 
1783 	if (rtptable.rtp_count == 0)
1784 		return;
1785 	m = rtm_msg1(RTM_IFANNOUNCE, NULL);
1786 	if (m == NULL)
1787 		return;
1788 	ifan = mtod(m, struct if_announcemsghdr *);
1789 	ifan->ifan_index = ifp->if_index;
1790 	strlcpy(ifan->ifan_name, ifp->if_xname, sizeof(ifan->ifan_name));
1791 	ifan->ifan_what = what;
1792 	route_input(m, NULL, AF_UNSPEC);
1793 }
1794 
1795 #ifdef BFD
1796 /*
1797  * This is used to generate routing socket messages indicating
1798  * the state of a BFD session.
1799  */
1800 void
1801 rtm_bfd(struct bfd_config *bfd)
1802 {
1803 	struct bfd_msghdr	*bfdm;
1804 	struct sockaddr_bfd	 sa_bfd;
1805 	struct mbuf		*m;
1806 	struct rt_addrinfo	 info;
1807 
1808 	if (rtptable.rtp_count == 0)
1809 		return;
1810 	memset(&info, 0, sizeof(info));
1811 	info.rti_info[RTAX_DST] = rt_key(bfd->bc_rt);
1812 	info.rti_info[RTAX_IFA] = bfd->bc_rt->rt_ifa->ifa_addr;
1813 
1814 	m = rtm_msg1(RTM_BFD, &info);
1815 	if (m == NULL)
1816 		return;
1817 	bfdm = mtod(m, struct bfd_msghdr *);
1818 	bfdm->bm_addrs = info.rti_addrs;
1819 
1820 	bfd2sa(bfd->bc_rt, &sa_bfd);
1821 	memcpy(&bfdm->bm_sa, &sa_bfd, sizeof(sa_bfd));
1822 
1823 	route_input(m, NULL, info.rti_info[RTAX_DST]->sa_family);
1824 }
1825 #endif /* BFD */
1826 
1827 /*
1828  * This is used to generate routing socket messages indicating
1829  * the state of an ieee80211 interface.
1830  */
1831 void
1832 rtm_80211info(struct ifnet *ifp, struct if_ieee80211_data *ifie)
1833 {
1834 	struct if_ieee80211_msghdr	*ifim;
1835 	struct mbuf			*m;
1836 
1837 	if (rtptable.rtp_count == 0)
1838 		return;
1839 	m = rtm_msg1(RTM_80211INFO, NULL);
1840 	if (m == NULL)
1841 		return;
1842 	ifim = mtod(m, struct if_ieee80211_msghdr *);
1843 	ifim->ifim_index = ifp->if_index;
1844 	ifim->ifim_tableid = ifp->if_rdomain;
1845 
1846 	memcpy(&ifim->ifim_ifie, ifie, sizeof(ifim->ifim_ifie));
1847 	route_input(m, NULL, AF_UNSPEC);
1848 }
1849 
1850 /*
1851  * This is used to generate routing socket messages indicating
1852  * the address selection proposal from an interface.
1853  */
1854 void
1855 rtm_proposal(struct ifnet *ifp, struct rt_addrinfo *rtinfo, int flags,
1856     uint8_t prio)
1857 {
1858 	struct rt_msghdr	*rtm;
1859 	struct mbuf		*m;
1860 
1861 	m = rtm_msg1(RTM_PROPOSAL, rtinfo);
1862 	if (m == NULL)
1863 		return;
1864 	rtm = mtod(m, struct rt_msghdr *);
1865 	rtm->rtm_flags = RTF_DONE | flags;
1866 	rtm->rtm_priority = prio;
1867 	rtm->rtm_tableid = ifp->if_rdomain;
1868 	rtm->rtm_index = ifp->if_index;
1869 	rtm->rtm_addrs = rtinfo->rti_addrs;
1870 
1871 	route_input(m, NULL, rtinfo->rti_info[RTAX_DNS]->sa_family);
1872 }
1873 
1874 /*
1875  * This is used in dumping the kernel table via sysctl().
1876  */
1877 int
1878 sysctl_dumpentry(struct rtentry *rt, void *v, unsigned int id)
1879 {
1880 	struct walkarg		*w = v;
1881 	int			 error = 0, size;
1882 	struct rt_addrinfo	 info;
1883 	struct ifnet		*ifp;
1884 #ifdef BFD
1885 	struct sockaddr_bfd	 sa_bfd;
1886 #endif
1887 	struct sockaddr_rtlabel	 sa_rl;
1888 	struct sockaddr_in6	 sa_mask;
1889 
1890 	if (w->w_op == NET_RT_FLAGS && !(rt->rt_flags & w->w_arg))
1891 		return 0;
1892 	if (w->w_op == NET_RT_DUMP && w->w_arg) {
1893 		u_int8_t prio = w->w_arg & RTP_MASK;
1894 		if (w->w_arg < 0) {
1895 			prio = (-w->w_arg) & RTP_MASK;
1896 			/* Show all routes that are not this priority */
1897 			if (prio == (rt->rt_priority & RTP_MASK))
1898 				return 0;
1899 		} else {
1900 			if (prio != (rt->rt_priority & RTP_MASK) &&
1901 			    prio != RTP_ANY)
1902 				return 0;
1903 		}
1904 	}
1905 	bzero(&info, sizeof(info));
1906 	info.rti_info[RTAX_DST] = rt_key(rt);
1907 	info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1908 	info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask);
1909 	ifp = if_get(rt->rt_ifidx);
1910 	if (ifp != NULL) {
1911 		info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
1912 		info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
1913 		if (ifp->if_flags & IFF_POINTOPOINT)
1914 			info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr;
1915 	}
1916 	if_put(ifp);
1917 	info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl);
1918 #ifdef BFD
1919 	if (rt->rt_flags & RTF_BFD)
1920 		info.rti_info[RTAX_BFD] = bfd2sa(rt, &sa_bfd);
1921 #endif
1922 #ifdef MPLS
1923 	if (rt->rt_flags & RTF_MPLS) {
1924 		struct sockaddr_mpls	 sa_mpls;
1925 
1926 		bzero(&sa_mpls, sizeof(sa_mpls));
1927 		sa_mpls.smpls_family = AF_MPLS;
1928 		sa_mpls.smpls_len = sizeof(sa_mpls);
1929 		sa_mpls.smpls_label = ((struct rt_mpls *)
1930 		    rt->rt_llinfo)->mpls_label;
1931 		info.rti_info[RTAX_SRC] = (struct sockaddr *)&sa_mpls;
1932 		info.rti_mpls = ((struct rt_mpls *)
1933 		    rt->rt_llinfo)->mpls_operation;
1934 	}
1935 #endif
1936 
1937 	size = rtm_msg2(RTM_GET, RTM_VERSION, &info, NULL, w);
1938 	if (w->w_where && w->w_tmem && w->w_needed <= 0) {
1939 		struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem;
1940 
1941 		rtm->rtm_pid = curproc->p_p->ps_pid;
1942 		rtm->rtm_flags = rt->rt_flags;
1943 		rtm->rtm_priority = rt->rt_priority & RTP_MASK;
1944 		rtm_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx);
1945 		/* Do not account the routing table's reference. */
1946 		rtm->rtm_rmx.rmx_refcnt = rt->rt_refcnt - 1;
1947 		rtm->rtm_index = rt->rt_ifidx;
1948 		rtm->rtm_addrs = info.rti_addrs;
1949 		rtm->rtm_tableid = id;
1950 #ifdef MPLS
1951 		rtm->rtm_mpls = info.rti_mpls;
1952 #endif
1953 		if ((error = copyout(rtm, w->w_where, size)) != 0)
1954 			w->w_where = NULL;
1955 		else
1956 			w->w_where += size;
1957 	}
1958 	return (error);
1959 }
1960 
1961 int
1962 sysctl_iflist(int af, struct walkarg *w)
1963 {
1964 	struct ifnet		*ifp;
1965 	struct ifaddr		*ifa;
1966 	struct rt_addrinfo	 info;
1967 	int			 len, error = 0;
1968 
1969 	bzero(&info, sizeof(info));
1970 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1971 		if (w->w_arg && w->w_arg != ifp->if_index)
1972 			continue;
1973 		/* Copy the link-layer address first */
1974 		info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
1975 		len = rtm_msg2(RTM_IFINFO, RTM_VERSION, &info, 0, w);
1976 		if (w->w_where && w->w_tmem && w->w_needed <= 0) {
1977 			struct if_msghdr *ifm;
1978 
1979 			ifm = (struct if_msghdr *)w->w_tmem;
1980 			ifm->ifm_index = ifp->if_index;
1981 			ifm->ifm_tableid = ifp->if_rdomain;
1982 			ifm->ifm_flags = ifp->if_flags;
1983 			if_getdata(ifp, &ifm->ifm_data);
1984 			ifm->ifm_addrs = info.rti_addrs;
1985 			error = copyout(ifm, w->w_where, len);
1986 			if (error)
1987 				return (error);
1988 			w->w_where += len;
1989 		}
1990 		info.rti_info[RTAX_IFP] = NULL;
1991 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1992 			KASSERT(ifa->ifa_addr->sa_family != AF_LINK);
1993 			if (af && af != ifa->ifa_addr->sa_family)
1994 				continue;
1995 			info.rti_info[RTAX_IFA] = ifa->ifa_addr;
1996 			info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
1997 			info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
1998 			len = rtm_msg2(RTM_NEWADDR, RTM_VERSION, &info, 0, w);
1999 			if (w->w_where && w->w_tmem && w->w_needed <= 0) {
2000 				struct ifa_msghdr *ifam;
2001 
2002 				ifam = (struct ifa_msghdr *)w->w_tmem;
2003 				ifam->ifam_index = ifa->ifa_ifp->if_index;
2004 				ifam->ifam_flags = ifa->ifa_flags;
2005 				ifam->ifam_metric = ifa->ifa_metric;
2006 				ifam->ifam_addrs = info.rti_addrs;
2007 				error = copyout(w->w_tmem, w->w_where, len);
2008 				if (error)
2009 					return (error);
2010 				w->w_where += len;
2011 			}
2012 		}
2013 		info.rti_info[RTAX_IFA] = info.rti_info[RTAX_NETMASK] =
2014 		    info.rti_info[RTAX_BRD] = NULL;
2015 	}
2016 	return (0);
2017 }
2018 
2019 int
2020 sysctl_ifnames(struct walkarg *w)
2021 {
2022 	struct if_nameindex_msg ifn;
2023 	struct ifnet *ifp;
2024 	int error = 0;
2025 
2026 	/* XXX ignore tableid for now */
2027 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
2028 		if (w->w_arg && w->w_arg != ifp->if_index)
2029 			continue;
2030 		w->w_needed += sizeof(ifn);
2031 		if (w->w_where && w->w_needed <= 0) {
2032 
2033 			memset(&ifn, 0, sizeof(ifn));
2034 			ifn.if_index = ifp->if_index;
2035 			strlcpy(ifn.if_name, ifp->if_xname,
2036 			    sizeof(ifn.if_name));
2037 			error = copyout(&ifn, w->w_where, sizeof(ifn));
2038 			if (error)
2039 				return (error);
2040 			w->w_where += sizeof(ifn);
2041 		}
2042 	}
2043 
2044 	return (0);
2045 }
2046 
2047 int
2048 sysctl_rtable(int *name, u_int namelen, void *where, size_t *given, void *new,
2049     size_t newlen)
2050 {
2051 	int			 i, error = EINVAL;
2052 	u_char			 af;
2053 	struct walkarg		 w;
2054 	struct rt_tableinfo	 tableinfo;
2055 	u_int			 tableid = 0;
2056 
2057 	if (new)
2058 		return (EPERM);
2059 	if (namelen < 3 || namelen > 4)
2060 		return (EINVAL);
2061 	af = name[0];
2062 	bzero(&w, sizeof(w));
2063 	w.w_where = where;
2064 	w.w_given = *given;
2065 	w.w_needed = 0 - w.w_given;
2066 	w.w_op = name[1];
2067 	w.w_arg = name[2];
2068 
2069 	if (namelen == 4) {
2070 		tableid = name[3];
2071 		if (!rtable_exists(tableid))
2072 			return (ENOENT);
2073 	} else
2074 		tableid = curproc->p_p->ps_rtableid;
2075 
2076 	switch (w.w_op) {
2077 	case NET_RT_DUMP:
2078 	case NET_RT_FLAGS:
2079 		NET_LOCK();
2080 		for (i = 1; i <= AF_MAX; i++) {
2081 			if (af != 0 && af != i)
2082 				continue;
2083 
2084 			error = rtable_walk(tableid, i, NULL, sysctl_dumpentry,
2085 			    &w);
2086 			if (error == EAFNOSUPPORT)
2087 				error = 0;
2088 			if (error)
2089 				break;
2090 		}
2091 		NET_UNLOCK();
2092 		break;
2093 
2094 	case NET_RT_IFLIST:
2095 		NET_LOCK();
2096 		error = sysctl_iflist(af, &w);
2097 		NET_UNLOCK();
2098 		break;
2099 
2100 	case NET_RT_STATS:
2101 		return (sysctl_rtable_rtstat(where, given, new));
2102 	case NET_RT_TABLE:
2103 		tableid = w.w_arg;
2104 		if (!rtable_exists(tableid))
2105 			return (ENOENT);
2106 		memset(&tableinfo, 0, sizeof tableinfo);
2107 		tableinfo.rti_tableid = tableid;
2108 		tableinfo.rti_domainid = rtable_l2(tableid);
2109 		error = sysctl_rdstruct(where, given, new,
2110 		    &tableinfo, sizeof(tableinfo));
2111 		return (error);
2112 	case NET_RT_IFNAMES:
2113 		NET_LOCK();
2114 		error = sysctl_ifnames(&w);
2115 		NET_UNLOCK();
2116 		break;
2117 	}
2118 	free(w.w_tmem, M_RTABLE, w.w_tmemsize);
2119 	w.w_needed += w.w_given;
2120 	if (where) {
2121 		*given = w.w_where - (caddr_t)where;
2122 		if (*given < w.w_needed)
2123 			return (ENOMEM);
2124 	} else
2125 		*given = (11 * w.w_needed) / 10;
2126 
2127 	return (error);
2128 }
2129 
2130 int
2131 sysctl_rtable_rtstat(void *oldp, size_t *oldlenp, void *newp)
2132 {
2133 	extern struct cpumem *rtcounters;
2134 	uint64_t counters[rts_ncounters];
2135 	struct rtstat rtstat;
2136 	uint32_t *words = (uint32_t *)&rtstat;
2137 	int i;
2138 
2139 	CTASSERT(sizeof(rtstat) == (nitems(counters) * sizeof(uint32_t)));
2140 	memset(&rtstat, 0, sizeof rtstat);
2141 	counters_read(rtcounters, counters, nitems(counters));
2142 
2143 	for (i = 0; i < nitems(counters); i++)
2144 		words[i] = (uint32_t)counters[i];
2145 
2146 	return (sysctl_rdstruct(oldp, oldlenp, newp, &rtstat, sizeof(rtstat)));
2147 }
2148 
2149 int
2150 rtm_validate_proposal(struct rt_addrinfo *info)
2151 {
2152 	if (info->rti_addrs & ~(RTA_NETMASK | RTA_IFA | RTA_DNS | RTA_STATIC |
2153 	    RTA_SEARCH)) {
2154 		return -1;
2155 	}
2156 
2157 	if (ISSET(info->rti_addrs, RTA_NETMASK)) {
2158 		struct sockaddr *sa = info->rti_info[RTAX_NETMASK];
2159 		if (sa == NULL)
2160 			return -1;
2161 		switch (sa->sa_family) {
2162 		case AF_INET:
2163 			if (sa->sa_len != sizeof(struct sockaddr_in))
2164 				return -1;
2165 			break;
2166 		case AF_INET6:
2167 			if (sa->sa_len != sizeof(struct sockaddr_in6))
2168 				return -1;
2169 			break;
2170 		default:
2171 			return -1;
2172 		}
2173 	}
2174 
2175 	if (ISSET(info->rti_addrs, RTA_IFA)) {
2176 		struct sockaddr *sa = info->rti_info[RTAX_IFA];
2177 		if (sa == NULL)
2178 			return -1;
2179 		switch (sa->sa_family) {
2180 		case AF_INET:
2181 			if (sa->sa_len != sizeof(struct sockaddr_in))
2182 				return -1;
2183 			break;
2184 		case AF_INET6:
2185 			if (sa->sa_len != sizeof(struct sockaddr_in6))
2186 				return -1;
2187 			break;
2188 		default:
2189 			return -1;
2190 		}
2191 	}
2192 
2193 	if (ISSET(info->rti_addrs, RTA_DNS)) {
2194 		struct sockaddr_rtdns *rtdns =
2195 		    (struct sockaddr_rtdns *)info->rti_info[RTAX_DNS];
2196 		if (rtdns == NULL)
2197 			return -1;
2198 		if (rtdns->sr_len > sizeof(*rtdns))
2199 			return -1;
2200 		if (rtdns->sr_len < offsetof(struct sockaddr_rtdns, sr_dns))
2201 			return -1;
2202 		switch (rtdns->sr_family) {
2203 		case AF_INET:
2204 			if ((rtdns->sr_len - offsetof(struct sockaddr_rtdns,
2205 			    sr_dns)) % sizeof(struct in_addr) != 0)
2206 				return -1;
2207 			break;
2208 #ifdef INET6
2209 		case AF_INET6:
2210 			if ((rtdns->sr_len - offsetof(struct sockaddr_rtdns,
2211 			    sr_dns)) % sizeof(struct in6_addr) != 0)
2212 				return -1;
2213 			break;
2214 #endif
2215 		default:
2216 			return -1;
2217 		}
2218 	}
2219 
2220 	if (ISSET(info->rti_addrs, RTA_STATIC)) {
2221 		struct sockaddr_rtstatic *rtstatic =
2222 		    (struct sockaddr_rtstatic *)info->rti_info[RTAX_STATIC];
2223 		if (rtstatic == NULL)
2224 			return -1;
2225 		if (rtstatic->sr_len > sizeof(*rtstatic))
2226 			return -1;
2227 		if (rtstatic->sr_len <=
2228 		    offsetof(struct sockaddr_rtstatic, sr_static))
2229 			return -1;
2230 	}
2231 
2232 	if (ISSET(info->rti_addrs, RTA_SEARCH)) {
2233 		struct sockaddr_rtsearch *rtsearch =
2234 		    (struct sockaddr_rtsearch *)info->rti_info[RTAX_SEARCH];
2235 		if (rtsearch == NULL)
2236 			return -1;
2237 		if (rtsearch->sr_len > sizeof(*rtsearch))
2238 			return -1;
2239 		if (rtsearch->sr_len <=
2240 		    offsetof(struct sockaddr_rtsearch, sr_search))
2241 			return -1;
2242 	}
2243 
2244 	return 0;
2245 }
2246 
2247 /*
2248  * Definitions of protocols supported in the ROUTE domain.
2249  */
2250 
2251 struct domain routedomain;
2252 
2253 struct protosw routesw[] = {
2254 {
2255   .pr_type	= SOCK_RAW,
2256   .pr_domain	= &routedomain,
2257   .pr_flags	= PR_ATOMIC|PR_ADDR|PR_WANTRCVD,
2258   .pr_output	= route_output,
2259   .pr_ctloutput	= route_ctloutput,
2260   .pr_usrreq	= route_usrreq,
2261   .pr_attach	= route_attach,
2262   .pr_detach	= route_detach,
2263   .pr_init	= route_prinit,
2264   .pr_sysctl	= sysctl_rtable
2265 }
2266 };
2267 
2268 struct domain routedomain = {
2269   .dom_family = PF_ROUTE,
2270   .dom_name = "route",
2271   .dom_init = route_init,
2272   .dom_protosw = routesw,
2273   .dom_protoswNPROTOSW = &routesw[nitems(routesw)]
2274 };
2275