xref: /openbsd-src/sys/net/rtsock.c (revision f2da64fbbbf1b03f09f390ab01267c93dfd77c4c)
1 /*	$OpenBSD: rtsock.c,v 1.205 2016/09/17 07:35:05 phessler Exp $	*/
2 /*	$NetBSD: rtsock.c,v 1.18 1996/03/29 00:32:10 cgd Exp $	*/
3 
4 /*
5  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the project nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 /*
34  * Copyright (c) 1988, 1991, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. Neither the name of the University nor the names of its contributors
46  *    may be used to endorse or promote products derived from this software
47  *    without specific prior written permission.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59  * SUCH DAMAGE.
60  *
61  *	@(#)rtsock.c	8.6 (Berkeley) 2/11/95
62  */
63 
64 #include <sys/param.h>
65 #include <sys/systm.h>
66 #include <sys/proc.h>
67 #include <sys/sysctl.h>
68 #include <sys/mbuf.h>
69 #include <sys/socket.h>
70 #include <sys/socketvar.h>
71 #include <sys/domain.h>
72 #include <sys/protosw.h>
73 
74 #include <net/if.h>
75 #include <net/if_dl.h>
76 #include <net/if_var.h>
77 #include <net/route.h>
78 #include <net/raw_cb.h>
79 
80 #include <netinet/in.h>
81 
82 #ifdef MPLS
83 #include <netmpls/mpls.h>
84 #endif
85 #ifdef BFD
86 #include <net/bfd.h>
87 #endif
88 
89 #include <sys/stdarg.h>
90 #include <sys/kernel.h>
91 #include <sys/timeout.h>
92 
93 struct sockaddr		route_dst = { 2, PF_ROUTE, };
94 struct sockaddr		route_src = { 2, PF_ROUTE, };
95 struct sockproto	route_proto = { PF_ROUTE, };
96 
97 struct walkarg {
98 	int	w_op, w_arg, w_given, w_needed, w_tmemsize;
99 	caddr_t	w_where, w_tmem;
100 };
101 
102 int	route_ctloutput(int, struct socket *, int, int, struct mbuf **);
103 void	route_input(struct mbuf *m0, ...);
104 int	route_arp_conflict(struct rtentry *, struct rt_addrinfo *);
105 int	route_cleargateway(struct rtentry *, void *, unsigned int);
106 
107 struct mbuf	*rt_msg1(int, struct rt_addrinfo *);
108 int		 rt_msg2(int, int, struct rt_addrinfo *, caddr_t,
109 		     struct walkarg *);
110 void		 rt_xaddrs(caddr_t, caddr_t, struct rt_addrinfo *);
111 
112 int		 sysctl_iflist(int, struct walkarg *);
113 int		 sysctl_ifnames(struct walkarg *);
114 
115 struct routecb {
116 	struct rawcb	rcb;
117 	struct timeout	timeout;
118 	unsigned int	msgfilter;
119 	unsigned int	flags;
120 	u_int		rtableid;
121 };
122 #define	sotoroutecb(so)	((struct routecb *)(so)->so_pcb)
123 
124 struct route_cb {
125 	int		ip_count;
126 	int		ip6_count;
127 	int		mpls_count;
128 	int		any_count;
129 };
130 
131 struct route_cb route_cb;
132 
133 /*
134  * These flags and timeout are used for indicating to userland (via a
135  * RTM_DESYNC msg) when the route socket has overflowed and messages
136  * have been lost.
137  */
138 #define ROUTECB_FLAG_DESYNC	0x1	/* Route socket out of memory */
139 #define ROUTECB_FLAG_FLUSH	0x2	/* Wait until socket is empty before
140 					   queueing more packets */
141 
142 #define ROUTE_DESYNC_RESEND_TIMEOUT	(hz / 5)	/* In hz */
143 
144 void	rt_senddesync(void *);
145 
146 int
147 route_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam,
148     struct mbuf *control, struct proc *p)
149 {
150 	struct rawcb	*rp;
151 	struct routecb	*rop;
152 	int		 s, af;
153 	int		 error = 0;
154 
155 	s = splsoftnet();
156 	rp = sotorawcb(so);
157 
158 	switch (req) {
159 	case PRU_ATTACH:
160 		/*
161 		 * use the rawcb but allocate a routecb, this
162 		 * code does not care about the additional fields
163 		 * and works directly on the raw socket.
164 		 */
165 		rop = malloc(sizeof(struct routecb), M_PCB, M_WAITOK|M_ZERO);
166 		rp = &rop->rcb;
167 		so->so_pcb = rp;
168 		/* Init the timeout structure */
169 		timeout_set(&((struct routecb *)rp)->timeout, rt_senddesync, rp);
170 		/*
171 		 * Don't call raw_usrreq() in the attach case, because
172 		 * we want to allow non-privileged processes to listen
173 		 * on and send "safe" commands to the routing socket.
174 		 */
175 		if (curproc == 0)
176 			error = EACCES;
177 		else
178 			error = raw_attach(so, (int)(long)nam);
179 		if (error) {
180 			free(rop, M_PCB, sizeof(struct routecb));
181 			splx(s);
182 			return (error);
183 		}
184 		rop->rtableid = curproc->p_p->ps_rtableid;
185 		af = rp->rcb_proto.sp_protocol;
186 		if (af == AF_INET)
187 			route_cb.ip_count++;
188 		else if (af == AF_INET6)
189 			route_cb.ip6_count++;
190 #ifdef MPLS
191 		else if (af == AF_MPLS)
192 			route_cb.mpls_count++;
193 #endif
194 		rp->rcb_faddr = &route_src;
195 		route_cb.any_count++;
196 		soisconnected(so);
197 		so->so_options |= SO_USELOOPBACK;
198 		break;
199 
200 	case PRU_RCVD:
201 		rop = (struct routecb *)rp;
202 
203 		/*
204 		 * If we are in a FLUSH state, check if the buffer is
205 		 * empty so that we can clear the flag.
206 		 */
207 		if (((rop->flags & ROUTECB_FLAG_FLUSH) != 0) &&
208 		    ((sbspace(&rp->rcb_socket->so_rcv) ==
209 		    rp->rcb_socket->so_rcv.sb_hiwat)))
210 			rop->flags &= ~ROUTECB_FLAG_FLUSH;
211 		break;
212 
213 	case PRU_DETACH:
214 		if (rp) {
215 			timeout_del(&((struct routecb *)rp)->timeout);
216 			af = rp->rcb_proto.sp_protocol;
217 			if (af == AF_INET)
218 				route_cb.ip_count--;
219 			else if (af == AF_INET6)
220 				route_cb.ip6_count--;
221 #ifdef MPLS
222 			else if (af == AF_MPLS)
223 				route_cb.mpls_count--;
224 #endif
225 			route_cb.any_count--;
226 		}
227 		/* FALLTHROUGH */
228 	default:
229 		error = raw_usrreq(so, req, m, nam, control, p);
230 	}
231 
232 	splx(s);
233 	return (error);
234 }
235 
236 int
237 route_ctloutput(int op, struct socket *so, int level, int optname,
238     struct mbuf **mp)
239 {
240 	struct routecb *rop = sotoroutecb(so);
241 	struct mbuf *m = *mp;
242 	int error = 0;
243 	unsigned int tid;
244 
245 	if (level != AF_ROUTE) {
246 		error = EINVAL;
247 		if (op == PRCO_SETOPT && *mp)
248 			m_free(*mp);
249 		return (error);
250 	}
251 
252 	switch (op) {
253 	case PRCO_SETOPT:
254 		switch (optname) {
255 		case ROUTE_MSGFILTER:
256 			if (m == NULL || m->m_len != sizeof(unsigned int))
257 				error = EINVAL;
258 			else
259 				rop->msgfilter = *mtod(m, unsigned int *);
260 			break;
261 		case ROUTE_TABLEFILTER:
262 			if (m == NULL || m->m_len != sizeof(unsigned int)) {
263 				error = EINVAL;
264 				break;
265 			}
266 			tid = *mtod(m, unsigned int *);
267 			if (tid != RTABLE_ANY && !rtable_exists(tid))
268 				error = ENOENT;
269 			else
270 				rop->rtableid = tid;
271 			break;
272 		default:
273 			error = ENOPROTOOPT;
274 			break;
275 		}
276 		if (m)
277 			m_free(m);
278 		break;
279 	case PRCO_GETOPT:
280 		switch (optname) {
281 		case ROUTE_MSGFILTER:
282 			*mp = m = m_get(M_WAIT, MT_SOOPTS);
283 			m->m_len = sizeof(unsigned int);
284 			*mtod(m, unsigned int *) = rop->msgfilter;
285 			break;
286 		case ROUTE_TABLEFILTER:
287 			*mp = m = m_get(M_WAIT, MT_SOOPTS);
288 			m->m_len = sizeof(unsigned int);
289 			*mtod(m, unsigned int *) = rop->rtableid;
290 			break;
291 		default:
292 			error = ENOPROTOOPT;
293 			break;
294 		}
295 	}
296 	return (error);
297 }
298 
299 void
300 rt_senddesync(void *data)
301 {
302 	struct rawcb	*rp;
303 	struct routecb	*rop;
304 	struct mbuf	*desync_mbuf;
305 
306 	rp = (struct rawcb *)data;
307 	rop = (struct routecb *)rp;
308 
309 	/* If we are in a DESYNC state, try to send a RTM_DESYNC packet */
310 	if ((rop->flags & ROUTECB_FLAG_DESYNC) != 0) {
311 		/*
312 		 * If we fail to alloc memory or if sbappendaddr()
313 		 * fails, re-add timeout and try again.
314 		 */
315 		desync_mbuf = rt_msg1(RTM_DESYNC, NULL);
316 		if ((desync_mbuf != NULL) &&
317 		    (sbappendaddr(&rp->rcb_socket->so_rcv, &route_src,
318 		    desync_mbuf, (struct mbuf *)NULL) != 0)) {
319 			rop->flags &= ~ROUTECB_FLAG_DESYNC;
320 			sorwakeup(rp->rcb_socket);
321 		} else {
322 			m_freem(desync_mbuf);
323 			/* Re-add timeout to try sending msg again */
324 			timeout_add(&rop->timeout, ROUTE_DESYNC_RESEND_TIMEOUT);
325 		}
326 	}
327 }
328 
329 void
330 route_input(struct mbuf *m0, ...)
331 {
332 	struct rawcb *rp;
333 	struct routecb *rop;
334 	struct rt_msghdr *rtm;
335 	struct mbuf *m = m0;
336 	int sockets = 0;
337 	struct socket *last = NULL;
338 	va_list ap;
339 	struct sockproto *proto;
340 	struct sockaddr *sosrc, *sodst;
341 
342 	va_start(ap, m0);
343 	proto = va_arg(ap, struct sockproto *);
344 	sosrc = va_arg(ap, struct sockaddr *);
345 	sodst = va_arg(ap, struct sockaddr *);
346 	va_end(ap);
347 
348 	/* ensure that we can access the rtm_type via mtod() */
349 	if (m->m_len < offsetof(struct rt_msghdr, rtm_type) + 1) {
350 		m_freem(m);
351 		return;
352 	}
353 
354 	LIST_FOREACH(rp, &rawcb, rcb_list) {
355 		if (rp->rcb_socket->so_state & SS_CANTRCVMORE)
356 			continue;
357 		if (rp->rcb_proto.sp_family != proto->sp_family)
358 			continue;
359 		if (rp->rcb_proto.sp_protocol && proto->sp_protocol &&
360 		    rp->rcb_proto.sp_protocol != proto->sp_protocol)
361 			continue;
362 		/*
363 		 * We assume the lower level routines have
364 		 * placed the address in a canonical format
365 		 * suitable for a structure comparison.
366 		 *
367 		 * Note that if the lengths are not the same
368 		 * the comparison will fail at the first byte.
369 		 */
370 #define	equal(a1, a2) \
371   (bcmp((caddr_t)(a1), (caddr_t)(a2), a1->sa_len) == 0)
372 		if (rp->rcb_laddr && !equal(rp->rcb_laddr, sodst))
373 			continue;
374 		if (rp->rcb_faddr && !equal(rp->rcb_faddr, sosrc))
375 			continue;
376 
377 		/* filter messages that the process does not want */
378 		rop = (struct routecb *)rp;
379 		rtm = mtod(m, struct rt_msghdr *);
380 		/* but RTM_DESYNC can't be filtered */
381 		if (rtm->rtm_type != RTM_DESYNC && rop->msgfilter != 0 &&
382 		    !(rop->msgfilter & (1 << rtm->rtm_type)))
383 			continue;
384 		switch (rtm->rtm_type) {
385 		case RTM_IFANNOUNCE:
386 		case RTM_DESYNC:
387 			/* no tableid */
388 			break;
389 		case RTM_RESOLVE:
390 		case RTM_NEWADDR:
391 		case RTM_DELADDR:
392 		case RTM_IFINFO:
393 			/* check against rdomain id */
394 			if (rop->rtableid != RTABLE_ANY &&
395 			    rtable_l2(rop->rtableid) != rtm->rtm_tableid)
396 				continue;
397 			break;
398 		default:
399 			/* check against rtable id */
400 			if (rop->rtableid != RTABLE_ANY &&
401 			    rop->rtableid != rtm->rtm_tableid)
402 				continue;
403 			break;
404 		}
405 
406 		/*
407 		 * Check to see if the flush flag is set. If so, don't queue
408 		 * any more messages until the flag is cleared.
409 		 */
410 		if ((rop->flags & ROUTECB_FLAG_FLUSH) != 0)
411 			continue;
412 
413 		if (last) {
414 			struct mbuf *n;
415 			if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) != NULL) {
416 				if (sbspace(&last->so_rcv) < (2 * MSIZE) ||
417 				    sbappendaddr(&last->so_rcv, sosrc,
418 				    n, (struct mbuf *)NULL) == 0) {
419 					/*
420 					 * Flag socket as desync'ed and
421 					 * flush required
422 					 */
423 					sotoroutecb(last)->flags |=
424 					    ROUTECB_FLAG_DESYNC |
425 					    ROUTECB_FLAG_FLUSH;
426 					rt_senddesync((void *) sotorawcb(last));
427 					m_freem(n);
428 				} else {
429 					sorwakeup(last);
430 					sockets++;
431 				}
432 			}
433 		}
434 		last = rp->rcb_socket;
435 	}
436 	if (last) {
437 		if (sbspace(&last->so_rcv) < (2 * MSIZE) ||
438 		    sbappendaddr(&last->so_rcv, sosrc,
439 		    m, (struct mbuf *)NULL) == 0) {
440 			/* Flag socket as desync'ed and flush required */
441 			sotoroutecb(last)->flags |=
442 			    ROUTECB_FLAG_DESYNC | ROUTECB_FLAG_FLUSH;
443 			rt_senddesync((void *) sotorawcb(last));
444 			m_freem(m);
445 		} else {
446 			sorwakeup(last);
447 			sockets++;
448 		}
449 	} else
450 		m_freem(m);
451 }
452 
453 int
454 route_output(struct mbuf *m, ...)
455 {
456 	struct rt_msghdr	*rtm = NULL;
457 	struct rtentry		*rt = NULL;
458 	struct rtentry		*saved_nrt = NULL;
459 	struct rt_addrinfo	 info;
460 	int			 plen, len, newgate = 0, error = 0;
461 	struct ifnet		*ifp = NULL;
462 	struct ifaddr		*ifa = NULL;
463 	struct socket		*so;
464 	struct rawcb		*rp = NULL;
465 	struct sockaddr_rtlabel	 sa_rl;
466 	struct sockaddr_in6	 sa_mask;
467 #ifdef MPLS
468 	struct sockaddr_mpls	 sa_mpls, *psa_mpls;
469 #endif
470 	va_list			 ap;
471 	u_int			 tableid;
472 	u_int8_t		 prio;
473 	u_char			 vers;
474 
475 	va_start(ap, m);
476 	so = va_arg(ap, struct socket *);
477 	va_end(ap);
478 
479 	info.rti_info[RTAX_DST] = NULL;	/* for error handling (goto flush) */
480 	if (m == NULL || ((m->m_len < sizeof(int32_t)) &&
481 	    (m = m_pullup(m, sizeof(int32_t))) == 0))
482 		return (ENOBUFS);
483 	if ((m->m_flags & M_PKTHDR) == 0)
484 		panic("route_output");
485 	len = m->m_pkthdr.len;
486 	if (len < offsetof(struct rt_msghdr, rtm_type) + 1 ||
487 	    len != mtod(m, struct rt_msghdr *)->rtm_msglen) {
488 		error = EINVAL;
489 		goto fail;
490 	}
491 	vers = mtod(m, struct rt_msghdr *)->rtm_version;
492 	switch (vers) {
493 	case RTM_VERSION:
494 		if (len < sizeof(struct rt_msghdr)) {
495 			error = EINVAL;
496 			goto fail;
497 		}
498 		if (len > RTM_MAXSIZE) {
499 			error = EMSGSIZE;
500 			goto fail;
501 		}
502 		rtm = malloc(len, M_RTABLE, M_NOWAIT);
503 		if (rtm == NULL) {
504 			error = ENOBUFS;
505 			goto fail;
506 		}
507 		m_copydata(m, 0, len, (caddr_t)rtm);
508 		break;
509 	default:
510 		error = EPROTONOSUPPORT;
511 		goto fail;
512 	}
513 	rtm->rtm_pid = curproc->p_p->ps_pid;
514 	if (rtm->rtm_hdrlen == 0)	/* old client */
515 		rtm->rtm_hdrlen = sizeof(struct rt_msghdr);
516 	if (len < rtm->rtm_hdrlen) {
517 		error = EINVAL;
518 		goto fail;
519 	}
520 
521 	/* Verify that the caller is sending an appropriate message early */
522 	switch (rtm->rtm_type) {
523 	case RTM_ADD:
524 	case RTM_DELETE:
525 	case RTM_GET:
526 	case RTM_CHANGE:
527 	case RTM_LOCK:
528 		break;
529 	default:
530 		error = EOPNOTSUPP;
531 		goto fail;
532 	}
533 
534 	/*
535 	 * Verify that the caller has the appropriate privilege; RTM_GET
536 	 * is the only operation the non-superuser is allowed.
537 	 */
538 	if (rtm->rtm_type != RTM_GET && suser(curproc, 0) != 0) {
539 		error = EACCES;
540 		goto fail;
541 	}
542 	tableid = rtm->rtm_tableid;
543 	if (!rtable_exists(tableid)) {
544 		if (rtm->rtm_type == RTM_ADD) {
545 			if ((error = rtable_add(tableid)) != 0)
546 				goto flush;
547 		} else {
548 			error = EINVAL;
549 			goto flush;
550 		}
551 	}
552 
553 
554 	/* Do not let userland play with kernel-only flags. */
555 	if ((rtm->rtm_flags & (RTF_LOCAL|RTF_BROADCAST)) != 0) {
556 		error = EINVAL;
557 		goto fail;
558 	}
559 
560 	/* make sure that kernel-only bits are not set */
561 	rtm->rtm_priority &= RTP_MASK;
562 	rtm->rtm_flags &= ~(RTF_DONE|RTF_CLONED|RTF_CACHED);
563 	rtm->rtm_fmask &= RTF_FMASK;
564 
565 	if (rtm->rtm_priority != 0) {
566 		if (rtm->rtm_priority > RTP_MAX ||
567 		    rtm->rtm_priority == RTP_LOCAL) {
568 			error = EINVAL;
569 			goto fail;
570 		}
571 		prio = rtm->rtm_priority;
572 	} else if (rtm->rtm_type != RTM_ADD)
573 		prio = RTP_ANY;
574 	else if (rtm->rtm_flags & RTF_STATIC)
575 		prio = 0;
576 	else
577 		prio = RTP_DEFAULT;
578 
579 	bzero(&info, sizeof(info));
580 	info.rti_addrs = rtm->rtm_addrs;
581 	rt_xaddrs(rtm->rtm_hdrlen + (caddr_t)rtm, len + (caddr_t)rtm, &info);
582 	info.rti_flags = rtm->rtm_flags;
583 	if (info.rti_info[RTAX_DST] == NULL ||
584 	    info.rti_info[RTAX_DST]->sa_family >= AF_MAX ||
585 	    (info.rti_info[RTAX_GATEWAY] != NULL &&
586 	    info.rti_info[RTAX_GATEWAY]->sa_family >= AF_MAX) ||
587 	    info.rti_info[RTAX_GENMASK] != NULL) {
588 		error = EINVAL;
589 		goto flush;
590 	}
591 #ifdef MPLS
592 	info.rti_mpls = rtm->rtm_mpls;
593 #endif
594 
595 	if (info.rti_info[RTAX_GATEWAY] != NULL &&
596 	    info.rti_info[RTAX_GATEWAY]->sa_family == AF_LINK &&
597 	    (info.rti_flags & RTF_CLONING) == 0) {
598 		info.rti_flags |= RTF_LLINFO;
599 	}
600 
601 	switch (rtm->rtm_type) {
602 	case RTM_ADD:
603 		if (info.rti_info[RTAX_GATEWAY] == NULL) {
604 			error = EINVAL;
605 			goto flush;
606 		}
607 
608 		rt = rtable_match(tableid, info.rti_info[RTAX_DST], NULL);
609 		if ((error = route_arp_conflict(rt, &info))) {
610 			rtfree(rt);
611 			rt = NULL;
612 			goto flush;
613 		}
614 
615 		/*
616 		 * We cannot go through a delete/create/insert cycle for
617 		 * cached route because this can lead to races in the
618 		 * receive path.  Instead we upade the L2 cache.
619 		 */
620 		if ((rt != NULL) && ISSET(rt->rt_flags, RTF_CACHED))
621 			goto change;
622 
623 		rtfree(rt);
624 		rt = NULL;
625 
626 		error = rtrequest(RTM_ADD, &info, prio, &saved_nrt, tableid);
627 		if (error == 0) {
628 			rt_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx,
629 			    &saved_nrt->rt_rmx);
630 			/* write back the priority the kernel used */
631 			rtm->rtm_priority = saved_nrt->rt_priority & RTP_MASK;
632 			rtm->rtm_index = saved_nrt->rt_ifidx;
633 			rtm->rtm_flags = saved_nrt->rt_flags;
634 			rtfree(saved_nrt);
635 		}
636 		break;
637 	case RTM_DELETE:
638 		if (!rtable_exists(tableid)) {
639 			error = EAFNOSUPPORT;
640 			goto flush;
641 		}
642 
643 		rt = rtable_lookup(tableid, info.rti_info[RTAX_DST],
644 		    info.rti_info[RTAX_NETMASK], info.rti_info[RTAX_GATEWAY],
645 		    prio);
646 
647 		/*
648 		 * Invalidate the cache of automagically created and
649 		 * referenced L2 entries to make sure that ``rt_gwroute''
650 		 * pointer stays valid for other CPUs.
651 		 */
652 		if ((rt != NULL) && (ISSET(rt->rt_flags, RTF_CACHED))) {
653 			ifp = if_get(rt->rt_ifidx);
654 			KASSERT(ifp != NULL);
655 			ifp->if_rtrequest(ifp, RTM_INVALIDATE, rt);
656 			if_put(ifp);
657 			/* Reset the MTU of the gateway route. */
658 			rtable_walk(tableid, rt_key(rt)->sa_family,
659 			    route_cleargateway, rt);
660 			goto report;
661 		}
662 
663 		/*
664 		 * Make sure that local routes are only modified by the
665 		 * kernel.
666 		 */
667 		if ((rt != NULL) &&
668 		    ISSET(rt->rt_flags, RTF_LOCAL|RTF_BROADCAST)) {
669 			error = EINVAL;
670 			goto report;
671 		}
672 
673 		rtfree(rt);
674 		rt = NULL;
675 
676 		error = rtrequest(RTM_DELETE, &info, prio, &rt, tableid);
677 		if (error == 0)
678 			goto report;
679 		break;
680 	case RTM_GET:
681 		if (!rtable_exists(tableid)) {
682 			error = EAFNOSUPPORT;
683 			goto flush;
684 		}
685 		rt = rtable_lookup(tableid, info.rti_info[RTAX_DST],
686 		    info.rti_info[RTAX_NETMASK], info.rti_info[RTAX_GATEWAY],
687 		    prio);
688 		if (rt == NULL) {
689 			error = ESRCH;
690 			goto flush;
691 		}
692 
693 report:
694 		info.rti_info[RTAX_DST] = rt_key(rt);
695 		info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
696 		info.rti_info[RTAX_NETMASK] =
697 		    rt_plen2mask(rt, &sa_mask);
698 		info.rti_info[RTAX_LABEL] =
699 		    rtlabel_id2sa(rt->rt_labelid, &sa_rl);
700 #ifdef MPLS
701 		if (rt->rt_flags & RTF_MPLS) {
702 			bzero(&sa_mpls, sizeof(sa_mpls));
703 			sa_mpls.smpls_family = AF_MPLS;
704 			sa_mpls.smpls_len = sizeof(sa_mpls);
705 			sa_mpls.smpls_label = ((struct rt_mpls *)
706 			    rt->rt_llinfo)->mpls_label;
707 			info.rti_info[RTAX_SRC] =
708 			    (struct sockaddr *)&sa_mpls;
709 			info.rti_mpls = ((struct rt_mpls *)
710 			    rt->rt_llinfo)->mpls_operation;
711 			rtm->rtm_mpls = info.rti_mpls;
712 		}
713 #endif
714 		info.rti_info[RTAX_IFP] = NULL;
715 		info.rti_info[RTAX_IFA] = NULL;
716 		ifp = if_get(rt->rt_ifidx);
717 		if (ifp != NULL && rtm->rtm_addrs & (RTA_IFP|RTA_IFA)) {
718 			info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
719 			info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
720 			if (ifp->if_flags & IFF_POINTOPOINT)
721 				info.rti_info[RTAX_BRD] =
722 				    rt->rt_ifa->ifa_dstaddr;
723 			else
724 				info.rti_info[RTAX_BRD] = NULL;
725 		}
726 		if_put(ifp);
727 		len = rt_msg2(rtm->rtm_type, RTM_VERSION, &info, NULL,
728 		    NULL);
729 		if (len > rtm->rtm_msglen) {
730 			struct rt_msghdr	*new_rtm;
731 			new_rtm = malloc(len, M_RTABLE, M_NOWAIT);
732 			if (new_rtm == NULL) {
733 				error = ENOBUFS;
734 				goto flush;
735 			}
736 			memcpy(new_rtm, rtm, rtm->rtm_msglen);
737 			free(rtm, M_RTABLE, 0);
738 			rtm = new_rtm;
739 		}
740 		rt_msg2(rtm->rtm_type, RTM_VERSION, &info, (caddr_t)rtm,
741 		    NULL);
742 		rtm->rtm_flags = rt->rt_flags;
743 		rtm->rtm_use = 0;
744 		rtm->rtm_priority = rt->rt_priority & RTP_MASK;
745 		rtm->rtm_index = rt->rt_ifidx;
746 		rt_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx);
747 		rtm->rtm_addrs = info.rti_addrs;
748 		break;
749 	case RTM_CHANGE:
750 	case RTM_LOCK:
751 		if (!rtable_exists(tableid)) {
752 			error = EAFNOSUPPORT;
753 			goto flush;
754 		}
755 
756 		rt = rtable_lookup(tableid, info.rti_info[RTAX_DST],
757 		    info.rti_info[RTAX_NETMASK], info.rti_info[RTAX_GATEWAY],
758 		    prio);
759 #ifndef SMALL_KERNEL
760 		/*
761 		 * If we got multipath routes, we require users to specify
762 		 * a matching gateway.
763 		 */
764 		if ((rt != NULL) && ISSET(rt->rt_flags, RTF_MPATH) &&
765 		    (info.rti_info[RTAX_GATEWAY] == NULL)) {
766 		    	rtfree(rt);
767 		    	rt = NULL;
768 		}
769 #endif
770 		/*
771 		 * If RTAX_GATEWAY is the argument we're trying to
772 		 * change, try to find a compatible route.
773 		 */
774 		if ((rt == NULL) && (info.rti_info[RTAX_GATEWAY] != NULL) &&
775 		    (rtm->rtm_type == RTM_CHANGE)) {
776 			rt = rtable_lookup(tableid, info.rti_info[RTAX_DST],
777 			    info.rti_info[RTAX_NETMASK], NULL, prio);
778 #ifndef SMALL_KERNEL
779 			/* Ensure we don't pick a multipath one. */
780 			if ((rt != NULL) && ISSET(rt->rt_flags, RTF_MPATH)) {
781 				rtfree(rt);
782 				rt = NULL;
783 			}
784 #endif
785 		}
786 
787 		if (rt == NULL) {
788 			error = ESRCH;
789 			goto flush;
790 		}
791 
792 		/*
793 		 * RTM_CHANGE/LOCK need a perfect match.
794 		 */
795 		plen = rtable_satoplen(info.rti_info[RTAX_DST]->sa_family,
796 		    info.rti_info[RTAX_NETMASK]);
797 		if (rt_plen(rt) != plen ) {
798 			error = ESRCH;
799 			goto flush;
800 		}
801 
802 		switch (rtm->rtm_type) {
803 		case RTM_CHANGE:
804 			if (info.rti_info[RTAX_GATEWAY] != NULL)
805 				if (rt->rt_gateway == NULL ||
806 				    bcmp(rt->rt_gateway,
807 				    info.rti_info[RTAX_GATEWAY],
808 				    info.rti_info[RTAX_GATEWAY]->sa_len)) {
809 					newgate = 1;
810 				}
811 			/*
812 			 * Check reachable gateway before changing the route.
813 			 * New gateway could require new ifaddr, ifp;
814 			 * flags may also be different; ifp may be specified
815 			 * by ll sockaddr when protocol address is ambiguous.
816 			 */
817 			if (newgate || info.rti_info[RTAX_IFP] != NULL ||
818 			    info.rti_info[RTAX_IFA] != NULL) {
819 				if ((error = rt_getifa(&info, tableid)) != 0)
820 					goto flush;
821 				ifa = info.rti_ifa;
822 				if (rt->rt_ifa != ifa) {
823 					ifp = if_get(rt->rt_ifidx);
824 					KASSERT(ifp != NULL);
825 					ifp->if_rtrequest(ifp, RTM_DELETE, rt);
826 					ifafree(rt->rt_ifa);
827 					if_put(ifp);
828 
829 					ifa->ifa_refcnt++;
830 					rt->rt_ifa = ifa;
831 					rt->rt_ifidx = ifa->ifa_ifp->if_index;
832 #ifndef SMALL_KERNEL
833 					/* recheck link state after ifp change*/
834 					rt_if_linkstate_change(rt, ifa->ifa_ifp,
835 					    tableid);
836 #endif
837 				}
838 			}
839 change:
840 			if (info.rti_info[RTAX_GATEWAY] != NULL && (error =
841 			    rt_setgate(rt, info.rti_info[RTAX_GATEWAY],
842 			    tableid)))
843 				goto flush;
844 #ifdef MPLS
845 			if ((rtm->rtm_flags & RTF_MPLS) &&
846 			    info.rti_info[RTAX_SRC] != NULL) {
847 				struct rt_mpls *rt_mpls;
848 
849 				psa_mpls = (struct sockaddr_mpls *)
850 				    info.rti_info[RTAX_SRC];
851 
852 				if (rt->rt_llinfo == NULL) {
853 					rt->rt_llinfo =
854 					    malloc(sizeof(struct rt_mpls),
855 					    M_TEMP, M_NOWAIT|M_ZERO);
856 				}
857 				if (rt->rt_llinfo == NULL) {
858 					error = ENOMEM;
859 					goto flush;
860 				}
861 
862 				rt_mpls = (struct rt_mpls *)rt->rt_llinfo;
863 
864 				if (psa_mpls != NULL) {
865 					rt_mpls->mpls_label =
866 					    psa_mpls->smpls_label;
867 				}
868 
869 				rt_mpls->mpls_operation = info.rti_mpls;
870 
871 				/* XXX: set experimental bits */
872 
873 				rt->rt_flags |= RTF_MPLS;
874 			} else if (newgate || ((rtm->rtm_fmask & RTF_MPLS) &&
875 			    !(rtm->rtm_flags & RTF_MPLS))) {
876 				/* if gateway changed remove MPLS information */
877 				if (rt->rt_llinfo != NULL &&
878 				    rt->rt_flags & RTF_MPLS) {
879 					free(rt->rt_llinfo, M_TEMP, 0);
880 					rt->rt_llinfo = NULL;
881 					rt->rt_flags &= ~RTF_MPLS;
882 				}
883 			}
884 #endif
885 
886 #ifdef BFD
887 			if (ISSET(rtm->rtm_flags, RTF_BFD)) {
888 				if ((error = bfdset(rt)))
889 					goto flush;
890 			} else if (!ISSET(rtm->rtm_flags, RTF_BFD) &&
891 			    ISSET(rtm->rtm_fmask, RTF_BFD)) {
892 				bfdclear(rt);
893 			}
894 #endif
895 
896 			/* Hack to allow some flags to be toggled */
897 			if (rtm->rtm_fmask)
898 				rt->rt_flags =
899 				    (rt->rt_flags & ~rtm->rtm_fmask) |
900 				    (rtm->rtm_flags & rtm->rtm_fmask);
901 
902 			rt_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx,
903 			    &rt->rt_rmx);
904 			rtm->rtm_index = rt->rt_ifidx;
905 			rtm->rtm_priority = rt->rt_priority & RTP_MASK;
906 			rtm->rtm_flags = rt->rt_flags;
907 
908 			ifp = if_get(rt->rt_ifidx);
909 			KASSERT(ifp != NULL);
910 			ifp->if_rtrequest(ifp, RTM_ADD, rt);
911 			if_put(ifp);
912 
913 			if (info.rti_info[RTAX_LABEL] != NULL) {
914 				char *rtlabel = ((struct sockaddr_rtlabel *)
915 				    info.rti_info[RTAX_LABEL])->sr_label;
916 				rtlabel_unref(rt->rt_labelid);
917 				rt->rt_labelid = rtlabel_name2id(rtlabel);
918 			}
919 			if_group_routechange(info.rti_info[RTAX_DST],
920 			    info.rti_info[RTAX_NETMASK]);
921 			/* FALLTHROUGH */
922 		case RTM_LOCK:
923 			rt->rt_rmx.rmx_locks &= ~(rtm->rtm_inits);
924 			rt->rt_rmx.rmx_locks |=
925 			    (rtm->rtm_inits & rtm->rtm_rmx.rmx_locks);
926 			rtm->rtm_priority = rt->rt_priority & RTP_MASK;
927 			break;
928 		}
929 		break;
930 	}
931 
932 flush:
933 	if (rtm) {
934 		if (error)
935 			rtm->rtm_errno = error;
936 		else {
937 			rtm->rtm_flags |= RTF_DONE;
938 		}
939 	}
940 	if (info.rti_info[RTAX_DST])
941 		route_proto.sp_protocol = info.rti_info[RTAX_DST]->sa_family;
942 	if (rt)
943 		rtfree(rt);
944 
945 	/*
946 	 * Check to see if we don't want our own messages.
947 	 */
948 	if (!(so->so_options & SO_USELOOPBACK)) {
949 		if (route_cb.any_count <= 1) {
950 fail:
951 			free(rtm, M_RTABLE, 0);
952 			m_freem(m);
953 			return (error);
954 		}
955 		/* There is another listener, so construct message */
956 		rp = sotorawcb(so);
957 	}
958 	if (rp)
959 		rp->rcb_proto.sp_family = 0; /* Avoid us */
960 	if (rtm) {
961 		if (m_copyback(m, 0, rtm->rtm_msglen, rtm, M_NOWAIT)) {
962 			m_freem(m);
963 			m = NULL;
964 		} else if (m->m_pkthdr.len > rtm->rtm_msglen)
965 			m_adj(m, rtm->rtm_msglen - m->m_pkthdr.len);
966 		free(rtm, M_RTABLE, 0);
967 	}
968 	if (m)
969 		route_input(m, &route_proto, &route_src, &route_dst);
970 	if (rp)
971 		rp->rcb_proto.sp_family = PF_ROUTE;
972 
973 	return (error);
974 }
975 
976 int
977 route_cleargateway(struct rtentry *rt, void *arg, unsigned int rtableid)
978 {
979 	struct rtentry *nhrt = arg;
980 
981 	if (ISSET(rt->rt_flags, RTF_GATEWAY) && rt->rt_gwroute == nhrt &&
982 	    !ISSET(rt->rt_locks, RTV_MTU))
983                 rt->rt_mtu = 0;
984 
985 	return (0);
986 }
987 
988 /*
989  * Check if the user request to insert an ARP entry does not conflict
990  * with existing ones.
991  *
992  * Only two entries are allowed for a given IP address: a private one
993  * (priv) and a public one (pub).
994  */
995 int
996 route_arp_conflict(struct rtentry *rt, struct rt_addrinfo *info)
997 {
998 #ifdef ART
999 	int		 proxy = (info->rti_flags & RTF_ANNOUNCE);
1000 
1001 	if ((info->rti_flags & RTF_LLINFO) == 0 ||
1002 	    (info->rti_info[RTAX_DST]->sa_family != AF_INET))
1003 		return (0);
1004 
1005 	if (rt == NULL || !ISSET(rt->rt_flags, RTF_LLINFO))
1006 		return (0);
1007 
1008 	/* If the entry is cached, it can be updated. */
1009 	if (ISSET(rt->rt_flags, RTF_CACHED))
1010 		return (0);
1011 
1012 	/*
1013 	 * Same destination, not cached and both "priv" or "pub" conflict.
1014 	 * If a second entry exists, it always conflict.
1015 	 */
1016 	if ((ISSET(rt->rt_flags, RTF_ANNOUNCE) == proxy) ||
1017 	    ISSET(rt->rt_flags, RTF_MPATH))
1018 		return (EEXIST);
1019 
1020 	/* No conflict but an entry exist so we need to force mpath. */
1021 	info->rti_flags |= RTF_MPATH;
1022 #endif /* ART */
1023 	return (0);
1024 }
1025 
1026 void
1027 rt_setmetrics(u_long which, const struct rt_metrics *in,
1028     struct rt_kmetrics *out)
1029 {
1030 	int64_t expire;
1031 
1032 	if (which & RTV_MTU)
1033 		out->rmx_mtu = in->rmx_mtu;
1034 	if (which & RTV_EXPIRE) {
1035 		expire = in->rmx_expire;
1036 		if (expire != 0) {
1037 			expire -= time_second;
1038 			expire += time_uptime;
1039 		}
1040 
1041 		out->rmx_expire = expire;
1042 	}
1043 	/* RTV_PRIORITY handled before */
1044 }
1045 
1046 void
1047 rt_getmetrics(const struct rt_kmetrics *in, struct rt_metrics *out)
1048 {
1049 	int64_t expire;
1050 
1051 	expire = in->rmx_expire;
1052 	if (expire != 0) {
1053 		expire -= time_uptime;
1054 		expire += time_second;
1055 	}
1056 
1057 	bzero(out, sizeof(*out));
1058 	out->rmx_locks = in->rmx_locks;
1059 	out->rmx_mtu = in->rmx_mtu;
1060 	out->rmx_expire = expire;
1061 	out->rmx_pksent = in->rmx_pksent;
1062 }
1063 
1064 #define ROUNDUP(a) \
1065 	((a) > 0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long))
1066 #define ADVANCE(x, n) (x += ROUNDUP((n)->sa_len))
1067 
1068 void
1069 rt_xaddrs(caddr_t cp, caddr_t cplim, struct rt_addrinfo *rtinfo)
1070 {
1071 	struct sockaddr	*sa;
1072 	int		 i;
1073 
1074 	bzero(rtinfo->rti_info, sizeof(rtinfo->rti_info));
1075 	for (i = 0; (i < RTAX_MAX) && (cp < cplim); i++) {
1076 		if ((rtinfo->rti_addrs & (1 << i)) == 0)
1077 			continue;
1078 		rtinfo->rti_info[i] = sa = (struct sockaddr *)cp;
1079 		ADVANCE(cp, sa);
1080 	}
1081 }
1082 
1083 struct mbuf *
1084 rt_msg1(int type, struct rt_addrinfo *rtinfo)
1085 {
1086 	struct rt_msghdr	*rtm;
1087 	struct mbuf		*m;
1088 	int			 i;
1089 	struct sockaddr		*sa;
1090 	int			 len, dlen, hlen;
1091 
1092 	switch (type) {
1093 	case RTM_DELADDR:
1094 	case RTM_NEWADDR:
1095 		len = sizeof(struct ifa_msghdr);
1096 		break;
1097 	case RTM_IFINFO:
1098 		len = sizeof(struct if_msghdr);
1099 		break;
1100 	case RTM_IFANNOUNCE:
1101 		len = sizeof(struct if_announcemsghdr);
1102 		break;
1103 	default:
1104 		len = sizeof(struct rt_msghdr);
1105 		break;
1106 	}
1107 	if (len > MCLBYTES)
1108 		panic("rt_msg1");
1109 	m = m_gethdr(M_DONTWAIT, MT_DATA);
1110 	if (m && len > MHLEN) {
1111 		MCLGET(m, M_DONTWAIT);
1112 		if ((m->m_flags & M_EXT) == 0) {
1113 			m_free(m);
1114 			m = NULL;
1115 		}
1116 	}
1117 	if (m == NULL)
1118 		return (m);
1119 	m->m_pkthdr.len = m->m_len = hlen = len;
1120 	m->m_pkthdr.ph_ifidx = 0;
1121 	rtm = mtod(m, struct rt_msghdr *);
1122 	bzero(rtm, len);
1123 	for (i = 0; i < RTAX_MAX; i++) {
1124 		if (rtinfo == NULL || (sa = rtinfo->rti_info[i]) == NULL)
1125 			continue;
1126 		rtinfo->rti_addrs |= (1 << i);
1127 		dlen = ROUNDUP(sa->sa_len);
1128 		if (m_copyback(m, len, dlen, sa, M_NOWAIT)) {
1129 			m_freem(m);
1130 			return (NULL);
1131 		}
1132 		len += dlen;
1133 	}
1134 	rtm->rtm_msglen = len;
1135 	rtm->rtm_hdrlen = hlen;
1136 	rtm->rtm_version = RTM_VERSION;
1137 	rtm->rtm_type = type;
1138 	return (m);
1139 }
1140 
1141 int
1142 rt_msg2(int type, int vers, struct rt_addrinfo *rtinfo, caddr_t cp,
1143     struct walkarg *w)
1144 {
1145 	int		i;
1146 	int		len, dlen, hlen, second_time = 0;
1147 	caddr_t		cp0;
1148 
1149 	rtinfo->rti_addrs = 0;
1150 again:
1151 	switch (type) {
1152 	case RTM_DELADDR:
1153 	case RTM_NEWADDR:
1154 		len = sizeof(struct ifa_msghdr);
1155 		break;
1156 	case RTM_IFINFO:
1157 		len = sizeof(struct if_msghdr);
1158 		break;
1159 	default:
1160 		len = sizeof(struct rt_msghdr);
1161 		break;
1162 	}
1163 	hlen = len;
1164 	if ((cp0 = cp) != NULL)
1165 		cp += len;
1166 	for (i = 0; i < RTAX_MAX; i++) {
1167 		struct sockaddr *sa;
1168 
1169 		if ((sa = rtinfo->rti_info[i]) == NULL)
1170 			continue;
1171 		rtinfo->rti_addrs |= (1 << i);
1172 		dlen = ROUNDUP(sa->sa_len);
1173 		if (cp) {
1174 			bcopy(sa, cp, (size_t)dlen);
1175 			cp += dlen;
1176 		}
1177 		len += dlen;
1178 	}
1179 	/* align message length to the next natural boundary */
1180 	len = ALIGN(len);
1181 	if (cp == 0 && w != NULL && !second_time) {
1182 		struct walkarg *rw = w;
1183 
1184 		rw->w_needed += len;
1185 		if (rw->w_needed <= 0 && rw->w_where) {
1186 			if (rw->w_tmemsize < len) {
1187 				free(rw->w_tmem, M_RTABLE, 0);
1188 				rw->w_tmem = malloc(len, M_RTABLE, M_NOWAIT);
1189 				if (rw->w_tmem)
1190 					rw->w_tmemsize = len;
1191 			}
1192 			if (rw->w_tmem) {
1193 				cp = rw->w_tmem;
1194 				second_time = 1;
1195 				goto again;
1196 			} else
1197 				rw->w_where = 0;
1198 		}
1199 	}
1200 	if (cp && w)		/* clear the message header */
1201 		bzero(cp0, hlen);
1202 
1203 	if (cp) {
1204 		struct rt_msghdr *rtm = (struct rt_msghdr *)cp0;
1205 
1206 		rtm->rtm_version = RTM_VERSION;
1207 		rtm->rtm_type = type;
1208 		rtm->rtm_msglen = len;
1209 		rtm->rtm_hdrlen = hlen;
1210 	}
1211 	return (len);
1212 }
1213 
1214 /*
1215  * This routine is called to generate a message from the routing
1216  * socket indicating that a redirect has occurred, a routing lookup
1217  * has failed, or that a protocol has detected timeouts to a particular
1218  * destination.
1219  */
1220 void
1221 rt_missmsg(int type, struct rt_addrinfo *rtinfo, int flags, uint8_t prio,
1222     u_int ifidx, int error, u_int tableid)
1223 {
1224 	struct rt_msghdr	*rtm;
1225 	struct mbuf		*m;
1226 	struct sockaddr		*sa = rtinfo->rti_info[RTAX_DST];
1227 
1228 	if (route_cb.any_count == 0)
1229 		return;
1230 	m = rt_msg1(type, rtinfo);
1231 	if (m == NULL)
1232 		return;
1233 	rtm = mtod(m, struct rt_msghdr *);
1234 	rtm->rtm_flags = RTF_DONE | flags;
1235 	rtm->rtm_priority = prio;
1236 	rtm->rtm_errno = error;
1237 	rtm->rtm_tableid = tableid;
1238 	rtm->rtm_addrs = rtinfo->rti_addrs;
1239 	rtm->rtm_index = ifidx;
1240 	if (sa == NULL)
1241 		route_proto.sp_protocol = 0;
1242 	else
1243 		route_proto.sp_protocol = sa->sa_family;
1244 	route_input(m, &route_proto, &route_src, &route_dst);
1245 }
1246 
1247 /*
1248  * This routine is called to generate a message from the routing
1249  * socket indicating that the status of a network interface has changed.
1250  */
1251 void
1252 rt_ifmsg(struct ifnet *ifp)
1253 {
1254 	struct if_msghdr	*ifm;
1255 	struct mbuf		*m;
1256 
1257 	if (route_cb.any_count == 0)
1258 		return;
1259 	m = rt_msg1(RTM_IFINFO, NULL);
1260 	if (m == NULL)
1261 		return;
1262 	ifm = mtod(m, struct if_msghdr *);
1263 	ifm->ifm_index = ifp->if_index;
1264 	ifm->ifm_tableid = ifp->if_rdomain;
1265 	ifm->ifm_flags = ifp->if_flags;
1266 	ifm->ifm_xflags = ifp->if_xflags;
1267 	ifm->ifm_data = ifp->if_data;
1268 	ifm->ifm_addrs = 0;
1269 	route_proto.sp_protocol = 0;
1270 	route_input(m, &route_proto, &route_src, &route_dst);
1271 }
1272 
1273 /*
1274  * This is called to generate messages from the routing socket
1275  * indicating a network interface has had addresses associated with it.
1276  * if we ever reverse the logic and replace messages TO the routing
1277  * socket indicate a request to configure interfaces, then it will
1278  * be unnecessary as the routing socket will automatically generate
1279  * copies of it.
1280  */
1281 void
1282 rt_sendaddrmsg(struct rtentry *rt, int cmd, struct ifaddr *ifa)
1283 {
1284 	struct ifnet		*ifp = ifa->ifa_ifp;
1285 	struct mbuf		*m = NULL;
1286 	struct rt_addrinfo	 info;
1287 	struct ifa_msghdr	*ifam;
1288 
1289 	if (route_cb.any_count == 0)
1290 		return;
1291 
1292 	memset(&info, 0, sizeof(info));
1293 	info.rti_info[RTAX_IFA] = ifa->ifa_addr;
1294 	info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
1295 	info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
1296 	info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
1297 	if ((m = rt_msg1(cmd, &info)) == NULL)
1298 		return;
1299 	ifam = mtod(m, struct ifa_msghdr *);
1300 	ifam->ifam_index = ifp->if_index;
1301 	ifam->ifam_metric = ifa->ifa_metric;
1302 	ifam->ifam_flags = ifa->ifa_flags;
1303 	ifam->ifam_addrs = info.rti_addrs;
1304 	ifam->ifam_tableid = ifp->if_rdomain;
1305 
1306 	if (ifa->ifa_addr == NULL)
1307 		route_proto.sp_protocol = 0;
1308 	else
1309 		route_proto.sp_protocol = ifa->ifa_addr->sa_family;
1310 	route_input(m, &route_proto, &route_src, &route_dst);
1311 }
1312 
1313 /*
1314  * This is called to generate routing socket messages indicating
1315  * network interface arrival and departure.
1316  */
1317 void
1318 rt_ifannouncemsg(struct ifnet *ifp, int what)
1319 {
1320 	struct if_announcemsghdr	*ifan;
1321 	struct mbuf			*m;
1322 
1323 	if (route_cb.any_count == 0)
1324 		return;
1325 	m = rt_msg1(RTM_IFANNOUNCE, NULL);
1326 	if (m == NULL)
1327 		return;
1328 	ifan = mtod(m, struct if_announcemsghdr *);
1329 	ifan->ifan_index = ifp->if_index;
1330 	strlcpy(ifan->ifan_name, ifp->if_xname, sizeof(ifan->ifan_name));
1331 	ifan->ifan_what = what;
1332 	route_proto.sp_protocol = 0;
1333 	route_input(m, &route_proto, &route_src, &route_dst);
1334 }
1335 
1336 /*
1337  * This is used in dumping the kernel table via sysctl().
1338  */
1339 int
1340 sysctl_dumpentry(struct rtentry *rt, void *v, unsigned int id)
1341 {
1342 	struct walkarg		*w = v;
1343 	int			 error = 0, size;
1344 	struct rt_addrinfo	 info;
1345 	struct ifnet		*ifp;
1346 #ifdef MPLS
1347 	struct sockaddr_mpls	 sa_mpls;
1348 #endif
1349 	struct sockaddr_rtlabel	 sa_rl;
1350 	struct sockaddr_in6	 sa_mask;
1351 
1352 	if (w->w_op == NET_RT_FLAGS && !(rt->rt_flags & w->w_arg))
1353 		return 0;
1354 	if (w->w_op == NET_RT_DUMP && w->w_arg) {
1355 		u_int8_t prio = w->w_arg & RTP_MASK;
1356 		if (w->w_arg < 0) {
1357 			prio = (-w->w_arg) & RTP_MASK;
1358 			/* Show all routes that are not this priority */
1359 			if (prio == (rt->rt_priority & RTP_MASK))
1360 				return 0;
1361 		} else {
1362 			if (prio != (rt->rt_priority & RTP_MASK) &&
1363 			    prio != RTP_ANY)
1364 				return 0;
1365 		}
1366 	}
1367 	bzero(&info, sizeof(info));
1368 	info.rti_info[RTAX_DST] = rt_key(rt);
1369 	info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1370 	info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask);
1371 	ifp = if_get(rt->rt_ifidx);
1372 	if (ifp != NULL) {
1373 		info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
1374 		info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
1375 		if (ifp->if_flags & IFF_POINTOPOINT)
1376 			info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr;
1377 	}
1378 	if_put(ifp);
1379 	info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl);
1380 #ifdef MPLS
1381 	if (rt->rt_flags & RTF_MPLS) {
1382 		bzero(&sa_mpls, sizeof(sa_mpls));
1383 		sa_mpls.smpls_family = AF_MPLS;
1384 		sa_mpls.smpls_len = sizeof(sa_mpls);
1385 		sa_mpls.smpls_label = ((struct rt_mpls *)
1386 		    rt->rt_llinfo)->mpls_label;
1387 		info.rti_info[RTAX_SRC] = (struct sockaddr *)&sa_mpls;
1388 		info.rti_mpls = ((struct rt_mpls *)
1389 		    rt->rt_llinfo)->mpls_operation;
1390 	}
1391 #endif
1392 
1393 	size = rt_msg2(RTM_GET, RTM_VERSION, &info, NULL, w);
1394 	if (w->w_where && w->w_tmem && w->w_needed <= 0) {
1395 		struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem;
1396 
1397 		rtm->rtm_pid = curproc->p_p->ps_pid;
1398 		rtm->rtm_flags = rt->rt_flags;
1399 		rtm->rtm_priority = rt->rt_priority & RTP_MASK;
1400 		rt_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx);
1401 		/* Do not account the routing table's reference. */
1402 		rtm->rtm_rmx.rmx_refcnt = rt->rt_refcnt - 1;
1403 		rtm->rtm_index = rt->rt_ifidx;
1404 		rtm->rtm_addrs = info.rti_addrs;
1405 		rtm->rtm_tableid = id;
1406 #ifdef MPLS
1407 		rtm->rtm_mpls = info.rti_mpls;
1408 #endif
1409 		if ((error = copyout(rtm, w->w_where, size)) != 0)
1410 			w->w_where = NULL;
1411 		else
1412 			w->w_where += size;
1413 	}
1414 	return (error);
1415 }
1416 
1417 int
1418 sysctl_iflist(int af, struct walkarg *w)
1419 {
1420 	struct ifnet		*ifp;
1421 	struct ifaddr		*ifa;
1422 	struct rt_addrinfo	 info;
1423 	int			 len, error = 0;
1424 
1425 	bzero(&info, sizeof(info));
1426 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1427 		if (w->w_arg && w->w_arg != ifp->if_index)
1428 			continue;
1429 		/* Copy the link-layer address first */
1430 		info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
1431 		len = rt_msg2(RTM_IFINFO, RTM_VERSION, &info, 0, w);
1432 		if (w->w_where && w->w_tmem && w->w_needed <= 0) {
1433 			struct if_msghdr *ifm;
1434 
1435 			ifm = (struct if_msghdr *)w->w_tmem;
1436 			ifm->ifm_index = ifp->if_index;
1437 			ifm->ifm_tableid = ifp->if_rdomain;
1438 			ifm->ifm_flags = ifp->if_flags;
1439 			ifm->ifm_data = ifp->if_data;
1440 			ifm->ifm_addrs = info.rti_addrs;
1441 			error = copyout(ifm, w->w_where, len);
1442 			if (error)
1443 				return (error);
1444 			w->w_where += len;
1445 		}
1446 		info.rti_info[RTAX_IFP] = NULL;
1447 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1448 			KASSERT(ifa->ifa_addr->sa_family != AF_LINK);
1449 			if (af && af != ifa->ifa_addr->sa_family)
1450 				continue;
1451 			info.rti_info[RTAX_IFA] = ifa->ifa_addr;
1452 			info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
1453 			info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
1454 			len = rt_msg2(RTM_NEWADDR, RTM_VERSION, &info, 0, w);
1455 			if (w->w_where && w->w_tmem && w->w_needed <= 0) {
1456 				struct ifa_msghdr *ifam;
1457 
1458 				ifam = (struct ifa_msghdr *)w->w_tmem;
1459 				ifam->ifam_index = ifa->ifa_ifp->if_index;
1460 				ifam->ifam_flags = ifa->ifa_flags;
1461 				ifam->ifam_metric = ifa->ifa_metric;
1462 				ifam->ifam_addrs = info.rti_addrs;
1463 				error = copyout(w->w_tmem, w->w_where, len);
1464 				if (error)
1465 					return (error);
1466 				w->w_where += len;
1467 			}
1468 		}
1469 		info.rti_info[RTAX_IFA] = info.rti_info[RTAX_NETMASK] =
1470 		    info.rti_info[RTAX_BRD] = NULL;
1471 	}
1472 	return (0);
1473 }
1474 
1475 int
1476 sysctl_ifnames(struct walkarg *w)
1477 {
1478 	struct if_nameindex_msg ifn;
1479 	struct ifnet *ifp;
1480 	int error = 0;
1481 
1482 	/* XXX ignore tableid for now */
1483 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1484 		if (w->w_arg && w->w_arg != ifp->if_index)
1485 			continue;
1486 		w->w_needed += sizeof(ifn);
1487 		if (w->w_where && w->w_needed <= 0) {
1488 
1489 			memset(&ifn, 0, sizeof(ifn));
1490 			ifn.if_index = ifp->if_index;
1491 			strlcpy(ifn.if_name, ifp->if_xname,
1492 			    sizeof(ifn.if_name));
1493 			error = copyout(&ifn, w->w_where, sizeof(ifn));
1494 			if (error)
1495 				return (error);
1496 			w->w_where += sizeof(ifn);
1497 		}
1498 	}
1499 
1500 	return (0);
1501 }
1502 
1503 int
1504 sysctl_rtable(int *name, u_int namelen, void *where, size_t *given, void *new,
1505     size_t newlen)
1506 {
1507 	int			 i, s, error = EINVAL;
1508 	u_char  		 af;
1509 	struct walkarg		 w;
1510 	struct rt_tableinfo	 tableinfo;
1511 	u_int			 tableid = 0;
1512 
1513 	if (new)
1514 		return (EPERM);
1515 	if (namelen < 3 || namelen > 4)
1516 		return (EINVAL);
1517 	af = name[0];
1518 	bzero(&w, sizeof(w));
1519 	w.w_where = where;
1520 	w.w_given = *given;
1521 	w.w_needed = 0 - w.w_given;
1522 	w.w_op = name[1];
1523 	w.w_arg = name[2];
1524 
1525 	if (namelen == 4) {
1526 		tableid = name[3];
1527 		if (!rtable_exists(tableid))
1528 			return (ENOENT);
1529 	} else
1530 		tableid = curproc->p_p->ps_rtableid;
1531 
1532 	s = splsoftnet();
1533 	switch (w.w_op) {
1534 	case NET_RT_DUMP:
1535 	case NET_RT_FLAGS:
1536 		for (i = 1; i <= AF_MAX; i++) {
1537 			if (af != 0 && af != i)
1538 				continue;
1539 
1540 			error = rtable_walk(tableid, i, sysctl_dumpentry, &w);
1541 			if (error == EAFNOSUPPORT)
1542 				error = 0;
1543 			if (error)
1544 				break;
1545 		}
1546 		break;
1547 
1548 	case NET_RT_IFLIST:
1549 		error = sysctl_iflist(af, &w);
1550 		break;
1551 
1552 	case NET_RT_STATS:
1553 		error = sysctl_rdstruct(where, given, new,
1554 		    &rtstat, sizeof(rtstat));
1555 		splx(s);
1556 		return (error);
1557 	case NET_RT_TABLE:
1558 		tableid = w.w_arg;
1559 		if (!rtable_exists(tableid)) {
1560 			splx(s);
1561 			return (ENOENT);
1562 		}
1563 		tableinfo.rti_tableid = tableid;
1564 		tableinfo.rti_domainid = rtable_l2(tableid);
1565 		error = sysctl_rdstruct(where, given, new,
1566 		    &tableinfo, sizeof(tableinfo));
1567 		splx(s);
1568 		return (error);
1569 	case NET_RT_IFNAMES:
1570 		error = sysctl_ifnames(&w);
1571 		break;
1572 	}
1573 	splx(s);
1574 	free(w.w_tmem, M_RTABLE, 0);
1575 	w.w_needed += w.w_given;
1576 	if (where) {
1577 		*given = w.w_where - (caddr_t)where;
1578 		if (*given < w.w_needed)
1579 			return (ENOMEM);
1580 	} else
1581 		*given = (11 * w.w_needed) / 10;
1582 
1583 	return (error);
1584 }
1585 
1586 /*
1587  * Definitions of protocols supported in the ROUTE domain.
1588  */
1589 
1590 extern	struct domain routedomain;		/* or at least forward */
1591 
1592 struct protosw routesw[] = {
1593 { SOCK_RAW,	&routedomain,	0,		PR_ATOMIC|PR_ADDR|PR_WANTRCVD,
1594   route_input,	route_output,	0,		route_ctloutput,
1595   route_usrreq,
1596   raw_init,	0,		0,		0,
1597   sysctl_rtable,
1598 }
1599 };
1600 
1601 struct domain routedomain =
1602     { PF_ROUTE, "route", route_init, 0, 0,
1603       routesw, &routesw[nitems(routesw)] };
1604