xref: /netbsd-src/sys/net/rtsock.c (revision 1ffa7b76c40339c17a0fb2a09fac93f287cfc046)
1 /*	$NetBSD: rtsock.c,v 1.59 2003/05/02 03:15:25 itojun Exp $	*/
2 
3 /*
4  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the project nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 /*
33  * Copyright (c) 1988, 1991, 1993
34  *	The Regents of the University of California.  All rights reserved.
35  *
36  * Redistribution and use in source and binary forms, with or without
37  * modification, are permitted provided that the following conditions
38  * are met:
39  * 1. Redistributions of source code must retain the above copyright
40  *    notice, this list of conditions and the following disclaimer.
41  * 2. Redistributions in binary form must reproduce the above copyright
42  *    notice, this list of conditions and the following disclaimer in the
43  *    documentation and/or other materials provided with the distribution.
44  * 3. All advertising materials mentioning features or use of this software
45  *    must display the following acknowledgement:
46  *	This product includes software developed by the University of
47  *	California, Berkeley and its contributors.
48  * 4. Neither the name of the University nor the names of its contributors
49  *    may be used to endorse or promote products derived from this software
50  *    without specific prior written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62  * SUCH DAMAGE.
63  *
64  *	@(#)rtsock.c	8.7 (Berkeley) 10/12/95
65  */
66 
67 #include <sys/cdefs.h>
68 __KERNEL_RCSID(0, "$NetBSD: rtsock.c,v 1.59 2003/05/02 03:15:25 itojun Exp $");
69 
70 #include "opt_inet.h"
71 
72 #include <sys/param.h>
73 #include <sys/systm.h>
74 #include <sys/proc.h>
75 #include <sys/mbuf.h>
76 #include <sys/socket.h>
77 #include <sys/socketvar.h>
78 #include <sys/domain.h>
79 #include <sys/protosw.h>
80 #include <sys/sysctl.h>
81 
82 #include <net/if.h>
83 #include <net/route.h>
84 #include <net/raw_cb.h>
85 
86 #include <machine/stdarg.h>
87 
88 extern	struct domain routedomain;		/* or at least forward */
89 
90 struct	sockaddr route_dst = { 2, PF_ROUTE, };
91 struct	sockaddr route_src = { 2, PF_ROUTE, };
92 struct	sockproto route_proto = { PF_ROUTE, };
93 
94 struct walkarg {
95 	int	w_op;
96 	int	w_arg;
97 	int	w_given;
98 	int	w_needed;
99 	caddr_t	w_where;
100 	int	w_tmemsize;
101 	int	w_tmemneeded;
102 	caddr_t	w_tmem;
103 };
104 
105 static struct mbuf *rt_msg1 __P((int, struct rt_addrinfo *, caddr_t, int));
106 static int rt_msg2 __P((int, struct rt_addrinfo *, caddr_t, struct walkarg *,
107     int *));
108 static int rt_xaddrs __P((caddr_t, caddr_t, struct rt_addrinfo *));
109 static int sysctl_dumpentry __P((struct radix_node *, void *));
110 static int sysctl_iflist __P((int, struct walkarg *, int));
111 static int sysctl_rtable __P((int *, u_int, void *, size_t *, void *, size_t));
112 static __inline void rt_adjustcount __P((int, int));
113 
114 /* Sleazy use of local variables throughout file, warning!!!! */
115 #define dst	info.rti_info[RTAX_DST]
116 #define gate	info.rti_info[RTAX_GATEWAY]
117 #define netmask	info.rti_info[RTAX_NETMASK]
118 #define genmask	info.rti_info[RTAX_GENMASK]
119 #define ifpaddr	info.rti_info[RTAX_IFP]
120 #define ifaaddr	info.rti_info[RTAX_IFA]
121 #define brdaddr	info.rti_info[RTAX_BRD]
122 
123 static __inline void
124 rt_adjustcount(af, cnt)
125 	int af, cnt;
126 {
127 	route_cb.any_count += cnt;
128 	switch (af) {
129 	case AF_INET:
130 		route_cb.ip_count += cnt;
131 		return;
132 #ifdef INET6
133 	case AF_INET6:
134 		route_cb.ip6_count += cnt;
135 		return;
136 #endif
137 	case AF_IPX:
138 		route_cb.ipx_count += cnt;
139 		return;
140 	case AF_NS:
141 		route_cb.ns_count += cnt;
142 		return;
143 	case AF_ISO:
144 		route_cb.iso_count += cnt;
145 		return;
146 	}
147 }
148 
149 /*ARGSUSED*/
150 int
151 route_usrreq(so, req, m, nam, control, p)
152 	struct socket *so;
153 	int req;
154 	struct mbuf *m, *nam, *control;
155 	struct proc *p;
156 {
157 	int error = 0;
158 	struct rawcb *rp = sotorawcb(so);
159 	int s;
160 
161 	if (req == PRU_ATTACH) {
162 		MALLOC(rp, struct rawcb *, sizeof(*rp), M_PCB, M_WAITOK);
163 		if ((so->so_pcb = rp) != NULL)
164 			memset(so->so_pcb, 0, sizeof(*rp));
165 
166 	}
167 	if (req == PRU_DETACH && rp)
168 		rt_adjustcount(rp->rcb_proto.sp_protocol, -1);
169 	s = splsoftnet();
170 
171 	/*
172 	 * Don't call raw_usrreq() in the attach case, because
173 	 * we want to allow non-privileged processes to listen on
174 	 * and send "safe" commands to the routing socket.
175 	 */
176 	if (req == PRU_ATTACH) {
177 		if (p == 0)
178 			error = EACCES;
179 		else
180 			error = raw_attach(so, (int)(long)nam);
181 	} else
182 		error = raw_usrreq(so, req, m, nam, control, p);
183 
184 	rp = sotorawcb(so);
185 	if (req == PRU_ATTACH && rp) {
186 		if (error) {
187 			free((caddr_t)rp, M_PCB);
188 			splx(s);
189 			return (error);
190 		}
191 		rt_adjustcount(rp->rcb_proto.sp_protocol, 1);
192 		rp->rcb_laddr = &route_src;
193 		rp->rcb_faddr = &route_dst;
194 		soisconnected(so);
195 		so->so_options |= SO_USELOOPBACK;
196 	}
197 	splx(s);
198 	return (error);
199 }
200 
201 /*ARGSUSED*/
202 int
203 #if __STDC__
204 route_output(struct mbuf *m, ...)
205 #else
206 route_output(m, va_alist)
207 	struct mbuf *m;
208 	va_dcl
209 #endif
210 {
211 	struct rt_msghdr *rtm = 0;
212 	struct radix_node *rn = 0;
213 	struct rtentry *rt = 0;
214 	struct rtentry *saved_nrt = 0;
215 	struct radix_node_head *rnh;
216 	struct rt_addrinfo info;
217 	int len, error = 0;
218 	struct ifnet *ifp = 0;
219 	struct ifaddr *ifa = 0;
220 	struct socket *so;
221 	va_list ap;
222 	sa_family_t family;
223 
224 	va_start(ap, m);
225 	so = va_arg(ap, struct socket *);
226 	va_end(ap);
227 
228 #define senderr(e) do { error = e; goto flush;} while (/*CONSTCOND*/ 0)
229 	if (m == 0 || ((m->m_len < sizeof(int32_t)) &&
230 	   (m = m_pullup(m, sizeof(int32_t))) == 0))
231 		return (ENOBUFS);
232 	if ((m->m_flags & M_PKTHDR) == 0)
233 		panic("route_output");
234 	len = m->m_pkthdr.len;
235 	if (len < sizeof(*rtm) ||
236 	    len != mtod(m, struct rt_msghdr *)->rtm_msglen) {
237 		dst = 0;
238 		senderr(EINVAL);
239 	}
240 	R_Malloc(rtm, struct rt_msghdr *, len);
241 	if (rtm == 0) {
242 		dst = 0;
243 		senderr(ENOBUFS);
244 	}
245 	m_copydata(m, 0, len, (caddr_t)rtm);
246 	if (rtm->rtm_version != RTM_VERSION) {
247 		dst = 0;
248 		senderr(EPROTONOSUPPORT);
249 	}
250 	rtm->rtm_pid = curproc->p_pid;
251 	memset(&info, 0, sizeof(info));
252 	info.rti_addrs = rtm->rtm_addrs;
253 	if (rt_xaddrs((caddr_t)(rtm + 1), len + (caddr_t)rtm, &info))
254 		senderr(EINVAL);
255 	info.rti_flags = rtm->rtm_flags;
256 	if (dst == 0 || (dst->sa_family >= AF_MAX))
257 		senderr(EINVAL);
258 	if (gate != 0 && (gate->sa_family >= AF_MAX))
259 		senderr(EINVAL);
260 	if (genmask) {
261 		struct radix_node *t;
262 		t = rn_addmask((caddr_t)genmask, 0, 1);
263 		if (t && genmask->sa_len >= ((struct sockaddr *)t->rn_key)->sa_len &&
264 		    Bcmp((caddr_t *)genmask + 1, (caddr_t *)t->rn_key + 1,
265 		    ((struct sockaddr *)t->rn_key)->sa_len) - 1)
266 			genmask = (struct sockaddr *)(t->rn_key);
267 		else
268 			senderr(ENOBUFS);
269 	}
270 
271 	/*
272 	 * Verify that the caller has the appropriate privilege; RTM_GET
273 	 * is the only operation the non-superuser is allowed.
274 	 */
275 	if (rtm->rtm_type != RTM_GET &&
276 	    suser(curproc->p_ucred, &curproc->p_acflag) != 0)
277 		senderr(EACCES);
278 
279 	switch (rtm->rtm_type) {
280 
281 	case RTM_ADD:
282 		if (gate == 0)
283 			senderr(EINVAL);
284 		error = rtrequest1(rtm->rtm_type, &info, &saved_nrt);
285 		if (error == 0 && saved_nrt) {
286 			rt_setmetrics(rtm->rtm_inits,
287 			    &rtm->rtm_rmx, &saved_nrt->rt_rmx);
288 			saved_nrt->rt_refcnt--;
289 			saved_nrt->rt_genmask = genmask;
290 		}
291 		break;
292 
293 	case RTM_DELETE:
294 		error = rtrequest1(rtm->rtm_type, &info, &saved_nrt);
295 		if (error == 0) {
296 			(rt = saved_nrt)->rt_refcnt++;
297 			goto report;
298 		}
299 		break;
300 
301 	case RTM_GET:
302 	case RTM_CHANGE:
303 	case RTM_LOCK:
304 		if ((rnh = rt_tables[dst->sa_family]) == 0) {
305 			senderr(EAFNOSUPPORT);
306 		}
307 		rn = rnh->rnh_lookup(dst, netmask, rnh);
308 		if (rn == NULL || (rn->rn_flags & RNF_ROOT) != 0) {
309 			senderr(ESRCH);
310 		}
311 		rt = (struct rtentry *)rn;
312 		rt->rt_refcnt++;
313 
314 		switch (rtm->rtm_type) {
315 		case RTM_GET:
316 		report:
317 			dst = rt_key(rt);
318 			gate = rt->rt_gateway;
319 			netmask = rt_mask(rt);
320 			genmask = rt->rt_genmask;
321 			if (rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) {
322 				if ((ifp = rt->rt_ifp) != NULL) {
323 					ifpaddr = TAILQ_FIRST(&ifp->if_addrlist)->ifa_addr;
324 					ifaaddr = rt->rt_ifa->ifa_addr;
325 					if (ifp->if_flags & IFF_POINTOPOINT)
326 						brdaddr = rt->rt_ifa->ifa_dstaddr;
327 					else
328 						brdaddr = 0;
329 					rtm->rtm_index = ifp->if_index;
330 				} else {
331 					ifpaddr = 0;
332 					ifaaddr = 0;
333 				}
334 			}
335 			(void)rt_msg2(rtm->rtm_type, &info, (caddr_t)0,
336 			    (struct walkarg *)0, &len);
337 			if (len > rtm->rtm_msglen) {
338 				struct rt_msghdr *new_rtm;
339 				R_Malloc(new_rtm, struct rt_msghdr *, len);
340 				if (new_rtm == 0)
341 					senderr(ENOBUFS);
342 				Bcopy(rtm, new_rtm, rtm->rtm_msglen);
343 				Free(rtm); rtm = new_rtm;
344 			}
345 			(void)rt_msg2(rtm->rtm_type, &info, (caddr_t)rtm,
346 			    (struct walkarg *)0, 0);
347 			rtm->rtm_flags = rt->rt_flags;
348 			rtm->rtm_rmx = rt->rt_rmx;
349 			rtm->rtm_addrs = info.rti_addrs;
350 			break;
351 
352 		case RTM_CHANGE:
353 			/*
354 			 * new gateway could require new ifaddr, ifp;
355 			 * flags may also be different; ifp may be specified
356 			 * by ll sockaddr when protocol address is ambiguous
357 			 */
358 			if ((error = rt_getifa(&info)) != 0)
359 				senderr(error);
360 			if (gate && rt_setgate(rt, rt_key(rt), gate))
361 				senderr(EDQUOT);
362 			/* new gateway could require new ifaddr, ifp;
363 			   flags may also be different; ifp may be specified
364 			   by ll sockaddr when protocol address is ambiguous */
365 			if (ifpaddr && (ifa = ifa_ifwithnet(ifpaddr)) &&
366 			    (ifp = ifa->ifa_ifp) && (ifaaddr || gate))
367 				ifa = ifaof_ifpforaddr(ifaaddr ? ifaaddr : gate,
368 				    ifp);
369 			else if ((ifaaddr && (ifa = ifa_ifwithaddr(ifaaddr))) ||
370 			    (gate && (ifa = ifa_ifwithroute(rt->rt_flags,
371 			    rt_key(rt), gate))))
372 				ifp = ifa->ifa_ifp;
373 			if (ifa) {
374 				struct ifaddr *oifa = rt->rt_ifa;
375 				if (oifa != ifa) {
376 				    if (oifa && oifa->ifa_rtrequest)
377 					oifa->ifa_rtrequest(RTM_DELETE, rt,
378 					    &info);
379 				    IFAFREE(rt->rt_ifa);
380 				    rt->rt_ifa = ifa;
381 				    IFAREF(rt->rt_ifa);
382 				    rt->rt_ifp = ifp;
383 				}
384 			}
385 			rt_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx,
386 			    &rt->rt_rmx);
387 			if (rt->rt_ifa && rt->rt_ifa->ifa_rtrequest)
388 				rt->rt_ifa->ifa_rtrequest(RTM_ADD, rt, &info);
389 			if (genmask)
390 				rt->rt_genmask = genmask;
391 			/*
392 			 * Fall into
393 			 */
394 		case RTM_LOCK:
395 			rt->rt_rmx.rmx_locks &= ~(rtm->rtm_inits);
396 			rt->rt_rmx.rmx_locks |=
397 			    (rtm->rtm_inits & rtm->rtm_rmx.rmx_locks);
398 			break;
399 		}
400 		break;
401 
402 	default:
403 		senderr(EOPNOTSUPP);
404 	}
405 
406 flush:
407 	if (rtm) {
408 		if (error)
409 			rtm->rtm_errno = error;
410 		else
411 			rtm->rtm_flags |= RTF_DONE;
412 	}
413 	family = dst ? dst->sa_family : 0;
414 	if (rt)
415 		rtfree(rt);
416     {
417 	struct rawcb *rp = 0;
418 	/*
419 	 * Check to see if we don't want our own messages.
420 	 */
421 	if ((so->so_options & SO_USELOOPBACK) == 0) {
422 		if (route_cb.any_count <= 1) {
423 			if (rtm)
424 				Free(rtm);
425 			m_freem(m);
426 			return (error);
427 		}
428 		/* There is another listener, so construct message */
429 		rp = sotorawcb(so);
430 	}
431 	if (rtm) {
432 		m_copyback(m, 0, rtm->rtm_msglen, (caddr_t)rtm);
433 		if (m->m_pkthdr.len < rtm->rtm_msglen) {
434 			m_freem(m);
435 			m = NULL;
436 		} else if (m->m_pkthdr.len > rtm->rtm_msglen)
437 			m_adj(m, rtm->rtm_msglen - m->m_pkthdr.len);
438 		Free(rtm);
439 	}
440 	if (rp)
441 		rp->rcb_proto.sp_family = 0; /* Avoid us */
442 	if (family)
443 		route_proto.sp_protocol = family;
444 	if (m)
445 		raw_input(m, &route_proto, &route_src, &route_dst);
446 	if (rp)
447 		rp->rcb_proto.sp_family = PF_ROUTE;
448     }
449 	return (error);
450 }
451 
452 void
453 rt_setmetrics(which, in, out)
454 	u_long which;
455 	struct rt_metrics *in, *out;
456 {
457 #define metric(f, e) if (which & (f)) out->e = in->e;
458 	metric(RTV_RPIPE, rmx_recvpipe);
459 	metric(RTV_SPIPE, rmx_sendpipe);
460 	metric(RTV_SSTHRESH, rmx_ssthresh);
461 	metric(RTV_RTT, rmx_rtt);
462 	metric(RTV_RTTVAR, rmx_rttvar);
463 	metric(RTV_HOPCOUNT, rmx_hopcount);
464 	metric(RTV_MTU, rmx_mtu);
465 	metric(RTV_EXPIRE, rmx_expire);
466 #undef metric
467 }
468 
469 #define ROUNDUP(a) \
470 	((a) > 0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long))
471 #define ADVANCE(x, n) (x += ROUNDUP((n)->sa_len))
472 
473 static int
474 rt_xaddrs(cp, cplim, rtinfo)
475 	caddr_t cp, cplim;
476 	struct rt_addrinfo *rtinfo;
477 {
478 	struct sockaddr *sa = NULL;	/* Quell compiler warning */
479 	int i;
480 
481 	for (i = 0; (i < RTAX_MAX) && (cp < cplim); i++) {
482 		if ((rtinfo->rti_addrs & (1 << i)) == 0)
483 			continue;
484 		rtinfo->rti_info[i] = sa = (struct sockaddr *)cp;
485 		ADVANCE(cp, sa);
486 	}
487 
488 	/* Check for extra addresses specified.  */
489 	if ((rtinfo->rti_addrs & (~0 << i)) != 0)
490 		return (1);
491 	/* Check for bad data length.  */
492 	if (cp != cplim) {
493 		if (i == RTAX_NETMASK + 1 &&
494 		    cp - ROUNDUP(sa->sa_len) + sa->sa_len == cplim)
495 			/*
496 			 * The last sockaddr was netmask.
497 			 * We accept this for now for the sake of old
498 			 * binaries or third party softwares.
499 			 */
500 			;
501 		else
502 			return (1);
503 	}
504 	return (0);
505 }
506 
507 static struct mbuf *
508 rt_msg1(type, rtinfo, data, datalen)
509 	int type;
510 	struct rt_addrinfo *rtinfo;
511 	caddr_t data;
512 	int datalen;
513 {
514 	struct rt_msghdr *rtm;
515 	struct mbuf *m;
516 	int i;
517 	struct sockaddr *sa;
518 	int len, dlen;
519 
520 	m = m_gethdr(M_DONTWAIT, MT_DATA);
521 	if (m == 0)
522 		return (m);
523 	MCLAIM(m, &routedomain.dom_mowner);
524 	switch (type) {
525 
526 	case RTM_DELADDR:
527 	case RTM_NEWADDR:
528 		len = sizeof(struct ifa_msghdr);
529 		break;
530 
531 #ifdef COMPAT_14
532 	case RTM_OIFINFO:
533 		len = sizeof(struct if_msghdr14);
534 		break;
535 #endif
536 
537 	case RTM_IFINFO:
538 		len = sizeof(struct if_msghdr);
539 		break;
540 
541 	case RTM_IFANNOUNCE:
542 		len = sizeof(struct if_announcemsghdr);
543 		break;
544 
545 	default:
546 		len = sizeof(struct rt_msghdr);
547 	}
548 	if (len > MHLEN + MLEN)
549 		panic("rt_msg1: message too long");
550 	else if (len > MHLEN) {
551 		m->m_next = m_get(M_DONTWAIT, MT_DATA);
552 		if (m->m_next == NULL) {
553 			m_freem(m);
554 			return (NULL);
555 		}
556 		MCLAIM(m->m_next, m->m_owner);
557 		m->m_pkthdr.len = len;
558 		m->m_len = MHLEN;
559 		m->m_next->m_len = len - MHLEN;
560 	} else {
561 		m->m_pkthdr.len = m->m_len = len;
562 	}
563 	m->m_pkthdr.rcvif = 0;
564 	m_copyback(m, 0, datalen, data);
565 	rtm = mtod(m, struct rt_msghdr *);
566 	for (i = 0; i < RTAX_MAX; i++) {
567 		if ((sa = rtinfo->rti_info[i]) == NULL)
568 			continue;
569 		rtinfo->rti_addrs |= (1 << i);
570 		dlen = ROUNDUP(sa->sa_len);
571 		m_copyback(m, len, dlen, (caddr_t)sa);
572 		len += dlen;
573 	}
574 	if (m->m_pkthdr.len != len) {
575 		m_freem(m);
576 		return (NULL);
577 	}
578 	rtm->rtm_msglen = len;
579 	rtm->rtm_version = RTM_VERSION;
580 	rtm->rtm_type = type;
581 	return (m);
582 }
583 
584 /*
585  * rt_msg2
586  *
587  *	 fills 'cp' or 'w'.w_tmem with the routing socket message and
588  *		returns the length of the message in 'lenp'.
589  *
590  * if walkarg is 0, cp is expected to be 0 or a buffer large enough to hold
591  *	the message
592  * otherwise walkarg's w_needed is updated and if the user buffer is
593  *	specified and w_needed indicates space exists the information is copied
594  *	into the temp space (w_tmem). w_tmem is [re]allocated if necessary,
595  *	if the allocation fails ENOBUFS is returned.
596  */
597 static int
598 rt_msg2(type, rtinfo, cp, w, lenp)
599 	int type;
600 	struct rt_addrinfo *rtinfo;
601 	caddr_t cp;
602 	struct walkarg *w;
603 	int *lenp;
604 {
605 	int i;
606 	int len, dlen, second_time = 0;
607 	caddr_t cp0;
608 
609 	rtinfo->rti_addrs = 0;
610 again:
611 	switch (type) {
612 
613 	case RTM_DELADDR:
614 	case RTM_NEWADDR:
615 		len = sizeof(struct ifa_msghdr);
616 		break;
617 #ifdef COMPAT_14
618 	case RTM_OIFINFO:
619 		len = sizeof(struct if_msghdr14);
620 		break;
621 #endif
622 
623 	case RTM_IFINFO:
624 		len = sizeof(struct if_msghdr);
625 		break;
626 
627 	default:
628 		len = sizeof(struct rt_msghdr);
629 	}
630 	if ((cp0 = cp) != NULL)
631 		cp += len;
632 	for (i = 0; i < RTAX_MAX; i++) {
633 		struct sockaddr *sa;
634 
635 		if ((sa = rtinfo->rti_info[i]) == 0)
636 			continue;
637 		rtinfo->rti_addrs |= (1 << i);
638 		dlen = ROUNDUP(sa->sa_len);
639 		if (cp) {
640 			bcopy(sa, cp, (unsigned)dlen);
641 			cp += dlen;
642 		}
643 		len += dlen;
644 	}
645 	if (cp == 0 && w != NULL && !second_time) {
646 		struct walkarg *rw = w;
647 
648 		rw->w_needed += len;
649 		if (rw->w_needed <= 0 && rw->w_where) {
650 			if (rw->w_tmemsize < len) {
651 				if (rw->w_tmem)
652 					free(rw->w_tmem, M_RTABLE);
653 				rw->w_tmem = (caddr_t) malloc(len, M_RTABLE,
654 				    M_NOWAIT);
655 				if (rw->w_tmem)
656 					rw->w_tmemsize = len;
657 			}
658 			if (rw->w_tmem) {
659 				cp = rw->w_tmem;
660 				second_time = 1;
661 				goto again;
662 			} else {
663 				rw->w_tmemneeded = len;
664 				return (ENOBUFS);
665 			}
666 		}
667 	}
668 	if (cp) {
669 		struct rt_msghdr *rtm = (struct rt_msghdr *)cp0;
670 
671 		rtm->rtm_version = RTM_VERSION;
672 		rtm->rtm_type = type;
673 		rtm->rtm_msglen = len;
674 	}
675 	if (lenp)
676 		*lenp = len;
677 	return (0);
678 }
679 
680 /*
681  * This routine is called to generate a message from the routing
682  * socket indicating that a redirect has occurred, a routing lookup
683  * has failed, or that a protocol has detected timeouts to a particular
684  * destination.
685  */
686 void
687 rt_missmsg(type, rtinfo, flags, error)
688 	int type, flags, error;
689 	struct rt_addrinfo *rtinfo;
690 {
691 	struct rt_msghdr rtm;
692 	struct mbuf *m;
693 	struct sockaddr *sa = rtinfo->rti_info[RTAX_DST];
694 
695 	if (route_cb.any_count == 0)
696 		return;
697 	memset(&rtm, 0, sizeof(rtm));
698 	rtm.rtm_flags = RTF_DONE | flags;
699 	rtm.rtm_errno = error;
700 	m = rt_msg1(type, rtinfo, (caddr_t)&rtm, sizeof(rtm));
701 	if (m == 0)
702 		return;
703 	mtod(m, struct rt_msghdr *)->rtm_addrs = rtinfo->rti_addrs;
704 	route_proto.sp_protocol = sa ? sa->sa_family : 0;
705 	raw_input(m, &route_proto, &route_src, &route_dst);
706 }
707 
708 /*
709  * This routine is called to generate a message from the routing
710  * socket indicating that the status of a network interface has changed.
711  */
712 void
713 rt_ifmsg(ifp)
714 	struct ifnet *ifp;
715 {
716 	struct if_msghdr ifm;
717 #ifdef COMPAT_14
718 	struct if_msghdr14 oifm;
719 #endif
720 	struct mbuf *m;
721 	struct rt_addrinfo info;
722 
723 	if (route_cb.any_count == 0)
724 		return;
725 	memset(&info, 0, sizeof(info));
726 	memset(&ifm, 0, sizeof(ifm));
727 	ifm.ifm_index = ifp->if_index;
728 	ifm.ifm_flags = ifp->if_flags;
729 	ifm.ifm_data = ifp->if_data;
730 	ifm.ifm_addrs = 0;
731 	m = rt_msg1(RTM_IFINFO, &info, (caddr_t)&ifm, sizeof(ifm));
732 	if (m == 0)
733 		return;
734 	route_proto.sp_protocol = 0;
735 	raw_input(m, &route_proto, &route_src, &route_dst);
736 #ifdef COMPAT_14
737 	memset(&info, 0, sizeof(info));
738 	memset(&oifm, 0, sizeof(oifm));
739 	oifm.ifm_index = ifp->if_index;
740 	oifm.ifm_flags = ifp->if_flags;
741 	oifm.ifm_data.ifi_type = ifp->if_data.ifi_type;
742 	oifm.ifm_data.ifi_addrlen = ifp->if_data.ifi_addrlen;
743 	oifm.ifm_data.ifi_hdrlen = ifp->if_data.ifi_hdrlen;
744 	oifm.ifm_data.ifi_mtu = ifp->if_data.ifi_mtu;
745 	oifm.ifm_data.ifi_metric = ifp->if_data.ifi_metric;
746 	oifm.ifm_data.ifi_baudrate = ifp->if_data.ifi_baudrate;
747 	oifm.ifm_data.ifi_ipackets = ifp->if_data.ifi_ipackets;
748 	oifm.ifm_data.ifi_ierrors = ifp->if_data.ifi_ierrors;
749 	oifm.ifm_data.ifi_opackets = ifp->if_data.ifi_opackets;
750 	oifm.ifm_data.ifi_oerrors = ifp->if_data.ifi_oerrors;
751 	oifm.ifm_data.ifi_collisions = ifp->if_data.ifi_collisions;
752 	oifm.ifm_data.ifi_ibytes = ifp->if_data.ifi_ibytes;
753 	oifm.ifm_data.ifi_obytes = ifp->if_data.ifi_obytes;
754 	oifm.ifm_data.ifi_imcasts = ifp->if_data.ifi_imcasts;
755 	oifm.ifm_data.ifi_omcasts = ifp->if_data.ifi_omcasts;
756 	oifm.ifm_data.ifi_iqdrops = ifp->if_data.ifi_iqdrops;
757 	oifm.ifm_data.ifi_noproto = ifp->if_data.ifi_noproto;
758 	oifm.ifm_data.ifi_lastchange = ifp->if_data.ifi_lastchange;
759 	oifm.ifm_addrs = 0;
760 	m = rt_msg1(RTM_OIFINFO, &info, (caddr_t)&oifm, sizeof(oifm));
761 	if (m == 0)
762 		return;
763 	route_proto.sp_protocol = 0;
764 	raw_input(m, &route_proto, &route_src, &route_dst);
765 #endif
766 }
767 
768 /*
769  * This is called to generate messages from the routing socket
770  * indicating a network interface has had addresses associated with it.
771  * if we ever reverse the logic and replace messages TO the routing
772  * socket indicate a request to configure interfaces, then it will
773  * be unnecessary as the routing socket will automatically generate
774  * copies of it.
775  */
776 void
777 rt_newaddrmsg(cmd, ifa, error, rt)
778 	int cmd, error;
779 	struct ifaddr *ifa;
780 	struct rtentry *rt;
781 {
782 	struct rt_addrinfo info;
783 	struct sockaddr *sa = NULL;
784 	int pass;
785 	struct mbuf *m = NULL;
786 	struct ifnet *ifp = ifa->ifa_ifp;
787 
788 	if (route_cb.any_count == 0)
789 		return;
790 	for (pass = 1; pass < 3; pass++) {
791 		memset(&info, 0, sizeof(info));
792 		if ((cmd == RTM_ADD && pass == 1) ||
793 		    (cmd == RTM_DELETE && pass == 2)) {
794 			struct ifa_msghdr ifam;
795 			int ncmd = cmd == RTM_ADD ? RTM_NEWADDR : RTM_DELADDR;
796 
797 			ifaaddr = sa = ifa->ifa_addr;
798 			ifpaddr = TAILQ_FIRST(&ifp->if_addrlist)->ifa_addr;
799 			netmask = ifa->ifa_netmask;
800 			brdaddr = ifa->ifa_dstaddr;
801 			memset(&ifam, 0, sizeof(ifam));
802 			ifam.ifam_index = ifp->if_index;
803 			ifam.ifam_metric = ifa->ifa_metric;
804 			ifam.ifam_flags = ifa->ifa_flags;
805 			m = rt_msg1(ncmd, &info, (caddr_t)&ifam, sizeof(ifam));
806 			if (m == NULL)
807 				continue;
808 			mtod(m, struct ifa_msghdr *)->ifam_addrs =
809 			    info.rti_addrs;
810 		}
811 		if ((cmd == RTM_ADD && pass == 2) ||
812 		    (cmd == RTM_DELETE && pass == 1)) {
813 			struct rt_msghdr rtm;
814 
815 			if (rt == 0)
816 				continue;
817 			netmask = rt_mask(rt);
818 			dst = sa = rt_key(rt);
819 			gate = rt->rt_gateway;
820 			memset(&rtm, 0, sizeof(rtm));
821 			rtm.rtm_index = ifp->if_index;
822 			rtm.rtm_flags |= rt->rt_flags;
823 			rtm.rtm_errno = error;
824 			m = rt_msg1(cmd, &info, (caddr_t)&rtm, sizeof(rtm));
825 			if (m == NULL)
826 				continue;
827 			mtod(m, struct rt_msghdr *)->rtm_addrs = info.rti_addrs;
828 		}
829 		route_proto.sp_protocol = sa ? sa->sa_family : 0;
830 		raw_input(m, &route_proto, &route_src, &route_dst);
831 	}
832 }
833 
834 /*
835  * This is called to generate routing socket messages indicating
836  * network interface arrival and departure.
837  */
838 void
839 rt_ifannouncemsg(ifp, what)
840 	struct ifnet *ifp;
841 	int what;
842 {
843 	struct if_announcemsghdr ifan;
844 	struct mbuf *m;
845 	struct rt_addrinfo info;
846 
847 	if (route_cb.any_count == 0)
848 		return;
849 	memset(&info, 0, sizeof(info));
850 	memset(&ifan, 0, sizeof(ifan));
851 	ifan.ifan_index = ifp->if_index;
852 	strcpy(ifan.ifan_name, ifp->if_xname);
853 	ifan.ifan_what = what;
854 	m = rt_msg1(RTM_IFANNOUNCE, &info, (caddr_t)&ifan, sizeof(ifan));
855 	if (m == 0)
856 		return;
857 	route_proto.sp_protocol = 0;
858 	raw_input(m, &route_proto, &route_src, &route_dst);
859 }
860 
861 /*
862  * This is used in dumping the kernel table via sysctl().
863  */
864 static int
865 sysctl_dumpentry(rn, v)
866 	struct radix_node *rn;
867 	void *v;
868 {
869 	struct walkarg *w = v;
870 	struct rtentry *rt = (struct rtentry *)rn;
871 	int error = 0, size;
872 	struct rt_addrinfo info;
873 
874 	if (w->w_op == NET_RT_FLAGS && !(rt->rt_flags & w->w_arg))
875 		return 0;
876 	memset(&info, 0, sizeof(info));
877 	dst = rt_key(rt);
878 	gate = rt->rt_gateway;
879 	netmask = rt_mask(rt);
880 	genmask = rt->rt_genmask;
881 	if (rt->rt_ifp) {
882 		ifpaddr = TAILQ_FIRST(&rt->rt_ifp->if_addrlist)->ifa_addr;
883 		ifaaddr = rt->rt_ifa->ifa_addr;
884 		if (rt->rt_ifp->if_flags & IFF_POINTOPOINT)
885 			brdaddr = rt->rt_ifa->ifa_dstaddr;
886 	}
887 	if ((error = rt_msg2(RTM_GET, &info, 0, w, &size)))
888 		return (error);
889 	if (w->w_where && w->w_tmem && w->w_needed <= 0) {
890 		struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem;
891 
892 		rtm->rtm_flags = rt->rt_flags;
893 		rtm->rtm_use = rt->rt_use;
894 		rtm->rtm_rmx = rt->rt_rmx;
895 		rtm->rtm_index = rt->rt_ifp->if_index;
896 		rtm->rtm_errno = rtm->rtm_pid = rtm->rtm_seq = 0;
897 		rtm->rtm_addrs = info.rti_addrs;
898 		if ((error = copyout(rtm, w->w_where, size)) != 0)
899 			w->w_where = NULL;
900 		else
901 			w->w_where += size;
902 	}
903 	return (error);
904 }
905 
906 static int
907 sysctl_iflist(af, w, type)
908 	int	af;
909 	struct	walkarg *w;
910 	int type;
911 {
912 	struct ifnet *ifp;
913 	struct ifaddr *ifa;
914 	struct	rt_addrinfo info;
915 	int	len, error = 0;
916 
917 	memset(&info, 0, sizeof(info));
918 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
919 		if (w->w_arg && w->w_arg != ifp->if_index)
920 			continue;
921 		ifa = TAILQ_FIRST(&ifp->if_addrlist);
922 		ifpaddr = ifa->ifa_addr;
923 		switch (type) {
924 		case NET_RT_IFLIST:
925 			error =
926 			    rt_msg2(RTM_IFINFO, &info, (caddr_t)0, w, &len);
927 			break;
928 #ifdef COMPAT_14
929 		case NET_RT_OIFLIST:
930 			error =
931 			    rt_msg2(RTM_OIFINFO, &info, (caddr_t)0, w, &len);
932 			break;
933 #endif
934 		default:
935 			panic("sysctl_iflist(1)");
936 		}
937 		if (error)
938 			return (error);
939 		ifpaddr = 0;
940 		if (w->w_where && w->w_tmem && w->w_needed <= 0) {
941 			switch (type) {
942 			case NET_RT_IFLIST: {
943 				struct if_msghdr *ifm;
944 
945 				ifm = (struct if_msghdr *)w->w_tmem;
946 				ifm->ifm_index = ifp->if_index;
947 				ifm->ifm_flags = ifp->if_flags;
948 				ifm->ifm_data = ifp->if_data;
949 				ifm->ifm_addrs = info.rti_addrs;
950 				error = copyout(ifm, w->w_where, len);
951 				if (error)
952 					return (error);
953 				w->w_where += len;
954 				break;
955 			}
956 
957 #ifdef COMPAT_14
958 			case NET_RT_OIFLIST: {
959 				struct if_msghdr14 *ifm;
960 
961 				ifm = (struct if_msghdr14 *)w->w_tmem;
962 				ifm->ifm_index = ifp->if_index;
963 				ifm->ifm_flags = ifp->if_flags;
964 				ifm->ifm_data.ifi_type = ifp->if_data.ifi_type;
965 				ifm->ifm_data.ifi_addrlen =
966 				    ifp->if_data.ifi_addrlen;
967 				ifm->ifm_data.ifi_hdrlen =
968 				    ifp->if_data.ifi_hdrlen;
969 				ifm->ifm_data.ifi_mtu = ifp->if_data.ifi_mtu;
970 				ifm->ifm_data.ifi_metric =
971 				    ifp->if_data.ifi_metric;
972 				ifm->ifm_data.ifi_baudrate =
973 				    ifp->if_data.ifi_baudrate;
974 				ifm->ifm_data.ifi_ipackets =
975 				    ifp->if_data.ifi_ipackets;
976 				ifm->ifm_data.ifi_ierrors =
977 				    ifp->if_data.ifi_ierrors;
978 				ifm->ifm_data.ifi_opackets =
979 				    ifp->if_data.ifi_opackets;
980 				ifm->ifm_data.ifi_oerrors =
981 				    ifp->if_data.ifi_oerrors;
982 				ifm->ifm_data.ifi_collisions =
983 				    ifp->if_data.ifi_collisions;
984 				ifm->ifm_data.ifi_ibytes =
985 				    ifp->if_data.ifi_ibytes;
986 				ifm->ifm_data.ifi_obytes =
987 				    ifp->if_data.ifi_obytes;
988 				ifm->ifm_data.ifi_imcasts =
989 				    ifp->if_data.ifi_imcasts;
990 				ifm->ifm_data.ifi_omcasts =
991 				    ifp->if_data.ifi_omcasts;
992 				ifm->ifm_data.ifi_iqdrops =
993 				    ifp->if_data.ifi_iqdrops;
994 				ifm->ifm_data.ifi_noproto =
995 				    ifp->if_data.ifi_noproto;
996 				ifm->ifm_data.ifi_lastchange =
997 				    ifp->if_data.ifi_lastchange;
998 				ifm->ifm_addrs = info.rti_addrs;
999 				error = copyout(ifm, w->w_where, len);
1000 				if (error)
1001 					return (error);
1002 				w->w_where += len;
1003 				break;
1004 			}
1005 #endif
1006 			default:
1007 				panic("sysctl_iflist(2)");
1008 			}
1009 		}
1010 		while ((ifa = TAILQ_NEXT(ifa, ifa_list)) != NULL) {
1011 			if (af && af != ifa->ifa_addr->sa_family)
1012 				continue;
1013 			ifaaddr = ifa->ifa_addr;
1014 			netmask = ifa->ifa_netmask;
1015 			brdaddr = ifa->ifa_dstaddr;
1016 			if ((error = rt_msg2(RTM_NEWADDR, &info, 0, w, &len)))
1017 				return (error);
1018 			if (w->w_where && w->w_tmem && w->w_needed <= 0) {
1019 				struct ifa_msghdr *ifam;
1020 
1021 				ifam = (struct ifa_msghdr *)w->w_tmem;
1022 				ifam->ifam_index = ifa->ifa_ifp->if_index;
1023 				ifam->ifam_flags = ifa->ifa_flags;
1024 				ifam->ifam_metric = ifa->ifa_metric;
1025 				ifam->ifam_addrs = info.rti_addrs;
1026 				error = copyout(w->w_tmem, w->w_where, len);
1027 				if (error)
1028 					return (error);
1029 				w->w_where += len;
1030 			}
1031 		}
1032 		ifaaddr = netmask = brdaddr = 0;
1033 	}
1034 	return (0);
1035 }
1036 
1037 static int
1038 sysctl_rtable(name, namelen, where, given, new, newlen)
1039 	int	*name;
1040 	u_int	namelen;
1041 	void 	*where;
1042 	size_t	*given;
1043 	void	*new;
1044 	size_t	newlen;
1045 {
1046 	struct radix_node_head *rnh;
1047 	int	i, s, error = EINVAL;
1048 	u_char  af;
1049 	struct	walkarg w;
1050 
1051 	if (new)
1052 		return (EPERM);
1053 	if (namelen != 3)
1054 		return (EINVAL);
1055 	af = name[0];
1056 	w.w_tmemneeded = 0;
1057 	w.w_tmemsize = 0;
1058 	w.w_tmem = NULL;
1059 again:
1060 	/* we may return here if a later [re]alloc of the t_mem buffer fails */
1061 	if (w.w_tmemneeded) {
1062 		w.w_tmem = (caddr_t) malloc(w.w_tmemneeded, M_RTABLE, M_WAITOK);
1063 		w.w_tmemsize = w.w_tmemneeded;
1064 		w.w_tmemneeded = 0;
1065 	}
1066 	w.w_op = name[1];
1067 	w.w_arg = name[2];
1068 	w.w_given = *given;
1069 	w.w_needed = 0 - w.w_given;
1070 	w.w_where = where;
1071 
1072 	s = splsoftnet();
1073 	switch (w.w_op) {
1074 
1075 	case NET_RT_DUMP:
1076 	case NET_RT_FLAGS:
1077 		for (i = 1; i <= AF_MAX; i++)
1078 			if ((rnh = rt_tables[i]) && (af == 0 || af == i) &&
1079 			    (error = (*rnh->rnh_walktree)(rnh,
1080 			    sysctl_dumpentry, &w)))
1081 				break;
1082 		break;
1083 
1084 #ifdef COMPAT_14
1085 	case NET_RT_OIFLIST:
1086 		error = sysctl_iflist(af, &w, w.w_op);
1087 		break;
1088 #endif
1089 
1090 	case NET_RT_IFLIST:
1091 		error = sysctl_iflist(af, &w, w.w_op);
1092 	}
1093 	splx(s);
1094 
1095 	/* check to see if we couldn't allocate memory with NOWAIT */
1096 	if (error == ENOBUFS && w.w_tmem == 0 && w.w_tmemneeded)
1097 		goto again;
1098 
1099 	if (w.w_tmem)
1100 		free(w.w_tmem, M_RTABLE);
1101 	w.w_needed += w.w_given;
1102 	if (where) {
1103 		*given = w.w_where - (caddr_t) where;
1104 		if (*given < w.w_needed)
1105 			return (ENOMEM);
1106 	} else {
1107 		*given = (11 * w.w_needed) / 10;
1108 	}
1109 	return (error);
1110 }
1111 
1112 /*
1113  * Definitions of protocols supported in the ROUTE domain.
1114  */
1115 
1116 struct protosw routesw[] = {
1117 { SOCK_RAW,	&routedomain,	0,		PR_ATOMIC|PR_ADDR,
1118   raw_input,	route_output,	raw_ctlinput,	0,
1119   route_usrreq,
1120   raw_init,	0,		0,		0,
1121   sysctl_rtable,
1122 }
1123 };
1124 
1125 struct domain routedomain =
1126     { PF_ROUTE, "route", route_init, 0, 0,
1127       routesw, &routesw[sizeof(routesw)/sizeof(routesw[0])] };
1128