xref: /netbsd-src/sys/net/rtsock.c (revision 481fca6e59249d8ffcf24fef7cfbe7b131bfb080)
1 /*	$NetBSD: rtsock.c,v 1.41 2000/06/28 02:53:49 mrg Exp $	*/
2 
3 /*
4  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the project nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 /*
33  * Copyright (c) 1988, 1991, 1993
34  *	The Regents of the University of California.  All rights reserved.
35  *
36  * Redistribution and use in source and binary forms, with or without
37  * modification, are permitted provided that the following conditions
38  * are met:
39  * 1. Redistributions of source code must retain the above copyright
40  *    notice, this list of conditions and the following disclaimer.
41  * 2. Redistributions in binary form must reproduce the above copyright
42  *    notice, this list of conditions and the following disclaimer in the
43  *    documentation and/or other materials provided with the distribution.
44  * 3. All advertising materials mentioning features or use of this software
45  *    must display the following acknowledgement:
46  *	This product includes software developed by the University of
47  *	California, Berkeley and its contributors.
48  * 4. Neither the name of the University nor the names of its contributors
49  *    may be used to endorse or promote products derived from this software
50  *    without specific prior written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62  * SUCH DAMAGE.
63  *
64  *	@(#)rtsock.c	8.7 (Berkeley) 10/12/95
65  */
66 
67 #include "opt_inet.h"
68 
69 #include <sys/param.h>
70 #include <sys/systm.h>
71 #include <sys/proc.h>
72 #include <sys/mbuf.h>
73 #include <sys/socket.h>
74 #include <sys/socketvar.h>
75 #include <sys/domain.h>
76 #include <sys/protosw.h>
77 
78 #include <uvm/uvm_extern.h>
79 
80 #include <sys/sysctl.h>
81 
82 #include <net/if.h>
83 #include <net/route.h>
84 #include <net/raw_cb.h>
85 
86 #include <machine/stdarg.h>
87 
88 struct	sockaddr route_dst = { 2, PF_ROUTE, };
89 struct	sockaddr route_src = { 2, PF_ROUTE, };
90 struct	sockproto route_proto = { PF_ROUTE, };
91 
92 struct walkarg {
93 	int	w_op;
94 	int	w_arg;
95 	int	w_given;
96 	int	w_needed;
97 	caddr_t	w_where;
98 	int	w_tmemsize;
99 	int	w_tmemneeded;
100 	caddr_t	w_tmem;
101 };
102 
103 static struct mbuf *rt_msg1 __P((int, struct rt_addrinfo *, caddr_t, int));
104 static int rt_msg2 __P((int, struct rt_addrinfo *, caddr_t, struct walkarg *,
105     int *));
106 static void rt_xaddrs __P((caddr_t, caddr_t, struct rt_addrinfo *));
107 static int sysctl_dumpentry __P((struct radix_node *, void *));
108 static int sysctl_iflist __P((int, struct walkarg *, int));
109 static int sysctl_rtable __P((int *, u_int, void *, size_t *, void *, size_t));
110 static __inline void rt_adjustcount __P((int, int));
111 
112 /* Sleazy use of local variables throughout file, warning!!!! */
113 #define dst	info.rti_info[RTAX_DST]
114 #define gate	info.rti_info[RTAX_GATEWAY]
115 #define netmask	info.rti_info[RTAX_NETMASK]
116 #define genmask	info.rti_info[RTAX_GENMASK]
117 #define ifpaddr	info.rti_info[RTAX_IFP]
118 #define ifaaddr	info.rti_info[RTAX_IFA]
119 #define brdaddr	info.rti_info[RTAX_BRD]
120 
121 static __inline void
122 rt_adjustcount(af, cnt)
123 	int af, cnt;
124 {
125 	route_cb.any_count += cnt;
126 	switch (af) {
127 	case AF_INET:
128 		route_cb.ip_count += cnt;
129 		return;
130 #ifdef INET6
131 	case AF_INET6:
132 		route_cb.ip6_count += cnt;
133 		return;
134 #endif
135 	case AF_IPX:
136 		route_cb.ipx_count += cnt;
137 		return;
138 	case AF_NS:
139 		route_cb.ns_count += cnt;
140 		return;
141 	case AF_ISO:
142 		route_cb.iso_count += cnt;
143 		return;
144 	}
145 }
146 
147 /*ARGSUSED*/
148 int
149 route_usrreq(so, req, m, nam, control, p)
150 	struct socket *so;
151 	int req;
152 	struct mbuf *m, *nam, *control;
153 	struct proc *p;
154 {
155 	int error = 0;
156 	struct rawcb *rp = sotorawcb(so);
157 	int s;
158 
159 	if (req == PRU_ATTACH) {
160 		MALLOC(rp, struct rawcb *, sizeof(*rp), M_PCB, M_WAITOK);
161 		if ((so->so_pcb = rp) != NULL)
162 			bzero(so->so_pcb, sizeof(*rp));
163 
164 	}
165 	if (req == PRU_DETACH && rp)
166 		rt_adjustcount(rp->rcb_proto.sp_protocol, -1);
167 	s = splsoftnet();
168 
169 	/*
170 	 * Don't call raw_usrreq() in the attach case, because
171 	 * we want to allow non-privileged processes to listen on
172 	 * and send "safe" commands to the routing socket.
173 	 */
174 	if (req == PRU_ATTACH) {
175 		if (p == 0)
176 			error = EACCES;
177 		else
178 			error = raw_attach(so, (int)(long)nam);
179 	} else
180 		error = raw_usrreq(so, req, m, nam, control, p);
181 
182 	rp = sotorawcb(so);
183 	if (req == PRU_ATTACH && rp) {
184 		if (error) {
185 			free((caddr_t)rp, M_PCB);
186 			splx(s);
187 			return (error);
188 		}
189 		rt_adjustcount(rp->rcb_proto.sp_protocol, 1);
190 		rp->rcb_laddr = &route_src;
191 		rp->rcb_faddr = &route_dst;
192 		soisconnected(so);
193 		so->so_options |= SO_USELOOPBACK;
194 	}
195 	splx(s);
196 	return (error);
197 }
198 
199 /*ARGSUSED*/
200 int
201 #if __STDC__
202 route_output(struct mbuf *m, ...)
203 #else
204 route_output(m, va_alist)
205 	struct mbuf *m;
206 	va_dcl
207 #endif
208 {
209 	struct rt_msghdr *rtm = 0;
210 	struct radix_node *rn = 0;
211 	struct rtentry *rt = 0;
212 	struct rtentry *saved_nrt = 0;
213 	struct radix_node_head *rnh;
214 	struct rt_addrinfo info;
215 	int len, error = 0;
216 	struct ifnet *ifp = 0;
217 	struct ifaddr *ifa = 0;
218 	struct socket *so;
219 	va_list ap;
220 
221 	va_start(ap, m);
222 	so = va_arg(ap, struct socket *);
223 	va_end(ap);
224 
225 	bzero(&info, sizeof(info));
226 #define senderr(e) do { error = e; goto flush;} while (0)
227 	if (m == 0 || ((m->m_len < sizeof(int32_t)) &&
228 	   (m = m_pullup(m, sizeof(int32_t))) == 0))
229 		return (ENOBUFS);
230 	if ((m->m_flags & M_PKTHDR) == 0)
231 		panic("route_output");
232 	len = m->m_pkthdr.len;
233 	if (len < sizeof(*rtm) ||
234 	    len != mtod(m, struct rt_msghdr *)->rtm_msglen) {
235 		dst = 0;
236 		senderr(EINVAL);
237 	}
238 	R_Malloc(rtm, struct rt_msghdr *, len);
239 	if (rtm == 0) {
240 		dst = 0;
241 		senderr(ENOBUFS);
242 	}
243 	m_copydata(m, 0, len, (caddr_t)rtm);
244 	if (rtm->rtm_version != RTM_VERSION) {
245 		dst = 0;
246 		senderr(EPROTONOSUPPORT);
247 	}
248 	rtm->rtm_pid = curproc->p_pid;
249 	info.rti_addrs = rtm->rtm_addrs;
250 	rt_xaddrs((caddr_t)(rtm + 1), len + (caddr_t)rtm, &info);
251 	if (dst == 0 || (dst->sa_family >= AF_MAX))
252 		senderr(EINVAL);
253 	if (gate != 0 && (gate->sa_family >= AF_MAX))
254 		senderr(EINVAL);
255 	if (genmask) {
256 		struct radix_node *t;
257 		t = rn_addmask((caddr_t)genmask, 0, 1);
258 		if (t && Bcmp(genmask, t->rn_key, *(u_char *)genmask) == 0)
259 			genmask = (struct sockaddr *)(t->rn_key);
260 		else
261 			senderr(ENOBUFS);
262 	}
263 
264 	/*
265 	 * Verify that the caller has the appropriate privilege; RTM_GET
266 	 * is the only operation the non-superuser is allowed.
267 	 */
268 	if (rtm->rtm_type != RTM_GET &&
269 	    suser(curproc->p_ucred, &curproc->p_acflag) != 0)
270 		senderr(EACCES);
271 
272 	switch (rtm->rtm_type) {
273 
274 	case RTM_ADD:
275 		if (gate == 0)
276 			senderr(EINVAL);
277 		error = rtrequest(RTM_ADD, dst, gate, netmask,
278 		    rtm->rtm_flags, &saved_nrt);
279 		if (error == 0 && saved_nrt) {
280 			rt_setmetrics(rtm->rtm_inits,
281 			    &rtm->rtm_rmx, &saved_nrt->rt_rmx);
282 			saved_nrt->rt_refcnt--;
283 			saved_nrt->rt_genmask = genmask;
284 		}
285 		break;
286 
287 	case RTM_DELETE:
288 		error = rtrequest(RTM_DELETE, dst, gate, netmask,
289 		    rtm->rtm_flags, &saved_nrt);
290 		if (error == 0) {
291 			(rt = saved_nrt)->rt_refcnt++;
292 			goto report;
293 		}
294 		break;
295 
296 	case RTM_GET:
297 	case RTM_CHANGE:
298 	case RTM_LOCK:
299 		if ((rnh = rt_tables[dst->sa_family]) == 0) {
300 			senderr(EAFNOSUPPORT);
301 		}
302 		rn = rnh->rnh_lookup(dst, netmask, rnh);
303 		if (rn == NULL || (rn->rn_flags & RNF_ROOT) != 0) {
304 			senderr(ESRCH);
305 		}
306 		rt = (struct rtentry *)rn;
307 		rt->rt_refcnt++;
308 
309 		switch(rtm->rtm_type) {
310 
311 		case RTM_GET:
312 		report:
313 			dst = rt_key(rt);
314 			gate = rt->rt_gateway;
315 			netmask = rt_mask(rt);
316 			genmask = rt->rt_genmask;
317 			if (rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) {
318 				if ((ifp = rt->rt_ifp) != NULL) {
319 					ifpaddr = ifp->if_addrlist.tqh_first->ifa_addr;
320 					ifaaddr = rt->rt_ifa->ifa_addr;
321 					if (ifp->if_flags & IFF_POINTOPOINT)
322 						brdaddr = rt->rt_ifa->ifa_dstaddr;
323 					else
324 						brdaddr = 0;
325 					rtm->rtm_index = ifp->if_index;
326 				} else {
327 					ifpaddr = 0;
328 					ifaaddr = 0;
329 				}
330 			}
331 			(void)rt_msg2(rtm->rtm_type, &info, (caddr_t)0,
332 			    (struct walkarg *)0, &len);
333 			if (len > rtm->rtm_msglen) {
334 				struct rt_msghdr *new_rtm;
335 				R_Malloc(new_rtm, struct rt_msghdr *, len);
336 				if (new_rtm == 0)
337 					senderr(ENOBUFS);
338 				Bcopy(rtm, new_rtm, rtm->rtm_msglen);
339 				Free(rtm); rtm = new_rtm;
340 			}
341 			(void)rt_msg2(rtm->rtm_type, &info, (caddr_t)rtm,
342 			    (struct walkarg *)0, 0);
343 			rtm->rtm_flags = rt->rt_flags;
344 			rtm->rtm_rmx = rt->rt_rmx;
345 			rtm->rtm_addrs = info.rti_addrs;
346 			break;
347 
348 		case RTM_CHANGE:
349 			if (gate && rt_setgate(rt, rt_key(rt), gate))
350 				senderr(EDQUOT);
351 			/* new gateway could require new ifaddr, ifp;
352 			   flags may also be different; ifp may be specified
353 			   by ll sockaddr when protocol address is ambiguous */
354 			if (ifpaddr && (ifa = ifa_ifwithnet(ifpaddr)) &&
355 			    (ifp = ifa->ifa_ifp) && (ifaaddr || gate))
356 				ifa = ifaof_ifpforaddr(ifaaddr ? ifaaddr : gate,
357 				    ifp);
358 			else if ((ifaaddr && (ifa = ifa_ifwithaddr(ifaaddr))) ||
359 			    (gate && (ifa = ifa_ifwithroute(rt->rt_flags,
360 			    rt_key(rt), gate))))
361 				ifp = ifa->ifa_ifp;
362 			if (ifa) {
363 				struct ifaddr *oifa = rt->rt_ifa;
364 				if (oifa != ifa) {
365 				    if (oifa && oifa->ifa_rtrequest)
366 					oifa->ifa_rtrequest(RTM_DELETE,
367 					rt, gate);
368 				    IFAFREE(rt->rt_ifa);
369 				    rt->rt_ifa = ifa;
370 				    IFAREF(rt->rt_ifa);
371 				    rt->rt_ifp = ifp;
372 				}
373 			}
374 			rt_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx,
375 			    &rt->rt_rmx);
376 			if (rt->rt_ifa && rt->rt_ifa->ifa_rtrequest)
377 				rt->rt_ifa->ifa_rtrequest(RTM_ADD, rt, gate);
378 			if (genmask)
379 				rt->rt_genmask = genmask;
380 			/*
381 			 * Fall into
382 			 */
383 		case RTM_LOCK:
384 			rt->rt_rmx.rmx_locks &= ~(rtm->rtm_inits);
385 			rt->rt_rmx.rmx_locks |=
386 			    (rtm->rtm_inits & rtm->rtm_rmx.rmx_locks);
387 			break;
388 		}
389 		break;
390 
391 	default:
392 		senderr(EOPNOTSUPP);
393 	}
394 
395 flush:
396 	if (rtm) {
397 		if (error)
398 			rtm->rtm_errno = error;
399 		else
400 			rtm->rtm_flags |= RTF_DONE;
401 	}
402 	if (rt)
403 		rtfree(rt);
404     {
405 	struct rawcb *rp = 0;
406 	/*
407 	 * Check to see if we don't want our own messages.
408 	 */
409 	if ((so->so_options & SO_USELOOPBACK) == 0) {
410 		if (route_cb.any_count <= 1) {
411 			if (rtm)
412 				Free(rtm);
413 			m_freem(m);
414 			return (error);
415 		}
416 		/* There is another listener, so construct message */
417 		rp = sotorawcb(so);
418 	}
419 	if (rtm) {
420 		m_copyback(m, 0, rtm->rtm_msglen, (caddr_t)rtm);
421 		Free(rtm);
422 	}
423 	if (rp)
424 		rp->rcb_proto.sp_family = 0; /* Avoid us */
425 	if (dst)
426 		route_proto.sp_protocol = dst->sa_family;
427 	raw_input(m, &route_proto, &route_src, &route_dst);
428 	if (rp)
429 		rp->rcb_proto.sp_family = PF_ROUTE;
430     }
431 	return (error);
432 }
433 
434 void
435 rt_setmetrics(which, in, out)
436 	u_long which;
437 	struct rt_metrics *in, *out;
438 {
439 #define metric(f, e) if (which & (f)) out->e = in->e;
440 	metric(RTV_RPIPE, rmx_recvpipe);
441 	metric(RTV_SPIPE, rmx_sendpipe);
442 	metric(RTV_SSTHRESH, rmx_ssthresh);
443 	metric(RTV_RTT, rmx_rtt);
444 	metric(RTV_RTTVAR, rmx_rttvar);
445 	metric(RTV_HOPCOUNT, rmx_hopcount);
446 	metric(RTV_MTU, rmx_mtu);
447 	metric(RTV_EXPIRE, rmx_expire);
448 #undef metric
449 }
450 
451 #define ROUNDUP(a) \
452 	((a) > 0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long))
453 #define ADVANCE(x, n) (x += ROUNDUP((n)->sa_len))
454 
455 static void
456 rt_xaddrs(cp, cplim, rtinfo)
457 	caddr_t cp, cplim;
458 	struct rt_addrinfo *rtinfo;
459 {
460 	struct sockaddr *sa;
461 	int i;
462 
463 	bzero(rtinfo->rti_info, sizeof(rtinfo->rti_info));
464 	for (i = 0; (i < RTAX_MAX) && (cp < cplim); i++) {
465 		if ((rtinfo->rti_addrs & (1 << i)) == 0)
466 			continue;
467 		rtinfo->rti_info[i] = sa = (struct sockaddr *)cp;
468 		ADVANCE(cp, sa);
469 	}
470 }
471 
472 static struct mbuf *
473 rt_msg1(type, rtinfo, data, datalen)
474 	int type;
475 	struct rt_addrinfo *rtinfo;
476 	caddr_t data;
477 	int datalen;
478 {
479 	struct rt_msghdr *rtm;
480 	struct mbuf *m;
481 	int i;
482 	struct sockaddr *sa;
483 	int len, dlen;
484 
485 	m = m_gethdr(M_DONTWAIT, MT_DATA);
486 	if (m == 0)
487 		return (m);
488 	switch (type) {
489 
490 	case RTM_DELADDR:
491 	case RTM_NEWADDR:
492 		len = sizeof(struct ifa_msghdr);
493 		break;
494 
495 #ifdef COMPAT_14
496 	case RTM_OIFINFO:
497 		len = sizeof(struct if_msghdr14);
498 		break;
499 #endif
500 
501 	case RTM_IFINFO:
502 		len = sizeof(struct if_msghdr);
503 		break;
504 
505 	case RTM_IFANNOUNCE:
506 		len = sizeof(struct if_announcemsghdr);
507 		break;
508 
509 	default:
510 		len = sizeof(struct rt_msghdr);
511 	}
512 	if (len > MHLEN + MLEN)
513 		panic("rt_msg1: message too long");
514 	else if (len > MHLEN) {
515 		m->m_next = m_get(M_DONTWAIT, MT_DATA);
516 		if (m->m_next == NULL) {
517 			m_freem(m);
518 			return (NULL);
519 		}
520 		m->m_pkthdr.len = len;
521 		m->m_len = MHLEN;
522 		m->m_next->m_len = len - MHLEN;
523 	} else {
524 		m->m_pkthdr.len = m->m_len = len;
525 	}
526 	m->m_pkthdr.rcvif = 0;
527 	m_copyback(m, 0, datalen, data);
528 	rtm = mtod(m, struct rt_msghdr *);
529 	for (i = 0; i < RTAX_MAX; i++) {
530 		if ((sa = rtinfo->rti_info[i]) == NULL)
531 			continue;
532 		rtinfo->rti_addrs |= (1 << i);
533 		dlen = ROUNDUP(sa->sa_len);
534 		m_copyback(m, len, dlen, (caddr_t)sa);
535 		len += dlen;
536 	}
537 	rtm->rtm_msglen = len;
538 	rtm->rtm_version = RTM_VERSION;
539 	rtm->rtm_type = type;
540 	return (m);
541 }
542 
543 /*
544  * rt_msg2
545  *
546  *	 fills 'cp' or 'w'.w_tmem with the routing socket message and
547  *		returns the length of the message in 'lenp'.
548  *
549  * if walkarg is 0, cp is expected to be 0 or a buffer large enough to hold
550  *	the message
551  * otherwise walkarg's w_needed is updated and if the user buffer is
552  *	specified and w_needed indicates space exists the information is copied
553  *	into the temp space (w_tmem). w_tmem is [re]allocated if necessary,
554  *	if the allocation fails ENOBUFS is returned.
555  */
556 static int
557 rt_msg2(type, rtinfo, cp, w, lenp)
558 	int type;
559 	struct rt_addrinfo *rtinfo;
560 	caddr_t cp;
561 	struct walkarg *w;
562 	int *lenp;
563 {
564 	int i;
565 	int len, dlen, second_time = 0;
566 	caddr_t cp0;
567 
568 	rtinfo->rti_addrs = 0;
569 again:
570 	switch (type) {
571 
572 	case RTM_DELADDR:
573 	case RTM_NEWADDR:
574 		len = sizeof(struct ifa_msghdr);
575 		break;
576 #ifdef COMPAT_14
577 	case RTM_OIFINFO:
578 		len = sizeof(struct if_msghdr14);
579 		break;
580 #endif
581 
582 	case RTM_IFINFO:
583 		len = sizeof(struct if_msghdr);
584 		break;
585 
586 	default:
587 		len = sizeof(struct rt_msghdr);
588 	}
589 	if ((cp0 = cp) != NULL)
590 		cp += len;
591 	for (i = 0; i < RTAX_MAX; i++) {
592 		struct sockaddr *sa;
593 
594 		if ((sa = rtinfo->rti_info[i]) == 0)
595 			continue;
596 		rtinfo->rti_addrs |= (1 << i);
597 		dlen = ROUNDUP(sa->sa_len);
598 		if (cp) {
599 			bcopy(sa, cp, (unsigned)dlen);
600 			cp += dlen;
601 		}
602 		len += dlen;
603 	}
604 	if (cp == 0 && w != NULL && !second_time) {
605 		struct walkarg *rw = w;
606 
607 		rw->w_needed += len;
608 		if (rw->w_needed <= 0 && rw->w_where) {
609 			if (rw->w_tmemsize < len) {
610 				if (rw->w_tmem)
611 					free(rw->w_tmem, M_RTABLE);
612 				rw->w_tmem = (caddr_t) malloc(len, M_RTABLE,
613 				    M_NOWAIT);
614 				if (rw->w_tmem)
615 					rw->w_tmemsize = len;
616 			}
617 			if (rw->w_tmem) {
618 				cp = rw->w_tmem;
619 				second_time = 1;
620 				goto again;
621 			} else {
622 				rw->w_tmemneeded = len;
623 				return (ENOBUFS);
624 			}
625 		}
626 	}
627 	if (cp) {
628 		struct rt_msghdr *rtm = (struct rt_msghdr *)cp0;
629 
630 		rtm->rtm_version = RTM_VERSION;
631 		rtm->rtm_type = type;
632 		rtm->rtm_msglen = len;
633 	}
634 	if (lenp)
635 		*lenp = len;
636 	return (0);
637 }
638 
639 /*
640  * This routine is called to generate a message from the routing
641  * socket indicating that a redirect has occured, a routing lookup
642  * has failed, or that a protocol has detected timeouts to a particular
643  * destination.
644  */
645 void
646 rt_missmsg(type, rtinfo, flags, error)
647 	int type, flags, error;
648 	struct rt_addrinfo *rtinfo;
649 {
650 	struct rt_msghdr rtm;
651 	struct mbuf *m;
652 	struct sockaddr *sa = rtinfo->rti_info[RTAX_DST];
653 
654 	if (route_cb.any_count == 0)
655 		return;
656 	bzero(&rtm, sizeof(rtm));
657 	rtm.rtm_flags = RTF_DONE | flags;
658 	rtm.rtm_errno = error;
659 	m = rt_msg1(type, rtinfo, (caddr_t)&rtm, sizeof(rtm));
660 	if (m == 0)
661 		return;
662 	mtod(m, struct rt_msghdr *)->rtm_addrs = rtinfo->rti_addrs;
663 	route_proto.sp_protocol = sa ? sa->sa_family : 0;
664 	raw_input(m, &route_proto, &route_src, &route_dst);
665 }
666 
667 /*
668  * This routine is called to generate a message from the routing
669  * socket indicating that the status of a network interface has changed.
670  */
671 void
672 rt_ifmsg(ifp)
673 	struct ifnet *ifp;
674 {
675 	struct if_msghdr ifm;
676 #ifdef COMPAT_14
677 	struct if_msghdr14 oifm;
678 #endif
679 	struct mbuf *m;
680 	struct rt_addrinfo info;
681 
682 	if (route_cb.any_count == 0)
683 		return;
684 	bzero(&info, sizeof(info));
685 	bzero(&ifm, sizeof(ifm));
686 	ifm.ifm_index = ifp->if_index;
687 	ifm.ifm_flags = ifp->if_flags;
688 	ifm.ifm_data = ifp->if_data;
689 	ifm.ifm_addrs = 0;
690 	m = rt_msg1(RTM_IFINFO, &info, (caddr_t)&ifm, sizeof(ifm));
691 	if (m == 0)
692 		return;
693 	route_proto.sp_protocol = 0;
694 	raw_input(m, &route_proto, &route_src, &route_dst);
695 #ifdef COMPAT_14
696 	bzero(&info, sizeof(info));
697 	bzero(&oifm, sizeof(ifm));
698 	oifm.ifm_index = ifp->if_index;
699 	oifm.ifm_flags = ifp->if_flags;
700 	oifm.ifm_data.ifi_type = ifp->if_data.ifi_type;
701 	oifm.ifm_data.ifi_addrlen = ifp->if_data.ifi_addrlen;
702 	oifm.ifm_data.ifi_hdrlen = ifp->if_data.ifi_hdrlen;
703 	oifm.ifm_data.ifi_mtu = ifp->if_data.ifi_mtu;
704 	oifm.ifm_data.ifi_metric = ifp->if_data.ifi_metric;
705 	oifm.ifm_data.ifi_baudrate = ifp->if_data.ifi_baudrate;
706 	oifm.ifm_data.ifi_ipackets = ifp->if_data.ifi_ipackets;
707 	oifm.ifm_data.ifi_ierrors = ifp->if_data.ifi_ierrors;
708 	oifm.ifm_data.ifi_opackets = ifp->if_data.ifi_opackets;
709 	oifm.ifm_data.ifi_oerrors = ifp->if_data.ifi_oerrors;
710 	oifm.ifm_data.ifi_collisions = ifp->if_data.ifi_collisions;
711 	oifm.ifm_data.ifi_ibytes = ifp->if_data.ifi_ibytes;
712 	oifm.ifm_data.ifi_obytes = ifp->if_data.ifi_obytes;
713 	oifm.ifm_data.ifi_imcasts = ifp->if_data.ifi_imcasts;
714 	oifm.ifm_data.ifi_omcasts = ifp->if_data.ifi_omcasts;
715 	oifm.ifm_data.ifi_iqdrops = ifp->if_data.ifi_iqdrops;
716 	oifm.ifm_data.ifi_noproto = ifp->if_data.ifi_noproto;
717 	oifm.ifm_data.ifi_lastchange = ifp->if_data.ifi_lastchange;
718 	oifm.ifm_addrs = 0;
719 	m = rt_msg1(RTM_OIFINFO, &info, (caddr_t)&oifm, sizeof(oifm));
720 	if (m == 0)
721 		return;
722 	route_proto.sp_protocol = 0;
723 	raw_input(m, &route_proto, &route_src, &route_dst);
724 #endif
725 }
726 
727 /*
728  * This is called to generate messages from the routing socket
729  * indicating a network interface has had addresses associated with it.
730  * if we ever reverse the logic and replace messages TO the routing
731  * socket indicate a request to configure interfaces, then it will
732  * be unnecessary as the routing socket will automatically generate
733  * copies of it.
734  */
735 void
736 rt_newaddrmsg(cmd, ifa, error, rt)
737 	int cmd, error;
738 	struct ifaddr *ifa;
739 	struct rtentry *rt;
740 {
741 	struct rt_addrinfo info;
742 	struct sockaddr *sa = NULL;
743 	int pass;
744 	struct mbuf *m = NULL;
745 	struct ifnet *ifp = ifa->ifa_ifp;
746 
747 	if (route_cb.any_count == 0)
748 		return;
749 	for (pass = 1; pass < 3; pass++) {
750 		bzero(&info, sizeof(info));
751 		if ((cmd == RTM_ADD && pass == 1) ||
752 		    (cmd == RTM_DELETE && pass == 2)) {
753 			struct ifa_msghdr ifam;
754 			int ncmd = cmd == RTM_ADD ? RTM_NEWADDR : RTM_DELADDR;
755 
756 			ifaaddr = sa = ifa->ifa_addr;
757 			ifpaddr = ifp->if_addrlist.tqh_first->ifa_addr;
758 			netmask = ifa->ifa_netmask;
759 			brdaddr = ifa->ifa_dstaddr;
760 			bzero(&ifam, sizeof(ifam));
761 			ifam.ifam_index = ifp->if_index;
762 			ifam.ifam_metric = ifa->ifa_metric;
763 			ifam.ifam_flags = ifa->ifa_flags;
764 			m = rt_msg1(ncmd, &info, (caddr_t)&ifam, sizeof(ifam));
765 			if (m == NULL)
766 				continue;
767 			mtod(m, struct ifa_msghdr *)->ifam_addrs =
768 			    info.rti_addrs;
769 		}
770 		if ((cmd == RTM_ADD && pass == 2) ||
771 		    (cmd == RTM_DELETE && pass == 1)) {
772 			struct rt_msghdr rtm;
773 
774 			if (rt == 0)
775 				continue;
776 			netmask = rt_mask(rt);
777 			dst = sa = rt_key(rt);
778 			gate = rt->rt_gateway;
779 			bzero(&rtm, sizeof(rtm));
780 			rtm.rtm_index = ifp->if_index;
781 			rtm.rtm_flags |= rt->rt_flags;
782 			rtm.rtm_errno = error;
783 			m = rt_msg1(cmd, &info, (caddr_t)&rtm, sizeof(rtm));
784 			if (m == NULL)
785 				continue;
786 			mtod(m, struct rt_msghdr *)->rtm_addrs = info.rti_addrs;
787 		}
788 		route_proto.sp_protocol = sa ? sa->sa_family : 0;
789 		raw_input(m, &route_proto, &route_src, &route_dst);
790 	}
791 }
792 
793 /*
794  * This is called to generate routing socket messages indicating
795  * network interface arrival and departure.
796  */
797 void
798 rt_ifannouncemsg(ifp, what)
799 	struct ifnet *ifp;
800 	int what;
801 {
802 	struct if_announcemsghdr ifan;
803 	struct mbuf *m;
804 	struct rt_addrinfo info;
805 
806 	if (route_cb.any_count == 0)
807 		return;
808 	bzero(&info, sizeof(info));
809 	bzero(&ifan, sizeof(ifan));
810 	ifan.ifan_index = ifp->if_index;
811 	strcpy(ifan.ifan_name, ifp->if_xname);
812 	ifan.ifan_what = what;
813 	m = rt_msg1(RTM_IFANNOUNCE, &info, (caddr_t)&ifan, sizeof(ifan));
814 	if (m == 0)
815 		return;
816 	route_proto.sp_protocol = 0;
817 	raw_input(m, &route_proto, &route_src, &route_dst);
818 }
819 
820 /*
821  * This is used in dumping the kernel table via sysctl().
822  */
823 static int
824 sysctl_dumpentry(rn, v)
825 	struct radix_node *rn;
826 	void *v;
827 {
828 	struct walkarg *w = v;
829 	struct rtentry *rt = (struct rtentry *)rn;
830 	int error = 0, size;
831 	struct rt_addrinfo info;
832 
833 	if (w->w_op == NET_RT_FLAGS && !(rt->rt_flags & w->w_arg))
834 		return 0;
835 	bzero(&info, sizeof(info));
836 	dst = rt_key(rt);
837 	gate = rt->rt_gateway;
838 	netmask = rt_mask(rt);
839 	genmask = rt->rt_genmask;
840 	if (rt->rt_ifp) {
841 		ifpaddr = rt->rt_ifp->if_addrlist.tqh_first->ifa_addr;
842 		ifaaddr = rt->rt_ifa->ifa_addr;
843 		if (rt->rt_ifp->if_flags & IFF_POINTOPOINT)
844 			brdaddr = rt->rt_ifa->ifa_dstaddr;
845 	}
846 	if ((error = rt_msg2(RTM_GET, &info, 0, w, &size)))
847 		return (error);
848 	if (w->w_where && w->w_tmem && w->w_needed <= 0) {
849 		struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem;
850 
851 		rtm->rtm_flags = rt->rt_flags;
852 		rtm->rtm_use = rt->rt_use;
853 		rtm->rtm_rmx = rt->rt_rmx;
854 		rtm->rtm_index = rt->rt_ifp->if_index;
855 		rtm->rtm_errno = rtm->rtm_pid = rtm->rtm_seq = 0;
856 		rtm->rtm_addrs = info.rti_addrs;
857 		if ((error = copyout(rtm, w->w_where, size)) != 0)
858 			w->w_where = NULL;
859 		else
860 			w->w_where += size;
861 	}
862 	return (error);
863 }
864 
865 static int
866 sysctl_iflist(af, w, type)
867 	int	af;
868 	struct	walkarg *w;
869 	int type;
870 {
871 	struct ifnet *ifp;
872 	struct ifaddr *ifa;
873 	struct	rt_addrinfo info;
874 	int	len, error = 0;
875 
876 	bzero(&info, sizeof(info));
877 	for (ifp = ifnet.tqh_first; ifp != 0; ifp = ifp->if_list.tqe_next) {
878 		if (w->w_arg && w->w_arg != ifp->if_index)
879 			continue;
880 		ifa = ifp->if_addrlist.tqh_first;
881 		ifpaddr = ifa->ifa_addr;
882 		switch(type) {
883 		case NET_RT_IFLIST:
884 			error =
885 			    rt_msg2(RTM_IFINFO, &info, (caddr_t)0, w, &len);
886 			break;
887 #ifdef COMPAT_14
888 		case NET_RT_OIFLIST:
889 			error =
890 			    rt_msg2(RTM_OIFINFO, &info, (caddr_t)0, w, &len);
891 			break;
892 #endif
893 		default:
894 			panic("sysctl_iflist(1)");
895 		}
896 		if (error)
897 			return (error);
898 		ifpaddr = 0;
899 		if (w->w_where && w->w_tmem && w->w_needed <= 0) {
900 			switch(type) {
901 			case NET_RT_IFLIST: {
902 				struct if_msghdr *ifm;
903 
904 				ifm = (struct if_msghdr *)w->w_tmem;
905 				ifm->ifm_index = ifp->if_index;
906 				ifm->ifm_flags = ifp->if_flags;
907 				ifm->ifm_data = ifp->if_data;
908 				ifm->ifm_addrs = info.rti_addrs;
909 				error = copyout(ifm, w->w_where, len);
910 				if (error)
911 					return (error);
912 				w->w_where += len;
913 				break;
914 			}
915 
916 #ifdef COMPAT_14
917 			case NET_RT_OIFLIST: {
918 				struct if_msghdr14 *ifm;
919 
920 				ifm = (struct if_msghdr14 *)w->w_tmem;
921 				ifm->ifm_index = ifp->if_index;
922 				ifm->ifm_flags = ifp->if_flags;
923 				ifm->ifm_data.ifi_type = ifp->if_data.ifi_type;
924 				ifm->ifm_data.ifi_addrlen =
925 				    ifp->if_data.ifi_addrlen;
926 				ifm->ifm_data.ifi_hdrlen =
927 				    ifp->if_data.ifi_hdrlen;
928 				ifm->ifm_data.ifi_mtu = ifp->if_data.ifi_mtu;
929 				ifm->ifm_data.ifi_metric =
930 				    ifp->if_data.ifi_metric;
931 				ifm->ifm_data.ifi_baudrate =
932 				    ifp->if_data.ifi_baudrate;
933 				ifm->ifm_data.ifi_ipackets =
934 				    ifp->if_data.ifi_ipackets;
935 				ifm->ifm_data.ifi_ierrors =
936 				    ifp->if_data.ifi_ierrors;
937 				ifm->ifm_data.ifi_opackets =
938 				    ifp->if_data.ifi_opackets;
939 				ifm->ifm_data.ifi_oerrors =
940 				    ifp->if_data.ifi_oerrors;
941 				ifm->ifm_data.ifi_collisions =
942 				    ifp->if_data.ifi_collisions;
943 				ifm->ifm_data.ifi_ibytes =
944 				    ifp->if_data.ifi_ibytes;
945 				ifm->ifm_data.ifi_obytes =
946 				    ifp->if_data.ifi_obytes;
947 				ifm->ifm_data.ifi_imcasts =
948 				    ifp->if_data.ifi_imcasts;
949 				ifm->ifm_data.ifi_omcasts =
950 				    ifp->if_data.ifi_omcasts;
951 				ifm->ifm_data.ifi_iqdrops =
952 				    ifp->if_data.ifi_iqdrops;
953 				ifm->ifm_data.ifi_noproto =
954 				    ifp->if_data.ifi_noproto;
955 				ifm->ifm_data.ifi_lastchange =
956 				    ifp->if_data.ifi_lastchange;
957 				ifm->ifm_addrs = info.rti_addrs;
958 				error = copyout(ifm, w->w_where, len);
959 				if (error)
960 					return (error);
961 				w->w_where += len;
962 				break;
963 			}
964 #endif
965 			default:
966 				panic("sysctl_iflist(2)");
967 			}
968 		}
969 		while ((ifa = ifa->ifa_list.tqe_next) != NULL) {
970 			if (af && af != ifa->ifa_addr->sa_family)
971 				continue;
972 			ifaaddr = ifa->ifa_addr;
973 			netmask = ifa->ifa_netmask;
974 			brdaddr = ifa->ifa_dstaddr;
975 			if ((error = rt_msg2(RTM_NEWADDR, &info, 0, w, &len)))
976 				return (error);
977 			if (w->w_where && w->w_tmem && w->w_needed <= 0) {
978 				struct ifa_msghdr *ifam;
979 
980 				ifam = (struct ifa_msghdr *)w->w_tmem;
981 				ifam->ifam_index = ifa->ifa_ifp->if_index;
982 				ifam->ifam_flags = ifa->ifa_flags;
983 				ifam->ifam_metric = ifa->ifa_metric;
984 				ifam->ifam_addrs = info.rti_addrs;
985 				error = copyout(w->w_tmem, w->w_where, len);
986 				if (error)
987 					return (error);
988 				w->w_where += len;
989 			}
990 		}
991 		ifaaddr = netmask = brdaddr = 0;
992 	}
993 	return (0);
994 }
995 
996 static int
997 sysctl_rtable(name, namelen, where, given, new, newlen)
998 	int	*name;
999 	u_int	namelen;
1000 	void 	*where;
1001 	size_t	*given;
1002 	void	*new;
1003 	size_t	newlen;
1004 {
1005 	struct radix_node_head *rnh;
1006 	int	i, s, error = EINVAL;
1007 	u_char  af;
1008 	struct	walkarg w;
1009 
1010 	if (new)
1011 		return (EPERM);
1012 	if (namelen != 3)
1013 		return (EINVAL);
1014 	af = name[0];
1015 	w.w_tmemneeded = 0;
1016 	w.w_tmemsize = 0;
1017 	w.w_tmem = NULL;
1018 again:
1019 	/* we may return here if a later [re]alloc of the t_mem buffer fails */
1020 	if (w.w_tmemneeded) {
1021 		w.w_tmem = (caddr_t) malloc(w.w_tmemneeded, M_RTABLE, M_WAITOK);
1022 		w.w_tmemsize = w.w_tmemneeded;
1023 		w.w_tmemneeded = 0;
1024 	}
1025 	w.w_op = name[1];
1026 	w.w_arg = name[2];
1027 	w.w_given = *given;
1028 	w.w_needed = 0 - w.w_given;
1029 	w.w_where = where;
1030 
1031 	s = splsoftnet();
1032 	switch (w.w_op) {
1033 
1034 	case NET_RT_DUMP:
1035 	case NET_RT_FLAGS:
1036 		for (i = 1; i <= AF_MAX; i++)
1037 			if ((rnh = rt_tables[i]) && (af == 0 || af == i) &&
1038 			    (error = (*rnh->rnh_walktree)(rnh,
1039 			    sysctl_dumpentry, &w)))
1040 				break;
1041 		break;
1042 
1043 #ifdef COMPAT_14
1044 	case NET_RT_OIFLIST:
1045 		error = sysctl_iflist(af, &w, w.w_op);
1046 		break;
1047 #endif
1048 
1049 	case NET_RT_IFLIST:
1050 		error = sysctl_iflist(af, &w, w.w_op);
1051 	}
1052 	splx(s);
1053 
1054 	/* check to see if we couldn't allocate memory with NOWAIT */
1055 	if (error == ENOBUFS && w.w_tmem == 0 && w.w_tmemneeded)
1056 		goto again;
1057 
1058 	if (w.w_tmem)
1059 		free(w.w_tmem, M_RTABLE);
1060 	w.w_needed += w.w_given;
1061 	if (where) {
1062 		*given = w.w_where - (caddr_t) where;
1063 		if (*given < w.w_needed)
1064 			return (ENOMEM);
1065 	} else {
1066 		*given = (11 * w.w_needed) / 10;
1067 	}
1068 	return (error);
1069 }
1070 
1071 /*
1072  * Definitions of protocols supported in the ROUTE domain.
1073  */
1074 
1075 extern	struct domain routedomain;		/* or at least forward */
1076 
1077 struct protosw routesw[] = {
1078 { SOCK_RAW,	&routedomain,	0,		PR_ATOMIC|PR_ADDR,
1079   raw_input,	route_output,	raw_ctlinput,	0,
1080   route_usrreq,
1081   raw_init,	0,		0,		0,
1082   sysctl_rtable,
1083 }
1084 };
1085 
1086 struct domain routedomain =
1087     { PF_ROUTE, "route", route_init, 0, 0,
1088       routesw, &routesw[sizeof(routesw)/sizeof(routesw[0])] };
1089