xref: /netbsd-src/sys/net/rtsock.c (revision 4472dbe5e3bd91ef2540bada7a7ca7384627ff9b)
1 /*	$NetBSD: rtsock.c,v 1.40 2000/04/15 17:51:28 simonb Exp $	*/
2 
3 /*
4  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the project nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 /*
33  * Copyright (c) 1988, 1991, 1993
34  *	The Regents of the University of California.  All rights reserved.
35  *
36  * Redistribution and use in source and binary forms, with or without
37  * modification, are permitted provided that the following conditions
38  * are met:
39  * 1. Redistributions of source code must retain the above copyright
40  *    notice, this list of conditions and the following disclaimer.
41  * 2. Redistributions in binary form must reproduce the above copyright
42  *    notice, this list of conditions and the following disclaimer in the
43  *    documentation and/or other materials provided with the distribution.
44  * 3. All advertising materials mentioning features or use of this software
45  *    must display the following acknowledgement:
46  *	This product includes software developed by the University of
47  *	California, Berkeley and its contributors.
48  * 4. Neither the name of the University nor the names of its contributors
49  *    may be used to endorse or promote products derived from this software
50  *    without specific prior written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62  * SUCH DAMAGE.
63  *
64  *	@(#)rtsock.c	8.7 (Berkeley) 10/12/95
65  */
66 
67 #include "opt_inet.h"
68 
69 #include <sys/param.h>
70 #include <sys/systm.h>
71 #include <sys/proc.h>
72 #include <sys/mbuf.h>
73 #include <sys/socket.h>
74 #include <sys/socketvar.h>
75 #include <sys/domain.h>
76 #include <sys/protosw.h>
77 
78 #include <vm/vm.h>
79 #include <sys/sysctl.h>
80 
81 #include <net/if.h>
82 #include <net/route.h>
83 #include <net/raw_cb.h>
84 
85 #include <machine/stdarg.h>
86 
87 struct	sockaddr route_dst = { 2, PF_ROUTE, };
88 struct	sockaddr route_src = { 2, PF_ROUTE, };
89 struct	sockproto route_proto = { PF_ROUTE, };
90 
91 struct walkarg {
92 	int	w_op;
93 	int	w_arg;
94 	int	w_given;
95 	int	w_needed;
96 	caddr_t	w_where;
97 	int	w_tmemsize;
98 	int	w_tmemneeded;
99 	caddr_t	w_tmem;
100 };
101 
102 static struct mbuf *rt_msg1 __P((int, struct rt_addrinfo *, caddr_t, int));
103 static int rt_msg2 __P((int, struct rt_addrinfo *, caddr_t, struct walkarg *,
104     int *));
105 static void rt_xaddrs __P((caddr_t, caddr_t, struct rt_addrinfo *));
106 static int sysctl_dumpentry __P((struct radix_node *, void *));
107 static int sysctl_iflist __P((int, struct walkarg *, int));
108 static int sysctl_rtable __P((int *, u_int, void *, size_t *, void *, size_t));
109 static __inline void rt_adjustcount __P((int, int));
110 
111 /* Sleazy use of local variables throughout file, warning!!!! */
112 #define dst	info.rti_info[RTAX_DST]
113 #define gate	info.rti_info[RTAX_GATEWAY]
114 #define netmask	info.rti_info[RTAX_NETMASK]
115 #define genmask	info.rti_info[RTAX_GENMASK]
116 #define ifpaddr	info.rti_info[RTAX_IFP]
117 #define ifaaddr	info.rti_info[RTAX_IFA]
118 #define brdaddr	info.rti_info[RTAX_BRD]
119 
120 static __inline void
121 rt_adjustcount(af, cnt)
122 	int af, cnt;
123 {
124 	route_cb.any_count += cnt;
125 	switch (af) {
126 	case AF_INET:
127 		route_cb.ip_count += cnt;
128 		return;
129 #ifdef INET6
130 	case AF_INET6:
131 		route_cb.ip6_count += cnt;
132 		return;
133 #endif
134 	case AF_IPX:
135 		route_cb.ipx_count += cnt;
136 		return;
137 	case AF_NS:
138 		route_cb.ns_count += cnt;
139 		return;
140 	case AF_ISO:
141 		route_cb.iso_count += cnt;
142 		return;
143 	}
144 }
145 
146 /*ARGSUSED*/
147 int
148 route_usrreq(so, req, m, nam, control, p)
149 	struct socket *so;
150 	int req;
151 	struct mbuf *m, *nam, *control;
152 	struct proc *p;
153 {
154 	int error = 0;
155 	struct rawcb *rp = sotorawcb(so);
156 	int s;
157 
158 	if (req == PRU_ATTACH) {
159 		MALLOC(rp, struct rawcb *, sizeof(*rp), M_PCB, M_WAITOK);
160 		if ((so->so_pcb = rp) != NULL)
161 			bzero(so->so_pcb, sizeof(*rp));
162 
163 	}
164 	if (req == PRU_DETACH && rp)
165 		rt_adjustcount(rp->rcb_proto.sp_protocol, -1);
166 	s = splsoftnet();
167 
168 	/*
169 	 * Don't call raw_usrreq() in the attach case, because
170 	 * we want to allow non-privileged processes to listen on
171 	 * and send "safe" commands to the routing socket.
172 	 */
173 	if (req == PRU_ATTACH) {
174 		if (p == 0)
175 			error = EACCES;
176 		else
177 			error = raw_attach(so, (int)(long)nam);
178 	} else
179 		error = raw_usrreq(so, req, m, nam, control, p);
180 
181 	rp = sotorawcb(so);
182 	if (req == PRU_ATTACH && rp) {
183 		if (error) {
184 			free((caddr_t)rp, M_PCB);
185 			splx(s);
186 			return (error);
187 		}
188 		rt_adjustcount(rp->rcb_proto.sp_protocol, 1);
189 		rp->rcb_laddr = &route_src;
190 		rp->rcb_faddr = &route_dst;
191 		soisconnected(so);
192 		so->so_options |= SO_USELOOPBACK;
193 	}
194 	splx(s);
195 	return (error);
196 }
197 
198 /*ARGSUSED*/
199 int
200 #if __STDC__
201 route_output(struct mbuf *m, ...)
202 #else
203 route_output(m, va_alist)
204 	struct mbuf *m;
205 	va_dcl
206 #endif
207 {
208 	struct rt_msghdr *rtm = 0;
209 	struct radix_node *rn = 0;
210 	struct rtentry *rt = 0;
211 	struct rtentry *saved_nrt = 0;
212 	struct radix_node_head *rnh;
213 	struct rt_addrinfo info;
214 	int len, error = 0;
215 	struct ifnet *ifp = 0;
216 	struct ifaddr *ifa = 0;
217 	struct socket *so;
218 	va_list ap;
219 
220 	va_start(ap, m);
221 	so = va_arg(ap, struct socket *);
222 	va_end(ap);
223 
224 	bzero(&info, sizeof(info));
225 #define senderr(e) do { error = e; goto flush;} while (0)
226 	if (m == 0 || ((m->m_len < sizeof(int32_t)) &&
227 	   (m = m_pullup(m, sizeof(int32_t))) == 0))
228 		return (ENOBUFS);
229 	if ((m->m_flags & M_PKTHDR) == 0)
230 		panic("route_output");
231 	len = m->m_pkthdr.len;
232 	if (len < sizeof(*rtm) ||
233 	    len != mtod(m, struct rt_msghdr *)->rtm_msglen) {
234 		dst = 0;
235 		senderr(EINVAL);
236 	}
237 	R_Malloc(rtm, struct rt_msghdr *, len);
238 	if (rtm == 0) {
239 		dst = 0;
240 		senderr(ENOBUFS);
241 	}
242 	m_copydata(m, 0, len, (caddr_t)rtm);
243 	if (rtm->rtm_version != RTM_VERSION) {
244 		dst = 0;
245 		senderr(EPROTONOSUPPORT);
246 	}
247 	rtm->rtm_pid = curproc->p_pid;
248 	info.rti_addrs = rtm->rtm_addrs;
249 	rt_xaddrs((caddr_t)(rtm + 1), len + (caddr_t)rtm, &info);
250 	if (dst == 0 || (dst->sa_family >= AF_MAX))
251 		senderr(EINVAL);
252 	if (gate != 0 && (gate->sa_family >= AF_MAX))
253 		senderr(EINVAL);
254 	if (genmask) {
255 		struct radix_node *t;
256 		t = rn_addmask((caddr_t)genmask, 0, 1);
257 		if (t && Bcmp(genmask, t->rn_key, *(u_char *)genmask) == 0)
258 			genmask = (struct sockaddr *)(t->rn_key);
259 		else
260 			senderr(ENOBUFS);
261 	}
262 
263 	/*
264 	 * Verify that the caller has the appropriate privilege; RTM_GET
265 	 * is the only operation the non-superuser is allowed.
266 	 */
267 	if (rtm->rtm_type != RTM_GET &&
268 	    suser(curproc->p_ucred, &curproc->p_acflag) != 0)
269 		senderr(EACCES);
270 
271 	switch (rtm->rtm_type) {
272 
273 	case RTM_ADD:
274 		if (gate == 0)
275 			senderr(EINVAL);
276 		error = rtrequest(RTM_ADD, dst, gate, netmask,
277 		    rtm->rtm_flags, &saved_nrt);
278 		if (error == 0 && saved_nrt) {
279 			rt_setmetrics(rtm->rtm_inits,
280 			    &rtm->rtm_rmx, &saved_nrt->rt_rmx);
281 			saved_nrt->rt_refcnt--;
282 			saved_nrt->rt_genmask = genmask;
283 		}
284 		break;
285 
286 	case RTM_DELETE:
287 		error = rtrequest(RTM_DELETE, dst, gate, netmask,
288 		    rtm->rtm_flags, &saved_nrt);
289 		if (error == 0) {
290 			(rt = saved_nrt)->rt_refcnt++;
291 			goto report;
292 		}
293 		break;
294 
295 	case RTM_GET:
296 	case RTM_CHANGE:
297 	case RTM_LOCK:
298 		if ((rnh = rt_tables[dst->sa_family]) == 0) {
299 			senderr(EAFNOSUPPORT);
300 		}
301 		rn = rnh->rnh_lookup(dst, netmask, rnh);
302 		if (rn == NULL || (rn->rn_flags & RNF_ROOT) != 0) {
303 			senderr(ESRCH);
304 		}
305 		rt = (struct rtentry *)rn;
306 		rt->rt_refcnt++;
307 
308 		switch(rtm->rtm_type) {
309 
310 		case RTM_GET:
311 		report:
312 			dst = rt_key(rt);
313 			gate = rt->rt_gateway;
314 			netmask = rt_mask(rt);
315 			genmask = rt->rt_genmask;
316 			if (rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) {
317 				if ((ifp = rt->rt_ifp) != NULL) {
318 					ifpaddr = ifp->if_addrlist.tqh_first->ifa_addr;
319 					ifaaddr = rt->rt_ifa->ifa_addr;
320 					if (ifp->if_flags & IFF_POINTOPOINT)
321 						brdaddr = rt->rt_ifa->ifa_dstaddr;
322 					else
323 						brdaddr = 0;
324 					rtm->rtm_index = ifp->if_index;
325 				} else {
326 					ifpaddr = 0;
327 					ifaaddr = 0;
328 				}
329 			}
330 			(void)rt_msg2(rtm->rtm_type, &info, (caddr_t)0,
331 			    (struct walkarg *)0, &len);
332 			if (len > rtm->rtm_msglen) {
333 				struct rt_msghdr *new_rtm;
334 				R_Malloc(new_rtm, struct rt_msghdr *, len);
335 				if (new_rtm == 0)
336 					senderr(ENOBUFS);
337 				Bcopy(rtm, new_rtm, rtm->rtm_msglen);
338 				Free(rtm); rtm = new_rtm;
339 			}
340 			(void)rt_msg2(rtm->rtm_type, &info, (caddr_t)rtm,
341 			    (struct walkarg *)0, 0);
342 			rtm->rtm_flags = rt->rt_flags;
343 			rtm->rtm_rmx = rt->rt_rmx;
344 			rtm->rtm_addrs = info.rti_addrs;
345 			break;
346 
347 		case RTM_CHANGE:
348 			if (gate && rt_setgate(rt, rt_key(rt), gate))
349 				senderr(EDQUOT);
350 			/* new gateway could require new ifaddr, ifp;
351 			   flags may also be different; ifp may be specified
352 			   by ll sockaddr when protocol address is ambiguous */
353 			if (ifpaddr && (ifa = ifa_ifwithnet(ifpaddr)) &&
354 			    (ifp = ifa->ifa_ifp) && (ifaaddr || gate))
355 				ifa = ifaof_ifpforaddr(ifaaddr ? ifaaddr : gate,
356 				    ifp);
357 			else if ((ifaaddr && (ifa = ifa_ifwithaddr(ifaaddr))) ||
358 			    (gate && (ifa = ifa_ifwithroute(rt->rt_flags,
359 			    rt_key(rt), gate))))
360 				ifp = ifa->ifa_ifp;
361 			if (ifa) {
362 				struct ifaddr *oifa = rt->rt_ifa;
363 				if (oifa != ifa) {
364 				    if (oifa && oifa->ifa_rtrequest)
365 					oifa->ifa_rtrequest(RTM_DELETE,
366 					rt, gate);
367 				    IFAFREE(rt->rt_ifa);
368 				    rt->rt_ifa = ifa;
369 				    IFAREF(rt->rt_ifa);
370 				    rt->rt_ifp = ifp;
371 				}
372 			}
373 			rt_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx,
374 			    &rt->rt_rmx);
375 			if (rt->rt_ifa && rt->rt_ifa->ifa_rtrequest)
376 				rt->rt_ifa->ifa_rtrequest(RTM_ADD, rt, gate);
377 			if (genmask)
378 				rt->rt_genmask = genmask;
379 			/*
380 			 * Fall into
381 			 */
382 		case RTM_LOCK:
383 			rt->rt_rmx.rmx_locks &= ~(rtm->rtm_inits);
384 			rt->rt_rmx.rmx_locks |=
385 			    (rtm->rtm_inits & rtm->rtm_rmx.rmx_locks);
386 			break;
387 		}
388 		break;
389 
390 	default:
391 		senderr(EOPNOTSUPP);
392 	}
393 
394 flush:
395 	if (rtm) {
396 		if (error)
397 			rtm->rtm_errno = error;
398 		else
399 			rtm->rtm_flags |= RTF_DONE;
400 	}
401 	if (rt)
402 		rtfree(rt);
403     {
404 	struct rawcb *rp = 0;
405 	/*
406 	 * Check to see if we don't want our own messages.
407 	 */
408 	if ((so->so_options & SO_USELOOPBACK) == 0) {
409 		if (route_cb.any_count <= 1) {
410 			if (rtm)
411 				Free(rtm);
412 			m_freem(m);
413 			return (error);
414 		}
415 		/* There is another listener, so construct message */
416 		rp = sotorawcb(so);
417 	}
418 	if (rtm) {
419 		m_copyback(m, 0, rtm->rtm_msglen, (caddr_t)rtm);
420 		Free(rtm);
421 	}
422 	if (rp)
423 		rp->rcb_proto.sp_family = 0; /* Avoid us */
424 	if (dst)
425 		route_proto.sp_protocol = dst->sa_family;
426 	raw_input(m, &route_proto, &route_src, &route_dst);
427 	if (rp)
428 		rp->rcb_proto.sp_family = PF_ROUTE;
429     }
430 	return (error);
431 }
432 
433 void
434 rt_setmetrics(which, in, out)
435 	u_long which;
436 	struct rt_metrics *in, *out;
437 {
438 #define metric(f, e) if (which & (f)) out->e = in->e;
439 	metric(RTV_RPIPE, rmx_recvpipe);
440 	metric(RTV_SPIPE, rmx_sendpipe);
441 	metric(RTV_SSTHRESH, rmx_ssthresh);
442 	metric(RTV_RTT, rmx_rtt);
443 	metric(RTV_RTTVAR, rmx_rttvar);
444 	metric(RTV_HOPCOUNT, rmx_hopcount);
445 	metric(RTV_MTU, rmx_mtu);
446 	metric(RTV_EXPIRE, rmx_expire);
447 #undef metric
448 }
449 
450 #define ROUNDUP(a) \
451 	((a) > 0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long))
452 #define ADVANCE(x, n) (x += ROUNDUP((n)->sa_len))
453 
454 static void
455 rt_xaddrs(cp, cplim, rtinfo)
456 	caddr_t cp, cplim;
457 	struct rt_addrinfo *rtinfo;
458 {
459 	struct sockaddr *sa;
460 	int i;
461 
462 	bzero(rtinfo->rti_info, sizeof(rtinfo->rti_info));
463 	for (i = 0; (i < RTAX_MAX) && (cp < cplim); i++) {
464 		if ((rtinfo->rti_addrs & (1 << i)) == 0)
465 			continue;
466 		rtinfo->rti_info[i] = sa = (struct sockaddr *)cp;
467 		ADVANCE(cp, sa);
468 	}
469 }
470 
471 static struct mbuf *
472 rt_msg1(type, rtinfo, data, datalen)
473 	int type;
474 	struct rt_addrinfo *rtinfo;
475 	caddr_t data;
476 	int datalen;
477 {
478 	struct rt_msghdr *rtm;
479 	struct mbuf *m;
480 	int i;
481 	struct sockaddr *sa;
482 	int len, dlen;
483 
484 	m = m_gethdr(M_DONTWAIT, MT_DATA);
485 	if (m == 0)
486 		return (m);
487 	switch (type) {
488 
489 	case RTM_DELADDR:
490 	case RTM_NEWADDR:
491 		len = sizeof(struct ifa_msghdr);
492 		break;
493 
494 #ifdef COMPAT_14
495 	case RTM_OIFINFO:
496 		len = sizeof(struct if_msghdr14);
497 		break;
498 #endif
499 
500 	case RTM_IFINFO:
501 		len = sizeof(struct if_msghdr);
502 		break;
503 
504 	case RTM_IFANNOUNCE:
505 		len = sizeof(struct if_announcemsghdr);
506 		break;
507 
508 	default:
509 		len = sizeof(struct rt_msghdr);
510 	}
511 	if (len > MHLEN + MLEN)
512 		panic("rt_msg1: message too long");
513 	else if (len > MHLEN) {
514 		m->m_next = m_get(M_DONTWAIT, MT_DATA);
515 		if (m->m_next == NULL) {
516 			m_freem(m);
517 			return (NULL);
518 		}
519 		m->m_pkthdr.len = len;
520 		m->m_len = MHLEN;
521 		m->m_next->m_len = len - MHLEN;
522 	} else {
523 		m->m_pkthdr.len = m->m_len = len;
524 	}
525 	m->m_pkthdr.rcvif = 0;
526 	m_copyback(m, 0, datalen, data);
527 	rtm = mtod(m, struct rt_msghdr *);
528 	for (i = 0; i < RTAX_MAX; i++) {
529 		if ((sa = rtinfo->rti_info[i]) == NULL)
530 			continue;
531 		rtinfo->rti_addrs |= (1 << i);
532 		dlen = ROUNDUP(sa->sa_len);
533 		m_copyback(m, len, dlen, (caddr_t)sa);
534 		len += dlen;
535 	}
536 	rtm->rtm_msglen = len;
537 	rtm->rtm_version = RTM_VERSION;
538 	rtm->rtm_type = type;
539 	return (m);
540 }
541 
542 /*
543  * rt_msg2
544  *
545  *	 fills 'cp' or 'w'.w_tmem with the routing socket message and
546  *		returns the length of the message in 'lenp'.
547  *
548  * if walkarg is 0, cp is expected to be 0 or a buffer large enough to hold
549  *	the message
550  * otherwise walkarg's w_needed is updated and if the user buffer is
551  *	specified and w_needed indicates space exists the information is copied
552  *	into the temp space (w_tmem). w_tmem is [re]allocated if necessary,
553  *	if the allocation fails ENOBUFS is returned.
554  */
555 static int
556 rt_msg2(type, rtinfo, cp, w, lenp)
557 	int type;
558 	struct rt_addrinfo *rtinfo;
559 	caddr_t cp;
560 	struct walkarg *w;
561 	int *lenp;
562 {
563 	int i;
564 	int len, dlen, second_time = 0;
565 	caddr_t cp0;
566 
567 	rtinfo->rti_addrs = 0;
568 again:
569 	switch (type) {
570 
571 	case RTM_DELADDR:
572 	case RTM_NEWADDR:
573 		len = sizeof(struct ifa_msghdr);
574 		break;
575 #ifdef COMPAT_14
576 	case RTM_OIFINFO:
577 		len = sizeof(struct if_msghdr14);
578 		break;
579 #endif
580 
581 	case RTM_IFINFO:
582 		len = sizeof(struct if_msghdr);
583 		break;
584 
585 	default:
586 		len = sizeof(struct rt_msghdr);
587 	}
588 	if ((cp0 = cp) != NULL)
589 		cp += len;
590 	for (i = 0; i < RTAX_MAX; i++) {
591 		struct sockaddr *sa;
592 
593 		if ((sa = rtinfo->rti_info[i]) == 0)
594 			continue;
595 		rtinfo->rti_addrs |= (1 << i);
596 		dlen = ROUNDUP(sa->sa_len);
597 		if (cp) {
598 			bcopy(sa, cp, (unsigned)dlen);
599 			cp += dlen;
600 		}
601 		len += dlen;
602 	}
603 	if (cp == 0 && w != NULL && !second_time) {
604 		struct walkarg *rw = w;
605 
606 		rw->w_needed += len;
607 		if (rw->w_needed <= 0 && rw->w_where) {
608 			if (rw->w_tmemsize < len) {
609 				if (rw->w_tmem)
610 					free(rw->w_tmem, M_RTABLE);
611 				rw->w_tmem = (caddr_t) malloc(len, M_RTABLE,
612 				    M_NOWAIT);
613 				if (rw->w_tmem)
614 					rw->w_tmemsize = len;
615 			}
616 			if (rw->w_tmem) {
617 				cp = rw->w_tmem;
618 				second_time = 1;
619 				goto again;
620 			} else {
621 				rw->w_tmemneeded = len;
622 				return (ENOBUFS);
623 			}
624 		}
625 	}
626 	if (cp) {
627 		struct rt_msghdr *rtm = (struct rt_msghdr *)cp0;
628 
629 		rtm->rtm_version = RTM_VERSION;
630 		rtm->rtm_type = type;
631 		rtm->rtm_msglen = len;
632 	}
633 	if (lenp)
634 		*lenp = len;
635 	return (0);
636 }
637 
638 /*
639  * This routine is called to generate a message from the routing
640  * socket indicating that a redirect has occured, a routing lookup
641  * has failed, or that a protocol has detected timeouts to a particular
642  * destination.
643  */
644 void
645 rt_missmsg(type, rtinfo, flags, error)
646 	int type, flags, error;
647 	struct rt_addrinfo *rtinfo;
648 {
649 	struct rt_msghdr rtm;
650 	struct mbuf *m;
651 	struct sockaddr *sa = rtinfo->rti_info[RTAX_DST];
652 
653 	if (route_cb.any_count == 0)
654 		return;
655 	bzero(&rtm, sizeof(rtm));
656 	rtm.rtm_flags = RTF_DONE | flags;
657 	rtm.rtm_errno = error;
658 	m = rt_msg1(type, rtinfo, (caddr_t)&rtm, sizeof(rtm));
659 	if (m == 0)
660 		return;
661 	mtod(m, struct rt_msghdr *)->rtm_addrs = rtinfo->rti_addrs;
662 	route_proto.sp_protocol = sa ? sa->sa_family : 0;
663 	raw_input(m, &route_proto, &route_src, &route_dst);
664 }
665 
666 /*
667  * This routine is called to generate a message from the routing
668  * socket indicating that the status of a network interface has changed.
669  */
670 void
671 rt_ifmsg(ifp)
672 	struct ifnet *ifp;
673 {
674 	struct if_msghdr ifm;
675 #ifdef COMPAT_14
676 	struct if_msghdr14 oifm;
677 #endif
678 	struct mbuf *m;
679 	struct rt_addrinfo info;
680 
681 	if (route_cb.any_count == 0)
682 		return;
683 	bzero(&info, sizeof(info));
684 	bzero(&ifm, sizeof(ifm));
685 	ifm.ifm_index = ifp->if_index;
686 	ifm.ifm_flags = ifp->if_flags;
687 	ifm.ifm_data = ifp->if_data;
688 	ifm.ifm_addrs = 0;
689 	m = rt_msg1(RTM_IFINFO, &info, (caddr_t)&ifm, sizeof(ifm));
690 	if (m == 0)
691 		return;
692 	route_proto.sp_protocol = 0;
693 	raw_input(m, &route_proto, &route_src, &route_dst);
694 #ifdef COMPAT_14
695 	bzero(&info, sizeof(info));
696 	bzero(&oifm, sizeof(ifm));
697 	oifm.ifm_index = ifp->if_index;
698 	oifm.ifm_flags = ifp->if_flags;
699 	oifm.ifm_data.ifi_type = ifp->if_data.ifi_type;
700 	oifm.ifm_data.ifi_addrlen = ifp->if_data.ifi_addrlen;
701 	oifm.ifm_data.ifi_hdrlen = ifp->if_data.ifi_hdrlen;
702 	oifm.ifm_data.ifi_mtu = ifp->if_data.ifi_mtu;
703 	oifm.ifm_data.ifi_metric = ifp->if_data.ifi_metric;
704 	oifm.ifm_data.ifi_baudrate = ifp->if_data.ifi_baudrate;
705 	oifm.ifm_data.ifi_ipackets = ifp->if_data.ifi_ipackets;
706 	oifm.ifm_data.ifi_ierrors = ifp->if_data.ifi_ierrors;
707 	oifm.ifm_data.ifi_opackets = ifp->if_data.ifi_opackets;
708 	oifm.ifm_data.ifi_oerrors = ifp->if_data.ifi_oerrors;
709 	oifm.ifm_data.ifi_collisions = ifp->if_data.ifi_collisions;
710 	oifm.ifm_data.ifi_ibytes = ifp->if_data.ifi_ibytes;
711 	oifm.ifm_data.ifi_obytes = ifp->if_data.ifi_obytes;
712 	oifm.ifm_data.ifi_imcasts = ifp->if_data.ifi_imcasts;
713 	oifm.ifm_data.ifi_omcasts = ifp->if_data.ifi_omcasts;
714 	oifm.ifm_data.ifi_iqdrops = ifp->if_data.ifi_iqdrops;
715 	oifm.ifm_data.ifi_noproto = ifp->if_data.ifi_noproto;
716 	oifm.ifm_data.ifi_lastchange = ifp->if_data.ifi_lastchange;
717 	oifm.ifm_addrs = 0;
718 	m = rt_msg1(RTM_OIFINFO, &info, (caddr_t)&oifm, sizeof(oifm));
719 	if (m == 0)
720 		return;
721 	route_proto.sp_protocol = 0;
722 	raw_input(m, &route_proto, &route_src, &route_dst);
723 #endif
724 }
725 
726 /*
727  * This is called to generate messages from the routing socket
728  * indicating a network interface has had addresses associated with it.
729  * if we ever reverse the logic and replace messages TO the routing
730  * socket indicate a request to configure interfaces, then it will
731  * be unnecessary as the routing socket will automatically generate
732  * copies of it.
733  */
734 void
735 rt_newaddrmsg(cmd, ifa, error, rt)
736 	int cmd, error;
737 	struct ifaddr *ifa;
738 	struct rtentry *rt;
739 {
740 	struct rt_addrinfo info;
741 	struct sockaddr *sa = NULL;
742 	int pass;
743 	struct mbuf *m = NULL;
744 	struct ifnet *ifp = ifa->ifa_ifp;
745 
746 	if (route_cb.any_count == 0)
747 		return;
748 	for (pass = 1; pass < 3; pass++) {
749 		bzero(&info, sizeof(info));
750 		if ((cmd == RTM_ADD && pass == 1) ||
751 		    (cmd == RTM_DELETE && pass == 2)) {
752 			struct ifa_msghdr ifam;
753 			int ncmd = cmd == RTM_ADD ? RTM_NEWADDR : RTM_DELADDR;
754 
755 			ifaaddr = sa = ifa->ifa_addr;
756 			ifpaddr = ifp->if_addrlist.tqh_first->ifa_addr;
757 			netmask = ifa->ifa_netmask;
758 			brdaddr = ifa->ifa_dstaddr;
759 			bzero(&ifam, sizeof(ifam));
760 			ifam.ifam_index = ifp->if_index;
761 			ifam.ifam_metric = ifa->ifa_metric;
762 			ifam.ifam_flags = ifa->ifa_flags;
763 			m = rt_msg1(ncmd, &info, (caddr_t)&ifam, sizeof(ifam));
764 			if (m == NULL)
765 				continue;
766 			mtod(m, struct ifa_msghdr *)->ifam_addrs =
767 			    info.rti_addrs;
768 		}
769 		if ((cmd == RTM_ADD && pass == 2) ||
770 		    (cmd == RTM_DELETE && pass == 1)) {
771 			struct rt_msghdr rtm;
772 
773 			if (rt == 0)
774 				continue;
775 			netmask = rt_mask(rt);
776 			dst = sa = rt_key(rt);
777 			gate = rt->rt_gateway;
778 			bzero(&rtm, sizeof(rtm));
779 			rtm.rtm_index = ifp->if_index;
780 			rtm.rtm_flags |= rt->rt_flags;
781 			rtm.rtm_errno = error;
782 			m = rt_msg1(cmd, &info, (caddr_t)&rtm, sizeof(rtm));
783 			if (m == NULL)
784 				continue;
785 			mtod(m, struct rt_msghdr *)->rtm_addrs = info.rti_addrs;
786 		}
787 		route_proto.sp_protocol = sa ? sa->sa_family : 0;
788 		raw_input(m, &route_proto, &route_src, &route_dst);
789 	}
790 }
791 
792 /*
793  * This is called to generate routing socket messages indicating
794  * network interface arrival and departure.
795  */
796 void
797 rt_ifannouncemsg(ifp, what)
798 	struct ifnet *ifp;
799 	int what;
800 {
801 	struct if_announcemsghdr ifan;
802 	struct mbuf *m;
803 	struct rt_addrinfo info;
804 
805 	if (route_cb.any_count == 0)
806 		return;
807 	bzero(&info, sizeof(info));
808 	bzero(&ifan, sizeof(ifan));
809 	ifan.ifan_index = ifp->if_index;
810 	strcpy(ifan.ifan_name, ifp->if_xname);
811 	ifan.ifan_what = what;
812 	m = rt_msg1(RTM_IFANNOUNCE, &info, (caddr_t)&ifan, sizeof(ifan));
813 	if (m == 0)
814 		return;
815 	route_proto.sp_protocol = 0;
816 	raw_input(m, &route_proto, &route_src, &route_dst);
817 }
818 
819 /*
820  * This is used in dumping the kernel table via sysctl().
821  */
822 static int
823 sysctl_dumpentry(rn, v)
824 	struct radix_node *rn;
825 	void *v;
826 {
827 	struct walkarg *w = v;
828 	struct rtentry *rt = (struct rtentry *)rn;
829 	int error = 0, size;
830 	struct rt_addrinfo info;
831 
832 	if (w->w_op == NET_RT_FLAGS && !(rt->rt_flags & w->w_arg))
833 		return 0;
834 	bzero(&info, sizeof(info));
835 	dst = rt_key(rt);
836 	gate = rt->rt_gateway;
837 	netmask = rt_mask(rt);
838 	genmask = rt->rt_genmask;
839 	if (rt->rt_ifp) {
840 		ifpaddr = rt->rt_ifp->if_addrlist.tqh_first->ifa_addr;
841 		ifaaddr = rt->rt_ifa->ifa_addr;
842 		if (rt->rt_ifp->if_flags & IFF_POINTOPOINT)
843 			brdaddr = rt->rt_ifa->ifa_dstaddr;
844 	}
845 	if ((error = rt_msg2(RTM_GET, &info, 0, w, &size)))
846 		return (error);
847 	if (w->w_where && w->w_tmem && w->w_needed <= 0) {
848 		struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem;
849 
850 		rtm->rtm_flags = rt->rt_flags;
851 		rtm->rtm_use = rt->rt_use;
852 		rtm->rtm_rmx = rt->rt_rmx;
853 		rtm->rtm_index = rt->rt_ifp->if_index;
854 		rtm->rtm_errno = rtm->rtm_pid = rtm->rtm_seq = 0;
855 		rtm->rtm_addrs = info.rti_addrs;
856 		if ((error = copyout(rtm, w->w_where, size)) != 0)
857 			w->w_where = NULL;
858 		else
859 			w->w_where += size;
860 	}
861 	return (error);
862 }
863 
864 static int
865 sysctl_iflist(af, w, type)
866 	int	af;
867 	struct	walkarg *w;
868 	int type;
869 {
870 	struct ifnet *ifp;
871 	struct ifaddr *ifa;
872 	struct	rt_addrinfo info;
873 	int	len, error = 0;
874 
875 	bzero(&info, sizeof(info));
876 	for (ifp = ifnet.tqh_first; ifp != 0; ifp = ifp->if_list.tqe_next) {
877 		if (w->w_arg && w->w_arg != ifp->if_index)
878 			continue;
879 		ifa = ifp->if_addrlist.tqh_first;
880 		ifpaddr = ifa->ifa_addr;
881 		switch(type) {
882 		case NET_RT_IFLIST:
883 			error =
884 			    rt_msg2(RTM_IFINFO, &info, (caddr_t)0, w, &len);
885 			break;
886 #ifdef COMPAT_14
887 		case NET_RT_OIFLIST:
888 			error =
889 			    rt_msg2(RTM_OIFINFO, &info, (caddr_t)0, w, &len);
890 			break;
891 #endif
892 		default:
893 			panic("sysctl_iflist(1)");
894 		}
895 		if (error)
896 			return (error);
897 		ifpaddr = 0;
898 		if (w->w_where && w->w_tmem && w->w_needed <= 0) {
899 			switch(type) {
900 			case NET_RT_IFLIST: {
901 				struct if_msghdr *ifm;
902 
903 				ifm = (struct if_msghdr *)w->w_tmem;
904 				ifm->ifm_index = ifp->if_index;
905 				ifm->ifm_flags = ifp->if_flags;
906 				ifm->ifm_data = ifp->if_data;
907 				ifm->ifm_addrs = info.rti_addrs;
908 				error = copyout(ifm, w->w_where, len);
909 				if (error)
910 					return (error);
911 				w->w_where += len;
912 				break;
913 			}
914 
915 #ifdef COMPAT_14
916 			case NET_RT_OIFLIST: {
917 				struct if_msghdr14 *ifm;
918 
919 				ifm = (struct if_msghdr14 *)w->w_tmem;
920 				ifm->ifm_index = ifp->if_index;
921 				ifm->ifm_flags = ifp->if_flags;
922 				ifm->ifm_data.ifi_type = ifp->if_data.ifi_type;
923 				ifm->ifm_data.ifi_addrlen =
924 				    ifp->if_data.ifi_addrlen;
925 				ifm->ifm_data.ifi_hdrlen =
926 				    ifp->if_data.ifi_hdrlen;
927 				ifm->ifm_data.ifi_mtu = ifp->if_data.ifi_mtu;
928 				ifm->ifm_data.ifi_metric =
929 				    ifp->if_data.ifi_metric;
930 				ifm->ifm_data.ifi_baudrate =
931 				    ifp->if_data.ifi_baudrate;
932 				ifm->ifm_data.ifi_ipackets =
933 				    ifp->if_data.ifi_ipackets;
934 				ifm->ifm_data.ifi_ierrors =
935 				    ifp->if_data.ifi_ierrors;
936 				ifm->ifm_data.ifi_opackets =
937 				    ifp->if_data.ifi_opackets;
938 				ifm->ifm_data.ifi_oerrors =
939 				    ifp->if_data.ifi_oerrors;
940 				ifm->ifm_data.ifi_collisions =
941 				    ifp->if_data.ifi_collisions;
942 				ifm->ifm_data.ifi_ibytes =
943 				    ifp->if_data.ifi_ibytes;
944 				ifm->ifm_data.ifi_obytes =
945 				    ifp->if_data.ifi_obytes;
946 				ifm->ifm_data.ifi_imcasts =
947 				    ifp->if_data.ifi_imcasts;
948 				ifm->ifm_data.ifi_omcasts =
949 				    ifp->if_data.ifi_omcasts;
950 				ifm->ifm_data.ifi_iqdrops =
951 				    ifp->if_data.ifi_iqdrops;
952 				ifm->ifm_data.ifi_noproto =
953 				    ifp->if_data.ifi_noproto;
954 				ifm->ifm_data.ifi_lastchange =
955 				    ifp->if_data.ifi_lastchange;
956 				ifm->ifm_addrs = info.rti_addrs;
957 				error = copyout(ifm, w->w_where, len);
958 				if (error)
959 					return (error);
960 				w->w_where += len;
961 				break;
962 			}
963 #endif
964 			default:
965 				panic("sysctl_iflist(2)");
966 			}
967 		}
968 		while ((ifa = ifa->ifa_list.tqe_next) != NULL) {
969 			if (af && af != ifa->ifa_addr->sa_family)
970 				continue;
971 			ifaaddr = ifa->ifa_addr;
972 			netmask = ifa->ifa_netmask;
973 			brdaddr = ifa->ifa_dstaddr;
974 			if ((error = rt_msg2(RTM_NEWADDR, &info, 0, w, &len)))
975 				return (error);
976 			if (w->w_where && w->w_tmem && w->w_needed <= 0) {
977 				struct ifa_msghdr *ifam;
978 
979 				ifam = (struct ifa_msghdr *)w->w_tmem;
980 				ifam->ifam_index = ifa->ifa_ifp->if_index;
981 				ifam->ifam_flags = ifa->ifa_flags;
982 				ifam->ifam_metric = ifa->ifa_metric;
983 				ifam->ifam_addrs = info.rti_addrs;
984 				error = copyout(w->w_tmem, w->w_where, len);
985 				if (error)
986 					return (error);
987 				w->w_where += len;
988 			}
989 		}
990 		ifaaddr = netmask = brdaddr = 0;
991 	}
992 	return (0);
993 }
994 
995 static int
996 sysctl_rtable(name, namelen, where, given, new, newlen)
997 	int	*name;
998 	u_int	namelen;
999 	void 	*where;
1000 	size_t	*given;
1001 	void	*new;
1002 	size_t	newlen;
1003 {
1004 	struct radix_node_head *rnh;
1005 	int	i, s, error = EINVAL;
1006 	u_char  af;
1007 	struct	walkarg w;
1008 
1009 	if (new)
1010 		return (EPERM);
1011 	if (namelen != 3)
1012 		return (EINVAL);
1013 	af = name[0];
1014 	w.w_tmemneeded = 0;
1015 	w.w_tmemsize = 0;
1016 	w.w_tmem = NULL;
1017 again:
1018 	/* we may return here if a later [re]alloc of the t_mem buffer fails */
1019 	if (w.w_tmemneeded) {
1020 		w.w_tmem = (caddr_t) malloc(w.w_tmemneeded, M_RTABLE, M_WAITOK);
1021 		w.w_tmemsize = w.w_tmemneeded;
1022 		w.w_tmemneeded = 0;
1023 	}
1024 	w.w_op = name[1];
1025 	w.w_arg = name[2];
1026 	w.w_given = *given;
1027 	w.w_needed = 0 - w.w_given;
1028 	w.w_where = where;
1029 
1030 	s = splsoftnet();
1031 	switch (w.w_op) {
1032 
1033 	case NET_RT_DUMP:
1034 	case NET_RT_FLAGS:
1035 		for (i = 1; i <= AF_MAX; i++)
1036 			if ((rnh = rt_tables[i]) && (af == 0 || af == i) &&
1037 			    (error = (*rnh->rnh_walktree)(rnh,
1038 			    sysctl_dumpentry, &w)))
1039 				break;
1040 		break;
1041 
1042 #ifdef COMPAT_14
1043 	case NET_RT_OIFLIST:
1044 		error = sysctl_iflist(af, &w, w.w_op);
1045 		break;
1046 #endif
1047 
1048 	case NET_RT_IFLIST:
1049 		error = sysctl_iflist(af, &w, w.w_op);
1050 	}
1051 	splx(s);
1052 
1053 	/* check to see if we couldn't allocate memory with NOWAIT */
1054 	if (error == ENOBUFS && w.w_tmem == 0 && w.w_tmemneeded)
1055 		goto again;
1056 
1057 	if (w.w_tmem)
1058 		free(w.w_tmem, M_RTABLE);
1059 	w.w_needed += w.w_given;
1060 	if (where) {
1061 		*given = w.w_where - (caddr_t) where;
1062 		if (*given < w.w_needed)
1063 			return (ENOMEM);
1064 	} else {
1065 		*given = (11 * w.w_needed) / 10;
1066 	}
1067 	return (error);
1068 }
1069 
1070 /*
1071  * Definitions of protocols supported in the ROUTE domain.
1072  */
1073 
1074 extern	struct domain routedomain;		/* or at least forward */
1075 
1076 struct protosw routesw[] = {
1077 { SOCK_RAW,	&routedomain,	0,		PR_ATOMIC|PR_ADDR,
1078   raw_input,	route_output,	raw_ctlinput,	0,
1079   route_usrreq,
1080   raw_init,	0,		0,		0,
1081   sysctl_rtable,
1082 }
1083 };
1084 
1085 struct domain routedomain =
1086     { PF_ROUTE, "route", route_init, 0, 0,
1087       routesw, &routesw[sizeof(routesw)/sizeof(routesw[0])] };
1088