xref: /netbsd-src/sys/net/rtsock.c (revision 466a16a118933bd295a8a104f095714fadf9cf68)
1 /*	$NetBSD: rtsock.c,v 1.119 2008/12/21 19:12:43 roy Exp $	*/
2 
3 /*
4  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the project nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 /*
33  * Copyright (c) 1988, 1991, 1993
34  *	The Regents of the University of California.  All rights reserved.
35  *
36  * Redistribution and use in source and binary forms, with or without
37  * modification, are permitted provided that the following conditions
38  * are met:
39  * 1. Redistributions of source code must retain the above copyright
40  *    notice, this list of conditions and the following disclaimer.
41  * 2. Redistributions in binary form must reproduce the above copyright
42  *    notice, this list of conditions and the following disclaimer in the
43  *    documentation and/or other materials provided with the distribution.
44  * 3. Neither the name of the University nor the names of its contributors
45  *    may be used to endorse or promote products derived from this software
46  *    without specific prior written permission.
47  *
48  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58  * SUCH DAMAGE.
59  *
60  *	@(#)rtsock.c	8.7 (Berkeley) 10/12/95
61  */
62 
63 #include <sys/cdefs.h>
64 __KERNEL_RCSID(0, "$NetBSD: rtsock.c,v 1.119 2008/12/21 19:12:43 roy Exp $");
65 
66 #include "opt_inet.h"
67 
68 #include <sys/param.h>
69 #include <sys/systm.h>
70 #include <sys/proc.h>
71 #include <sys/mbuf.h>
72 #include <sys/socket.h>
73 #include <sys/socketvar.h>
74 #include <sys/domain.h>
75 #include <sys/protosw.h>
76 #include <sys/sysctl.h>
77 #include <sys/kauth.h>
78 #include <sys/intr.h>
79 #ifdef RTSOCK_DEBUG
80 #include <netinet/in.h>
81 #endif /* RTSOCK_DEBUG */
82 
83 #include <net/if.h>
84 #include <net/route.h>
85 #include <net/raw_cb.h>
86 
87 #include <machine/stdarg.h>
88 
89 DOMAIN_DEFINE(routedomain);	/* forward declare and add to link set */
90 
91 struct	sockaddr route_dst = { .sa_len = 2, .sa_family = PF_ROUTE, };
92 struct	sockaddr route_src = { .sa_len = 2, .sa_family = PF_ROUTE, };
93 
94 int	route_maxqlen = IFQ_MAXLEN;
95 static struct	ifqueue route_intrq;
96 static void	*route_sih;
97 
98 struct walkarg {
99 	int	w_op;
100 	int	w_arg;
101 	int	w_given;
102 	int	w_needed;
103 	void *	w_where;
104 	int	w_tmemsize;
105 	int	w_tmemneeded;
106 	void *	w_tmem;
107 };
108 
109 static struct mbuf *rt_msg1(int, struct rt_addrinfo *, void *, int);
110 static int rt_msg2(int, struct rt_addrinfo *, void *, struct walkarg *, int *);
111 static int rt_xaddrs(u_char, const char *, const char *, struct rt_addrinfo *);
112 static struct mbuf *rt_makeifannouncemsg(struct ifnet *, int, int,
113     struct rt_addrinfo *);
114 static int sysctl_dumpentry(struct rtentry *, void *);
115 static int sysctl_iflist(int, struct walkarg *, int);
116 static int sysctl_rtable(SYSCTLFN_PROTO);
117 static inline void rt_adjustcount(int, int);
118 static void route_enqueue(struct mbuf *, int);
119 
120 static inline void
121 rt_adjustcount(int af, int cnt)
122 {
123 	route_cb.any_count += cnt;
124 	switch (af) {
125 	case AF_INET:
126 		route_cb.ip_count += cnt;
127 		return;
128 #ifdef INET6
129 	case AF_INET6:
130 		route_cb.ip6_count += cnt;
131 		return;
132 #endif
133 	case AF_IPX:
134 		route_cb.ipx_count += cnt;
135 		return;
136 	case AF_NS:
137 		route_cb.ns_count += cnt;
138 		return;
139 	case AF_ISO:
140 		route_cb.iso_count += cnt;
141 		return;
142 	}
143 }
144 
145 /*ARGSUSED*/
146 int
147 route_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam,
148 	struct mbuf *control, struct lwp *l)
149 {
150 	int error = 0;
151 	struct rawcb *rp = sotorawcb(so);
152 	int s;
153 
154 	if (req == PRU_ATTACH) {
155 		sosetlock(so);
156 		rp = malloc(sizeof(*rp), M_PCB, M_WAITOK|M_ZERO);
157 		so->so_pcb = rp;
158 	}
159 	if (req == PRU_DETACH && rp)
160 		rt_adjustcount(rp->rcb_proto.sp_protocol, -1);
161 	s = splsoftnet();
162 
163 	/*
164 	 * Don't call raw_usrreq() in the attach case, because
165 	 * we want to allow non-privileged processes to listen on
166 	 * and send "safe" commands to the routing socket.
167 	 */
168 	if (req == PRU_ATTACH) {
169 		if (l == NULL)
170 			error = EACCES;
171 		else
172 			error = raw_attach(so, (int)(long)nam);
173 	} else
174 		error = raw_usrreq(so, req, m, nam, control, l);
175 
176 	rp = sotorawcb(so);
177 	if (req == PRU_ATTACH && rp) {
178 		if (error) {
179 			free(rp, M_PCB);
180 			splx(s);
181 			return error;
182 		}
183 		rt_adjustcount(rp->rcb_proto.sp_protocol, 1);
184 		rp->rcb_laddr = &route_src;
185 		rp->rcb_faddr = &route_dst;
186 		soisconnected(so);
187 		so->so_options |= SO_USELOOPBACK;
188 	}
189 	splx(s);
190 	return error;
191 }
192 
193 static const struct sockaddr *
194 intern_netmask(const struct sockaddr *mask)
195 {
196 	struct radix_node *rn;
197 	extern struct radix_node_head *mask_rnhead;
198 
199 	if (mask != NULL &&
200 	    (rn = rn_search(mask, mask_rnhead->rnh_treetop)))
201 		mask = (const struct sockaddr *)rn->rn_key;
202 
203 	return mask;
204 }
205 
206 /*ARGSUSED*/
207 int
208 route_output(struct mbuf *m, ...)
209 {
210 	struct sockproto proto = { .sp_family = PF_ROUTE, };
211 	struct rt_msghdr *rtm = NULL;
212 	struct rt_msghdr *old_rtm = NULL;
213 	struct rtentry *rt = NULL;
214 	struct rtentry *saved_nrt = NULL;
215 	struct rt_addrinfo info;
216 	int len, error = 0, ifa_route = 0;
217 	struct ifnet *ifp = NULL;
218 	struct ifaddr *ifa = NULL, *oifa;
219 	struct socket *so;
220 	va_list ap;
221 	sa_family_t family;
222 
223 	va_start(ap, m);
224 	so = va_arg(ap, struct socket *);
225 	va_end(ap);
226 
227 #define senderr(e) do { error = e; goto flush;} while (/*CONSTCOND*/ 0)
228 	if (m == NULL || ((m->m_len < sizeof(int32_t)) &&
229 	   (m = m_pullup(m, sizeof(int32_t))) == NULL))
230 		return ENOBUFS;
231 	if ((m->m_flags & M_PKTHDR) == 0)
232 		panic("route_output");
233 	len = m->m_pkthdr.len;
234 	if (len < sizeof(*rtm) ||
235 	    len != mtod(m, struct rt_msghdr *)->rtm_msglen) {
236 		info.rti_info[RTAX_DST] = NULL;
237 		senderr(EINVAL);
238 	}
239 	R_Malloc(rtm, struct rt_msghdr *, len);
240 	if (rtm == NULL) {
241 		info.rti_info[RTAX_DST] = NULL;
242 		senderr(ENOBUFS);
243 	}
244 	m_copydata(m, 0, len, rtm);
245 	if (rtm->rtm_version != RTM_VERSION) {
246 		info.rti_info[RTAX_DST] = NULL;
247 		senderr(EPROTONOSUPPORT);
248 	}
249 	rtm->rtm_pid = curproc->p_pid;
250 	memset(&info, 0, sizeof(info));
251 	info.rti_addrs = rtm->rtm_addrs;
252 	if (rt_xaddrs(rtm->rtm_type, (const char *)(rtm + 1), len + (char *)rtm,
253 	    &info))
254 		senderr(EINVAL);
255 	info.rti_flags = rtm->rtm_flags;
256 #ifdef RTSOCK_DEBUG
257 	if (info.rti_info[RTAX_DST]->sa_family == AF_INET) {
258 		printf("%s: extracted info.rti_info[RTAX_DST] %s\n", __func__,
259 		    inet_ntoa(((const struct sockaddr_in *)
260 		    info.rti_info[RTAX_DST])->sin_addr));
261 	}
262 #endif /* RTSOCK_DEBUG */
263 	if (info.rti_info[RTAX_DST] == NULL ||
264 	    (info.rti_info[RTAX_DST]->sa_family >= AF_MAX))
265 		senderr(EINVAL);
266 	if (info.rti_info[RTAX_GATEWAY] != NULL &&
267 	    (info.rti_info[RTAX_GATEWAY]->sa_family >= AF_MAX))
268 		senderr(EINVAL);
269 
270 	/*
271 	 * Verify that the caller has the appropriate privilege; RTM_GET
272 	 * is the only operation the non-superuser is allowed.
273 	 */
274 	if (kauth_authorize_network(curlwp->l_cred, KAUTH_NETWORK_ROUTE,
275 	    0, rtm, NULL, NULL) != 0)
276 		senderr(EACCES);
277 
278 	switch (rtm->rtm_type) {
279 
280 	case RTM_ADD:
281 		if (info.rti_info[RTAX_GATEWAY] == NULL)
282 			senderr(EINVAL);
283 		error = rtrequest1(rtm->rtm_type, &info, &saved_nrt);
284 		if (error == 0 && saved_nrt) {
285 			rt_setmetrics(rtm->rtm_inits,
286 			    &rtm->rtm_rmx, &saved_nrt->rt_rmx);
287 			saved_nrt->rt_refcnt--;
288 		}
289 		break;
290 
291 	case RTM_DELETE:
292 		error = rtrequest1(rtm->rtm_type, &info, &saved_nrt);
293 		if (error == 0) {
294 			(rt = saved_nrt)->rt_refcnt++;
295 			ifa = rt_get_ifa(rt);
296 			/*
297 			 * If deleting an automatic route, scrub the flag.
298 			 */
299 			if (ifa->ifa_flags & IFA_ROUTE)
300 				ifa->ifa_flags &= ~IFA_ROUTE;
301 			goto report;
302 		}
303 		break;
304 
305 	case RTM_GET:
306 	case RTM_CHANGE:
307 	case RTM_LOCK:
308                 /* XXX This will mask info.rti_info[RTAX_DST] with
309 		 * info.rti_info[RTAX_NETMASK] before
310                  * searching.  It did not used to do that.  --dyoung
311 		 */
312 		error = rtrequest1(RTM_GET, &info, &rt);
313 		if (error != 0)
314 			senderr(error);
315 		if (rtm->rtm_type != RTM_GET) {/* XXX: too grotty */
316 			struct radix_node *rn;
317 
318 			if (memcmp(info.rti_info[RTAX_DST], rt_getkey(rt),
319 			    info.rti_info[RTAX_DST]->sa_len) != 0)
320 				senderr(ESRCH);
321 			info.rti_info[RTAX_NETMASK] = intern_netmask(
322 			    info.rti_info[RTAX_NETMASK]);
323 			for (rn = rt->rt_nodes; rn; rn = rn->rn_dupedkey)
324 				if (info.rti_info[RTAX_NETMASK] ==
325 				    (const struct sockaddr *)rn->rn_mask)
326 					break;
327 			if (rn == NULL)
328 				senderr(ETOOMANYREFS);
329 			rt = (struct rtentry *)rn;
330 		}
331 
332 		switch (rtm->rtm_type) {
333 		case RTM_GET:
334 		report:
335 			info.rti_info[RTAX_DST] = rt_getkey(rt);
336 			info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
337 			info.rti_info[RTAX_NETMASK] = rt_mask(rt);
338 			if ((rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) == 0)
339 				;
340 			else if ((ifp = rt->rt_ifp) != NULL) {
341 				const struct ifaddr *rtifa;
342 				info.rti_info[RTAX_IFP] = ifp->if_dl->ifa_addr;
343                                 /* rtifa used to be simply rt->rt_ifa.
344                                  * If rt->rt_ifa != NULL, then
345                                  * rt_get_ifa() != NULL.  So this
346                                  * ought to still be safe. --dyoung
347 				 */
348 				rtifa = rt_get_ifa(rt);
349 				info.rti_info[RTAX_IFA] = rtifa->ifa_addr;
350 #ifdef RTSOCK_DEBUG
351 				if (info.rti_info[RTAX_IFA]->sa_family ==
352 				    AF_INET) {
353 					printf("%s: copying out RTAX_IFA %s ",
354 					    __func__, inet_ntoa(
355 					    (const struct sockaddr_in *)
356 					    info.rti_info[RTAX_IFA])->sin_addr);
357 					printf("for info.rti_info[RTAX_DST] %s "
358 					    "ifa_getifa %p ifa_seqno %p\n",
359 					    inet_ntoa(
360 					    (const struct sockaddr_in *)
361 					    info.rti_info[RTAX_DST])->sin_addr),
362 					    (void *)rtifa->ifa_getifa,
363 					    rtifa->ifa_seqno);
364 				}
365 #endif /* RTSOCK_DEBUG */
366 				if (ifp->if_flags & IFF_POINTOPOINT) {
367 					info.rti_info[RTAX_BRD] =
368 					    rtifa->ifa_dstaddr;
369 				} else
370 					info.rti_info[RTAX_BRD] = NULL;
371 				rtm->rtm_index = ifp->if_index;
372 			} else {
373 				info.rti_info[RTAX_IFP] = NULL;
374 				info.rti_info[RTAX_IFA] = NULL;
375 			}
376 			(void)rt_msg2(rtm->rtm_type, &info, NULL, NULL, &len);
377 			if (len > rtm->rtm_msglen) {
378 				old_rtm = rtm;
379 				R_Malloc(rtm, struct rt_msghdr *, len);
380 				if (rtm == NULL)
381 					senderr(ENOBUFS);
382 				(void)memcpy(rtm, old_rtm, old_rtm->rtm_msglen);
383 			}
384 			(void)rt_msg2(rtm->rtm_type, &info, rtm, NULL, 0);
385 			rtm->rtm_flags = rt->rt_flags;
386 			rtm->rtm_rmx = rt->rt_rmx;
387 			rtm->rtm_addrs = info.rti_addrs;
388 			break;
389 
390 		case RTM_CHANGE:
391 			/*
392 			 * new gateway could require new ifaddr, ifp;
393 			 * flags may also be different; ifp may be specified
394 			 * by ll sockaddr when protocol address is ambiguous
395 			 */
396 			if ((error = rt_getifa(&info)) != 0)
397 				senderr(error);
398 			if (info.rti_info[RTAX_GATEWAY] &&
399 			    rt_setgate(rt, info.rti_info[RTAX_GATEWAY]))
400 				senderr(EDQUOT);
401 			/* new gateway could require new ifaddr, ifp;
402 			   flags may also be different; ifp may be specified
403 			   by ll sockaddr when protocol address is ambiguous */
404 			if (info.rti_info[RTAX_IFP] &&
405 			    (ifa = ifa_ifwithnet(info.rti_info[RTAX_IFP])) &&
406 			    (ifp = ifa->ifa_ifp) && (info.rti_info[RTAX_IFA] ||
407 			    info.rti_info[RTAX_GATEWAY])) {
408 				ifa = ifaof_ifpforaddr(info.rti_info[RTAX_IFA] ?
409 				    info.rti_info[RTAX_IFA] :
410 				    info.rti_info[RTAX_GATEWAY], ifp);
411 			} else if ((info.rti_info[RTAX_IFA] &&
412 			    (ifa = ifa_ifwithaddr(info.rti_info[RTAX_IFA]))) ||
413 			    (info.rti_info[RTAX_GATEWAY] &&
414 			    (ifa = ifa_ifwithroute(rt->rt_flags,
415 			    rt_getkey(rt), info.rti_info[RTAX_GATEWAY])))) {
416 				ifp = ifa->ifa_ifp;
417 			}
418 			oifa = rt->rt_ifa;
419 			if (oifa && oifa->ifa_flags & IFA_ROUTE) {
420 				/*
421 				 * If changing an automatically added route,
422 				 * remove the flag and store the fact.
423 				 */
424 				oifa->ifa_flags &= ~IFA_ROUTE;
425 				ifa_route = 1;
426 			}
427 			if (ifa) {
428 				if (oifa != ifa) {
429 					if (oifa && oifa->ifa_rtrequest) {
430 						oifa->ifa_rtrequest(RTM_DELETE,
431 						    rt, &info);
432 					}
433 					/*
434 					 * If changing an automatically added
435 					 * route, store this if not static.
436 					 */
437 					if (ifa_route &&
438 					    !(rt->rt_flags & RTF_STATIC))
439 						ifa->ifa_flags |= IFA_ROUTE;
440 					rt_replace_ifa(rt, ifa);
441 					rt->rt_ifp = ifp;
442 				}
443 			}
444 			rt_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx,
445 			    &rt->rt_rmx);
446 			if (rt->rt_ifa && rt->rt_ifa->ifa_rtrequest)
447 				rt->rt_ifa->ifa_rtrequest(RTM_ADD, rt, &info);
448 			/*FALLTHROUGH*/
449 		case RTM_LOCK:
450 			rt->rt_rmx.rmx_locks &= ~(rtm->rtm_inits);
451 			rt->rt_rmx.rmx_locks |=
452 			    (rtm->rtm_inits & rtm->rtm_rmx.rmx_locks);
453 			break;
454 		}
455 		break;
456 
457 	default:
458 		senderr(EOPNOTSUPP);
459 	}
460 
461 flush:
462 	if (rtm) {
463 		if (error)
464 			rtm->rtm_errno = error;
465 		else
466 			rtm->rtm_flags |= RTF_DONE;
467 	}
468 	family = info.rti_info[RTAX_DST] ? info.rti_info[RTAX_DST]->sa_family :
469 	    0;
470 	/* We cannot free old_rtm until we have stopped using the
471 	 * pointers in info, some of which may point to sockaddrs
472 	 * in old_rtm.
473 	 */
474 	if (old_rtm != NULL)
475 		Free(old_rtm);
476 	if (rt)
477 		rtfree(rt);
478     {
479 	struct rawcb *rp = NULL;
480 	/*
481 	 * Check to see if we don't want our own messages.
482 	 */
483 	if ((so->so_options & SO_USELOOPBACK) == 0) {
484 		if (route_cb.any_count <= 1) {
485 			if (rtm)
486 				Free(rtm);
487 			m_freem(m);
488 			return error;
489 		}
490 		/* There is another listener, so construct message */
491 		rp = sotorawcb(so);
492 	}
493 	if (rtm) {
494 		m_copyback(m, 0, rtm->rtm_msglen, rtm);
495 		if (m->m_pkthdr.len < rtm->rtm_msglen) {
496 			m_freem(m);
497 			m = NULL;
498 		} else if (m->m_pkthdr.len > rtm->rtm_msglen)
499 			m_adj(m, rtm->rtm_msglen - m->m_pkthdr.len);
500 		Free(rtm);
501 	}
502 	if (rp)
503 		rp->rcb_proto.sp_family = 0; /* Avoid us */
504 	if (family)
505 		proto.sp_protocol = family;
506 	if (m)
507 		raw_input(m, &proto, &route_src, &route_dst);
508 	if (rp)
509 		rp->rcb_proto.sp_family = PF_ROUTE;
510     }
511 	return error;
512 }
513 
514 void
515 rt_setmetrics(u_long which, const struct rt_metrics *in, struct rt_metrics *out)
516 {
517 #define metric(f, e) if (which & (f)) out->e = in->e;
518 	metric(RTV_RPIPE, rmx_recvpipe);
519 	metric(RTV_SPIPE, rmx_sendpipe);
520 	metric(RTV_SSTHRESH, rmx_ssthresh);
521 	metric(RTV_RTT, rmx_rtt);
522 	metric(RTV_RTTVAR, rmx_rttvar);
523 	metric(RTV_HOPCOUNT, rmx_hopcount);
524 	metric(RTV_MTU, rmx_mtu);
525 	metric(RTV_EXPIRE, rmx_expire);
526 #undef metric
527 }
528 
529 #define ROUNDUP(a) \
530 	((a) > 0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long))
531 #define ADVANCE(x, n) (x += ROUNDUP((n)->sa_len))
532 
533 static int
534 rt_xaddrs(u_char rtmtype, const char *cp, const char *cplim,
535     struct rt_addrinfo *rtinfo)
536 {
537 	const struct sockaddr *sa = NULL;	/* Quell compiler warning */
538 	int i;
539 
540 	for (i = 0; i < RTAX_MAX && cp < cplim; i++) {
541 		if ((rtinfo->rti_addrs & (1 << i)) == 0)
542 			continue;
543 		rtinfo->rti_info[i] = sa = (const struct sockaddr *)cp;
544 		ADVANCE(cp, sa);
545 	}
546 
547 	/*
548 	 * Check for extra addresses specified, except RTM_GET asking
549 	 * for interface info.
550 	 */
551 	if (rtmtype == RTM_GET) {
552 		if (((rtinfo->rti_addrs &
553 		    (~((1 << RTAX_IFP) | (1 << RTAX_IFA)))) & (~0 << i)) != 0)
554 			return 1;
555 	} else if ((rtinfo->rti_addrs & (~0 << i)) != 0)
556 		return 1;
557 	/* Check for bad data length.  */
558 	if (cp != cplim) {
559 		if (i == RTAX_NETMASK + 1 && sa != NULL &&
560 		    cp - ROUNDUP(sa->sa_len) + sa->sa_len == cplim)
561 			/*
562 			 * The last sockaddr was info.rti_info[RTAX_NETMASK].
563 			 * We accept this for now for the sake of old
564 			 * binaries or third party softwares.
565 			 */
566 			;
567 		else
568 			return 1;
569 	}
570 	return 0;
571 }
572 
573 static struct mbuf *
574 rt_msg1(int type, struct rt_addrinfo *rtinfo, void *data, int datalen)
575 {
576 	struct rt_msghdr *rtm;
577 	struct mbuf *m;
578 	int i;
579 	const struct sockaddr *sa;
580 	int len, dlen;
581 
582 	m = m_gethdr(M_DONTWAIT, MT_DATA);
583 	if (m == NULL)
584 		return m;
585 	MCLAIM(m, &routedomain.dom_mowner);
586 	switch (type) {
587 
588 	case RTM_DELADDR:
589 	case RTM_NEWADDR:
590 		len = sizeof(struct ifa_msghdr);
591 		break;
592 
593 #ifdef COMPAT_14
594 	case RTM_OIFINFO:
595 		len = sizeof(struct if_msghdr14);
596 		break;
597 #endif
598 
599 	case RTM_IFINFO:
600 		len = sizeof(struct if_msghdr);
601 		break;
602 
603 	case RTM_IFANNOUNCE:
604 	case RTM_IEEE80211:
605 		len = sizeof(struct if_announcemsghdr);
606 		break;
607 
608 	default:
609 		len = sizeof(struct rt_msghdr);
610 	}
611 	if (len > MHLEN + MLEN)
612 		panic("rt_msg1: message too long");
613 	else if (len > MHLEN) {
614 		m->m_next = m_get(M_DONTWAIT, MT_DATA);
615 		if (m->m_next == NULL) {
616 			m_freem(m);
617 			return NULL;
618 		}
619 		MCLAIM(m->m_next, m->m_owner);
620 		m->m_pkthdr.len = len;
621 		m->m_len = MHLEN;
622 		m->m_next->m_len = len - MHLEN;
623 	} else {
624 		m->m_pkthdr.len = m->m_len = len;
625 	}
626 	m->m_pkthdr.rcvif = NULL;
627 	m_copyback(m, 0, datalen, data);
628 	if (len > datalen)
629 		(void)memset(mtod(m, char *) + datalen, 0, len - datalen);
630 	rtm = mtod(m, struct rt_msghdr *);
631 	for (i = 0; i < RTAX_MAX; i++) {
632 		if ((sa = rtinfo->rti_info[i]) == NULL)
633 			continue;
634 		rtinfo->rti_addrs |= (1 << i);
635 		dlen = ROUNDUP(sa->sa_len);
636 		m_copyback(m, len, dlen, sa);
637 		len += dlen;
638 	}
639 	if (m->m_pkthdr.len != len) {
640 		m_freem(m);
641 		return NULL;
642 	}
643 	rtm->rtm_msglen = len;
644 	rtm->rtm_version = RTM_VERSION;
645 	rtm->rtm_type = type;
646 	return m;
647 }
648 
649 /*
650  * rt_msg2
651  *
652  *	 fills 'cp' or 'w'.w_tmem with the routing socket message and
653  *		returns the length of the message in 'lenp'.
654  *
655  * if walkarg is 0, cp is expected to be 0 or a buffer large enough to hold
656  *	the message
657  * otherwise walkarg's w_needed is updated and if the user buffer is
658  *	specified and w_needed indicates space exists the information is copied
659  *	into the temp space (w_tmem). w_tmem is [re]allocated if necessary,
660  *	if the allocation fails ENOBUFS is returned.
661  */
662 static int
663 rt_msg2(int type, struct rt_addrinfo *rtinfo, void *cpv, struct walkarg *w,
664 	int *lenp)
665 {
666 	int i;
667 	int len, dlen, second_time = 0;
668 	char *cp0, *cp = cpv;
669 
670 	rtinfo->rti_addrs = 0;
671 again:
672 	switch (type) {
673 
674 	case RTM_DELADDR:
675 	case RTM_NEWADDR:
676 		len = sizeof(struct ifa_msghdr);
677 		break;
678 #ifdef COMPAT_14
679 	case RTM_OIFINFO:
680 		len = sizeof(struct if_msghdr14);
681 		break;
682 #endif
683 
684 	case RTM_IFINFO:
685 		len = sizeof(struct if_msghdr);
686 		break;
687 
688 	default:
689 		len = sizeof(struct rt_msghdr);
690 	}
691 	if ((cp0 = cp) != NULL)
692 		cp += len;
693 	for (i = 0; i < RTAX_MAX; i++) {
694 		const struct sockaddr *sa;
695 
696 		if ((sa = rtinfo->rti_info[i]) == NULL)
697 			continue;
698 		rtinfo->rti_addrs |= (1 << i);
699 		dlen = ROUNDUP(sa->sa_len);
700 		if (cp) {
701 			(void)memcpy(cp, sa, (size_t)dlen);
702 			cp += dlen;
703 		}
704 		len += dlen;
705 	}
706 	if (cp == NULL && w != NULL && !second_time) {
707 		struct walkarg *rw = w;
708 
709 		rw->w_needed += len;
710 		if (rw->w_needed <= 0 && rw->w_where) {
711 			if (rw->w_tmemsize < len) {
712 				if (rw->w_tmem)
713 					free(rw->w_tmem, M_RTABLE);
714 				rw->w_tmem = malloc(len, M_RTABLE, M_NOWAIT);
715 				if (rw->w_tmem)
716 					rw->w_tmemsize = len;
717 				else
718 					rw->w_tmemsize = 0;
719 			}
720 			if (rw->w_tmem) {
721 				cp = rw->w_tmem;
722 				second_time = 1;
723 				goto again;
724 			} else {
725 				rw->w_tmemneeded = len;
726 				return ENOBUFS;
727 			}
728 		}
729 	}
730 	if (cp) {
731 		struct rt_msghdr *rtm = (struct rt_msghdr *)cp0;
732 
733 		rtm->rtm_version = RTM_VERSION;
734 		rtm->rtm_type = type;
735 		rtm->rtm_msglen = len;
736 	}
737 	if (lenp)
738 		*lenp = len;
739 	return 0;
740 }
741 
742 /*
743  * This routine is called to generate a message from the routing
744  * socket indicating that a redirect has occurred, a routing lookup
745  * has failed, or that a protocol has detected timeouts to a particular
746  * destination.
747  */
748 void
749 rt_missmsg(int type, struct rt_addrinfo *rtinfo, int flags, int error)
750 {
751 	struct rt_msghdr rtm;
752 	struct mbuf *m;
753 	const struct sockaddr *sa = rtinfo->rti_info[RTAX_DST];
754 
755 	if (route_cb.any_count == 0)
756 		return;
757 	memset(&rtm, 0, sizeof(rtm));
758 	rtm.rtm_flags = RTF_DONE | flags;
759 	rtm.rtm_errno = error;
760 	m = rt_msg1(type, rtinfo, &rtm, sizeof(rtm));
761 	if (m == NULL)
762 		return;
763 	mtod(m, struct rt_msghdr *)->rtm_addrs = rtinfo->rti_addrs;
764 	route_enqueue(m, sa ? sa->sa_family : 0);
765 }
766 
767 /*
768  * This routine is called to generate a message from the routing
769  * socket indicating that the status of a network interface has changed.
770  */
771 void
772 rt_ifmsg(struct ifnet *ifp)
773 {
774 	struct if_msghdr ifm;
775 #ifdef COMPAT_14
776 	struct if_msghdr14 oifm;
777 #endif
778 	struct mbuf *m;
779 	struct rt_addrinfo info;
780 
781 	if (route_cb.any_count == 0)
782 		return;
783 	memset(&info, 0, sizeof(info));
784 	memset(&ifm, 0, sizeof(ifm));
785 	ifm.ifm_index = ifp->if_index;
786 	ifm.ifm_flags = ifp->if_flags;
787 	ifm.ifm_data = ifp->if_data;
788 	ifm.ifm_addrs = 0;
789 	m = rt_msg1(RTM_IFINFO, &info, &ifm, sizeof(ifm));
790 	if (m == NULL)
791 		return;
792 	route_enqueue(m, 0);
793 #ifdef COMPAT_14
794 	memset(&info, 0, sizeof(info));
795 	memset(&oifm, 0, sizeof(oifm));
796 	oifm.ifm_index = ifp->if_index;
797 	oifm.ifm_flags = ifp->if_flags;
798 	oifm.ifm_data.ifi_type = ifp->if_data.ifi_type;
799 	oifm.ifm_data.ifi_addrlen = ifp->if_data.ifi_addrlen;
800 	oifm.ifm_data.ifi_hdrlen = ifp->if_data.ifi_hdrlen;
801 	oifm.ifm_data.ifi_mtu = ifp->if_data.ifi_mtu;
802 	oifm.ifm_data.ifi_metric = ifp->if_data.ifi_metric;
803 	oifm.ifm_data.ifi_baudrate = ifp->if_data.ifi_baudrate;
804 	oifm.ifm_data.ifi_ipackets = ifp->if_data.ifi_ipackets;
805 	oifm.ifm_data.ifi_ierrors = ifp->if_data.ifi_ierrors;
806 	oifm.ifm_data.ifi_opackets = ifp->if_data.ifi_opackets;
807 	oifm.ifm_data.ifi_oerrors = ifp->if_data.ifi_oerrors;
808 	oifm.ifm_data.ifi_collisions = ifp->if_data.ifi_collisions;
809 	oifm.ifm_data.ifi_ibytes = ifp->if_data.ifi_ibytes;
810 	oifm.ifm_data.ifi_obytes = ifp->if_data.ifi_obytes;
811 	oifm.ifm_data.ifi_imcasts = ifp->if_data.ifi_imcasts;
812 	oifm.ifm_data.ifi_omcasts = ifp->if_data.ifi_omcasts;
813 	oifm.ifm_data.ifi_iqdrops = ifp->if_data.ifi_iqdrops;
814 	oifm.ifm_data.ifi_noproto = ifp->if_data.ifi_noproto;
815 	oifm.ifm_data.ifi_lastchange = ifp->if_data.ifi_lastchange;
816 	oifm.ifm_addrs = 0;
817 	m = rt_msg1(RTM_OIFINFO, &info, &oifm, sizeof(oifm));
818 	if (m == NULL)
819 		return;
820 	route_enqueue(m, 0);
821 #endif
822 }
823 
824 /*
825  * This is called to generate messages from the routing socket
826  * indicating a network interface has had addresses associated with it.
827  * if we ever reverse the logic and replace messages TO the routing
828  * socket indicate a request to configure interfaces, then it will
829  * be unnecessary as the routing socket will automatically generate
830  * copies of it.
831  */
832 void
833 rt_newaddrmsg(int cmd, struct ifaddr *ifa, int error, struct rtentry *rt)
834 {
835 #define	cmdpass(__cmd, __pass)	(((__cmd) << 2) | (__pass))
836 	struct rt_addrinfo info;
837 	const struct sockaddr *sa;
838 	int pass;
839 	struct mbuf *m;
840 	struct ifnet *ifp = ifa->ifa_ifp;
841 	struct rt_msghdr rtm;
842 	struct ifa_msghdr ifam;
843 	int ncmd;
844 
845 	if (route_cb.any_count == 0)
846 		return;
847 	for (pass = 1; pass < 3; pass++) {
848 		memset(&info, 0, sizeof(info));
849 		switch (cmdpass(cmd, pass)) {
850 		case cmdpass(RTM_ADD, 1):
851 		case cmdpass(RTM_CHANGE, 1):
852 		case cmdpass(RTM_DELETE, 2):
853 			if (cmd == RTM_ADD)
854 				ncmd = RTM_NEWADDR;
855 			else
856 				ncmd = RTM_DELADDR;
857 
858 			info.rti_info[RTAX_IFA] = sa = ifa->ifa_addr;
859 			info.rti_info[RTAX_IFP] = ifp->if_dl->ifa_addr;
860 			info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
861 			info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
862 			memset(&ifam, 0, sizeof(ifam));
863 			ifam.ifam_index = ifp->if_index;
864 			ifam.ifam_metric = ifa->ifa_metric;
865 			ifam.ifam_flags = ifa->ifa_flags;
866 			m = rt_msg1(ncmd, &info, &ifam, sizeof(ifam));
867 			if (m == NULL)
868 				continue;
869 			mtod(m, struct ifa_msghdr *)->ifam_addrs =
870 			    info.rti_addrs;
871 			break;
872 		case cmdpass(RTM_ADD, 2):
873 		case cmdpass(RTM_CHANGE, 2):
874 		case cmdpass(RTM_DELETE, 1):
875 			if (rt == NULL)
876 				continue;
877 			info.rti_info[RTAX_NETMASK] = rt_mask(rt);
878 			info.rti_info[RTAX_DST] = sa = rt_getkey(rt);
879 			info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
880 			memset(&rtm, 0, sizeof(rtm));
881 			rtm.rtm_index = ifp->if_index;
882 			rtm.rtm_flags |= rt->rt_flags;
883 			rtm.rtm_errno = error;
884 			m = rt_msg1(cmd, &info, &rtm, sizeof(rtm));
885 			if (m == NULL)
886 				continue;
887 			mtod(m, struct rt_msghdr *)->rtm_addrs = info.rti_addrs;
888 			break;
889 		default:
890 			continue;
891 		}
892 #ifdef DIAGNOSTIC
893 		if (m == NULL)
894 			panic("%s: called with wrong command", __func__);
895 #endif
896 		route_enqueue(m, sa ? sa->sa_family : 0);
897 	}
898 #undef cmdpass
899 }
900 
901 static struct mbuf *
902 rt_makeifannouncemsg(struct ifnet *ifp, int type, int what,
903     struct rt_addrinfo *info)
904 {
905 	struct if_announcemsghdr ifan;
906 
907 	memset(info, 0, sizeof(*info));
908 	memset(&ifan, 0, sizeof(ifan));
909 	ifan.ifan_index = ifp->if_index;
910 	strlcpy(ifan.ifan_name, ifp->if_xname, sizeof(ifan.ifan_name));
911 	ifan.ifan_what = what;
912 	return rt_msg1(type, info, &ifan, sizeof(ifan));
913 }
914 
915 /*
916  * This is called to generate routing socket messages indicating
917  * network interface arrival and departure.
918  */
919 void
920 rt_ifannouncemsg(struct ifnet *ifp, int what)
921 {
922 	struct mbuf *m;
923 	struct rt_addrinfo info;
924 
925 	if (route_cb.any_count == 0)
926 		return;
927 	m = rt_makeifannouncemsg(ifp, RTM_IFANNOUNCE, what, &info);
928 	if (m == NULL)
929 		return;
930 	route_enqueue(m, 0);
931 }
932 
933 /*
934  * This is called to generate routing socket messages indicating
935  * IEEE80211 wireless events.
936  * XXX we piggyback on the RTM_IFANNOUNCE msg format in a clumsy way.
937  */
938 void
939 rt_ieee80211msg(struct ifnet *ifp, int what, void *data, size_t data_len)
940 {
941 	struct mbuf *m;
942 	struct rt_addrinfo info;
943 
944 	if (route_cb.any_count == 0)
945 		return;
946 	m = rt_makeifannouncemsg(ifp, RTM_IEEE80211, what, &info);
947 	if (m == NULL)
948 		return;
949 	/*
950 	 * Append the ieee80211 data.  Try to stick it in the
951 	 * mbuf containing the ifannounce msg; otherwise allocate
952 	 * a new mbuf and append.
953 	 *
954 	 * NB: we assume m is a single mbuf.
955 	 */
956 	if (data_len > M_TRAILINGSPACE(m)) {
957 		struct mbuf *n = m_get(M_NOWAIT, MT_DATA);
958 		if (n == NULL) {
959 			m_freem(m);
960 			return;
961 		}
962 		(void)memcpy(mtod(n, void *), data, data_len);
963 		n->m_len = data_len;
964 		m->m_next = n;
965 	} else if (data_len > 0) {
966 		(void)memcpy(mtod(m, uint8_t *) + m->m_len, data, data_len);
967 		m->m_len += data_len;
968 	}
969 	if (m->m_flags & M_PKTHDR)
970 		m->m_pkthdr.len += data_len;
971 	mtod(m, struct if_announcemsghdr *)->ifan_msglen += data_len;
972 	route_enqueue(m, 0);
973 }
974 
975 /*
976  * This is used in dumping the kernel table via sysctl().
977  */
978 static int
979 sysctl_dumpentry(struct rtentry *rt, void *v)
980 {
981 	struct walkarg *w = v;
982 	int error = 0, size;
983 	struct rt_addrinfo info;
984 
985 	if (w->w_op == NET_RT_FLAGS && !(rt->rt_flags & w->w_arg))
986 		return 0;
987 	memset(&info, 0, sizeof(info));
988 	info.rti_info[RTAX_DST] = rt_getkey(rt);
989 	info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
990 	info.rti_info[RTAX_NETMASK] = rt_mask(rt);
991 	if (rt->rt_ifp) {
992 		const struct ifaddr *rtifa;
993 		info.rti_info[RTAX_IFP] = rt->rt_ifp->if_dl->ifa_addr;
994 		/* rtifa used to be simply rt->rt_ifa.  If rt->rt_ifa != NULL,
995 		 * then rt_get_ifa() != NULL.  So this ought to still be safe.
996 		 * --dyoung
997 		 */
998 		rtifa = rt_get_ifa(rt);
999 		info.rti_info[RTAX_IFA] = rtifa->ifa_addr;
1000 		if (rt->rt_ifp->if_flags & IFF_POINTOPOINT)
1001 			info.rti_info[RTAX_BRD] = rtifa->ifa_dstaddr;
1002 	}
1003 	if ((error = rt_msg2(RTM_GET, &info, 0, w, &size)))
1004 		return error;
1005 	if (w->w_where && w->w_tmem && w->w_needed <= 0) {
1006 		struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem;
1007 
1008 		rtm->rtm_flags = rt->rt_flags;
1009 		rtm->rtm_use = rt->rt_use;
1010 		rtm->rtm_rmx = rt->rt_rmx;
1011 		KASSERT(rt->rt_ifp != NULL);
1012 		rtm->rtm_index = rt->rt_ifp->if_index;
1013 		rtm->rtm_errno = rtm->rtm_pid = rtm->rtm_seq = 0;
1014 		rtm->rtm_addrs = info.rti_addrs;
1015 		if ((error = copyout(rtm, w->w_where, size)) != 0)
1016 			w->w_where = NULL;
1017 		else
1018 			w->w_where = (char *)w->w_where + size;
1019 	}
1020 	return error;
1021 }
1022 
1023 static int
1024 sysctl_iflist(int af, struct walkarg *w, int type)
1025 {
1026 	struct ifnet *ifp;
1027 	struct ifaddr *ifa;
1028 	struct	rt_addrinfo info;
1029 	int	len, error = 0;
1030 
1031 	memset(&info, 0, sizeof(info));
1032 	IFNET_FOREACH(ifp) {
1033 		if (w->w_arg && w->w_arg != ifp->if_index)
1034 			continue;
1035 		if (IFADDR_EMPTY(ifp))
1036 			continue;
1037 		info.rti_info[RTAX_IFP] = ifp->if_dl->ifa_addr;
1038 		switch (type) {
1039 		case NET_RT_IFLIST:
1040 			error = rt_msg2(RTM_IFINFO, &info, NULL, w, &len);
1041 			break;
1042 #ifdef COMPAT_14
1043 		case NET_RT_OIFLIST:
1044 			error = rt_msg2(RTM_OIFINFO, &info, NULL, w, &len);
1045 			break;
1046 #endif
1047 		default:
1048 			panic("sysctl_iflist(1)");
1049 		}
1050 		if (error)
1051 			return error;
1052 		info.rti_info[RTAX_IFP] = NULL;
1053 		if (w->w_where && w->w_tmem && w->w_needed <= 0) {
1054 			switch (type) {
1055 			case NET_RT_IFLIST: {
1056 				struct if_msghdr *ifm;
1057 
1058 				ifm = (struct if_msghdr *)w->w_tmem;
1059 				ifm->ifm_index = ifp->if_index;
1060 				ifm->ifm_flags = ifp->if_flags;
1061 				ifm->ifm_data = ifp->if_data;
1062 				ifm->ifm_addrs = info.rti_addrs;
1063 				error = copyout(ifm, w->w_where, len);
1064 				if (error)
1065 					return error;
1066 				w->w_where = (char *)w->w_where + len;
1067 				break;
1068 			}
1069 
1070 #ifdef COMPAT_14
1071 			case NET_RT_OIFLIST: {
1072 				struct if_msghdr14 *ifm;
1073 
1074 				ifm = (struct if_msghdr14 *)w->w_tmem;
1075 				ifm->ifm_index = ifp->if_index;
1076 				ifm->ifm_flags = ifp->if_flags;
1077 				ifm->ifm_data.ifi_type = ifp->if_data.ifi_type;
1078 				ifm->ifm_data.ifi_addrlen =
1079 				    ifp->if_data.ifi_addrlen;
1080 				ifm->ifm_data.ifi_hdrlen =
1081 				    ifp->if_data.ifi_hdrlen;
1082 				ifm->ifm_data.ifi_mtu = ifp->if_data.ifi_mtu;
1083 				ifm->ifm_data.ifi_metric =
1084 				    ifp->if_data.ifi_metric;
1085 				ifm->ifm_data.ifi_baudrate =
1086 				    ifp->if_data.ifi_baudrate;
1087 				ifm->ifm_data.ifi_ipackets =
1088 				    ifp->if_data.ifi_ipackets;
1089 				ifm->ifm_data.ifi_ierrors =
1090 				    ifp->if_data.ifi_ierrors;
1091 				ifm->ifm_data.ifi_opackets =
1092 				    ifp->if_data.ifi_opackets;
1093 				ifm->ifm_data.ifi_oerrors =
1094 				    ifp->if_data.ifi_oerrors;
1095 				ifm->ifm_data.ifi_collisions =
1096 				    ifp->if_data.ifi_collisions;
1097 				ifm->ifm_data.ifi_ibytes =
1098 				    ifp->if_data.ifi_ibytes;
1099 				ifm->ifm_data.ifi_obytes =
1100 				    ifp->if_data.ifi_obytes;
1101 				ifm->ifm_data.ifi_imcasts =
1102 				    ifp->if_data.ifi_imcasts;
1103 				ifm->ifm_data.ifi_omcasts =
1104 				    ifp->if_data.ifi_omcasts;
1105 				ifm->ifm_data.ifi_iqdrops =
1106 				    ifp->if_data.ifi_iqdrops;
1107 				ifm->ifm_data.ifi_noproto =
1108 				    ifp->if_data.ifi_noproto;
1109 				ifm->ifm_data.ifi_lastchange =
1110 				    ifp->if_data.ifi_lastchange;
1111 				ifm->ifm_addrs = info.rti_addrs;
1112 				error = copyout(ifm, w->w_where, len);
1113 				if (error)
1114 					return error;
1115 				w->w_where = (char *)w->w_where + len;
1116 				break;
1117 			}
1118 #endif
1119 			default:
1120 				panic("sysctl_iflist(2)");
1121 			}
1122 		}
1123 		IFADDR_FOREACH(ifa, ifp) {
1124 			if (af && af != ifa->ifa_addr->sa_family)
1125 				continue;
1126 			info.rti_info[RTAX_IFA] = ifa->ifa_addr;
1127 			info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
1128 			info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
1129 			if ((error = rt_msg2(RTM_NEWADDR, &info, 0, w, &len)))
1130 				return error;
1131 			if (w->w_where && w->w_tmem && w->w_needed <= 0) {
1132 				struct ifa_msghdr *ifam;
1133 
1134 				ifam = (struct ifa_msghdr *)w->w_tmem;
1135 				ifam->ifam_index = ifa->ifa_ifp->if_index;
1136 				ifam->ifam_flags = ifa->ifa_flags;
1137 				ifam->ifam_metric = ifa->ifa_metric;
1138 				ifam->ifam_addrs = info.rti_addrs;
1139 				error = copyout(w->w_tmem, w->w_where, len);
1140 				if (error)
1141 					return error;
1142 				w->w_where = (char *)w->w_where + len;
1143 			}
1144 		}
1145 		info.rti_info[RTAX_IFA] = info.rti_info[RTAX_NETMASK] =
1146 		    info.rti_info[RTAX_BRD] = NULL;
1147 	}
1148 	return 0;
1149 }
1150 
1151 static int
1152 sysctl_rtable(SYSCTLFN_ARGS)
1153 {
1154 	void 	*where = oldp;
1155 	size_t	*given = oldlenp;
1156 	const void *new = newp;
1157 	int	i, s, error = EINVAL;
1158 	u_char  af;
1159 	struct	walkarg w;
1160 
1161 	if (namelen == 1 && name[0] == CTL_QUERY)
1162 		return sysctl_query(SYSCTLFN_CALL(rnode));
1163 
1164 	if (new)
1165 		return EPERM;
1166 	if (namelen != 3)
1167 		return EINVAL;
1168 	af = name[0];
1169 	w.w_tmemneeded = 0;
1170 	w.w_tmemsize = 0;
1171 	w.w_tmem = NULL;
1172 again:
1173 	/* we may return here if a later [re]alloc of the t_mem buffer fails */
1174 	if (w.w_tmemneeded) {
1175 		w.w_tmem = malloc(w.w_tmemneeded, M_RTABLE, M_WAITOK);
1176 		w.w_tmemsize = w.w_tmemneeded;
1177 		w.w_tmemneeded = 0;
1178 	}
1179 	w.w_op = name[1];
1180 	w.w_arg = name[2];
1181 	w.w_given = *given;
1182 	w.w_needed = 0 - w.w_given;
1183 	w.w_where = where;
1184 
1185 	s = splsoftnet();
1186 	switch (w.w_op) {
1187 
1188 	case NET_RT_DUMP:
1189 	case NET_RT_FLAGS:
1190 		for (i = 1; i <= AF_MAX; i++)
1191 			if ((af == 0 || af == i) &&
1192 			    (error = rt_walktree(i, sysctl_dumpentry, &w)))
1193 				break;
1194 		break;
1195 
1196 #ifdef COMPAT_14
1197 	case NET_RT_OIFLIST:
1198 		error = sysctl_iflist(af, &w, w.w_op);
1199 		break;
1200 #endif
1201 
1202 	case NET_RT_IFLIST:
1203 		error = sysctl_iflist(af, &w, w.w_op);
1204 	}
1205 	splx(s);
1206 
1207 	/* check to see if we couldn't allocate memory with NOWAIT */
1208 	if (error == ENOBUFS && w.w_tmem == 0 && w.w_tmemneeded)
1209 		goto again;
1210 
1211 	if (w.w_tmem)
1212 		free(w.w_tmem, M_RTABLE);
1213 	w.w_needed += w.w_given;
1214 	if (where) {
1215 		*given = (char *)w.w_where - (char *)where;
1216 		if (*given < w.w_needed)
1217 			return ENOMEM;
1218 	} else {
1219 		*given = (11 * w.w_needed) / 10;
1220 	}
1221 	return error;
1222 }
1223 
1224 /*
1225  * Routing message software interrupt routine
1226  */
1227 static void
1228 route_intr(void *cookie)
1229 {
1230 	struct sockproto proto = { .sp_family = PF_ROUTE, };
1231 	struct mbuf *m;
1232 	int s;
1233 
1234 	mutex_enter(softnet_lock);
1235 	KERNEL_LOCK(1, NULL);
1236 	while (!IF_IS_EMPTY(&route_intrq)) {
1237 		s = splnet();
1238 		IF_DEQUEUE(&route_intrq, m);
1239 		splx(s);
1240 		if (m == NULL)
1241 			break;
1242 		proto.sp_protocol = M_GETCTX(m, uintptr_t);
1243 		raw_input(m, &proto, &route_src, &route_dst);
1244 	}
1245 	KERNEL_UNLOCK_ONE(NULL);
1246 	mutex_exit(softnet_lock);
1247 }
1248 
1249 /*
1250  * Enqueue a message to the software interrupt routine.
1251  */
1252 static void
1253 route_enqueue(struct mbuf *m, int family)
1254 {
1255 	int s, wasempty;
1256 
1257 	s = splnet();
1258 	if (IF_QFULL(&route_intrq)) {
1259 		IF_DROP(&route_intrq);
1260 		m_freem(m);
1261 	} else {
1262 		wasempty = IF_IS_EMPTY(&route_intrq);
1263 		M_SETCTX(m, (uintptr_t)family);
1264 		IF_ENQUEUE(&route_intrq, m);
1265 		if (wasempty)
1266 			softint_schedule(route_sih);
1267 	}
1268 	splx(s);
1269 }
1270 
1271 void
1272 rt_init(void)
1273 {
1274 
1275 	route_intrq.ifq_maxlen = route_maxqlen;
1276 	route_sih = softint_establish(SOFTINT_NET | SOFTINT_MPSAFE,
1277 	    route_intr, NULL);
1278 }
1279 
1280 /*
1281  * Definitions of protocols supported in the ROUTE domain.
1282  */
1283 PR_WRAP_USRREQ(route_usrreq)
1284 #define	route_usrreq	route_usrreq_wrapper
1285 
1286 const struct protosw routesw[] = {
1287 	{
1288 		.pr_type = SOCK_RAW,
1289 		.pr_domain = &routedomain,
1290 		.pr_flags = PR_ATOMIC|PR_ADDR,
1291 		.pr_input = raw_input,
1292 		.pr_output = route_output,
1293 		.pr_ctlinput = raw_ctlinput,
1294 		.pr_usrreq = route_usrreq,
1295 		.pr_init = raw_init,
1296 	},
1297 };
1298 
1299 struct domain routedomain = {
1300 	.dom_family = PF_ROUTE,
1301 	.dom_name = "route",
1302 	.dom_init = route_init,
1303 	.dom_protosw = routesw,
1304 	.dom_protoswNPROTOSW = &routesw[__arraycount(routesw)],
1305 };
1306 
1307 SYSCTL_SETUP(sysctl_net_route_setup, "sysctl net.route subtree setup")
1308 {
1309 	const struct sysctlnode *rnode = NULL;
1310 
1311 	sysctl_createv(clog, 0, NULL, NULL,
1312 		       CTLFLAG_PERMANENT,
1313 		       CTLTYPE_NODE, "net", NULL,
1314 		       NULL, 0, NULL, 0,
1315 		       CTL_NET, CTL_EOL);
1316 
1317 	sysctl_createv(clog, 0, NULL, &rnode,
1318 		       CTLFLAG_PERMANENT,
1319 		       CTLTYPE_NODE, "route",
1320 		       SYSCTL_DESCR("PF_ROUTE information"),
1321 		       NULL, 0, NULL, 0,
1322 		       CTL_NET, PF_ROUTE, CTL_EOL);
1323 	sysctl_createv(clog, 0, NULL, NULL,
1324 		       CTLFLAG_PERMANENT,
1325 		       CTLTYPE_NODE, "rtable",
1326 		       SYSCTL_DESCR("Routing table information"),
1327 		       sysctl_rtable, 0, NULL, 0,
1328 		       CTL_NET, PF_ROUTE, 0 /* any protocol */, CTL_EOL);
1329 	sysctl_createv(clog, 0, &rnode, NULL,
1330 		       CTLFLAG_PERMANENT,
1331 		       CTLTYPE_STRUCT, "stats",
1332 		       SYSCTL_DESCR("Routing statistics"),
1333 		       NULL, 0, &rtstat, sizeof(rtstat),
1334 		       CTL_CREATE, CTL_EOL);
1335 }
1336