xref: /netbsd-src/sys/net/rtsock.c (revision cd22f25e6f6d1cc1f197fe8c5468a80f51d1c4e1)
1 /*	$NetBSD: rtsock.c,v 1.101 2008/04/24 11:38:37 ad Exp $	*/
2 
3 /*
4  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the project nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 /*
33  * Copyright (c) 1988, 1991, 1993
34  *	The Regents of the University of California.  All rights reserved.
35  *
36  * Redistribution and use in source and binary forms, with or without
37  * modification, are permitted provided that the following conditions
38  * are met:
39  * 1. Redistributions of source code must retain the above copyright
40  *    notice, this list of conditions and the following disclaimer.
41  * 2. Redistributions in binary form must reproduce the above copyright
42  *    notice, this list of conditions and the following disclaimer in the
43  *    documentation and/or other materials provided with the distribution.
44  * 3. Neither the name of the University nor the names of its contributors
45  *    may be used to endorse or promote products derived from this software
46  *    without specific prior written permission.
47  *
48  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58  * SUCH DAMAGE.
59  *
60  *	@(#)rtsock.c	8.7 (Berkeley) 10/12/95
61  */
62 
63 #include <sys/cdefs.h>
64 __KERNEL_RCSID(0, "$NetBSD: rtsock.c,v 1.101 2008/04/24 11:38:37 ad Exp $");
65 
66 #include "opt_inet.h"
67 
68 #include <sys/param.h>
69 #include <sys/systm.h>
70 #include <sys/proc.h>
71 #include <sys/mbuf.h>
72 #include <sys/socket.h>
73 #include <sys/socketvar.h>
74 #include <sys/domain.h>
75 #include <sys/protosw.h>
76 #include <sys/sysctl.h>
77 #include <sys/kauth.h>
78 #include <sys/intr.h>
79 #ifdef RTSOCK_DEBUG
80 #include <netinet/in.h>
81 #endif /* RTSOCK_DEBUG */
82 
83 #include <net/if.h>
84 #include <net/route.h>
85 #include <net/raw_cb.h>
86 
87 #include <machine/stdarg.h>
88 
89 DOMAIN_DEFINE(routedomain);	/* forward declare and add to link set */
90 
91 struct	sockaddr route_dst = { .sa_len = 2, .sa_family = PF_ROUTE, };
92 struct	sockaddr route_src = { .sa_len = 2, .sa_family = PF_ROUTE, };
93 
94 int	route_maxqlen = IFQ_MAXLEN;
95 static struct	ifqueue route_intrq;
96 static void	*route_sih;
97 
98 struct walkarg {
99 	int	w_op;
100 	int	w_arg;
101 	int	w_given;
102 	int	w_needed;
103 	void *	w_where;
104 	int	w_tmemsize;
105 	int	w_tmemneeded;
106 	void *	w_tmem;
107 };
108 
109 static struct mbuf *rt_msg1(int, struct rt_addrinfo *, void *, int);
110 static int rt_msg2(int, struct rt_addrinfo *, void *, struct walkarg *, int *);
111 static int rt_xaddrs(u_char, const char *, const char *, struct rt_addrinfo *);
112 static struct mbuf *rt_makeifannouncemsg(struct ifnet *, int, int,
113     struct rt_addrinfo *);
114 static int sysctl_dumpentry(struct rtentry *, void *);
115 static int sysctl_iflist(int, struct walkarg *, int);
116 static int sysctl_rtable(SYSCTLFN_PROTO);
117 static inline void rt_adjustcount(int, int);
118 static void route_enqueue(struct mbuf *, int);
119 
120 /* Sleazy use of local variables throughout file, warning!!!! */
121 #define dst	info.rti_info[RTAX_DST]
122 #define gate	info.rti_info[RTAX_GATEWAY]
123 #define netmask	info.rti_info[RTAX_NETMASK]
124 #define ifpaddr	info.rti_info[RTAX_IFP]
125 #define ifaaddr	info.rti_info[RTAX_IFA]
126 #define brdaddr	info.rti_info[RTAX_BRD]
127 
128 static inline void
129 rt_adjustcount(int af, int cnt)
130 {
131 	route_cb.any_count += cnt;
132 	switch (af) {
133 	case AF_INET:
134 		route_cb.ip_count += cnt;
135 		return;
136 #ifdef INET6
137 	case AF_INET6:
138 		route_cb.ip6_count += cnt;
139 		return;
140 #endif
141 	case AF_IPX:
142 		route_cb.ipx_count += cnt;
143 		return;
144 	case AF_NS:
145 		route_cb.ns_count += cnt;
146 		return;
147 	case AF_ISO:
148 		route_cb.iso_count += cnt;
149 		return;
150 	}
151 }
152 
153 /*ARGSUSED*/
154 int
155 route_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam,
156 	struct mbuf *control, struct lwp *l)
157 {
158 	int error = 0;
159 	struct rawcb *rp = sotorawcb(so);
160 	int s;
161 
162 	if (req == PRU_ATTACH) {
163 		sosetlock(so);
164 		MALLOC(rp, struct rawcb *, sizeof(*rp), M_PCB, M_WAITOK);
165 		if ((so->so_pcb = rp) != NULL)
166 			memset(so->so_pcb, 0, sizeof(*rp));
167 
168 	}
169 	if (req == PRU_DETACH && rp)
170 		rt_adjustcount(rp->rcb_proto.sp_protocol, -1);
171 	s = splsoftnet();
172 
173 	/*
174 	 * Don't call raw_usrreq() in the attach case, because
175 	 * we want to allow non-privileged processes to listen on
176 	 * and send "safe" commands to the routing socket.
177 	 */
178 	if (req == PRU_ATTACH) {
179 		if (l == NULL)
180 			error = EACCES;
181 		else
182 			error = raw_attach(so, (int)(long)nam);
183 	} else
184 		error = raw_usrreq(so, req, m, nam, control, l);
185 
186 	rp = sotorawcb(so);
187 	if (req == PRU_ATTACH && rp) {
188 		if (error) {
189 			free((void *)rp, M_PCB);
190 			splx(s);
191 			return error;
192 		}
193 		rt_adjustcount(rp->rcb_proto.sp_protocol, 1);
194 		rp->rcb_laddr = &route_src;
195 		rp->rcb_faddr = &route_dst;
196 		soisconnected(so);
197 		so->so_options |= SO_USELOOPBACK;
198 	}
199 	splx(s);
200 	return error;
201 }
202 
203 static const struct sockaddr *
204 intern_netmask(const struct sockaddr *mask)
205 {
206 	struct radix_node *rn;
207 	extern struct radix_node_head *mask_rnhead;
208 
209 	if (mask != NULL &&
210 	    (rn = rn_search(mask, mask_rnhead->rnh_treetop)))
211 		mask = (const struct sockaddr *)rn->rn_key;
212 
213 	return mask;
214 }
215 
216 /*ARGSUSED*/
217 int
218 route_output(struct mbuf *m, ...)
219 {
220 	struct sockproto proto = { .sp_family = PF_ROUTE, };
221 	struct rt_msghdr *rtm = NULL;
222 	struct rtentry *rt = NULL;
223 	struct rtentry *saved_nrt = NULL;
224 	struct rt_addrinfo info;
225 	int len, error = 0;
226 	struct ifnet *ifp = NULL;
227 	struct ifaddr *ifa = NULL;
228 	struct socket *so;
229 	va_list ap;
230 	sa_family_t family;
231 
232 	va_start(ap, m);
233 	so = va_arg(ap, struct socket *);
234 	va_end(ap);
235 
236 #define senderr(e) do { error = e; goto flush;} while (/*CONSTCOND*/ 0)
237 	if (m == NULL || ((m->m_len < sizeof(int32_t)) &&
238 	   (m = m_pullup(m, sizeof(int32_t))) == NULL))
239 		return ENOBUFS;
240 	if ((m->m_flags & M_PKTHDR) == 0)
241 		panic("route_output");
242 	len = m->m_pkthdr.len;
243 	if (len < sizeof(*rtm) ||
244 	    len != mtod(m, struct rt_msghdr *)->rtm_msglen) {
245 		dst = NULL;
246 		senderr(EINVAL);
247 	}
248 	R_Malloc(rtm, struct rt_msghdr *, len);
249 	if (rtm == NULL) {
250 		dst = NULL;
251 		senderr(ENOBUFS);
252 	}
253 	m_copydata(m, 0, len, (void *)rtm);
254 	if (rtm->rtm_version != RTM_VERSION) {
255 		dst = NULL;
256 		senderr(EPROTONOSUPPORT);
257 	}
258 	rtm->rtm_pid = curproc->p_pid;
259 	memset(&info, 0, sizeof(info));
260 	info.rti_addrs = rtm->rtm_addrs;
261 	if (rt_xaddrs(rtm->rtm_type, (void *)(rtm + 1), len + (char *)rtm, &info))
262 		senderr(EINVAL);
263 	info.rti_flags = rtm->rtm_flags;
264 #ifdef RTSOCK_DEBUG
265 	if (dst->sa_family == AF_INET) {
266 		printf("%s: extracted dst %s\n", __func__,
267 		    inet_ntoa(((const struct sockaddr_in *)dst)->sin_addr));
268 	}
269 #endif /* RTSOCK_DEBUG */
270 	if (dst == NULL || (dst->sa_family >= AF_MAX))
271 		senderr(EINVAL);
272 	if (gate != NULL && (gate->sa_family >= AF_MAX))
273 		senderr(EINVAL);
274 
275 	/*
276 	 * Verify that the caller has the appropriate privilege; RTM_GET
277 	 * is the only operation the non-superuser is allowed.
278 	 */
279 	if (kauth_authorize_network(curlwp->l_cred, KAUTH_NETWORK_ROUTE,
280 	    0, rtm, NULL, NULL) != 0)
281 		senderr(EACCES);
282 
283 	switch (rtm->rtm_type) {
284 
285 	case RTM_ADD:
286 		if (gate == NULL)
287 			senderr(EINVAL);
288 		error = rtrequest1(rtm->rtm_type, &info, &saved_nrt);
289 		if (error == 0 && saved_nrt) {
290 			rt_setmetrics(rtm->rtm_inits,
291 			    &rtm->rtm_rmx, &saved_nrt->rt_rmx);
292 			saved_nrt->rt_refcnt--;
293 		}
294 		break;
295 
296 	case RTM_DELETE:
297 		error = rtrequest1(rtm->rtm_type, &info, &saved_nrt);
298 		if (error == 0) {
299 			(rt = saved_nrt)->rt_refcnt++;
300 			goto report;
301 		}
302 		break;
303 
304 	case RTM_GET:
305 	case RTM_CHANGE:
306 	case RTM_LOCK:
307                 /* XXX This will mask dst with netmask before
308                  * searching.  It did not used to do that.  --dyoung
309 		 */
310 		error = rtrequest(RTM_GET, dst, gate, netmask, 0, &rt);
311 		if (error != 0)
312 			senderr(error);
313 		if (rtm->rtm_type != RTM_GET) {/* XXX: too grotty */
314 			struct radix_node *rn;
315 
316 			if (memcmp(dst, rt_getkey(rt), dst->sa_len) != 0)
317 				senderr(ESRCH);
318 			netmask = intern_netmask(netmask);
319 			for (rn = rt->rt_nodes; rn; rn = rn->rn_dupedkey)
320 				if (netmask == (const struct sockaddr *)rn->rn_mask)
321 					break;
322 			if (rn == NULL)
323 				senderr(ETOOMANYREFS);
324 			rt = (struct rtentry *)rn;
325 		}
326 
327 		switch (rtm->rtm_type) {
328 		case RTM_GET:
329 		report:
330 			dst = rt_getkey(rt);
331 			gate = rt->rt_gateway;
332 			netmask = rt_mask(rt);
333 			if ((rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) == 0)
334 				;
335 			else if ((ifp = rt->rt_ifp) != NULL) {
336 				const struct ifaddr *rtifa;
337 				ifpaddr = ifp->if_dl->ifa_addr;
338                                 /* rtifa used to be simply rt->rt_ifa.
339                                  * If rt->rt_ifa != NULL, then
340                                  * rt_get_ifa() != NULL.  So this
341                                  * ought to still be safe. --dyoung
342 				 */
343 				rtifa = rt_get_ifa(rt);
344 				ifaaddr = rtifa->ifa_addr;
345 #ifdef RTSOCK_DEBUG
346 				if (ifaaddr->sa_family == AF_INET) {
347 					printf("%s: copying out RTAX_IFA %s ",
348 					    __func__,
349 					    inet_ntoa(((const struct sockaddr_in *)ifaaddr)->sin_addr));
350 					printf("for dst %s ifa_getifa %p ifa_seqno %p\n",
351 					    inet_ntoa(((const struct sockaddr_in *)dst)->sin_addr),
352 					    (void *)rtifa->ifa_getifa, rtifa->ifa_seqno);
353 				}
354 #endif /* RTSOCK_DEBUG */
355 				if (ifp->if_flags & IFF_POINTOPOINT)
356 					brdaddr = rtifa->ifa_dstaddr;
357 				else
358 					brdaddr = NULL;
359 				rtm->rtm_index = ifp->if_index;
360 			} else {
361 				ifpaddr = NULL;
362 				ifaaddr = NULL;
363 			}
364 			(void)rt_msg2(rtm->rtm_type, &info, NULL, NULL, &len);
365 			if (len > rtm->rtm_msglen) {
366 				struct rt_msghdr *new_rtm;
367 				R_Malloc(new_rtm, struct rt_msghdr *, len);
368 				if (new_rtm == NULL)
369 					senderr(ENOBUFS);
370 				Bcopy(rtm, new_rtm, rtm->rtm_msglen);
371 				Free(rtm); rtm = new_rtm;
372 			}
373 			(void)rt_msg2(rtm->rtm_type, &info, (void *)rtm,
374 			    NULL, 0);
375 			rtm->rtm_flags = rt->rt_flags;
376 			rtm->rtm_rmx = rt->rt_rmx;
377 			rtm->rtm_addrs = info.rti_addrs;
378 			break;
379 
380 		case RTM_CHANGE:
381 			/*
382 			 * new gateway could require new ifaddr, ifp;
383 			 * flags may also be different; ifp may be specified
384 			 * by ll sockaddr when protocol address is ambiguous
385 			 */
386 			if ((error = rt_getifa(&info)) != 0)
387 				senderr(error);
388 			if (gate && rt_setgate(rt, gate))
389 				senderr(EDQUOT);
390 			/* new gateway could require new ifaddr, ifp;
391 			   flags may also be different; ifp may be specified
392 			   by ll sockaddr when protocol address is ambiguous */
393 			if (ifpaddr && (ifa = ifa_ifwithnet(ifpaddr)) &&
394 			    (ifp = ifa->ifa_ifp) && (ifaaddr || gate))
395 				ifa = ifaof_ifpforaddr(ifaaddr ? ifaaddr : gate,
396 				    ifp);
397 			else if ((ifaaddr && (ifa = ifa_ifwithaddr(ifaaddr))) ||
398 			    (gate && (ifa = ifa_ifwithroute(rt->rt_flags,
399 			    rt_getkey(rt), gate))))
400 				ifp = ifa->ifa_ifp;
401 			if (ifa) {
402 				struct ifaddr *oifa = rt->rt_ifa;
403 				if (oifa != ifa) {
404 					if (oifa && oifa->ifa_rtrequest) {
405 						oifa->ifa_rtrequest(RTM_DELETE,
406 						    rt, &info);
407 					}
408 					rt_replace_ifa(rt, ifa);
409 					rt->rt_ifp = ifp;
410 				}
411 			}
412 			rt_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx,
413 			    &rt->rt_rmx);
414 			if (rt->rt_ifa && rt->rt_ifa->ifa_rtrequest)
415 				rt->rt_ifa->ifa_rtrequest(RTM_ADD, rt, &info);
416 			/*
417 			 * Fall into
418 			 */
419 		case RTM_LOCK:
420 			rt->rt_rmx.rmx_locks &= ~(rtm->rtm_inits);
421 			rt->rt_rmx.rmx_locks |=
422 			    (rtm->rtm_inits & rtm->rtm_rmx.rmx_locks);
423 			break;
424 		}
425 		break;
426 
427 	default:
428 		senderr(EOPNOTSUPP);
429 	}
430 
431 flush:
432 	if (rtm) {
433 		if (error)
434 			rtm->rtm_errno = error;
435 		else
436 			rtm->rtm_flags |= RTF_DONE;
437 	}
438 	family = dst ? dst->sa_family : 0;
439 	if (rt)
440 		rtfree(rt);
441     {
442 	struct rawcb *rp = NULL;
443 	/*
444 	 * Check to see if we don't want our own messages.
445 	 */
446 	if ((so->so_options & SO_USELOOPBACK) == 0) {
447 		if (route_cb.any_count <= 1) {
448 			if (rtm)
449 				Free(rtm);
450 			m_freem(m);
451 			return error;
452 		}
453 		/* There is another listener, so construct message */
454 		rp = sotorawcb(so);
455 	}
456 	if (rtm) {
457 		m_copyback(m, 0, rtm->rtm_msglen, (void *)rtm);
458 		if (m->m_pkthdr.len < rtm->rtm_msglen) {
459 			m_freem(m);
460 			m = NULL;
461 		} else if (m->m_pkthdr.len > rtm->rtm_msglen)
462 			m_adj(m, rtm->rtm_msglen - m->m_pkthdr.len);
463 		Free(rtm);
464 	}
465 	if (rp)
466 		rp->rcb_proto.sp_family = 0; /* Avoid us */
467 	if (family)
468 		proto.sp_protocol = family;
469 	if (m)
470 		raw_input(m, &proto, &route_src, &route_dst);
471 	if (rp)
472 		rp->rcb_proto.sp_family = PF_ROUTE;
473     }
474 	return error;
475 }
476 
477 void
478 rt_setmetrics(u_long which, const struct rt_metrics *in, struct rt_metrics *out)
479 {
480 #define metric(f, e) if (which & (f)) out->e = in->e;
481 	metric(RTV_RPIPE, rmx_recvpipe);
482 	metric(RTV_SPIPE, rmx_sendpipe);
483 	metric(RTV_SSTHRESH, rmx_ssthresh);
484 	metric(RTV_RTT, rmx_rtt);
485 	metric(RTV_RTTVAR, rmx_rttvar);
486 	metric(RTV_HOPCOUNT, rmx_hopcount);
487 	metric(RTV_MTU, rmx_mtu);
488 	metric(RTV_EXPIRE, rmx_expire);
489 #undef metric
490 }
491 
492 #define ROUNDUP(a) \
493 	((a) > 0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long))
494 #define ADVANCE(x, n) (x += ROUNDUP((n)->sa_len))
495 
496 static int
497 rt_xaddrs(u_char rtmtype, const char *cp, const char *cplim, struct rt_addrinfo *rtinfo)
498 {
499 	const struct sockaddr *sa = NULL;	/* Quell compiler warning */
500 	int i;
501 
502 	for (i = 0; (i < RTAX_MAX) && (cp < cplim); i++) {
503 		if ((rtinfo->rti_addrs & (1 << i)) == 0)
504 			continue;
505 		rtinfo->rti_info[i] = sa = (const struct sockaddr *)cp;
506 		ADVANCE(cp, sa);
507 	}
508 
509 	/* Check for extra addresses specified, except RTM_GET asking for interface info.  */
510 	if (rtmtype == RTM_GET) {
511 		if (((rtinfo->rti_addrs & (~((1 << RTAX_IFP) | (1 << RTAX_IFA)))) & (~0 << i)) != 0)
512 			return 1;
513 	} else {
514 		if ((rtinfo->rti_addrs & (~0 << i)) != 0)
515 			return 1;
516 	}
517 	/* Check for bad data length.  */
518 	if (cp != cplim) {
519 		if (i == RTAX_NETMASK + 1 && sa &&
520 		    cp - ROUNDUP(sa->sa_len) + sa->sa_len == cplim)
521 			/*
522 			 * The last sockaddr was netmask.
523 			 * We accept this for now for the sake of old
524 			 * binaries or third party softwares.
525 			 */
526 			;
527 		else
528 			return 1;
529 	}
530 	return 0;
531 }
532 
533 static struct mbuf *
534 rt_msg1(int type, struct rt_addrinfo *rtinfo, void *data, int datalen)
535 {
536 	struct rt_msghdr *rtm;
537 	struct mbuf *m;
538 	int i;
539 	const struct sockaddr *sa;
540 	int len, dlen;
541 
542 	m = m_gethdr(M_DONTWAIT, MT_DATA);
543 	if (m == NULL)
544 		return m;
545 	MCLAIM(m, &routedomain.dom_mowner);
546 	switch (type) {
547 
548 	case RTM_DELADDR:
549 	case RTM_NEWADDR:
550 		len = sizeof(struct ifa_msghdr);
551 		break;
552 
553 #ifdef COMPAT_14
554 	case RTM_OIFINFO:
555 		len = sizeof(struct if_msghdr14);
556 		break;
557 #endif
558 
559 	case RTM_IFINFO:
560 		len = sizeof(struct if_msghdr);
561 		break;
562 
563 	case RTM_IFANNOUNCE:
564 	case RTM_IEEE80211:
565 		len = sizeof(struct if_announcemsghdr);
566 		break;
567 
568 	default:
569 		len = sizeof(struct rt_msghdr);
570 	}
571 	if (len > MHLEN + MLEN)
572 		panic("rt_msg1: message too long");
573 	else if (len > MHLEN) {
574 		m->m_next = m_get(M_DONTWAIT, MT_DATA);
575 		if (m->m_next == NULL) {
576 			m_freem(m);
577 			return NULL;
578 		}
579 		MCLAIM(m->m_next, m->m_owner);
580 		m->m_pkthdr.len = len;
581 		m->m_len = MHLEN;
582 		m->m_next->m_len = len - MHLEN;
583 	} else {
584 		m->m_pkthdr.len = m->m_len = len;
585 	}
586 	m->m_pkthdr.rcvif = NULL;
587 	m_copyback(m, 0, datalen, data);
588 	rtm = mtod(m, struct rt_msghdr *);
589 	for (i = 0; i < RTAX_MAX; i++) {
590 		if ((sa = rtinfo->rti_info[i]) == NULL)
591 			continue;
592 		rtinfo->rti_addrs |= (1 << i);
593 		dlen = ROUNDUP(sa->sa_len);
594 		m_copyback(m, len, dlen, sa);
595 		len += dlen;
596 	}
597 	if (m->m_pkthdr.len != len) {
598 		m_freem(m);
599 		return NULL;
600 	}
601 	rtm->rtm_msglen = len;
602 	rtm->rtm_version = RTM_VERSION;
603 	rtm->rtm_type = type;
604 	return m;
605 }
606 
607 /*
608  * rt_msg2
609  *
610  *	 fills 'cp' or 'w'.w_tmem with the routing socket message and
611  *		returns the length of the message in 'lenp'.
612  *
613  * if walkarg is 0, cp is expected to be 0 or a buffer large enough to hold
614  *	the message
615  * otherwise walkarg's w_needed is updated and if the user buffer is
616  *	specified and w_needed indicates space exists the information is copied
617  *	into the temp space (w_tmem). w_tmem is [re]allocated if necessary,
618  *	if the allocation fails ENOBUFS is returned.
619  */
620 static int
621 rt_msg2(int type, struct rt_addrinfo *rtinfo, void *cpv, struct walkarg *w,
622 	int *lenp)
623 {
624 	int i;
625 	int len, dlen, second_time = 0;
626 	char *cp0, *cp = cpv;
627 
628 	rtinfo->rti_addrs = 0;
629 again:
630 	switch (type) {
631 
632 	case RTM_DELADDR:
633 	case RTM_NEWADDR:
634 		len = sizeof(struct ifa_msghdr);
635 		break;
636 #ifdef COMPAT_14
637 	case RTM_OIFINFO:
638 		len = sizeof(struct if_msghdr14);
639 		break;
640 #endif
641 
642 	case RTM_IFINFO:
643 		len = sizeof(struct if_msghdr);
644 		break;
645 
646 	default:
647 		len = sizeof(struct rt_msghdr);
648 	}
649 	if ((cp0 = cp) != NULL)
650 		cp += len;
651 	for (i = 0; i < RTAX_MAX; i++) {
652 		const struct sockaddr *sa;
653 
654 		if ((sa = rtinfo->rti_info[i]) == NULL)
655 			continue;
656 		rtinfo->rti_addrs |= (1 << i);
657 		dlen = ROUNDUP(sa->sa_len);
658 		if (cp) {
659 			bcopy(sa, cp, (unsigned)dlen);
660 			cp += dlen;
661 		}
662 		len += dlen;
663 	}
664 	if (cp == NULL && w != NULL && !second_time) {
665 		struct walkarg *rw = w;
666 
667 		rw->w_needed += len;
668 		if (rw->w_needed <= 0 && rw->w_where) {
669 			if (rw->w_tmemsize < len) {
670 				if (rw->w_tmem)
671 					free(rw->w_tmem, M_RTABLE);
672 				rw->w_tmem = (void *) malloc(len, M_RTABLE,
673 				    M_NOWAIT);
674 				if (rw->w_tmem)
675 					rw->w_tmemsize = len;
676 			}
677 			if (rw->w_tmem) {
678 				cp = rw->w_tmem;
679 				second_time = 1;
680 				goto again;
681 			} else {
682 				rw->w_tmemneeded = len;
683 				return ENOBUFS;
684 			}
685 		}
686 	}
687 	if (cp) {
688 		struct rt_msghdr *rtm = (struct rt_msghdr *)cp0;
689 
690 		rtm->rtm_version = RTM_VERSION;
691 		rtm->rtm_type = type;
692 		rtm->rtm_msglen = len;
693 	}
694 	if (lenp)
695 		*lenp = len;
696 	return 0;
697 }
698 
699 /*
700  * This routine is called to generate a message from the routing
701  * socket indicating that a redirect has occurred, a routing lookup
702  * has failed, or that a protocol has detected timeouts to a particular
703  * destination.
704  */
705 void
706 rt_missmsg(int type, struct rt_addrinfo *rtinfo, int flags, int error)
707 {
708 	struct rt_msghdr rtm;
709 	struct mbuf *m;
710 	const struct sockaddr *sa = rtinfo->rti_info[RTAX_DST];
711 
712 	if (route_cb.any_count == 0)
713 		return;
714 	memset(&rtm, 0, sizeof(rtm));
715 	rtm.rtm_flags = RTF_DONE | flags;
716 	rtm.rtm_errno = error;
717 	m = rt_msg1(type, rtinfo, (void *)&rtm, sizeof(rtm));
718 	if (m == NULL)
719 		return;
720 	mtod(m, struct rt_msghdr *)->rtm_addrs = rtinfo->rti_addrs;
721 	route_enqueue(m, sa ? sa->sa_family : 0);
722 }
723 
724 /*
725  * This routine is called to generate a message from the routing
726  * socket indicating that the status of a network interface has changed.
727  */
728 void
729 rt_ifmsg(struct ifnet *ifp)
730 {
731 	struct if_msghdr ifm;
732 #ifdef COMPAT_14
733 	struct if_msghdr14 oifm;
734 #endif
735 	struct mbuf *m;
736 	struct rt_addrinfo info;
737 
738 	if (route_cb.any_count == 0)
739 		return;
740 	memset(&info, 0, sizeof(info));
741 	memset(&ifm, 0, sizeof(ifm));
742 	ifm.ifm_index = ifp->if_index;
743 	ifm.ifm_flags = ifp->if_flags;
744 	ifm.ifm_data = ifp->if_data;
745 	ifm.ifm_addrs = 0;
746 	m = rt_msg1(RTM_IFINFO, &info, (void *)&ifm, sizeof(ifm));
747 	if (m == NULL)
748 		return;
749 	route_enqueue(m, 0);
750 #ifdef COMPAT_14
751 	memset(&info, 0, sizeof(info));
752 	memset(&oifm, 0, sizeof(oifm));
753 	oifm.ifm_index = ifp->if_index;
754 	oifm.ifm_flags = ifp->if_flags;
755 	oifm.ifm_data.ifi_type = ifp->if_data.ifi_type;
756 	oifm.ifm_data.ifi_addrlen = ifp->if_data.ifi_addrlen;
757 	oifm.ifm_data.ifi_hdrlen = ifp->if_data.ifi_hdrlen;
758 	oifm.ifm_data.ifi_mtu = ifp->if_data.ifi_mtu;
759 	oifm.ifm_data.ifi_metric = ifp->if_data.ifi_metric;
760 	oifm.ifm_data.ifi_baudrate = ifp->if_data.ifi_baudrate;
761 	oifm.ifm_data.ifi_ipackets = ifp->if_data.ifi_ipackets;
762 	oifm.ifm_data.ifi_ierrors = ifp->if_data.ifi_ierrors;
763 	oifm.ifm_data.ifi_opackets = ifp->if_data.ifi_opackets;
764 	oifm.ifm_data.ifi_oerrors = ifp->if_data.ifi_oerrors;
765 	oifm.ifm_data.ifi_collisions = ifp->if_data.ifi_collisions;
766 	oifm.ifm_data.ifi_ibytes = ifp->if_data.ifi_ibytes;
767 	oifm.ifm_data.ifi_obytes = ifp->if_data.ifi_obytes;
768 	oifm.ifm_data.ifi_imcasts = ifp->if_data.ifi_imcasts;
769 	oifm.ifm_data.ifi_omcasts = ifp->if_data.ifi_omcasts;
770 	oifm.ifm_data.ifi_iqdrops = ifp->if_data.ifi_iqdrops;
771 	oifm.ifm_data.ifi_noproto = ifp->if_data.ifi_noproto;
772 	oifm.ifm_data.ifi_lastchange = ifp->if_data.ifi_lastchange;
773 	oifm.ifm_addrs = 0;
774 	m = rt_msg1(RTM_OIFINFO, &info, (void *)&oifm, sizeof(oifm));
775 	if (m == NULL)
776 		return;
777 	route_enqueue(m, 0);
778 #endif
779 }
780 
781 /*
782  * This is called to generate messages from the routing socket
783  * indicating a network interface has had addresses associated with it.
784  * if we ever reverse the logic and replace messages TO the routing
785  * socket indicate a request to configure interfaces, then it will
786  * be unnecessary as the routing socket will automatically generate
787  * copies of it.
788  */
789 void
790 rt_newaddrmsg(int cmd, struct ifaddr *ifa, int error, struct rtentry *rt)
791 {
792 	struct rt_addrinfo info;
793 	const struct sockaddr *sa = NULL;
794 	int pass;
795 	struct mbuf *m = NULL;
796 	struct ifnet *ifp = ifa->ifa_ifp;
797 
798 	if (route_cb.any_count == 0)
799 		return;
800 	for (pass = 1; pass < 3; pass++) {
801 		memset(&info, 0, sizeof(info));
802 		if ((cmd == RTM_ADD && pass == 1) ||
803 		    (cmd == RTM_DELETE && pass == 2)) {
804 			struct ifa_msghdr ifam;
805 			int ncmd = cmd == RTM_ADD ? RTM_NEWADDR : RTM_DELADDR;
806 
807 			ifaaddr = sa = ifa->ifa_addr;
808 			ifpaddr = ifp->if_dl->ifa_addr;
809 			netmask = ifa->ifa_netmask;
810 			brdaddr = ifa->ifa_dstaddr;
811 			memset(&ifam, 0, sizeof(ifam));
812 			ifam.ifam_index = ifp->if_index;
813 			ifam.ifam_metric = ifa->ifa_metric;
814 			ifam.ifam_flags = ifa->ifa_flags;
815 			m = rt_msg1(ncmd, &info, (void *)&ifam, sizeof(ifam));
816 			if (m == NULL)
817 				continue;
818 			mtod(m, struct ifa_msghdr *)->ifam_addrs =
819 			    info.rti_addrs;
820 		}
821 		if ((cmd == RTM_ADD && pass == 2) ||
822 		    (cmd == RTM_DELETE && pass == 1)) {
823 			struct rt_msghdr rtm;
824 
825 			if (rt == NULL)
826 				continue;
827 			netmask = rt_mask(rt);
828 			dst = sa = rt_getkey(rt);
829 			gate = rt->rt_gateway;
830 			memset(&rtm, 0, sizeof(rtm));
831 			rtm.rtm_index = ifp->if_index;
832 			rtm.rtm_flags |= rt->rt_flags;
833 			rtm.rtm_errno = error;
834 			m = rt_msg1(cmd, &info, (void *)&rtm, sizeof(rtm));
835 			if (m == NULL)
836 				continue;
837 			mtod(m, struct rt_msghdr *)->rtm_addrs = info.rti_addrs;
838 		}
839 		route_enqueue(m, sa ? sa->sa_family : 0);
840 	}
841 }
842 
843 static struct mbuf *
844 rt_makeifannouncemsg(struct ifnet *ifp, int type, int what,
845     struct rt_addrinfo *info)
846 {
847 	struct if_announcemsghdr ifan;
848 
849 	memset(info, 0, sizeof(*info));
850 	memset(&ifan, 0, sizeof(ifan));
851 	ifan.ifan_index = ifp->if_index;
852 	strlcpy(ifan.ifan_name, ifp->if_xname, sizeof(ifan.ifan_name));
853 	ifan.ifan_what = what;
854 	return rt_msg1(type, info, (void *)&ifan, sizeof(ifan));
855 }
856 
857 /*
858  * This is called to generate routing socket messages indicating
859  * network interface arrival and departure.
860  */
861 void
862 rt_ifannouncemsg(struct ifnet *ifp, int what)
863 {
864 	struct mbuf *m;
865 	struct rt_addrinfo info;
866 
867 	if (route_cb.any_count == 0)
868 		return;
869 	m = rt_makeifannouncemsg(ifp, RTM_IFANNOUNCE, what, &info);
870 	if (m == NULL)
871 		return;
872 	route_enqueue(m, 0);
873 }
874 
875 /*
876  * This is called to generate routing socket messages indicating
877  * IEEE80211 wireless events.
878  * XXX we piggyback on the RTM_IFANNOUNCE msg format in a clumsy way.
879  */
880 void
881 rt_ieee80211msg(struct ifnet *ifp, int what, void *data, size_t data_len)
882 {
883 	struct mbuf *m;
884 	struct rt_addrinfo info;
885 
886 	if (route_cb.any_count == 0)
887 		return;
888 	m = rt_makeifannouncemsg(ifp, RTM_IEEE80211, what, &info);
889 	if (m == NULL)
890 		return;
891 	/*
892 	 * Append the ieee80211 data.  Try to stick it in the
893 	 * mbuf containing the ifannounce msg; otherwise allocate
894 	 * a new mbuf and append.
895 	 *
896 	 * NB: we assume m is a single mbuf.
897 	 */
898 	if (data_len > M_TRAILINGSPACE(m)) {
899 		struct mbuf *n = m_get(M_NOWAIT, MT_DATA);
900 		if (n == NULL) {
901 			m_freem(m);
902 			return;
903 		}
904 		(void)memcpy(mtod(n, void *), data, data_len);
905 		n->m_len = data_len;
906 		m->m_next = n;
907 	} else if (data_len > 0) {
908 		(void)memcpy(mtod(m, uint8_t *) + m->m_len, data, data_len);
909 		m->m_len += data_len;
910 	}
911 	if (m->m_flags & M_PKTHDR)
912 		m->m_pkthdr.len += data_len;
913 	mtod(m, struct if_announcemsghdr *)->ifan_msglen += data_len;
914 	route_enqueue(m, 0);
915 }
916 
917 /*
918  * This is used in dumping the kernel table via sysctl().
919  */
920 static int
921 sysctl_dumpentry(struct rtentry *rt, void *v)
922 {
923 	struct walkarg *w = v;
924 	int error = 0, size;
925 	struct rt_addrinfo info;
926 
927 	if (w->w_op == NET_RT_FLAGS && !(rt->rt_flags & w->w_arg))
928 		return 0;
929 	memset(&info, 0, sizeof(info));
930 	dst = rt_getkey(rt);
931 	gate = rt->rt_gateway;
932 	netmask = rt_mask(rt);
933 	if (rt->rt_ifp) {
934 		const struct ifaddr *rtifa;
935 		ifpaddr = rt->rt_ifp->if_dl->ifa_addr;
936 		/* rtifa used to be simply rt->rt_ifa.  If rt->rt_ifa != NULL,
937 		 * then rt_get_ifa() != NULL.  So this ought to still be safe.
938 		 * --dyoung
939 		 */
940 		rtifa = rt_get_ifa(rt);
941 		ifaaddr = rtifa->ifa_addr;
942 		if (rt->rt_ifp->if_flags & IFF_POINTOPOINT)
943 			brdaddr = rtifa->ifa_dstaddr;
944 	}
945 	if ((error = rt_msg2(RTM_GET, &info, 0, w, &size)))
946 		return error;
947 	if (w->w_where && w->w_tmem && w->w_needed <= 0) {
948 		struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem;
949 
950 		rtm->rtm_flags = rt->rt_flags;
951 		rtm->rtm_use = rt->rt_use;
952 		rtm->rtm_rmx = rt->rt_rmx;
953 		KASSERT(rt->rt_ifp != NULL);
954 		rtm->rtm_index = rt->rt_ifp->if_index;
955 		rtm->rtm_errno = rtm->rtm_pid = rtm->rtm_seq = 0;
956 		rtm->rtm_addrs = info.rti_addrs;
957 		if ((error = copyout(rtm, w->w_where, size)) != 0)
958 			w->w_where = NULL;
959 		else
960 			w->w_where = (char *)w->w_where + size;
961 	}
962 	return error;
963 }
964 
965 static int
966 sysctl_iflist(int af, struct walkarg *w, int type)
967 {
968 	struct ifnet *ifp;
969 	struct ifaddr *ifa;
970 	struct	rt_addrinfo info;
971 	int	len, error = 0;
972 
973 	memset(&info, 0, sizeof(info));
974 	IFNET_FOREACH(ifp) {
975 		if (w->w_arg && w->w_arg != ifp->if_index)
976 			continue;
977 		if (IFADDR_EMPTY(ifp))
978 			continue;
979 		ifpaddr = ifp->if_dl->ifa_addr;
980 		switch (type) {
981 		case NET_RT_IFLIST:
982 			error =
983 			    rt_msg2(RTM_IFINFO, &info, NULL, w, &len);
984 			break;
985 #ifdef COMPAT_14
986 		case NET_RT_OIFLIST:
987 			error =
988 			    rt_msg2(RTM_OIFINFO, &info, NULL, w, &len);
989 			break;
990 #endif
991 		default:
992 			panic("sysctl_iflist(1)");
993 		}
994 		if (error)
995 			return error;
996 		ifpaddr = NULL;
997 		if (w->w_where && w->w_tmem && w->w_needed <= 0) {
998 			switch (type) {
999 			case NET_RT_IFLIST: {
1000 				struct if_msghdr *ifm;
1001 
1002 				ifm = (struct if_msghdr *)w->w_tmem;
1003 				ifm->ifm_index = ifp->if_index;
1004 				ifm->ifm_flags = ifp->if_flags;
1005 				ifm->ifm_data = ifp->if_data;
1006 				ifm->ifm_addrs = info.rti_addrs;
1007 				error = copyout(ifm, w->w_where, len);
1008 				if (error)
1009 					return error;
1010 				w->w_where = (char *)w->w_where + len;
1011 				break;
1012 			}
1013 
1014 #ifdef COMPAT_14
1015 			case NET_RT_OIFLIST: {
1016 				struct if_msghdr14 *ifm;
1017 
1018 				ifm = (struct if_msghdr14 *)w->w_tmem;
1019 				ifm->ifm_index = ifp->if_index;
1020 				ifm->ifm_flags = ifp->if_flags;
1021 				ifm->ifm_data.ifi_type = ifp->if_data.ifi_type;
1022 				ifm->ifm_data.ifi_addrlen =
1023 				    ifp->if_data.ifi_addrlen;
1024 				ifm->ifm_data.ifi_hdrlen =
1025 				    ifp->if_data.ifi_hdrlen;
1026 				ifm->ifm_data.ifi_mtu = ifp->if_data.ifi_mtu;
1027 				ifm->ifm_data.ifi_metric =
1028 				    ifp->if_data.ifi_metric;
1029 				ifm->ifm_data.ifi_baudrate =
1030 				    ifp->if_data.ifi_baudrate;
1031 				ifm->ifm_data.ifi_ipackets =
1032 				    ifp->if_data.ifi_ipackets;
1033 				ifm->ifm_data.ifi_ierrors =
1034 				    ifp->if_data.ifi_ierrors;
1035 				ifm->ifm_data.ifi_opackets =
1036 				    ifp->if_data.ifi_opackets;
1037 				ifm->ifm_data.ifi_oerrors =
1038 				    ifp->if_data.ifi_oerrors;
1039 				ifm->ifm_data.ifi_collisions =
1040 				    ifp->if_data.ifi_collisions;
1041 				ifm->ifm_data.ifi_ibytes =
1042 				    ifp->if_data.ifi_ibytes;
1043 				ifm->ifm_data.ifi_obytes =
1044 				    ifp->if_data.ifi_obytes;
1045 				ifm->ifm_data.ifi_imcasts =
1046 				    ifp->if_data.ifi_imcasts;
1047 				ifm->ifm_data.ifi_omcasts =
1048 				    ifp->if_data.ifi_omcasts;
1049 				ifm->ifm_data.ifi_iqdrops =
1050 				    ifp->if_data.ifi_iqdrops;
1051 				ifm->ifm_data.ifi_noproto =
1052 				    ifp->if_data.ifi_noproto;
1053 				ifm->ifm_data.ifi_lastchange =
1054 				    ifp->if_data.ifi_lastchange;
1055 				ifm->ifm_addrs = info.rti_addrs;
1056 				error = copyout(ifm, w->w_where, len);
1057 				if (error)
1058 					return error;
1059 				w->w_where = (char *)w->w_where + len;
1060 				break;
1061 			}
1062 #endif
1063 			default:
1064 				panic("sysctl_iflist(2)");
1065 			}
1066 		}
1067 		IFADDR_FOREACH(ifa, ifp) {
1068 			if (af && af != ifa->ifa_addr->sa_family)
1069 				continue;
1070 			ifaaddr = ifa->ifa_addr;
1071 			netmask = ifa->ifa_netmask;
1072 			brdaddr = ifa->ifa_dstaddr;
1073 			if ((error = rt_msg2(RTM_NEWADDR, &info, 0, w, &len)))
1074 				return error;
1075 			if (w->w_where && w->w_tmem && w->w_needed <= 0) {
1076 				struct ifa_msghdr *ifam;
1077 
1078 				ifam = (struct ifa_msghdr *)w->w_tmem;
1079 				ifam->ifam_index = ifa->ifa_ifp->if_index;
1080 				ifam->ifam_flags = ifa->ifa_flags;
1081 				ifam->ifam_metric = ifa->ifa_metric;
1082 				ifam->ifam_addrs = info.rti_addrs;
1083 				error = copyout(w->w_tmem, w->w_where, len);
1084 				if (error)
1085 					return error;
1086 				w->w_where = (char *)w->w_where + len;
1087 			}
1088 		}
1089 		ifaaddr = netmask = brdaddr = NULL;
1090 	}
1091 	return 0;
1092 }
1093 
1094 static int
1095 sysctl_rtable(SYSCTLFN_ARGS)
1096 {
1097 	void 	*where = oldp;
1098 	size_t	*given = oldlenp;
1099 	const void *new = newp;
1100 	int	i, s, error = EINVAL;
1101 	u_char  af;
1102 	struct	walkarg w;
1103 
1104 	if (namelen == 1 && name[0] == CTL_QUERY)
1105 		return sysctl_query(SYSCTLFN_CALL(rnode));
1106 
1107 	if (new)
1108 		return EPERM;
1109 	if (namelen != 3)
1110 		return EINVAL;
1111 	af = name[0];
1112 	w.w_tmemneeded = 0;
1113 	w.w_tmemsize = 0;
1114 	w.w_tmem = NULL;
1115 again:
1116 	/* we may return here if a later [re]alloc of the t_mem buffer fails */
1117 	if (w.w_tmemneeded) {
1118 		w.w_tmem = (void *) malloc(w.w_tmemneeded, M_RTABLE, M_WAITOK);
1119 		w.w_tmemsize = w.w_tmemneeded;
1120 		w.w_tmemneeded = 0;
1121 	}
1122 	w.w_op = name[1];
1123 	w.w_arg = name[2];
1124 	w.w_given = *given;
1125 	w.w_needed = 0 - w.w_given;
1126 	w.w_where = where;
1127 
1128 	s = splsoftnet();
1129 	switch (w.w_op) {
1130 
1131 	case NET_RT_DUMP:
1132 	case NET_RT_FLAGS:
1133 		for (i = 1; i <= AF_MAX; i++)
1134 			if ((af == 0 || af == i) &&
1135 			    (error = rt_walktree(i, sysctl_dumpentry, &w)))
1136 				break;
1137 		break;
1138 
1139 #ifdef COMPAT_14
1140 	case NET_RT_OIFLIST:
1141 		error = sysctl_iflist(af, &w, w.w_op);
1142 		break;
1143 #endif
1144 
1145 	case NET_RT_IFLIST:
1146 		error = sysctl_iflist(af, &w, w.w_op);
1147 	}
1148 	splx(s);
1149 
1150 	/* check to see if we couldn't allocate memory with NOWAIT */
1151 	if (error == ENOBUFS && w.w_tmem == 0 && w.w_tmemneeded)
1152 		goto again;
1153 
1154 	if (w.w_tmem)
1155 		free(w.w_tmem, M_RTABLE);
1156 	w.w_needed += w.w_given;
1157 	if (where) {
1158 		*given = (char *)w.w_where - (char *)where;
1159 		if (*given < w.w_needed)
1160 			return ENOMEM;
1161 	} else {
1162 		*given = (11 * w.w_needed) / 10;
1163 	}
1164 	return error;
1165 }
1166 
1167 /*
1168  * Routing message software interrupt routine
1169  */
1170 static void
1171 route_intr(void *cookie)
1172 {
1173 	struct sockproto proto = { .sp_family = PF_ROUTE, };
1174 	struct mbuf *m;
1175 	int s;
1176 
1177 	mutex_enter(softnet_lock);
1178 	KERNEL_LOCK(1, NULL);
1179 	while (!IF_IS_EMPTY(&route_intrq)) {
1180 		s = splnet();
1181 		IF_DEQUEUE(&route_intrq, m);
1182 		splx(s);
1183 		if (m == NULL)
1184 			break;
1185 		proto.sp_protocol = M_GETCTX(m, uintptr_t);
1186 		raw_input(m, &proto, &route_src, &route_dst);
1187 	}
1188 	KERNEL_UNLOCK_ONE(NULL);
1189 	mutex_exit(softnet_lock);
1190 }
1191 
1192 /*
1193  * Enqueue a message to the software interrupt routine.
1194  */
1195 static void
1196 route_enqueue(struct mbuf *m, int family)
1197 {
1198 	int s, wasempty;
1199 
1200 	s = splnet();
1201 	if (IF_QFULL(&route_intrq)) {
1202 		IF_DROP(&route_intrq);
1203 		m_freem(m);
1204 	} else {
1205 		wasempty = IF_IS_EMPTY(&route_intrq);
1206 		M_SETCTX(m, (uintptr_t)family);
1207 		IF_ENQUEUE(&route_intrq, m);
1208 		if (wasempty)
1209 			softint_schedule(route_sih);
1210 	}
1211 	splx(s);
1212 }
1213 
1214 void
1215 rt_init(void)
1216 {
1217 
1218 	route_intrq.ifq_maxlen = route_maxqlen;
1219 	route_sih = softint_establish(SOFTINT_NET | SOFTINT_MPSAFE,
1220 	    route_intr, NULL);
1221 }
1222 
1223 /*
1224  * Definitions of protocols supported in the ROUTE domain.
1225  */
1226 PR_WRAP_USRREQ(route_usrreq)
1227 #define	route_usrreq	route_usrreq_wrapper
1228 
1229 const struct protosw routesw[] = {
1230 	{
1231 		.pr_type = SOCK_RAW,
1232 		.pr_domain = &routedomain,
1233 		.pr_flags = PR_ATOMIC|PR_ADDR,
1234 		.pr_input = raw_input,
1235 		.pr_output = route_output,
1236 		.pr_ctlinput = raw_ctlinput,
1237 		.pr_usrreq = route_usrreq,
1238 		.pr_init = raw_init,
1239 	},
1240 };
1241 
1242 struct domain routedomain = {
1243 	.dom_family = PF_ROUTE,
1244 	.dom_name = "route",
1245 	.dom_init = route_init,
1246 	.dom_protosw = routesw,
1247 	.dom_protoswNPROTOSW = &routesw[__arraycount(routesw)],
1248 };
1249 
1250 SYSCTL_SETUP(sysctl_net_route_setup, "sysctl net.route subtree setup")
1251 {
1252 	const struct sysctlnode *rnode = NULL;
1253 
1254 	sysctl_createv(clog, 0, NULL, NULL,
1255 		       CTLFLAG_PERMANENT,
1256 		       CTLTYPE_NODE, "net", NULL,
1257 		       NULL, 0, NULL, 0,
1258 		       CTL_NET, CTL_EOL);
1259 
1260 	sysctl_createv(clog, 0, NULL, &rnode,
1261 		       CTLFLAG_PERMANENT,
1262 		       CTLTYPE_NODE, "route",
1263 		       SYSCTL_DESCR("PF_ROUTE information"),
1264 		       NULL, 0, NULL, 0,
1265 		       CTL_NET, PF_ROUTE, CTL_EOL);
1266 	sysctl_createv(clog, 0, NULL, NULL,
1267 		       CTLFLAG_PERMANENT,
1268 		       CTLTYPE_NODE, "rtable",
1269 		       SYSCTL_DESCR("Routing table information"),
1270 		       sysctl_rtable, 0, NULL, 0,
1271 		       CTL_NET, PF_ROUTE, 0 /* any protocol */, CTL_EOL);
1272 	sysctl_createv(clog, 0, &rnode, NULL,
1273 		       CTLFLAG_PERMANENT,
1274 		       CTLTYPE_STRUCT, "stats",
1275 		       SYSCTL_DESCR("Routing statistics"),
1276 		       NULL, 0, &rtstat, sizeof(rtstat),
1277 		       CTL_CREATE, CTL_EOL);
1278 }
1279