xref: /openbsd-src/sys/net/rtsock.c (revision 2b0358df1d88d06ef4139321dd05bd5e05d91eaf)
1 /*	$OpenBSD: rtsock.c,v 1.84 2009/02/03 16:42:54 michele Exp $	*/
2 /*	$NetBSD: rtsock.c,v 1.18 1996/03/29 00:32:10 cgd Exp $	*/
3 
4 /*
5  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the project nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 /*
34  * Copyright (c) 1988, 1991, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. Neither the name of the University nor the names of its contributors
46  *    may be used to endorse or promote products derived from this software
47  *    without specific prior written permission.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59  * SUCH DAMAGE.
60  *
61  *	@(#)rtsock.c	8.6 (Berkeley) 2/11/95
62  */
63 
64 #include <sys/param.h>
65 #include <sys/systm.h>
66 #include <sys/proc.h>
67 #include <sys/mbuf.h>
68 #include <sys/socket.h>
69 #include <sys/socketvar.h>
70 #include <sys/domain.h>
71 #include <sys/protosw.h>
72 
73 #include <uvm/uvm_extern.h>
74 #include <sys/sysctl.h>
75 
76 #include <net/if.h>
77 #include <net/route.h>
78 #include <net/raw_cb.h>
79 
80 #ifdef MPLS
81 #include <netmpls/mpls.h>
82 #endif /* MPLS */
83 
84 #include <sys/stdarg.h>
85 
86 struct sockaddr		route_dst = { 2, PF_ROUTE, };
87 struct sockaddr		route_src = { 2, PF_ROUTE, };
88 struct sockproto	route_proto = { PF_ROUTE, };
89 
90 struct walkarg {
91 	int	w_op, w_arg, w_given, w_needed, w_tmemsize;
92 	caddr_t	w_where, w_tmem;
93 };
94 
95 int	route_ctloutput(int, struct socket *, int, int, struct mbuf **);
96 void	route_input(struct mbuf *m0, ...);
97 
98 struct mbuf	*rt_msg1(int, struct rt_addrinfo *);
99 int		 rt_msg2(int, int, struct rt_addrinfo *, caddr_t,
100 		     struct walkarg *);
101 void		 rt_xaddrs(caddr_t, caddr_t, struct rt_addrinfo *);
102 #ifndef SMALL_KERNEL
103 struct rt_msghdr *rtmsg_3to4(struct mbuf *, int *);
104 #endif
105 
106 /* Sleazy use of local variables throughout file, warning!!!! */
107 #define dst	info.rti_info[RTAX_DST]
108 #define gate	info.rti_info[RTAX_GATEWAY]
109 #define netmask	info.rti_info[RTAX_NETMASK]
110 #define genmask	info.rti_info[RTAX_GENMASK]
111 #define ifpaddr	info.rti_info[RTAX_IFP]
112 #define ifaaddr	info.rti_info[RTAX_IFA]
113 #define brdaddr	info.rti_info[RTAX_BRD]
114 
115 struct routecb {
116 	struct rawcb	rcb;
117 	unsigned int	msgfilter;
118 };
119 #define	sotoroutecb(so)	((struct routecb *)(so)->so_pcb)
120 
121 
122 int
123 route_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam,
124     struct mbuf *control, struct proc *p)
125 {
126 	int		 error = 0;
127 	struct rawcb	*rp = sotorawcb(so);
128 	int		 s;
129 
130 	/*
131 	 * use the rawcb but allocate a rooutecb, this code does not care
132 	 * about the additional fields and works directly on the raw socket.
133 	 */
134 	if (req == PRU_ATTACH) {
135 		rp = malloc(sizeof(struct routecb), M_PCB, M_WAITOK|M_ZERO);
136 		so->so_pcb = rp;
137 	}
138 	if (req == PRU_DETACH && rp) {
139 		int af = rp->rcb_proto.sp_protocol;
140 		if (af == AF_INET)
141 			route_cb.ip_count--;
142 		else if (af == AF_INET6)
143 			route_cb.ip6_count--;
144 		route_cb.any_count--;
145 	}
146 	s = splsoftnet();
147 	/*
148 	 * Don't call raw_usrreq() in the attach case, because
149 	 * we want to allow non-privileged processes to listen on
150 	 * and send "safe" commands to the routing socket.
151 	 */
152 	if (req == PRU_ATTACH) {
153 		if (curproc == 0)
154 			error = EACCES;
155 		else
156 			error = raw_attach(so, (int)(long)nam);
157 	} else
158 		error = raw_usrreq(so, req, m, nam, control, p);
159 
160 	rp = sotorawcb(so);
161 	if (req == PRU_ATTACH && rp) {
162 		int af = rp->rcb_proto.sp_protocol;
163 		if (error) {
164 			free(rp, M_PCB);
165 			splx(s);
166 			return (error);
167 		}
168 		if (af == AF_INET)
169 			route_cb.ip_count++;
170 		else if (af == AF_INET6)
171 			route_cb.ip6_count++;
172 #ifdef MPLS
173                else if (af == AF_MPLS)
174                        route_cb.mpls_count++;
175 #endif /* MPLS */
176 		rp->rcb_faddr = &route_src;
177 		route_cb.any_count++;
178 		soisconnected(so);
179 		so->so_options |= SO_USELOOPBACK;
180 	}
181 	splx(s);
182 	return (error);
183 }
184 
185 int
186 route_ctloutput(int op, struct socket *so, int level, int optname,
187     struct mbuf **mp)
188 {
189 	struct routecb *rop = sotoroutecb(so);
190 	struct mbuf *m = *mp;
191 	int error = 0;
192 
193 	if (level != AF_ROUTE) {
194 		error = EINVAL;
195 		if (op == PRCO_SETOPT && *mp)
196 			m_free(*mp);
197 		return (error);
198 	}
199 
200 	switch (op) {
201 	case PRCO_SETOPT:
202 		switch (optname) {
203 		case ROUTE_MSGFILTER:
204 			if (m == NULL || m->m_len != sizeof(unsigned int))
205 				error = EINVAL;
206 			else
207 				rop->msgfilter = *mtod(m, unsigned int *);
208 			break;
209 		default:
210 			error = ENOPROTOOPT;
211 			break;
212 		}
213 		if (m)
214 			m_free(m);
215 		break;
216 	case PRCO_GETOPT:
217 		switch (optname) {
218 		case ROUTE_MSGFILTER:
219 			*mp = m = m_get(M_WAIT, MT_SOOPTS);
220 			m->m_len = sizeof(int);
221 			*mtod(m, unsigned int *) = rop->msgfilter;
222 			break;
223 		default:
224 			error = ENOPROTOOPT;
225 			break;
226 		}
227 	}
228 	return (error);
229 }
230 
231 void
232 route_input(struct mbuf *m0, ...)
233 {
234 	struct rawcb *rp;
235 	struct routecb *rop;
236 	struct mbuf *m = m0;
237 	int sockets = 0;
238 	struct socket *last;
239 	va_list ap;
240 	struct sockproto *proto;
241 	struct sockaddr *sosrc, *sodst;
242 
243 	va_start(ap, m0);
244 	proto = va_arg(ap, struct sockproto *);
245 	sosrc = va_arg(ap, struct sockaddr *);
246 	sodst = va_arg(ap, struct sockaddr *);
247 	va_end(ap);
248 
249 	/* ensure that we can access the rtm_type via mtod() */
250 	if (m->m_len < offsetof(struct rt_msghdr, rtm_type) + 1) {
251 		m_freem(m);
252 		return;
253 	}
254 
255 	last = 0;
256 	LIST_FOREACH(rp, &rawcb, rcb_list) {
257 		if (rp->rcb_proto.sp_family != proto->sp_family)
258 			continue;
259 		if (rp->rcb_proto.sp_protocol  &&
260 		    rp->rcb_proto.sp_protocol != proto->sp_protocol)
261 			continue;
262 		/*
263 		 * We assume the lower level routines have
264 		 * placed the address in a canonical format
265 		 * suitable for a structure comparison.
266 		 *
267 		 * Note that if the lengths are not the same
268 		 * the comparison will fail at the first byte.
269 		 */
270 #define	equal(a1, a2) \
271   (bcmp((caddr_t)(a1), (caddr_t)(a2), a1->sa_len) == 0)
272 		if (rp->rcb_laddr && !equal(rp->rcb_laddr, sodst))
273 			continue;
274 		if (rp->rcb_faddr && !equal(rp->rcb_faddr, sosrc))
275 			continue;
276 
277 		/* filter messages that the process does not want */
278 		rop = (struct routecb *)rp;
279 		if (rop->msgfilter != 0 && !(rop->msgfilter & (1 <<
280 		    mtod(m, struct rt_msghdr *)->rtm_type)))
281 			continue;
282 
283 		if (last) {
284 			struct mbuf *n;
285 			if ((n = m_copy(m, 0, (int)M_COPYALL)) != NULL) {
286 				if (sbappendaddr(&last->so_rcv, sosrc,
287 				    n, (struct mbuf *)0) == 0)
288 					/* should notify about lost packet */
289 					m_freem(n);
290 				else {
291 					sorwakeup(last);
292 					sockets++;
293 				}
294 			}
295 		}
296 		last = rp->rcb_socket;
297 	}
298 	if (last) {
299 		if (sbappendaddr(&last->so_rcv, sosrc,
300 		    m, (struct mbuf *)0) == 0)
301 			m_freem(m);
302 		else {
303 			sorwakeup(last);
304 			sockets++;
305 		}
306 	} else
307 		m_freem(m);
308 }
309 
310 int
311 route_output(struct mbuf *m, ...)
312 {
313 	struct rt_msghdr	*rtm = NULL;
314 	struct radix_node	*rn = NULL;
315 	struct rtentry		*rt = NULL;
316 	struct rtentry		*saved_nrt = NULL;
317 	struct radix_node_head	*rnh;
318 	struct rt_addrinfo	 info;
319 	int			 len, error = 0;
320 	struct ifnet		*ifp = NULL;
321 	struct ifaddr		*ifa = NULL;
322 	struct socket		*so;
323 	struct rawcb		*rp = NULL;
324 	struct sockaddr_rtlabel	 sa_rt;
325 #ifdef MPLS
326 	struct sockaddr_mpls	 sa_mpls;
327 #endif
328 	const char		*label;
329 	va_list			 ap;
330 	u_int			 tableid;
331 	u_int8_t		 prio;
332 
333 	va_start(ap, m);
334 	so = va_arg(ap, struct socket *);
335 	va_end(ap);
336 
337 	dst = NULL;	/* for error handling (goto flush) */
338 	if (m == 0 || ((m->m_len < sizeof(int32_t)) &&
339 	    (m = m_pullup(m, sizeof(int32_t))) == 0))
340 		return (ENOBUFS);
341 	if ((m->m_flags & M_PKTHDR) == 0)
342 		panic("route_output");
343 	len = m->m_pkthdr.len;
344 	if (len < offsetof(struct rt_msghdr, rtm_type) + 1 ||
345 	    len != mtod(m, struct rt_msghdr *)->rtm_msglen) {
346 		error = EINVAL;
347 		goto flush;
348 	}
349 	switch (mtod(m, struct rt_msghdr *)->rtm_version) {
350 	case RTM_VERSION:
351 		if (len < sizeof(struct rt_msghdr)) {
352 			error = EINVAL;
353 			goto flush;
354 		}
355 		R_Malloc(rtm, struct rt_msghdr *, len);
356 		if (rtm == 0) {
357 			error = ENOBUFS;
358 			goto flush;
359 		}
360 		m_copydata(m, 0, len, (caddr_t)rtm);
361 		break;
362 #ifndef SMALL_KERNEL
363 	case RTM_OVERSION:
364 		if (len < sizeof(struct rt_omsghdr)) {
365 			error = EINVAL;
366 			goto flush;
367 		}
368 		rtm = rtmsg_3to4(m, &len);
369 		if (rtm == 0) {
370 			error = ENOBUFS;
371 			goto flush;
372 		}
373 		break;
374 #endif
375 	default:
376 		error = EPROTONOSUPPORT;
377 		goto flush;
378 	}
379 	rtm->rtm_pid = curproc->p_pid;
380 	if (rtm->rtm_hdrlen == 0)	/* old client */
381 		rtm->rtm_hdrlen = sizeof(struct rt_msghdr);
382 	if (len < rtm->rtm_hdrlen) {
383 		error = EINVAL;
384 		goto flush;
385 	}
386 
387 	tableid = rtm->rtm_tableid;
388 	if (!rtable_exists(tableid)) {
389 		if (rtm->rtm_type == RTM_ADD) {
390 			if (rtable_add(tableid)) {
391 				error = EINVAL;
392 				goto flush;
393 			}
394 		} else {
395 			error = EINVAL;
396 			goto flush;
397 		}
398 	}
399 
400 	/* make sure that kernel-only bits are not set */
401 	rtm->rtm_priority &= RTP_MASK;
402 
403 	if (rtm->rtm_priority != 0) {
404 		if (rtm->rtm_priority > RTP_MAX) {
405 			error = EINVAL;
406 			goto flush;
407 		}
408 		prio = rtm->rtm_priority;
409 	} else if (rtm->rtm_type != RTM_ADD)
410 		prio = RTP_ANY;
411 	else if (rtm->rtm_flags & RTF_STATIC)
412 		prio = RTP_STATIC;
413 	else
414 		prio = RTP_DEFAULT;
415 
416 	/* write back the priority the kernel used */
417 	 rtm->rtm_priority = prio;
418 
419 	bzero(&info, sizeof(info));
420 	info.rti_addrs = rtm->rtm_addrs;
421 	rt_xaddrs(rtm->rtm_hdrlen + (caddr_t)rtm, len + (caddr_t)rtm, &info);
422 	info.rti_flags = rtm->rtm_flags;
423 	if (dst == 0 || dst->sa_family >= AF_MAX ||
424 	    (gate != 0 && gate->sa_family >= AF_MAX)) {
425 		error = EINVAL;
426 		goto flush;
427 	}
428 	if (genmask) {
429 		struct radix_node	*t;
430 		t = rn_addmask(genmask, 0, 1);
431 		if (t && genmask->sa_len >=
432 		    ((struct sockaddr *)t->rn_key)->sa_len &&
433 		    Bcmp((caddr_t *)genmask + 1, (caddr_t *)t->rn_key + 1,
434 		    ((struct sockaddr *)t->rn_key)->sa_len) - 1)
435 			genmask = (struct sockaddr *)(t->rn_key);
436 		else {
437 			error = ENOBUFS;
438 			goto flush;
439 		}
440 	}
441 #ifdef MPLS
442 	info.rti_mpls = rtm->rtm_mpls;
443 #endif
444 
445 	/*
446 	 * Verify that the caller has the appropriate privilege; RTM_GET
447 	 * is the only operation the non-superuser is allowed.
448 	 */
449 	if (rtm->rtm_type != RTM_GET && suser(curproc, 0) != 0) {
450 		error = EACCES;
451 		goto flush;
452 	}
453 
454 	switch (rtm->rtm_type) {
455 	case RTM_ADD:
456 		if (gate == 0) {
457 			error = EINVAL;
458 			goto flush;
459 		}
460 		error = rtrequest1(rtm->rtm_type, &info, prio, &saved_nrt,
461 		    tableid);
462 		if (error == 0 && saved_nrt) {
463 			rt_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx,
464 			    &saved_nrt->rt_rmx);
465 			saved_nrt->rt_refcnt--;
466 			saved_nrt->rt_genmask = genmask;
467 			rtm->rtm_index = saved_nrt->rt_ifp->if_index;
468 		}
469 		break;
470 	case RTM_DELETE:
471 		error = rtrequest1(rtm->rtm_type, &info, prio, &saved_nrt,
472 		    tableid);
473 		if (error == 0) {
474 			(rt = saved_nrt)->rt_refcnt++;
475 			goto report;
476 		}
477 		break;
478 	case RTM_GET:
479 	case RTM_CHANGE:
480 	case RTM_LOCK:
481 		if ((rnh = rt_gettable(dst->sa_family, tableid)) == NULL) {
482 			error = EAFNOSUPPORT;
483 			goto flush;
484 		}
485 		rn = rt_lookup(dst, netmask, tableid);
486 		if (rn == NULL || (rn->rn_flags & RNF_ROOT) != 0) {
487 			error = ESRCH;
488 			goto flush;
489 		}
490 		rt = (struct rtentry *)rn;
491 #ifndef SMALL_KERNEL
492 		/*
493 		 * for RTM_CHANGE/LOCK, if we got multipath routes,
494 		 * we require users to specify a matching RTAX_GATEWAY.
495 		 *
496 		 * for RTM_GET, gate is optional even with multipath.
497 		 * if gate == NULL the first match is returned.
498 		 * (no need to call rt_mpath_matchgate if gate == NULL)
499 		 */
500 		if (rn_mpath_capable(rnh)) {
501 			/* first find correct priority bucket */
502 			rn = rn_mpath_prio(rn, prio);
503 			rt = (struct rtentry *)rn;
504 			if (prio != RTP_ANY && rt->rt_priority != prio) {
505 				error = ESRCH;
506 				goto flush;
507 			}
508 
509 			/* if multipath routes */
510 			if (rn_mpath_next(rn, 0)) {
511 				if (gate)
512 					rt = rt_mpath_matchgate(rt, gate, prio);
513 				else if (rtm->rtm_type != RTM_GET)
514 					/*
515 					 * only RTM_GET may use an empty gate
516 					 * on multipath ...
517 					 */
518 					rt = NULL;
519 			} else if (gate && (rtm->rtm_type == RTM_GET ||
520 			    rtm->rtm_type == RTM_LOCK))
521 				/*
522 				 * ... but if a gate is specified RTM_GET
523 				 * and RTM_LOCK must match the gate no matter
524 				 * what.
525 				 */
526 				rt = rt_mpath_matchgate(rt, gate, prio);
527 
528 			if (!rt) {
529 				error = ESRCH;
530 				goto flush;
531 			}
532 			rn = (struct radix_node *)rt;
533 		}
534 #endif
535 		rt->rt_refcnt++;
536 
537 		/*
538 		 * RTM_CHANGE/LOCK need a perfect match, rn_lookup()
539 		 * returns a perfect match in case a netmask is specified.
540 		 * For host routes only a longest prefix match is returned
541 		 * so it is necessary to compare the existence of the netmaks.
542 		 * If both have a netmask rn_lookup() did a perfect match and
543 		 * if none of them have a netmask both are host routes which is
544 		 * also a perfect match.
545 		 */
546 		if (rtm->rtm_type != RTM_GET && !rt_mask(rt) != !netmask) {
547 				error = ESRCH;
548 				goto flush;
549 		}
550 
551 		switch (rtm->rtm_type) {
552 		case RTM_GET:
553 report:
554 			dst = rt_key(rt);
555 			gate = rt->rt_gateway;
556 			netmask = rt_mask(rt);
557 			genmask = rt->rt_genmask;
558 
559 			if (rt->rt_labelid) {
560 				bzero(&sa_rt, sizeof(sa_rt));
561 				sa_rt.sr_len = sizeof(sa_rt);
562 				label = rtlabel_id2name(rt->rt_labelid);
563 				if (label != NULL)
564 					strlcpy(sa_rt.sr_label, label,
565 					    sizeof(sa_rt.sr_label));
566 				info.rti_info[RTAX_LABEL] =
567 				    (struct sockaddr *)&sa_rt;
568 			}
569 #ifdef MPLS
570 			if (rt->rt_flags & RTF_MPLS) {
571 				bzero(&sa_mpls, sizeof(sa_mpls));
572 				sa_mpls.smpls_family = AF_MPLS;
573 				sa_mpls.smpls_len = sizeof(sa_mpls);
574 				sa_mpls.smpls_label = ((struct rt_mpls *)
575 				    rt->rt_llinfo)->mpls_label;
576 				info.rti_info[RTAX_SRC] =
577 				    (struct sockaddr *)&sa_mpls;
578 				info.rti_mpls = ((struct rt_mpls *)
579 				    rt->rt_llinfo)->mpls_operation;
580 			}
581 #endif
582 			ifpaddr = 0;
583 			ifaaddr = 0;
584 			if (rtm->rtm_addrs & (RTA_IFP | RTA_IFA) &&
585 			    (ifp = rt->rt_ifp) != NULL) {
586 				ifpaddr =
587 				    TAILQ_FIRST(&ifp->if_addrlist)->ifa_addr;
588 				ifaaddr = rt->rt_ifa->ifa_addr;
589 				if (ifp->if_flags & IFF_POINTOPOINT)
590 					brdaddr = rt->rt_ifa->ifa_dstaddr;
591 				else
592 					brdaddr = 0;
593 				rtm->rtm_index = ifp->if_index;
594 			}
595 			len = rt_msg2(rtm->rtm_type, RTM_VERSION, &info, NULL,
596 			    NULL);
597 			if (len > rtm->rtm_msglen) {
598 				struct rt_msghdr	*new_rtm;
599 				R_Malloc(new_rtm, struct rt_msghdr *, len);
600 				if (new_rtm == 0) {
601 					error = ENOBUFS;
602 					goto flush;
603 				}
604 				Bcopy(rtm, new_rtm, rtm->rtm_msglen);
605 				Free(rtm); rtm = new_rtm;
606 			}
607 			rt_msg2(rtm->rtm_type, RTM_VERSION, &info, (caddr_t)rtm,
608 			    NULL);
609 			rtm->rtm_flags = rt->rt_flags;
610 			rtm->rtm_use = 0;
611 			rtm->rtm_priority = rt->rt_priority;
612 			rt_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx);
613 			rtm->rtm_addrs = info.rti_addrs;
614 			break;
615 
616 		case RTM_CHANGE:
617 			/*
618 			 * new gateway could require new ifaddr, ifp;
619 			 * flags may also be different; ifp may be specified
620 			 * by ll sockaddr when protocol address is ambiguous
621 			 */
622 			if ((error = rt_getifa(&info)) != 0)
623 				goto flush;
624 			if (gate && rt_setgate(rt, rt_key(rt), gate, tableid)) {
625 				error = EDQUOT;
626 				goto flush;
627 			}
628 			if (ifpaddr && (ifa = ifa_ifwithnet(ifpaddr)) &&
629 			    (ifp = ifa->ifa_ifp) && (ifaaddr || gate))
630 				ifa = ifaof_ifpforaddr(ifaaddr ? ifaaddr : gate,
631 							ifp);
632 			else if ((ifaaddr && (ifa = ifa_ifwithaddr(ifaaddr))) ||
633 			    (gate && (ifa = ifa_ifwithroute(rt->rt_flags,
634 			    rt_key(rt), gate))))
635 				ifp = ifa->ifa_ifp;
636 			if (ifa) {
637 				struct ifaddr *oifa = rt->rt_ifa;
638 				if (oifa != ifa) {
639 				    if (oifa && oifa->ifa_rtrequest)
640 					oifa->ifa_rtrequest(RTM_DELETE, rt,
641 					    &info);
642 				    IFAFREE(rt->rt_ifa);
643 				    rt->rt_ifa = ifa;
644 				    ifa->ifa_refcnt++;
645 				    rt->rt_ifp = ifp;
646 				}
647 			}
648 
649 			/* XXX Hack to allow some flags to be toggled */
650 			if (rtm->rtm_fmask & RTF_FMASK)
651 				rt->rt_flags = (rt->rt_flags &
652 				    ~rtm->rtm_fmask) |
653 				    (rtm->rtm_flags & rtm->rtm_fmask);
654 
655 			rt_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx,
656 			    &rt->rt_rmx);
657 			rtm->rtm_index = rt->rt_ifp->if_index;
658 			if (rt->rt_ifa && rt->rt_ifa->ifa_rtrequest)
659 				rt->rt_ifa->ifa_rtrequest(RTM_ADD, rt, &info);
660 			if (genmask)
661 				rt->rt_genmask = genmask;
662 			if (info.rti_info[RTAX_LABEL] != NULL) {
663 				char *rtlabel = ((struct sockaddr_rtlabel *)
664 				    info.rti_info[RTAX_LABEL])->sr_label;
665 				rtlabel_unref(rt->rt_labelid);
666 				rt->rt_labelid =
667 				    rtlabel_name2id(rtlabel);
668 			}
669 
670 			if_group_routechange(dst, netmask);
671 			/* FALLTHROUGH */
672 		case RTM_LOCK:
673 			rt->rt_rmx.rmx_locks &= ~(rtm->rtm_inits);
674 			rt->rt_rmx.rmx_locks |=
675 			    (rtm->rtm_inits & rtm->rtm_rmx.rmx_locks);
676 			break;
677 		}
678 		break;
679 
680 	default:
681 		error = EOPNOTSUPP;
682 		break;
683 	}
684 
685 flush:
686 	if (rtm) {
687 		if (error)
688 			rtm->rtm_errno = error;
689 		else
690 			rtm->rtm_flags |= RTF_DONE;
691 	}
692 	if (rt)
693 		rtfree(rt);
694 
695 	/*
696 	 * Check to see if we don't want our own messages.
697 	 */
698 	if (!(so->so_options & SO_USELOOPBACK)) {
699 		if (route_cb.any_count <= 1) {
700 			if (rtm)
701 				Free(rtm);
702 			m_freem(m);
703 			return (error);
704 		}
705 		/* There is another listener, so construct message */
706 		rp = sotorawcb(so);
707 	}
708 	if (rp)
709 		rp->rcb_proto.sp_family = 0; /* Avoid us */
710 	if (dst)
711 		route_proto.sp_protocol = dst->sa_family;
712 	if (rtm) {
713 		m_copyback(m, 0, rtm->rtm_msglen, rtm);
714 		if (m->m_pkthdr.len < rtm->rtm_msglen) {
715 			m_freem(m);
716 			m = NULL;
717 		} else if (m->m_pkthdr.len > rtm->rtm_msglen)
718 			m_adj(m, rtm->rtm_msglen - m->m_pkthdr.len);
719 		Free(rtm);
720 	}
721 	if (m)
722 		route_input(m, &route_proto, &route_src, &route_dst);
723 	if (rp)
724 		rp->rcb_proto.sp_family = PF_ROUTE;
725 
726 	return (error);
727 }
728 
729 void
730 rt_setmetrics(u_long which, struct rt_metrics *in, struct rt_kmetrics *out)
731 {
732 	if (which & RTV_MTU)
733 		out->rmx_mtu = in->rmx_mtu;
734 	if (which & RTV_EXPIRE)
735 		out->rmx_expire = in->rmx_expire;
736 	/* RTV_PRIORITY handled befor */
737 }
738 
739 void
740 rt_getmetrics(struct rt_kmetrics *in, struct rt_metrics *out)
741 {
742 	bzero(out, sizeof(*out));
743 	out->rmx_locks = in->rmx_locks;
744 	out->rmx_mtu = in->rmx_mtu;
745 	out->rmx_expire = in->rmx_expire;
746 	out->rmx_pksent = in->rmx_pksent;
747 }
748 
749 #define ROUNDUP(a) \
750 	((a) > 0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long))
751 #define ADVANCE(x, n) (x += ROUNDUP((n)->sa_len))
752 
753 void
754 rt_xaddrs(caddr_t cp, caddr_t cplim, struct rt_addrinfo *rtinfo)
755 {
756 	struct sockaddr	*sa;
757 	int		 i;
758 
759 	bzero(rtinfo->rti_info, sizeof(rtinfo->rti_info));
760 	for (i = 0; (i < RTAX_MAX) && (cp < cplim); i++) {
761 		if ((rtinfo->rti_addrs & (1 << i)) == 0)
762 			continue;
763 		rtinfo->rti_info[i] = sa = (struct sockaddr *)cp;
764 		ADVANCE(cp, sa);
765 	}
766 }
767 
768 struct mbuf *
769 rt_msg1(int type, struct rt_addrinfo *rtinfo)
770 {
771 	struct rt_msghdr	*rtm;
772 	struct mbuf		*m;
773 	int			 i;
774 	struct sockaddr		*sa;
775 	int			 len, dlen, hlen;
776 
777 	switch (type) {
778 	case RTM_DELADDR:
779 	case RTM_NEWADDR:
780 		len = sizeof(struct ifa_msghdr);
781 		break;
782 	case RTM_IFINFO:
783 		len = sizeof(struct if_msghdr);
784 		break;
785 	case RTM_IFANNOUNCE:
786 		len = sizeof(struct if_announcemsghdr);
787 		break;
788 	default:
789 		len = sizeof(struct rt_msghdr);
790 		break;
791 	}
792 	if (len > MCLBYTES)
793 		panic("rt_msg1");
794 	m = m_gethdr(M_DONTWAIT, MT_DATA);
795 	if (m && len > MHLEN) {
796 		MCLGET(m, M_DONTWAIT);
797 		if ((m->m_flags & M_EXT) == 0) {
798 			m_free(m);
799 			m = NULL;
800 		}
801 	}
802 	if (m == 0)
803 		return (m);
804 	m->m_pkthdr.len = m->m_len = hlen = len;
805 	m->m_pkthdr.rcvif = NULL;
806 	rtm = mtod(m, struct rt_msghdr *);
807 	bzero(rtm, len);
808 	for (i = 0; i < RTAX_MAX; i++) {
809 		if (rtinfo == NULL || (sa = rtinfo->rti_info[i]) == NULL)
810 			continue;
811 		rtinfo->rti_addrs |= (1 << i);
812 		dlen = ROUNDUP(sa->sa_len);
813 		m_copyback(m, len, dlen, sa);
814 		len += dlen;
815 	}
816 	if (m->m_pkthdr.len != len) {
817 		m_freem(m);
818 		return (NULL);
819 	}
820 	rtm->rtm_msglen = len;
821 	rtm->rtm_hdrlen = hlen;
822 	rtm->rtm_version = RTM_VERSION;
823 	rtm->rtm_type = type;
824 	return (m);
825 }
826 
827 int
828 rt_msg2(int type, int vers, struct rt_addrinfo *rtinfo, caddr_t cp,
829     struct walkarg *w)
830 {
831 	int		i;
832 	int		len, dlen, hlen, second_time = 0;
833 	caddr_t		cp0;
834 
835 	rtinfo->rti_addrs = 0;
836 again:
837 	switch (type) {
838 	case RTM_DELADDR:
839 	case RTM_NEWADDR:
840 #ifndef SMALL_KERNEL
841 		if (vers == RTM_OVERSION)
842 			len = sizeof(struct ifa_omsghdr);
843 		else
844 #endif
845 			len = sizeof(struct ifa_msghdr);
846 		break;
847 	case RTM_IFINFO:
848 #ifndef SMALL_KERNEL
849 		if (vers == RTM_OVERSION)
850 			len = sizeof(struct if_omsghdr);
851 		else
852 #endif
853 			len = sizeof(struct if_msghdr);
854 		break;
855 	default:
856 #ifndef SMALL_KERNEL
857 		if (vers == RTM_OVERSION)
858 			len = sizeof(struct rt_omsghdr);
859 		else
860 #endif
861 			len = sizeof(struct rt_msghdr);
862 		break;
863 	}
864 	hlen = len;
865 	if ((cp0 = cp) != NULL)
866 		cp += len;
867 	for (i = 0; i < RTAX_MAX; i++) {
868 		struct sockaddr *sa;
869 
870 		if ((sa = rtinfo->rti_info[i]) == 0)
871 			continue;
872 		rtinfo->rti_addrs |= (1 << i);
873 		dlen = ROUNDUP(sa->sa_len);
874 		if (cp) {
875 			bcopy(sa, cp, (size_t)dlen);
876 			cp += dlen;
877 		}
878 		len += dlen;
879 	}
880 	/* align message length to the next natural boundary */
881 	len = ALIGN(len);
882 	if (cp == 0 && w != NULL && !second_time) {
883 		struct walkarg *rw = w;
884 
885 		rw->w_needed += len;
886 		if (rw->w_needed <= 0 && rw->w_where) {
887 			if (rw->w_tmemsize < len) {
888 				if (rw->w_tmem)
889 					free(rw->w_tmem, M_RTABLE);
890 				rw->w_tmem = malloc(len, M_RTABLE, M_NOWAIT);
891 				if (rw->w_tmem)
892 					rw->w_tmemsize = len;
893 			}
894 			if (rw->w_tmem) {
895 				cp = rw->w_tmem;
896 				second_time = 1;
897 				goto again;
898 			} else
899 				rw->w_where = 0;
900 		}
901 	}
902 	if (cp && w)		/* clear the message header */
903 		bzero(cp0, hlen);
904 
905 	if (cp && vers != RTM_OVERSION) {
906 		struct rt_msghdr *rtm = (struct rt_msghdr *)cp0;
907 
908 		rtm->rtm_version = RTM_VERSION;
909 		rtm->rtm_type = type;
910 		rtm->rtm_msglen = len;
911 		rtm->rtm_hdrlen = hlen;
912 	}
913 #ifndef SMALL_KERNEL
914 	if (cp && vers == RTM_OVERSION) {
915 		struct rt_omsghdr *rtm = (struct rt_omsghdr *)cp0;
916 
917 		rtm->rtm_version = RTM_OVERSION;
918 		rtm->rtm_type = type;
919 		rtm->rtm_msglen = len;
920 	}
921 #endif
922 	return (len);
923 }
924 
925 /*
926  * This routine is called to generate a message from the routing
927  * socket indicating that a redirect has occurred, a routing lookup
928  * has failed, or that a protocol has detected timeouts to a particular
929  * destination.
930  */
931 void
932 rt_missmsg(int type, struct rt_addrinfo *rtinfo, int flags,
933     struct ifnet *ifp, int error, u_int tableid)
934 {
935 	struct rt_msghdr	*rtm;
936 	struct mbuf		*m;
937 	struct sockaddr		*sa = rtinfo->rti_info[RTAX_DST];
938 
939 	if (route_cb.any_count == 0)
940 		return;
941 	m = rt_msg1(type, rtinfo);
942 	if (m == 0)
943 		return;
944 	rtm = mtod(m, struct rt_msghdr *);
945 	rtm->rtm_flags = RTF_DONE | flags;
946 	rtm->rtm_errno = error;
947 	rtm->rtm_tableid = tableid;
948 	rtm->rtm_addrs = rtinfo->rti_addrs;
949 	if (ifp != NULL)
950 		rtm->rtm_index = ifp->if_index;
951 	if (sa == NULL)
952 		route_proto.sp_protocol = 0;
953 	else
954 		route_proto.sp_protocol = sa->sa_family;
955 	route_input(m, &route_proto, &route_src, &route_dst);
956 }
957 
958 /*
959  * This routine is called to generate a message from the routing
960  * socket indicating that the status of a network interface has changed.
961  */
962 void
963 rt_ifmsg(struct ifnet *ifp)
964 {
965 	struct if_msghdr	*ifm;
966 	struct mbuf		*m;
967 
968 	if (route_cb.any_count == 0)
969 		return;
970 	m = rt_msg1(RTM_IFINFO, NULL);
971 	if (m == 0)
972 		return;
973 	ifm = mtod(m, struct if_msghdr *);
974 	ifm->ifm_index = ifp->if_index;
975 	ifm->ifm_flags = ifp->if_flags;
976 	ifm->ifm_data = ifp->if_data;
977 	ifm->ifm_addrs = 0;
978 	route_proto.sp_protocol = 0;
979 	route_input(m, &route_proto, &route_src, &route_dst);
980 }
981 
982 /*
983  * This is called to generate messages from the routing socket
984  * indicating a network interface has had addresses associated with it.
985  * if we ever reverse the logic and replace messages TO the routing
986  * socket indicate a request to configure interfaces, then it will
987  * be unnecessary as the routing socket will automatically generate
988  * copies of it.
989  */
990 void
991 rt_newaddrmsg(int cmd, struct ifaddr *ifa, int error, struct rtentry *rt)
992 {
993 	struct rt_addrinfo	 info;
994 	struct sockaddr		*sa = NULL;
995 	int			 pass;
996 	struct mbuf		*m = NULL;
997 	struct ifnet		*ifp = ifa->ifa_ifp;
998 
999 	if (route_cb.any_count == 0)
1000 		return;
1001 	for (pass = 1; pass < 3; pass++) {
1002 		bzero(&info, sizeof(info));
1003 		if ((cmd == RTM_ADD && pass == 1) ||
1004 		    (cmd == RTM_DELETE && pass == 2)) {
1005 			struct ifa_msghdr	*ifam;
1006 			int			 ncmd;
1007 
1008 			if (cmd == RTM_ADD)
1009 				ncmd = RTM_NEWADDR;
1010 			else
1011 				ncmd = RTM_DELADDR;
1012 
1013 			ifaaddr = sa = ifa->ifa_addr;
1014 			ifpaddr = TAILQ_FIRST(&ifp->if_addrlist)->ifa_addr;
1015 			netmask = ifa->ifa_netmask;
1016 			brdaddr = ifa->ifa_dstaddr;
1017 			if ((m = rt_msg1(ncmd, &info)) == NULL)
1018 				continue;
1019 			ifam = mtod(m, struct ifa_msghdr *);
1020 			ifam->ifam_index = ifp->if_index;
1021 			ifam->ifam_metric = ifa->ifa_metric;
1022 			ifam->ifam_flags = ifa->ifa_flags;
1023 			ifam->ifam_addrs = info.rti_addrs;
1024 		}
1025 		if ((cmd == RTM_ADD && pass == 2) ||
1026 		    (cmd == RTM_DELETE && pass == 1)) {
1027 			struct rt_msghdr *rtm;
1028 
1029 			if (rt == 0)
1030 				continue;
1031 			netmask = rt_mask(rt);
1032 			dst = sa = rt_key(rt);
1033 			gate = rt->rt_gateway;
1034 			if ((m = rt_msg1(cmd, &info)) == NULL)
1035 				continue;
1036 			rtm = mtod(m, struct rt_msghdr *);
1037 			rtm->rtm_index = ifp->if_index;
1038 			rtm->rtm_flags |= rt->rt_flags;
1039 			rtm->rtm_errno = error;
1040 			rtm->rtm_addrs = info.rti_addrs;
1041 		}
1042 		if (sa == NULL)
1043 			route_proto.sp_protocol = 0;
1044 		else
1045 			route_proto.sp_protocol = sa->sa_family;
1046 		route_input(m, &route_proto, &route_src, &route_dst);
1047 	}
1048 }
1049 
1050 /*
1051  * This is called to generate routing socket messages indicating
1052  * network interface arrival and departure.
1053  */
1054 void
1055 rt_ifannouncemsg(struct ifnet *ifp, int what)
1056 {
1057 	struct if_announcemsghdr	*ifan;
1058 	struct mbuf			*m;
1059 
1060 	if (route_cb.any_count == 0)
1061 		return;
1062 	m = rt_msg1(RTM_IFANNOUNCE, NULL);
1063 	if (m == 0)
1064 		return;
1065 	ifan = mtod(m, struct if_announcemsghdr *);
1066 	ifan->ifan_index = ifp->if_index;
1067 	strlcpy(ifan->ifan_name, ifp->if_xname, sizeof(ifan->ifan_name));
1068 	ifan->ifan_what = what;
1069 	route_proto.sp_protocol = 0;
1070 	route_input(m, &route_proto, &route_src, &route_dst);
1071 }
1072 
1073 /*
1074  * This is used in dumping the kernel table via sysctl().
1075  */
1076 int
1077 sysctl_dumpentry(struct radix_node *rn, void *v)
1078 {
1079 	struct walkarg		*w = v;
1080 	struct rtentry		*rt = (struct rtentry *)rn;
1081 	int			 error = 0, size;
1082 	struct rt_addrinfo	 info;
1083 #ifdef MPLS
1084 	struct sockaddr_mpls	 sa_mpls;
1085 #endif
1086 	struct sockaddr_rtlabel	 sa_rt;
1087 	const char		*label;
1088 
1089 	if (w->w_op == NET_RT_FLAGS && !(rt->rt_flags & w->w_arg))
1090 		return 0;
1091 	bzero(&info, sizeof(info));
1092 	dst = rt_key(rt);
1093 	gate = rt->rt_gateway;
1094 	netmask = rt_mask(rt);
1095 	genmask = rt->rt_genmask;
1096 	if (rt->rt_ifp) {
1097 		ifpaddr = TAILQ_FIRST(&rt->rt_ifp->if_addrlist)->ifa_addr;
1098 		ifaaddr = rt->rt_ifa->ifa_addr;
1099 		if (rt->rt_ifp->if_flags & IFF_POINTOPOINT)
1100 			brdaddr = rt->rt_ifa->ifa_dstaddr;
1101 	}
1102 	if (rt->rt_labelid) {
1103 		bzero(&sa_rt, sizeof(sa_rt));
1104 		sa_rt.sr_len = sizeof(sa_rt);
1105 		label = rtlabel_id2name(rt->rt_labelid);
1106 		if (label != NULL) {
1107 			strlcpy(sa_rt.sr_label, label,
1108 			    sizeof(sa_rt.sr_label));
1109 			info.rti_info[RTAX_LABEL] =
1110 			    (struct sockaddr *)&sa_rt;
1111 		}
1112 	}
1113 #ifdef MPLS
1114 	if (rt->rt_flags & RTF_MPLS) {
1115 		bzero(&sa_mpls, sizeof(sa_mpls));
1116 		sa_mpls.smpls_family = AF_MPLS;
1117 		sa_mpls.smpls_len = sizeof(sa_mpls);
1118 		sa_mpls.smpls_label = ((struct rt_mpls *)
1119 		    rt->rt_llinfo)->mpls_label;
1120 		info.rti_info[RTAX_SRC] = (struct sockaddr *)&sa_mpls;
1121 		info.rti_mpls = ((struct rt_mpls *)
1122 		    rt->rt_llinfo)->mpls_operation;
1123 	}
1124 #endif
1125 
1126 	size = rt_msg2(RTM_GET, RTM_VERSION, &info, NULL, w);
1127 	if (w->w_where && w->w_tmem && w->w_needed <= 0) {
1128 		struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem;
1129 
1130 		rtm->rtm_flags = rt->rt_flags;
1131 		rtm->rtm_priority = rt->rt_priority;
1132 		rt_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx);
1133 		rtm->rtm_rmx.rmx_refcnt = rt->rt_refcnt;
1134 		rtm->rtm_index = rt->rt_ifp->if_index;
1135 		rtm->rtm_addrs = info.rti_addrs;
1136 #ifdef MPLS
1137 		rtm->rtm_mpls = info.rti_mpls;
1138 #endif
1139 		if ((error = copyout(rtm, w->w_where, size)) != 0)
1140 			w->w_where = NULL;
1141 		else
1142 			w->w_where += size;
1143 	}
1144 #ifndef SMALL_KERNEL
1145 	size = rt_msg2(RTM_GET, RTM_OVERSION, &info, NULL, w);
1146 	if (w->w_where && w->w_tmem && w->w_needed <= 0) {
1147 		struct rt_omsghdr *rtm = (struct rt_omsghdr *)w->w_tmem;
1148 
1149 		rtm->rtm_flags = rt->rt_flags;
1150 		rtm->rtm_rmx.rmx_locks = rt->rt_rmx.rmx_locks;
1151 		rtm->rtm_rmx.rmx_mtu = rt->rt_rmx.rmx_mtu;
1152 		rtm->rtm_index = rt->rt_ifp->if_index;
1153 		rtm->rtm_addrs = info.rti_addrs;
1154 		if ((error = copyout(rtm, w->w_where, size)) != 0)
1155 			w->w_where = NULL;
1156 		else
1157 			w->w_where += size;
1158 	}
1159 #endif
1160 	return (error);
1161 }
1162 
1163 int
1164 sysctl_iflist(int af, struct walkarg *w)
1165 {
1166 	struct ifnet		*ifp;
1167 	struct ifaddr		*ifa;
1168 	struct rt_addrinfo	 info;
1169 	int			 len, error = 0;
1170 
1171 	bzero(&info, sizeof(info));
1172 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1173 		if (w->w_arg && w->w_arg != ifp->if_index)
1174 			continue;
1175 		ifa = TAILQ_FIRST(&ifp->if_addrlist);
1176 		if (!ifa)
1177 			continue;
1178 		ifpaddr = ifa->ifa_addr;
1179 		len = rt_msg2(RTM_IFINFO, RTM_VERSION, &info, 0, w);
1180 		if (w->w_where && w->w_tmem && w->w_needed <= 0) {
1181 			struct if_msghdr *ifm;
1182 
1183 			ifm = (struct if_msghdr *)w->w_tmem;
1184 			ifm->ifm_index = ifp->if_index;
1185 			ifm->ifm_flags = ifp->if_flags;
1186 			ifm->ifm_data = ifp->if_data;
1187 			ifm->ifm_addrs = info.rti_addrs;
1188 			error = copyout(ifm, w->w_where, len);
1189 			if (error)
1190 				return (error);
1191 			w->w_where += len;
1192 		}
1193 #ifndef SMALL_KERNEL
1194 		len = rt_msg2(RTM_IFINFO, RTM_OVERSION, &info, 0, w);
1195 		if (w->w_where && w->w_tmem && w->w_needed <= 0) {
1196 			struct if_omsghdr *ifm;
1197 
1198 			ifm = (struct if_omsghdr *)w->w_tmem;
1199 			ifm->ifm_index = ifp->if_index;
1200 			ifm->ifm_flags = ifp->if_flags;
1201 			/* just init the most important types of if_data */
1202 			ifm->ifm_data.ifi_type = ifp->if_data.ifi_type;
1203 			ifm->ifm_data.ifi_addrlen = ifp->if_data.ifi_addrlen;
1204 			ifm->ifm_data.ifi_hdrlen = ifp->if_data.ifi_hdrlen;
1205 			ifm->ifm_data.ifi_link_state =
1206 			    ifp->if_data.ifi_link_state;
1207 			ifm->ifm_data.ifi_mtu = ifp->if_data.ifi_mtu;
1208 			ifm->ifm_data.ifi_metric = ifp->if_data.ifi_metric;
1209 			if (ifp->if_data.ifi_baudrate > ULONG_MAX)
1210 				ifm->ifm_data.ifi_baudrate = ULONG_MAX;
1211 			else
1212 				ifm->ifm_data.ifi_baudrate =
1213 				    ifp->if_data.ifi_baudrate;
1214 
1215 			ifm->ifm_addrs = info.rti_addrs;
1216 			error = copyout(ifm, w->w_where, len);
1217 			if (error)
1218 				return (error);
1219 			w->w_where += len;
1220 		}
1221 #endif
1222 		ifpaddr = 0;
1223 		while ((ifa = TAILQ_NEXT(ifa, ifa_list)) !=
1224 		    TAILQ_END(&ifp->if_addrlist)) {
1225 			if (af && af != ifa->ifa_addr->sa_family)
1226 				continue;
1227 			ifaaddr = ifa->ifa_addr;
1228 			netmask = ifa->ifa_netmask;
1229 			brdaddr = ifa->ifa_dstaddr;
1230 			len = rt_msg2(RTM_NEWADDR, RTM_VERSION, &info, 0, w);
1231 			if (w->w_where && w->w_tmem && w->w_needed <= 0) {
1232 				struct ifa_msghdr *ifam;
1233 
1234 				ifam = (struct ifa_msghdr *)w->w_tmem;
1235 				ifam->ifam_index = ifa->ifa_ifp->if_index;
1236 				ifam->ifam_flags = ifa->ifa_flags;
1237 				ifam->ifam_metric = ifa->ifa_metric;
1238 				ifam->ifam_addrs = info.rti_addrs;
1239 				error = copyout(w->w_tmem, w->w_where, len);
1240 				if (error)
1241 					return (error);
1242 				w->w_where += len;
1243 			}
1244 #ifndef SMALL_KERNEL
1245 			len = rt_msg2(RTM_NEWADDR, RTM_OVERSION, &info, 0, w);
1246 			if (w->w_where && w->w_tmem && w->w_needed <= 0) {
1247 				struct ifa_omsghdr *ifam;
1248 
1249 				ifam = (struct ifa_omsghdr *)w->w_tmem;
1250 				ifam->ifam_index = ifa->ifa_ifp->if_index;
1251 				ifam->ifam_flags = ifa->ifa_flags;
1252 				ifam->ifam_metric = ifa->ifa_metric;
1253 				ifam->ifam_addrs = info.rti_addrs;
1254 				error = copyout(w->w_tmem, w->w_where, len);
1255 				if (error)
1256 					return (error);
1257 				w->w_where += len;
1258 			}
1259 #endif
1260 		}
1261 		ifaaddr = netmask = brdaddr = 0;
1262 	}
1263 	return (0);
1264 }
1265 
1266 int
1267 sysctl_rtable(int *name, u_int namelen, void *where, size_t *given, void *new,
1268     size_t newlen)
1269 {
1270 	struct radix_node_head	*rnh;
1271 	int			 i, s, error = EINVAL;
1272 	u_char  		 af;
1273 	struct walkarg		 w;
1274 	u_int			 tableid = 0;
1275 
1276 	if (new)
1277 		return (EPERM);
1278 	if (namelen < 3 || namelen > 4)
1279 		return (EINVAL);
1280 	af = name[0];
1281 	bzero(&w, sizeof(w));
1282 	w.w_where = where;
1283 	w.w_given = *given;
1284 	w.w_needed = 0 - w.w_given;
1285 	w.w_op = name[1];
1286 	w.w_arg = name[2];
1287 
1288 	if (namelen == 4) {
1289 		tableid = name[3];
1290 		if (!rtable_exists(tableid))
1291 			return (EINVAL);
1292 	}
1293 
1294 	s = splsoftnet();
1295 	switch (w.w_op) {
1296 
1297 	case NET_RT_DUMP:
1298 	case NET_RT_FLAGS:
1299 		for (i = 1; i <= AF_MAX; i++)
1300 			if ((rnh = rt_gettable(i, tableid)) != NULL &&
1301 			    (af == 0 || af == i) &&
1302 			    (error = (*rnh->rnh_walktree)(rnh,
1303 			    sysctl_dumpentry, &w)))
1304 				break;
1305 		break;
1306 
1307 	case NET_RT_IFLIST:
1308 		error = sysctl_iflist(af, &w);
1309 		break;
1310 
1311 	case NET_RT_STATS:
1312 		error = sysctl_rdstruct(where, given, new,
1313 		    &rtstat, sizeof(rtstat));
1314 		splx(s);
1315 		return (error);
1316 	}
1317 	splx(s);
1318 	if (w.w_tmem)
1319 		free(w.w_tmem, M_RTABLE);
1320 	w.w_needed += w.w_given;
1321 	if (where) {
1322 		*given = w.w_where - (caddr_t)where;
1323 		if (*given < w.w_needed)
1324 			return (ENOMEM);
1325 	} else
1326 		*given = (11 * w.w_needed) / 10;
1327 
1328 	return (error);
1329 }
1330 
1331 #ifndef SMALL_KERNEL
1332 struct rt_msghdr *
1333 rtmsg_3to4(struct mbuf *m, int *len)
1334 {
1335 	struct rt_msghdr *rtm;
1336 	struct rt_omsghdr *ortm;
1337 	int slen;
1338 
1339 	slen = *len - sizeof(struct rt_omsghdr);
1340 	*len = sizeof(struct rt_msghdr) + slen;
1341 	R_Malloc(rtm, struct rt_msghdr *, *len);
1342 	if (rtm == 0)
1343 		return (NULL);
1344 	bzero(rtm, sizeof(struct rt_msghdr));
1345 	ortm = mtod(m, struct rt_omsghdr *);
1346 	rtm->rtm_msglen = sizeof(struct rt_msghdr) + slen;
1347 	rtm->rtm_version = RTM_VERSION;
1348 	rtm->rtm_type = ortm->rtm_type;
1349 	rtm->rtm_hdrlen = sizeof(struct rt_msghdr);
1350 	rtm->rtm_index = ortm->rtm_index;
1351 	rtm->rtm_tableid = 0; /* XXX we only care about the main table */
1352 	rtm->rtm_flags = ortm->rtm_flags;
1353 	rtm->rtm_addrs = ortm->rtm_addrs;
1354 	rtm->rtm_seq = ortm->rtm_seq;
1355 	rtm->rtm_fmask = ortm->rtm_fmask;
1356 	rtm->rtm_inits = ortm->rtm_inits;
1357 	/* copy just the interesting stuff ignore the rest */
1358 	rtm->rtm_rmx.rmx_locks = ortm->rtm_rmx.rmx_locks;
1359 	rtm->rtm_rmx.rmx_mtu = ortm->rtm_rmx.rmx_mtu;
1360 
1361 	m_copydata(m, sizeof(struct rt_omsghdr), slen,
1362 	    ((caddr_t)rtm + sizeof(struct rt_msghdr)));
1363 
1364 	return (rtm);
1365 }
1366 #endif
1367 
1368 /*
1369  * Definitions of protocols supported in the ROUTE domain.
1370  */
1371 
1372 extern	struct domain routedomain;		/* or at least forward */
1373 
1374 struct protosw routesw[] = {
1375 { SOCK_RAW,	&routedomain,	0,		PR_ATOMIC|PR_ADDR,
1376   route_input,	route_output,	raw_ctlinput,	route_ctloutput,
1377   route_usrreq,
1378   raw_init,	0,		0,		0,
1379   sysctl_rtable,
1380 }
1381 };
1382 
1383 struct domain routedomain =
1384     { PF_ROUTE, "route", route_init, 0, 0,
1385       routesw, &routesw[sizeof(routesw)/sizeof(routesw[0])] };
1386