1 /* $OpenBSD: rtsock.c,v 1.299 2020/06/24 22:03:42 cheloha Exp $ */ 2 /* $NetBSD: rtsock.c,v 1.18 1996/03/29 00:32:10 cgd Exp $ */ 3 4 /* 5 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the project nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 33 /* 34 * Copyright (c) 1988, 1991, 1993 35 * The Regents of the University of California. All rights reserved. 36 * 37 * Redistribution and use in source and binary forms, with or without 38 * modification, are permitted provided that the following conditions 39 * are met: 40 * 1. Redistributions of source code must retain the above copyright 41 * notice, this list of conditions and the following disclaimer. 42 * 2. Redistributions in binary form must reproduce the above copyright 43 * notice, this list of conditions and the following disclaimer in the 44 * documentation and/or other materials provided with the distribution. 45 * 3. Neither the name of the University nor the names of its contributors 46 * may be used to endorse or promote products derived from this software 47 * without specific prior written permission. 48 * 49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 59 * SUCH DAMAGE. 60 * 61 * @(#)rtsock.c 8.6 (Berkeley) 2/11/95 62 */ 63 64 #include <sys/param.h> 65 #include <sys/systm.h> 66 #include <sys/proc.h> 67 #include <sys/sysctl.h> 68 #include <sys/mbuf.h> 69 #include <sys/socket.h> 70 #include <sys/socketvar.h> 71 #include <sys/domain.h> 72 #include <sys/pool.h> 73 #include <sys/protosw.h> 74 #include <sys/srp.h> 75 76 #include <net/if.h> 77 #include <net/if_dl.h> 78 #include <net/if_var.h> 79 #include <net/route.h> 80 81 #include <netinet/in.h> 82 83 #ifdef MPLS 84 #include <netmpls/mpls.h> 85 #endif 86 #ifdef IPSEC 87 #include <netinet/ip_ipsp.h> 88 #include <net/if_enc.h> 89 #endif 90 #ifdef BFD 91 #include <net/bfd.h> 92 #endif 93 94 #include <sys/stdarg.h> 95 #include <sys/kernel.h> 96 #include <sys/timeout.h> 97 98 #define ROUTESNDQ 8192 99 #define ROUTERCVQ 8192 100 101 const struct sockaddr route_src = { 2, PF_ROUTE, }; 102 103 struct walkarg { 104 int w_op, w_arg, w_given, w_needed, w_tmemsize; 105 caddr_t w_where, w_tmem; 106 }; 107 108 void route_prinit(void); 109 void rcb_ref(void *, void *); 110 void rcb_unref(void *, void *); 111 int route_output(struct mbuf *, struct socket *, struct sockaddr *, 112 struct mbuf *); 113 int route_ctloutput(int, struct socket *, int, int, struct mbuf *); 114 int route_usrreq(struct socket *, int, struct mbuf *, struct mbuf *, 115 struct mbuf *, struct proc *); 116 void route_input(struct mbuf *m0, struct socket *, sa_family_t); 117 int route_arp_conflict(struct rtentry *, struct rt_addrinfo *); 118 int route_cleargateway(struct rtentry *, void *, unsigned int); 119 void rtm_senddesync_timer(void *); 120 void rtm_senddesync(struct socket *); 121 int rtm_sendup(struct socket *, struct mbuf *, int); 122 123 int rtm_getifa(struct rt_addrinfo *, unsigned int); 124 int rtm_output(struct rt_msghdr *, struct rtentry **, struct rt_addrinfo *, 125 uint8_t, unsigned int); 126 struct rt_msghdr *rtm_report(struct rtentry *, u_char, int, int); 127 struct mbuf *rtm_msg1(int, struct rt_addrinfo *); 128 int rtm_msg2(int, int, struct rt_addrinfo *, caddr_t, 129 struct walkarg *); 130 int rtm_xaddrs(caddr_t, caddr_t, struct rt_addrinfo *); 131 int rtm_validate_proposal(struct rt_addrinfo *); 132 void rtm_setmetrics(u_long, const struct rt_metrics *, 133 struct rt_kmetrics *); 134 void rtm_getmetrics(const struct rt_kmetrics *, 135 struct rt_metrics *); 136 137 int sysctl_iflist(int, struct walkarg *); 138 int sysctl_ifnames(struct walkarg *); 139 int sysctl_rtable_rtstat(void *, size_t *, void *); 140 141 struct rtpcb { 142 struct socket *rop_socket; 143 144 SRPL_ENTRY(rtpcb) rop_list; 145 struct refcnt rop_refcnt; 146 struct timeout rop_timeout; 147 unsigned int rop_msgfilter; 148 unsigned int rop_flags; 149 u_int rop_rtableid; 150 unsigned short rop_proto; 151 u_char rop_priority; 152 }; 153 #define sotortpcb(so) ((struct rtpcb *)(so)->so_pcb) 154 155 struct rtptable { 156 SRPL_HEAD(, rtpcb) rtp_list; 157 struct srpl_rc rtp_rc; 158 struct rwlock rtp_lk; 159 unsigned int rtp_count; 160 }; 161 162 struct pool rtpcb_pool; 163 struct rtptable rtptable; 164 165 /* 166 * These flags and timeout are used for indicating to userland (via a 167 * RTM_DESYNC msg) when the route socket has overflowed and messages 168 * have been lost. 169 */ 170 #define ROUTECB_FLAG_DESYNC 0x1 /* Route socket out of memory */ 171 #define ROUTECB_FLAG_FLUSH 0x2 /* Wait until socket is empty before 172 queueing more packets */ 173 174 #define ROUTE_DESYNC_RESEND_TIMEOUT 200 /* In ms */ 175 176 void 177 route_prinit(void) 178 { 179 srpl_rc_init(&rtptable.rtp_rc, rcb_ref, rcb_unref, NULL); 180 rw_init(&rtptable.rtp_lk, "rtsock"); 181 SRPL_INIT(&rtptable.rtp_list); 182 pool_init(&rtpcb_pool, sizeof(struct rtpcb), 0, 183 IPL_NONE, PR_WAITOK, "rtpcb", NULL); 184 } 185 186 void 187 rcb_ref(void *null, void *v) 188 { 189 struct rtpcb *rop = v; 190 191 refcnt_take(&rop->rop_refcnt); 192 } 193 194 void 195 rcb_unref(void *null, void *v) 196 { 197 struct rtpcb *rop = v; 198 199 refcnt_rele_wake(&rop->rop_refcnt); 200 } 201 202 int 203 route_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam, 204 struct mbuf *control, struct proc *p) 205 { 206 struct rtpcb *rop; 207 int error = 0; 208 209 if (req == PRU_CONTROL) 210 return (EOPNOTSUPP); 211 212 soassertlocked(so); 213 214 if (control && control->m_len) { 215 error = EOPNOTSUPP; 216 goto release; 217 } 218 219 rop = sotortpcb(so); 220 if (rop == NULL) { 221 error = EINVAL; 222 goto release; 223 } 224 225 switch (req) { 226 /* no connect, bind, accept. Socket is connected from the start */ 227 case PRU_CONNECT: 228 case PRU_BIND: 229 case PRU_CONNECT2: 230 case PRU_LISTEN: 231 case PRU_ACCEPT: 232 error = EOPNOTSUPP; 233 break; 234 235 case PRU_DISCONNECT: 236 case PRU_ABORT: 237 soisdisconnected(so); 238 break; 239 case PRU_SHUTDOWN: 240 socantsendmore(so); 241 break; 242 case PRU_SENSE: 243 /* stat: don't bother with a blocksize. */ 244 break; 245 246 /* minimal support, just implement a fake peer address */ 247 case PRU_SOCKADDR: 248 error = EINVAL; 249 break; 250 case PRU_PEERADDR: 251 bcopy(&route_src, mtod(nam, caddr_t), route_src.sa_len); 252 nam->m_len = route_src.sa_len; 253 break; 254 255 case PRU_RCVD: 256 /* 257 * If we are in a FLUSH state, check if the buffer is 258 * empty so that we can clear the flag. 259 */ 260 if (((rop->rop_flags & ROUTECB_FLAG_FLUSH) != 0) && 261 ((sbspace(rop->rop_socket, &rop->rop_socket->so_rcv) == 262 rop->rop_socket->so_rcv.sb_hiwat))) 263 rop->rop_flags &= ~ROUTECB_FLAG_FLUSH; 264 break; 265 266 case PRU_RCVOOB: 267 case PRU_SENDOOB: 268 error = EOPNOTSUPP; 269 break; 270 case PRU_SEND: 271 if (nam) { 272 error = EISCONN; 273 break; 274 } 275 error = (*so->so_proto->pr_output)(m, so, NULL, NULL); 276 m = NULL; 277 break; 278 default: 279 panic("route_usrreq"); 280 } 281 282 release: 283 if (req != PRU_RCVD && req != PRU_RCVOOB && req != PRU_SENSE) { 284 m_freem(control); 285 m_freem(m); 286 } 287 return (error); 288 } 289 290 int 291 route_attach(struct socket *so, int proto) 292 { 293 struct rtpcb *rop; 294 int error; 295 296 /* 297 * use the rawcb but allocate a rtpcb, this 298 * code does not care about the additional fields 299 * and works directly on the raw socket. 300 */ 301 rop = pool_get(&rtpcb_pool, PR_WAITOK|PR_ZERO); 302 so->so_pcb = rop; 303 /* Init the timeout structure */ 304 timeout_set(&rop->rop_timeout, rtm_senddesync_timer, so); 305 refcnt_init(&rop->rop_refcnt); 306 307 if (curproc == NULL) 308 error = EACCES; 309 else 310 error = soreserve(so, ROUTESNDQ, ROUTERCVQ); 311 if (error) { 312 pool_put(&rtpcb_pool, rop); 313 return (error); 314 } 315 316 rop->rop_socket = so; 317 rop->rop_proto = proto; 318 319 rop->rop_rtableid = curproc->p_p->ps_rtableid; 320 321 soisconnected(so); 322 so->so_options |= SO_USELOOPBACK; 323 324 rw_enter(&rtptable.rtp_lk, RW_WRITE); 325 SRPL_INSERT_HEAD_LOCKED(&rtptable.rtp_rc, &rtptable.rtp_list, rop, 326 rop_list); 327 rtptable.rtp_count++; 328 rw_exit(&rtptable.rtp_lk); 329 330 return (0); 331 } 332 333 int 334 route_detach(struct socket *so) 335 { 336 struct rtpcb *rop; 337 338 soassertlocked(so); 339 340 rop = sotortpcb(so); 341 if (rop == NULL) 342 return (EINVAL); 343 344 rw_enter(&rtptable.rtp_lk, RW_WRITE); 345 346 timeout_del(&rop->rop_timeout); 347 rtptable.rtp_count--; 348 349 SRPL_REMOVE_LOCKED(&rtptable.rtp_rc, &rtptable.rtp_list, rop, rtpcb, 350 rop_list); 351 rw_exit(&rtptable.rtp_lk); 352 353 /* wait for all references to drop */ 354 refcnt_finalize(&rop->rop_refcnt, "rtsockrefs"); 355 356 so->so_pcb = NULL; 357 KASSERT((so->so_state & SS_NOFDREF) == 0); 358 pool_put(&rtpcb_pool, rop); 359 360 return (0); 361 } 362 363 int 364 route_ctloutput(int op, struct socket *so, int level, int optname, 365 struct mbuf *m) 366 { 367 struct rtpcb *rop = sotortpcb(so); 368 int error = 0; 369 unsigned int tid, prio; 370 371 if (level != AF_ROUTE) 372 return (EINVAL); 373 374 switch (op) { 375 case PRCO_SETOPT: 376 switch (optname) { 377 case ROUTE_MSGFILTER: 378 if (m == NULL || m->m_len != sizeof(unsigned int)) 379 error = EINVAL; 380 else 381 rop->rop_msgfilter = *mtod(m, unsigned int *); 382 break; 383 case ROUTE_TABLEFILTER: 384 if (m == NULL || m->m_len != sizeof(unsigned int)) { 385 error = EINVAL; 386 break; 387 } 388 tid = *mtod(m, unsigned int *); 389 if (tid != RTABLE_ANY && !rtable_exists(tid)) 390 error = ENOENT; 391 else 392 rop->rop_rtableid = tid; 393 break; 394 case ROUTE_PRIOFILTER: 395 if (m == NULL || m->m_len != sizeof(unsigned int)) { 396 error = EINVAL; 397 break; 398 } 399 prio = *mtod(m, unsigned int *); 400 if (prio > RTP_MAX) 401 error = EINVAL; 402 else 403 rop->rop_priority = prio; 404 break; 405 default: 406 error = ENOPROTOOPT; 407 break; 408 } 409 break; 410 case PRCO_GETOPT: 411 switch (optname) { 412 case ROUTE_MSGFILTER: 413 m->m_len = sizeof(unsigned int); 414 *mtod(m, unsigned int *) = rop->rop_msgfilter; 415 break; 416 case ROUTE_TABLEFILTER: 417 m->m_len = sizeof(unsigned int); 418 *mtod(m, unsigned int *) = rop->rop_rtableid; 419 break; 420 case ROUTE_PRIOFILTER: 421 m->m_len = sizeof(unsigned int); 422 *mtod(m, unsigned int *) = rop->rop_priority; 423 break; 424 default: 425 error = ENOPROTOOPT; 426 break; 427 } 428 } 429 return (error); 430 } 431 432 void 433 rtm_senddesync_timer(void *xso) 434 { 435 struct socket *so = xso; 436 int s; 437 438 s = solock(so); 439 rtm_senddesync(so); 440 sounlock(so, s); 441 } 442 443 void 444 rtm_senddesync(struct socket *so) 445 { 446 struct rtpcb *rop = sotortpcb(so); 447 struct mbuf *desync_mbuf; 448 449 soassertlocked(so); 450 451 /* If we are in a DESYNC state, try to send a RTM_DESYNC packet */ 452 if ((rop->rop_flags & ROUTECB_FLAG_DESYNC) == 0) 453 return; 454 455 /* 456 * If we fail to alloc memory or if sbappendaddr() 457 * fails, re-add timeout and try again. 458 */ 459 desync_mbuf = rtm_msg1(RTM_DESYNC, NULL); 460 if (desync_mbuf != NULL) { 461 if (sbappendaddr(so, &so->so_rcv, &route_src, 462 desync_mbuf, NULL) != 0) { 463 rop->rop_flags &= ~ROUTECB_FLAG_DESYNC; 464 sorwakeup(rop->rop_socket); 465 return; 466 } 467 m_freem(desync_mbuf); 468 } 469 /* Re-add timeout to try sending msg again */ 470 timeout_add_msec(&rop->rop_timeout, ROUTE_DESYNC_RESEND_TIMEOUT); 471 } 472 473 void 474 route_input(struct mbuf *m0, struct socket *so0, sa_family_t sa_family) 475 { 476 struct socket *so; 477 struct rtpcb *rop; 478 struct rt_msghdr *rtm; 479 struct mbuf *m = m0; 480 struct socket *last = NULL; 481 struct srp_ref sr; 482 int s; 483 484 /* ensure that we can access the rtm_type via mtod() */ 485 if (m->m_len < offsetof(struct rt_msghdr, rtm_type) + 1) { 486 m_freem(m); 487 return; 488 } 489 490 SRPL_FOREACH(rop, &sr, &rtptable.rtp_list, rop_list) { 491 /* 492 * If route socket is bound to an address family only send 493 * messages that match the address family. Address family 494 * agnostic messages are always sent. 495 */ 496 if (sa_family != AF_UNSPEC && rop->rop_proto != AF_UNSPEC && 497 rop->rop_proto != sa_family) 498 continue; 499 500 501 so = rop->rop_socket; 502 s = solock(so); 503 504 /* 505 * Check to see if we don't want our own messages and 506 * if we can receive anything. 507 */ 508 if ((so0 == so && !(so0->so_options & SO_USELOOPBACK)) || 509 !(so->so_state & SS_ISCONNECTED) || 510 (so->so_state & SS_CANTRCVMORE)) { 511 next: 512 sounlock(so, s); 513 continue; 514 } 515 516 /* filter messages that the process does not want */ 517 rtm = mtod(m, struct rt_msghdr *); 518 /* but RTM_DESYNC can't be filtered */ 519 if (rtm->rtm_type != RTM_DESYNC && rop->rop_msgfilter != 0 && 520 !(rop->rop_msgfilter & (1 << rtm->rtm_type))) 521 goto next; 522 switch (rtm->rtm_type) { 523 case RTM_IFANNOUNCE: 524 case RTM_DESYNC: 525 /* no tableid */ 526 break; 527 case RTM_RESOLVE: 528 case RTM_NEWADDR: 529 case RTM_DELADDR: 530 case RTM_IFINFO: 531 case RTM_80211INFO: 532 case RTM_BFD: 533 /* check against rdomain id */ 534 if (rop->rop_rtableid != RTABLE_ANY && 535 rtable_l2(rop->rop_rtableid) != rtm->rtm_tableid) 536 goto next; 537 break; 538 default: 539 if (rop->rop_priority != 0 && 540 rop->rop_priority < rtm->rtm_priority) 541 goto next; 542 /* check against rtable id */ 543 if (rop->rop_rtableid != RTABLE_ANY && 544 rop->rop_rtableid != rtm->rtm_tableid) 545 goto next; 546 break; 547 } 548 549 /* 550 * Check to see if the flush flag is set. If so, don't queue 551 * any more messages until the flag is cleared. 552 */ 553 if ((rop->rop_flags & ROUTECB_FLAG_FLUSH) != 0) 554 goto next; 555 sounlock(so, s); 556 557 if (last) { 558 s = solock(last); 559 rtm_sendup(last, m, 1); 560 sounlock(last, s); 561 refcnt_rele_wake(&sotortpcb(last)->rop_refcnt); 562 } 563 /* keep a reference for last */ 564 refcnt_take(&rop->rop_refcnt); 565 last = rop->rop_socket; 566 } 567 SRPL_LEAVE(&sr); 568 569 if (last) { 570 s = solock(last); 571 rtm_sendup(last, m, 0); 572 sounlock(last, s); 573 refcnt_rele_wake(&sotortpcb(last)->rop_refcnt); 574 } else 575 m_freem(m); 576 } 577 578 int 579 rtm_sendup(struct socket *so, struct mbuf *m0, int more) 580 { 581 struct rtpcb *rop = sotortpcb(so); 582 struct mbuf *m; 583 584 soassertlocked(so); 585 586 if (more) { 587 m = m_copym(m0, 0, M_COPYALL, M_NOWAIT); 588 if (m == NULL) 589 return (ENOMEM); 590 } else 591 m = m0; 592 593 if (sbspace(so, &so->so_rcv) < (2 * MSIZE) || 594 sbappendaddr(so, &so->so_rcv, &route_src, m, NULL) == 0) { 595 /* Flag socket as desync'ed and flush required */ 596 rop->rop_flags |= ROUTECB_FLAG_DESYNC | ROUTECB_FLAG_FLUSH; 597 rtm_senddesync(so); 598 m_freem(m); 599 return (ENOBUFS); 600 } 601 602 sorwakeup(so); 603 return (0); 604 } 605 606 struct rt_msghdr * 607 rtm_report(struct rtentry *rt, u_char type, int seq, int tableid) 608 { 609 struct rt_msghdr *rtm; 610 struct rt_addrinfo info; 611 struct sockaddr_rtlabel sa_rl; 612 struct sockaddr_in6 sa_mask; 613 #ifdef BFD 614 struct sockaddr_bfd sa_bfd; 615 #endif 616 struct ifnet *ifp = NULL; 617 int len; 618 619 bzero(&info, sizeof(info)); 620 info.rti_info[RTAX_DST] = rt_key(rt); 621 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 622 info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask); 623 info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl); 624 #ifdef BFD 625 if (rt->rt_flags & RTF_BFD) 626 info.rti_info[RTAX_BFD] = bfd2sa(rt, &sa_bfd); 627 #endif 628 #ifdef MPLS 629 if (rt->rt_flags & RTF_MPLS) { 630 struct sockaddr_mpls sa_mpls; 631 632 bzero(&sa_mpls, sizeof(sa_mpls)); 633 sa_mpls.smpls_family = AF_MPLS; 634 sa_mpls.smpls_len = sizeof(sa_mpls); 635 sa_mpls.smpls_label = ((struct rt_mpls *) 636 rt->rt_llinfo)->mpls_label; 637 info.rti_info[RTAX_SRC] = (struct sockaddr *)&sa_mpls; 638 info.rti_mpls = ((struct rt_mpls *) 639 rt->rt_llinfo)->mpls_operation; 640 } 641 #endif 642 ifp = if_get(rt->rt_ifidx); 643 if (ifp != NULL) { 644 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); 645 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; 646 if (ifp->if_flags & IFF_POINTOPOINT) 647 info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr; 648 } 649 if_put(ifp); 650 /* RTAX_GENMASK, RTAX_AUTHOR, RTAX_SRCMASK ignored */ 651 652 /* build new route message */ 653 len = rtm_msg2(type, RTM_VERSION, &info, NULL, NULL); 654 rtm = malloc(len, M_RTABLE, M_WAITOK | M_ZERO); 655 656 rtm_msg2(type, RTM_VERSION, &info, (caddr_t)rtm, NULL); 657 rtm->rtm_type = type; 658 rtm->rtm_index = rt->rt_ifidx; 659 rtm->rtm_tableid = tableid; 660 rtm->rtm_priority = rt->rt_priority & RTP_MASK; 661 rtm->rtm_flags = rt->rt_flags; 662 rtm->rtm_pid = curproc->p_p->ps_pid; 663 rtm->rtm_seq = seq; 664 rtm_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx); 665 rtm->rtm_addrs = info.rti_addrs; 666 #ifdef MPLS 667 rtm->rtm_mpls = info.rti_mpls; 668 #endif 669 return rtm; 670 } 671 672 int 673 route_output(struct mbuf *m, struct socket *so, struct sockaddr *dstaddr, 674 struct mbuf *control) 675 { 676 struct rt_msghdr *rtm = NULL; 677 struct rtentry *rt = NULL; 678 struct rt_addrinfo info; 679 int len, seq, error = 0; 680 u_int tableid; 681 u_int8_t prio; 682 u_char vers, type; 683 684 if (m == NULL || ((m->m_len < sizeof(int32_t)) && 685 (m = m_pullup(m, sizeof(int32_t))) == 0)) 686 return (ENOBUFS); 687 if ((m->m_flags & M_PKTHDR) == 0) 688 panic("route_output"); 689 len = m->m_pkthdr.len; 690 if (len < offsetof(struct rt_msghdr, rtm_hdrlen) + 1 || 691 len != mtod(m, struct rt_msghdr *)->rtm_msglen) { 692 error = EINVAL; 693 goto fail; 694 } 695 vers = mtod(m, struct rt_msghdr *)->rtm_version; 696 switch (vers) { 697 case RTM_VERSION: 698 if (len < sizeof(struct rt_msghdr)) { 699 error = EINVAL; 700 goto fail; 701 } 702 if (len > RTM_MAXSIZE) { 703 error = EMSGSIZE; 704 goto fail; 705 } 706 rtm = malloc(len, M_RTABLE, M_WAITOK); 707 m_copydata(m, 0, len, (caddr_t)rtm); 708 break; 709 default: 710 error = EPROTONOSUPPORT; 711 goto fail; 712 } 713 714 /* Verify that the caller is sending an appropriate message early */ 715 switch (rtm->rtm_type) { 716 case RTM_ADD: 717 case RTM_DELETE: 718 case RTM_GET: 719 case RTM_CHANGE: 720 case RTM_PROPOSAL: 721 break; 722 default: 723 error = EOPNOTSUPP; 724 goto fail; 725 } 726 /* 727 * Verify that the header length is valid. 728 * All messages from userland start with a struct rt_msghdr. 729 */ 730 if (rtm->rtm_hdrlen == 0) /* old client */ 731 rtm->rtm_hdrlen = sizeof(struct rt_msghdr); 732 if (rtm->rtm_hdrlen < sizeof(struct rt_msghdr) || 733 len < rtm->rtm_hdrlen) { 734 error = EINVAL; 735 goto fail; 736 } 737 738 rtm->rtm_pid = curproc->p_p->ps_pid; 739 740 /* 741 * Verify that the caller has the appropriate privilege; RTM_GET 742 * is the only operation the non-superuser is allowed. 743 */ 744 if (rtm->rtm_type != RTM_GET && suser(curproc) != 0) { 745 error = EACCES; 746 goto fail; 747 } 748 tableid = rtm->rtm_tableid; 749 if (!rtable_exists(tableid)) { 750 if (rtm->rtm_type == RTM_ADD) { 751 if ((error = rtable_add(tableid)) != 0) 752 goto fail; 753 } else { 754 error = EINVAL; 755 goto fail; 756 } 757 } 758 759 760 /* Do not let userland play with kernel-only flags. */ 761 if ((rtm->rtm_flags & (RTF_LOCAL|RTF_BROADCAST)) != 0) { 762 error = EINVAL; 763 goto fail; 764 } 765 766 /* make sure that kernel-only bits are not set */ 767 rtm->rtm_priority &= RTP_MASK; 768 rtm->rtm_flags &= ~(RTF_DONE|RTF_CLONED|RTF_CACHED); 769 rtm->rtm_fmask &= RTF_FMASK; 770 771 if (rtm->rtm_priority != 0) { 772 if (rtm->rtm_priority > RTP_MAX || 773 rtm->rtm_priority == RTP_LOCAL) { 774 error = EINVAL; 775 goto fail; 776 } 777 prio = rtm->rtm_priority; 778 } else if (rtm->rtm_type != RTM_ADD) 779 prio = RTP_ANY; 780 else if (rtm->rtm_flags & RTF_STATIC) 781 prio = 0; 782 else 783 prio = RTP_DEFAULT; 784 785 bzero(&info, sizeof(info)); 786 info.rti_addrs = rtm->rtm_addrs; 787 if ((error = rtm_xaddrs(rtm->rtm_hdrlen + (caddr_t)rtm, 788 len + (caddr_t)rtm, &info)) != 0) 789 goto fail; 790 info.rti_flags = rtm->rtm_flags; 791 if (rtm->rtm_type != RTM_PROPOSAL && 792 (info.rti_info[RTAX_DST] == NULL || 793 info.rti_info[RTAX_DST]->sa_family >= AF_MAX || 794 (info.rti_info[RTAX_GATEWAY] != NULL && 795 info.rti_info[RTAX_GATEWAY]->sa_family >= AF_MAX) || 796 info.rti_info[RTAX_GENMASK] != NULL)) { 797 error = EINVAL; 798 goto fail; 799 } 800 #ifdef MPLS 801 info.rti_mpls = rtm->rtm_mpls; 802 #endif 803 804 if (info.rti_info[RTAX_GATEWAY] != NULL && 805 info.rti_info[RTAX_GATEWAY]->sa_family == AF_LINK && 806 (info.rti_flags & RTF_CLONING) == 0) { 807 info.rti_flags |= RTF_LLINFO; 808 } 809 810 /* 811 * Validate RTM_PROPOSAL and pass it along or error out. 812 */ 813 if (rtm->rtm_type == RTM_PROPOSAL) { 814 if (rtm_validate_proposal(&info) == -1) { 815 error = EINVAL; 816 goto fail; 817 } 818 /* 819 * If this is a solicitation proposal forward request to 820 * all interfaces. Most handlers will ignore it but at least 821 * umb(4) will send a response to this event. 822 */ 823 if (rtm->rtm_priority == RTP_PROPOSAL_SOLICIT) { 824 struct ifnet *ifp; 825 NET_LOCK(); 826 TAILQ_FOREACH(ifp, &ifnet, if_list) { 827 ifp->if_rtrequest(ifp, RTM_PROPOSAL, NULL); 828 } 829 NET_UNLOCK(); 830 } 831 } else { 832 error = rtm_output(rtm, &rt, &info, prio, tableid); 833 if (!error) { 834 type = rtm->rtm_type; 835 seq = rtm->rtm_seq; 836 free(rtm, M_RTABLE, len); 837 rtm = rtm_report(rt, type, seq, tableid); 838 len = rtm->rtm_msglen; 839 } 840 } 841 842 rtfree(rt); 843 if (error) { 844 rtm->rtm_errno = error; 845 } else { 846 rtm->rtm_flags |= RTF_DONE; 847 } 848 849 /* 850 * Check to see if we don't want our own messages. 851 */ 852 if (!(so->so_options & SO_USELOOPBACK)) { 853 if (rtptable.rtp_count <= 1) { 854 /* no other listener and no loopback of messages */ 855 fail: 856 free(rtm, M_RTABLE, len); 857 m_freem(m); 858 return (error); 859 } 860 } 861 if (m_copyback(m, 0, len, rtm, M_NOWAIT)) { 862 m_freem(m); 863 m = NULL; 864 } else if (m->m_pkthdr.len > len) 865 m_adj(m, len - m->m_pkthdr.len); 866 free(rtm, M_RTABLE, len); 867 if (m) 868 route_input(m, so, info.rti_info[RTAX_DST] ? 869 info.rti_info[RTAX_DST]->sa_family : AF_UNSPEC); 870 871 return (error); 872 } 873 874 int 875 rtm_output(struct rt_msghdr *rtm, struct rtentry **prt, 876 struct rt_addrinfo *info, uint8_t prio, unsigned int tableid) 877 { 878 struct rtentry *rt = *prt; 879 struct ifnet *ifp = NULL; 880 int plen, newgate = 0, error = 0; 881 882 switch (rtm->rtm_type) { 883 case RTM_ADD: 884 if (info->rti_info[RTAX_GATEWAY] == NULL) { 885 error = EINVAL; 886 break; 887 } 888 889 rt = rtable_match(tableid, info->rti_info[RTAX_DST], NULL); 890 if ((error = route_arp_conflict(rt, info))) { 891 rtfree(rt); 892 rt = NULL; 893 break; 894 } 895 896 /* 897 * We cannot go through a delete/create/insert cycle for 898 * cached route because this can lead to races in the 899 * receive path. Instead we update the L2 cache. 900 */ 901 if ((rt != NULL) && ISSET(rt->rt_flags, RTF_CACHED)) 902 goto change; 903 904 rtfree(rt); 905 rt = NULL; 906 907 NET_LOCK(); 908 if ((error = rtm_getifa(info, tableid)) != 0) { 909 NET_UNLOCK(); 910 break; 911 } 912 error = rtrequest(RTM_ADD, info, prio, &rt, tableid); 913 NET_UNLOCK(); 914 if (error == 0) 915 rtm_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx, 916 &rt->rt_rmx); 917 break; 918 case RTM_DELETE: 919 rt = rtable_lookup(tableid, info->rti_info[RTAX_DST], 920 info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY], 921 prio); 922 if (rt == NULL) { 923 error = ESRCH; 924 break; 925 } 926 927 /* 928 * If we got multipath routes, we require users to specify 929 * a matching gateway. 930 */ 931 if (ISSET(rt->rt_flags, RTF_MPATH) && 932 info->rti_info[RTAX_GATEWAY] == NULL) { 933 error = ESRCH; 934 break; 935 } 936 937 /* Detaching an interface requires the KERNEL_LOCK(). */ 938 ifp = if_get(rt->rt_ifidx); 939 KASSERT(ifp != NULL); 940 941 /* 942 * Invalidate the cache of automagically created and 943 * referenced L2 entries to make sure that ``rt_gwroute'' 944 * pointer stays valid for other CPUs. 945 */ 946 if ((ISSET(rt->rt_flags, RTF_CACHED))) { 947 NET_LOCK(); 948 ifp->if_rtrequest(ifp, RTM_INVALIDATE, rt); 949 /* Reset the MTU of the gateway route. */ 950 rtable_walk(tableid, rt_key(rt)->sa_family, NULL, 951 route_cleargateway, rt); 952 NET_UNLOCK(); 953 if_put(ifp); 954 break; 955 } 956 957 /* 958 * Make sure that local routes are only modified by the 959 * kernel. 960 */ 961 if (ISSET(rt->rt_flags, RTF_LOCAL|RTF_BROADCAST)) { 962 if_put(ifp); 963 error = EINVAL; 964 break; 965 } 966 967 rtfree(rt); 968 rt = NULL; 969 970 NET_LOCK(); 971 error = rtrequest_delete(info, prio, ifp, &rt, tableid); 972 NET_UNLOCK(); 973 if_put(ifp); 974 break; 975 case RTM_CHANGE: 976 rt = rtable_lookup(tableid, info->rti_info[RTAX_DST], 977 info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY], 978 prio); 979 /* 980 * If we got multipath routes, we require users to specify 981 * a matching gateway. 982 */ 983 if ((rt != NULL) && ISSET(rt->rt_flags, RTF_MPATH) && 984 (info->rti_info[RTAX_GATEWAY] == NULL)) { 985 rtfree(rt); 986 rt = NULL; 987 } 988 /* 989 * If RTAX_GATEWAY is the argument we're trying to 990 * change, try to find a compatible route. 991 */ 992 if ((rt == NULL) && (info->rti_info[RTAX_GATEWAY] != NULL)) { 993 rt = rtable_lookup(tableid, info->rti_info[RTAX_DST], 994 info->rti_info[RTAX_NETMASK], NULL, prio); 995 /* Ensure we don't pick a multipath one. */ 996 if ((rt != NULL) && ISSET(rt->rt_flags, RTF_MPATH)) { 997 rtfree(rt); 998 rt = NULL; 999 } 1000 } 1001 1002 if (rt == NULL) { 1003 error = ESRCH; 1004 break; 1005 } 1006 1007 /* 1008 * Make sure that local routes are only modified by the 1009 * kernel. 1010 */ 1011 if (ISSET(rt->rt_flags, RTF_LOCAL|RTF_BROADCAST)) { 1012 error = EINVAL; 1013 break; 1014 } 1015 1016 /* 1017 * RTM_CHANGE needs a perfect match. 1018 */ 1019 plen = rtable_satoplen(info->rti_info[RTAX_DST]->sa_family, 1020 info->rti_info[RTAX_NETMASK]); 1021 if (rt_plen(rt) != plen) { 1022 error = ESRCH; 1023 break; 1024 } 1025 1026 if (info->rti_info[RTAX_GATEWAY] != NULL) 1027 if (rt->rt_gateway == NULL || 1028 bcmp(rt->rt_gateway, 1029 info->rti_info[RTAX_GATEWAY], 1030 info->rti_info[RTAX_GATEWAY]->sa_len)) { 1031 newgate = 1; 1032 } 1033 /* 1034 * Check reachable gateway before changing the route. 1035 * New gateway could require new ifaddr, ifp; 1036 * flags may also be different; ifp may be specified 1037 * by ll sockaddr when protocol address is ambiguous. 1038 */ 1039 if (newgate || info->rti_info[RTAX_IFP] != NULL || 1040 info->rti_info[RTAX_IFA] != NULL) { 1041 struct ifaddr *ifa = NULL; 1042 1043 NET_LOCK(); 1044 if ((error = rtm_getifa(info, tableid)) != 0) { 1045 NET_UNLOCK(); 1046 break; 1047 } 1048 ifa = info->rti_ifa; 1049 if (rt->rt_ifa != ifa) { 1050 ifp = if_get(rt->rt_ifidx); 1051 KASSERT(ifp != NULL); 1052 ifp->if_rtrequest(ifp, RTM_DELETE, rt); 1053 ifafree(rt->rt_ifa); 1054 if_put(ifp); 1055 1056 ifa->ifa_refcnt++; 1057 rt->rt_ifa = ifa; 1058 rt->rt_ifidx = ifa->ifa_ifp->if_index; 1059 /* recheck link state after ifp change */ 1060 rt_if_linkstate_change(rt, ifa->ifa_ifp, 1061 tableid); 1062 } 1063 NET_UNLOCK(); 1064 } 1065 change: 1066 if (info->rti_info[RTAX_GATEWAY] != NULL) { 1067 /* When updating the gateway, make sure it is valid. */ 1068 if (!newgate && rt->rt_gateway->sa_family != 1069 info->rti_info[RTAX_GATEWAY]->sa_family) { 1070 error = EINVAL; 1071 break; 1072 } 1073 1074 NET_LOCK(); 1075 error = rt_setgate(rt, 1076 info->rti_info[RTAX_GATEWAY], tableid); 1077 NET_UNLOCK(); 1078 if (error) 1079 break; 1080 } 1081 #ifdef MPLS 1082 if (rtm->rtm_flags & RTF_MPLS) { 1083 NET_LOCK(); 1084 error = rt_mpls_set(rt, 1085 info->rti_info[RTAX_SRC], info->rti_mpls); 1086 NET_UNLOCK(); 1087 if (error) 1088 break; 1089 } else if (newgate || (rtm->rtm_fmask & RTF_MPLS)) { 1090 NET_LOCK(); 1091 /* if gateway changed remove MPLS information */ 1092 rt_mpls_clear(rt); 1093 NET_UNLOCK(); 1094 } 1095 #endif 1096 1097 #ifdef BFD 1098 if (ISSET(rtm->rtm_flags, RTF_BFD)) { 1099 if ((error = bfdset(rt))) 1100 break; 1101 } else if (!ISSET(rtm->rtm_flags, RTF_BFD) && 1102 ISSET(rtm->rtm_fmask, RTF_BFD)) { 1103 bfdclear(rt); 1104 } 1105 #endif 1106 1107 NET_LOCK(); 1108 /* Hack to allow some flags to be toggled */ 1109 if (rtm->rtm_fmask) { 1110 /* MPLS flag it is set by rt_mpls_set() */ 1111 rtm->rtm_fmask &= ~RTF_MPLS; 1112 rtm->rtm_flags &= ~RTF_MPLS; 1113 rt->rt_flags = 1114 (rt->rt_flags & ~rtm->rtm_fmask) | 1115 (rtm->rtm_flags & rtm->rtm_fmask); 1116 } 1117 rtm_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx, &rt->rt_rmx); 1118 1119 ifp = if_get(rt->rt_ifidx); 1120 KASSERT(ifp != NULL); 1121 ifp->if_rtrequest(ifp, RTM_ADD, rt); 1122 if_put(ifp); 1123 1124 if (info->rti_info[RTAX_LABEL] != NULL) { 1125 char *rtlabel = ((struct sockaddr_rtlabel *) 1126 info->rti_info[RTAX_LABEL])->sr_label; 1127 rtlabel_unref(rt->rt_labelid); 1128 rt->rt_labelid = rtlabel_name2id(rtlabel); 1129 } 1130 if_group_routechange(info->rti_info[RTAX_DST], 1131 info->rti_info[RTAX_NETMASK]); 1132 rt->rt_locks &= ~(rtm->rtm_inits); 1133 rt->rt_locks |= (rtm->rtm_inits & rtm->rtm_rmx.rmx_locks); 1134 NET_UNLOCK(); 1135 break; 1136 case RTM_GET: 1137 rt = rtable_lookup(tableid, info->rti_info[RTAX_DST], 1138 info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY], 1139 prio); 1140 if (rt == NULL) 1141 error = ESRCH; 1142 break; 1143 } 1144 1145 *prt = rt; 1146 return (error); 1147 } 1148 1149 struct ifaddr * 1150 ifa_ifwithroute(int flags, struct sockaddr *dst, struct sockaddr *gateway, 1151 unsigned int rtableid) 1152 { 1153 struct ifaddr *ifa; 1154 1155 if ((flags & RTF_GATEWAY) == 0) { 1156 /* 1157 * If we are adding a route to an interface, 1158 * and the interface is a pt to pt link 1159 * we should search for the destination 1160 * as our clue to the interface. Otherwise 1161 * we can use the local address. 1162 */ 1163 ifa = NULL; 1164 if (flags & RTF_HOST) 1165 ifa = ifa_ifwithdstaddr(dst, rtableid); 1166 if (ifa == NULL) 1167 ifa = ifa_ifwithaddr(gateway, rtableid); 1168 } else { 1169 /* 1170 * If we are adding a route to a remote net 1171 * or host, the gateway may still be on the 1172 * other end of a pt to pt link. 1173 */ 1174 ifa = ifa_ifwithdstaddr(gateway, rtableid); 1175 } 1176 if (ifa == NULL) { 1177 if (gateway->sa_family == AF_LINK) { 1178 struct sockaddr_dl *sdl = satosdl(gateway); 1179 struct ifnet *ifp = if_get(sdl->sdl_index); 1180 1181 if (ifp != NULL) 1182 ifa = ifaof_ifpforaddr(dst, ifp); 1183 if_put(ifp); 1184 } else { 1185 struct rtentry *rt; 1186 1187 rt = rtalloc(gateway, RT_RESOLVE, rtable_l2(rtableid)); 1188 if (rt != NULL) 1189 ifa = rt->rt_ifa; 1190 rtfree(rt); 1191 } 1192 } 1193 if (ifa == NULL) 1194 return (NULL); 1195 if (ifa->ifa_addr->sa_family != dst->sa_family) { 1196 struct ifaddr *oifa = ifa; 1197 ifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp); 1198 if (ifa == NULL) 1199 ifa = oifa; 1200 } 1201 return (ifa); 1202 } 1203 1204 int 1205 rtm_getifa(struct rt_addrinfo *info, unsigned int rtid) 1206 { 1207 struct ifnet *ifp = NULL; 1208 1209 /* 1210 * The "returned" `ifa' is guaranteed to be alive only if 1211 * the NET_LOCK() is held. 1212 */ 1213 NET_ASSERT_LOCKED(); 1214 1215 /* 1216 * ifp may be specified by sockaddr_dl when protocol address 1217 * is ambiguous 1218 */ 1219 if (info->rti_info[RTAX_IFP] != NULL) { 1220 struct sockaddr_dl *sdl; 1221 1222 sdl = satosdl(info->rti_info[RTAX_IFP]); 1223 ifp = if_get(sdl->sdl_index); 1224 } 1225 1226 #ifdef IPSEC 1227 /* 1228 * If the destination is a PF_KEY address, we'll look 1229 * for the existence of a encap interface number or address 1230 * in the options list of the gateway. By default, we'll return 1231 * enc0. 1232 */ 1233 if (info->rti_info[RTAX_DST] && 1234 info->rti_info[RTAX_DST]->sa_family == PF_KEY) 1235 info->rti_ifa = enc_getifa(rtid, 0); 1236 #endif 1237 1238 if (info->rti_ifa == NULL && info->rti_info[RTAX_IFA] != NULL) 1239 info->rti_ifa = ifa_ifwithaddr(info->rti_info[RTAX_IFA], rtid); 1240 1241 if (info->rti_ifa == NULL) { 1242 struct sockaddr *sa; 1243 1244 if ((sa = info->rti_info[RTAX_IFA]) == NULL) 1245 if ((sa = info->rti_info[RTAX_GATEWAY]) == NULL) 1246 sa = info->rti_info[RTAX_DST]; 1247 1248 if (sa != NULL && ifp != NULL) 1249 info->rti_ifa = ifaof_ifpforaddr(sa, ifp); 1250 else if (info->rti_info[RTAX_DST] != NULL && 1251 info->rti_info[RTAX_GATEWAY] != NULL) 1252 info->rti_ifa = ifa_ifwithroute(info->rti_flags, 1253 info->rti_info[RTAX_DST], 1254 info->rti_info[RTAX_GATEWAY], 1255 rtid); 1256 else if (sa != NULL) 1257 info->rti_ifa = ifa_ifwithroute(info->rti_flags, 1258 sa, sa, rtid); 1259 } 1260 1261 if_put(ifp); 1262 1263 if (info->rti_ifa == NULL) 1264 return (ENETUNREACH); 1265 1266 return (0); 1267 } 1268 1269 int 1270 route_cleargateway(struct rtentry *rt, void *arg, unsigned int rtableid) 1271 { 1272 struct rtentry *nhrt = arg; 1273 1274 if (ISSET(rt->rt_flags, RTF_GATEWAY) && rt->rt_gwroute == nhrt && 1275 !ISSET(rt->rt_locks, RTV_MTU)) 1276 rt->rt_mtu = 0; 1277 1278 return (0); 1279 } 1280 1281 /* 1282 * Check if the user request to insert an ARP entry does not conflict 1283 * with existing ones. 1284 * 1285 * Only two entries are allowed for a given IP address: a private one 1286 * (priv) and a public one (pub). 1287 */ 1288 int 1289 route_arp_conflict(struct rtentry *rt, struct rt_addrinfo *info) 1290 { 1291 int proxy = (info->rti_flags & RTF_ANNOUNCE); 1292 1293 if ((info->rti_flags & RTF_LLINFO) == 0 || 1294 (info->rti_info[RTAX_DST]->sa_family != AF_INET)) 1295 return (0); 1296 1297 if (rt == NULL || !ISSET(rt->rt_flags, RTF_LLINFO)) 1298 return (0); 1299 1300 /* If the entry is cached, it can be updated. */ 1301 if (ISSET(rt->rt_flags, RTF_CACHED)) 1302 return (0); 1303 1304 /* 1305 * Same destination, not cached and both "priv" or "pub" conflict. 1306 * If a second entry exists, it always conflict. 1307 */ 1308 if ((ISSET(rt->rt_flags, RTF_ANNOUNCE) == proxy) || 1309 ISSET(rt->rt_flags, RTF_MPATH)) 1310 return (EEXIST); 1311 1312 /* No conflict but an entry exist so we need to force mpath. */ 1313 info->rti_flags |= RTF_MPATH; 1314 return (0); 1315 } 1316 1317 void 1318 rtm_setmetrics(u_long which, const struct rt_metrics *in, 1319 struct rt_kmetrics *out) 1320 { 1321 int64_t expire; 1322 1323 if (which & RTV_MTU) 1324 out->rmx_mtu = in->rmx_mtu; 1325 if (which & RTV_EXPIRE) { 1326 expire = in->rmx_expire; 1327 if (expire != 0) { 1328 expire -= gettime(); 1329 expire += getuptime(); 1330 } 1331 1332 out->rmx_expire = expire; 1333 } 1334 } 1335 1336 void 1337 rtm_getmetrics(const struct rt_kmetrics *in, struct rt_metrics *out) 1338 { 1339 int64_t expire; 1340 1341 expire = in->rmx_expire; 1342 if (expire != 0) { 1343 expire -= getuptime(); 1344 expire += gettime(); 1345 } 1346 1347 bzero(out, sizeof(*out)); 1348 out->rmx_locks = in->rmx_locks; 1349 out->rmx_mtu = in->rmx_mtu; 1350 out->rmx_expire = expire; 1351 out->rmx_pksent = in->rmx_pksent; 1352 } 1353 1354 #define ROUNDUP(a) \ 1355 ((a) > 0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long)) 1356 #define ADVANCE(x, n) (x += ROUNDUP((n)->sa_len)) 1357 1358 int 1359 rtm_xaddrs(caddr_t cp, caddr_t cplim, struct rt_addrinfo *rtinfo) 1360 { 1361 struct sockaddr *sa; 1362 int i; 1363 1364 /* 1365 * Parse address bits, split address storage in chunks, and 1366 * set info pointers. Use sa_len for traversing the memory 1367 * and check that we stay within in the limit. 1368 */ 1369 bzero(rtinfo->rti_info, sizeof(rtinfo->rti_info)); 1370 for (i = 0; i < sizeof(rtinfo->rti_addrs) * 8; i++) { 1371 if ((rtinfo->rti_addrs & (1 << i)) == 0) 1372 continue; 1373 if (i >= RTAX_MAX || cp + sizeof(socklen_t) > cplim) 1374 return (EINVAL); 1375 sa = (struct sockaddr *)cp; 1376 if (cp + sa->sa_len > cplim) 1377 return (EINVAL); 1378 rtinfo->rti_info[i] = sa; 1379 ADVANCE(cp, sa); 1380 } 1381 /* 1382 * Check that the address family is suitable for the route address 1383 * type. Check that each address has a size that fits its family 1384 * and its length is within the size. Strings within addresses must 1385 * be NUL terminated. 1386 */ 1387 for (i = 0; i < RTAX_MAX; i++) { 1388 size_t len, maxlen, size; 1389 1390 sa = rtinfo->rti_info[i]; 1391 if (sa == NULL) 1392 continue; 1393 maxlen = size = 0; 1394 switch (i) { 1395 case RTAX_DST: 1396 case RTAX_GATEWAY: 1397 case RTAX_SRC: 1398 switch (sa->sa_family) { 1399 case AF_INET: 1400 size = sizeof(struct sockaddr_in); 1401 break; 1402 case AF_LINK: 1403 size = sizeof(struct sockaddr_dl); 1404 break; 1405 #ifdef INET6 1406 case AF_INET6: 1407 size = sizeof(struct sockaddr_in6); 1408 break; 1409 #endif 1410 #ifdef MPLS 1411 case AF_MPLS: 1412 size = sizeof(struct sockaddr_mpls); 1413 break; 1414 #endif 1415 } 1416 break; 1417 case RTAX_IFP: 1418 if (sa->sa_family != AF_LINK) 1419 return (EAFNOSUPPORT); 1420 /* 1421 * XXX Should be sizeof(struct sockaddr_dl), but 1422 * route(8) has a bug and provides less memory. 1423 * arp(8) has another bug and uses sizeof pointer. 1424 */ 1425 size = 4; 1426 break; 1427 case RTAX_IFA: 1428 switch (sa->sa_family) { 1429 case AF_INET: 1430 size = sizeof(struct sockaddr_in); 1431 break; 1432 #ifdef INET6 1433 case AF_INET6: 1434 size = sizeof(struct sockaddr_in6); 1435 break; 1436 #endif 1437 default: 1438 return (EAFNOSUPPORT); 1439 } 1440 break; 1441 case RTAX_LABEL: 1442 sa->sa_family = AF_UNSPEC; 1443 maxlen = RTLABEL_LEN; 1444 size = sizeof(struct sockaddr_rtlabel); 1445 break; 1446 #ifdef BFD 1447 case RTAX_BFD: 1448 sa->sa_family = AF_UNSPEC; 1449 size = sizeof(struct sockaddr_bfd); 1450 break; 1451 #endif 1452 case RTAX_DNS: 1453 /* more validation in rtm_validate_proposal */ 1454 if (sa->sa_len > sizeof(struct sockaddr_rtdns)) 1455 return (EINVAL); 1456 if (sa->sa_len < offsetof(struct sockaddr_rtdns, 1457 sr_dns)) 1458 return (EINVAL); 1459 switch (sa->sa_family) { 1460 case AF_INET: 1461 #ifdef INET6 1462 case AF_INET6: 1463 #endif 1464 break; 1465 default: 1466 return (EAFNOSUPPORT); 1467 } 1468 break; 1469 case RTAX_STATIC: 1470 sa->sa_family = AF_UNSPEC; 1471 maxlen = RTSTATIC_LEN; 1472 size = sizeof(struct sockaddr_rtstatic); 1473 break; 1474 case RTAX_SEARCH: 1475 sa->sa_family = AF_UNSPEC; 1476 maxlen = RTSEARCH_LEN; 1477 size = sizeof(struct sockaddr_rtsearch); 1478 break; 1479 } 1480 if (size) { 1481 /* memory for the full struct must be provided */ 1482 if (sa->sa_len < size) 1483 return (EINVAL); 1484 } 1485 if (maxlen) { 1486 /* this should not happen */ 1487 if (2 + maxlen > size) 1488 return (EINVAL); 1489 /* strings must be NUL terminated within the struct */ 1490 len = strnlen(sa->sa_data, maxlen); 1491 if (len >= maxlen || 2 + len >= sa->sa_len) 1492 return (EINVAL); 1493 break; 1494 } 1495 } 1496 return (0); 1497 } 1498 1499 struct mbuf * 1500 rtm_msg1(int type, struct rt_addrinfo *rtinfo) 1501 { 1502 struct rt_msghdr *rtm; 1503 struct mbuf *m; 1504 int i; 1505 struct sockaddr *sa; 1506 int len, dlen, hlen; 1507 1508 switch (type) { 1509 case RTM_DELADDR: 1510 case RTM_NEWADDR: 1511 len = sizeof(struct ifa_msghdr); 1512 break; 1513 case RTM_IFINFO: 1514 len = sizeof(struct if_msghdr); 1515 break; 1516 case RTM_IFANNOUNCE: 1517 len = sizeof(struct if_announcemsghdr); 1518 break; 1519 #ifdef BFD 1520 case RTM_BFD: 1521 len = sizeof(struct bfd_msghdr); 1522 break; 1523 #endif 1524 case RTM_80211INFO: 1525 len = sizeof(struct if_ieee80211_msghdr); 1526 break; 1527 default: 1528 len = sizeof(struct rt_msghdr); 1529 break; 1530 } 1531 if (len > MCLBYTES) 1532 panic("rtm_msg1"); 1533 m = m_gethdr(M_DONTWAIT, MT_DATA); 1534 if (m && len > MHLEN) { 1535 MCLGET(m, M_DONTWAIT); 1536 if ((m->m_flags & M_EXT) == 0) { 1537 m_free(m); 1538 m = NULL; 1539 } 1540 } 1541 if (m == NULL) 1542 return (m); 1543 m->m_pkthdr.len = m->m_len = hlen = len; 1544 m->m_pkthdr.ph_ifidx = 0; 1545 rtm = mtod(m, struct rt_msghdr *); 1546 bzero(rtm, len); 1547 for (i = 0; i < RTAX_MAX; i++) { 1548 if (rtinfo == NULL || (sa = rtinfo->rti_info[i]) == NULL) 1549 continue; 1550 rtinfo->rti_addrs |= (1 << i); 1551 dlen = ROUNDUP(sa->sa_len); 1552 if (m_copyback(m, len, dlen, sa, M_NOWAIT)) { 1553 m_freem(m); 1554 return (NULL); 1555 } 1556 len += dlen; 1557 } 1558 rtm->rtm_msglen = len; 1559 rtm->rtm_hdrlen = hlen; 1560 rtm->rtm_version = RTM_VERSION; 1561 rtm->rtm_type = type; 1562 return (m); 1563 } 1564 1565 int 1566 rtm_msg2(int type, int vers, struct rt_addrinfo *rtinfo, caddr_t cp, 1567 struct walkarg *w) 1568 { 1569 int i; 1570 int len, dlen, hlen, second_time = 0; 1571 caddr_t cp0; 1572 1573 rtinfo->rti_addrs = 0; 1574 again: 1575 switch (type) { 1576 case RTM_DELADDR: 1577 case RTM_NEWADDR: 1578 len = sizeof(struct ifa_msghdr); 1579 break; 1580 case RTM_IFINFO: 1581 len = sizeof(struct if_msghdr); 1582 break; 1583 default: 1584 len = sizeof(struct rt_msghdr); 1585 break; 1586 } 1587 hlen = len; 1588 if ((cp0 = cp) != NULL) 1589 cp += len; 1590 for (i = 0; i < RTAX_MAX; i++) { 1591 struct sockaddr *sa; 1592 1593 if ((sa = rtinfo->rti_info[i]) == NULL) 1594 continue; 1595 rtinfo->rti_addrs |= (1 << i); 1596 dlen = ROUNDUP(sa->sa_len); 1597 if (cp) { 1598 bcopy(sa, cp, (size_t)dlen); 1599 cp += dlen; 1600 } 1601 len += dlen; 1602 } 1603 /* align message length to the next natural boundary */ 1604 len = ALIGN(len); 1605 if (cp == 0 && w != NULL && !second_time) { 1606 w->w_needed += len; 1607 if (w->w_needed <= 0 && w->w_where) { 1608 if (w->w_tmemsize < len) { 1609 free(w->w_tmem, M_RTABLE, w->w_tmemsize); 1610 w->w_tmem = malloc(len, M_RTABLE, 1611 M_NOWAIT | M_ZERO); 1612 if (w->w_tmem) 1613 w->w_tmemsize = len; 1614 } 1615 if (w->w_tmem) { 1616 cp = w->w_tmem; 1617 second_time = 1; 1618 goto again; 1619 } else 1620 w->w_where = 0; 1621 } 1622 } 1623 if (cp && w) /* clear the message header */ 1624 bzero(cp0, hlen); 1625 1626 if (cp) { 1627 struct rt_msghdr *rtm = (struct rt_msghdr *)cp0; 1628 1629 rtm->rtm_version = RTM_VERSION; 1630 rtm->rtm_type = type; 1631 rtm->rtm_msglen = len; 1632 rtm->rtm_hdrlen = hlen; 1633 } 1634 return (len); 1635 } 1636 1637 void 1638 rtm_send(struct rtentry *rt, int cmd, int error, unsigned int rtableid) 1639 { 1640 struct rt_addrinfo info; 1641 struct ifnet *ifp; 1642 struct sockaddr_rtlabel sa_rl; 1643 struct sockaddr_in6 sa_mask; 1644 1645 memset(&info, 0, sizeof(info)); 1646 info.rti_info[RTAX_DST] = rt_key(rt); 1647 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 1648 if (!ISSET(rt->rt_flags, RTF_HOST)) 1649 info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask); 1650 info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl); 1651 ifp = if_get(rt->rt_ifidx); 1652 if (ifp != NULL) { 1653 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); 1654 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; 1655 } 1656 1657 rtm_miss(cmd, &info, rt->rt_flags, rt->rt_priority, rt->rt_ifidx, error, 1658 rtableid); 1659 if_put(ifp); 1660 } 1661 1662 /* 1663 * This routine is called to generate a message from the routing 1664 * socket indicating that a redirect has occurred, a routing lookup 1665 * has failed, or that a protocol has detected timeouts to a particular 1666 * destination. 1667 */ 1668 void 1669 rtm_miss(int type, struct rt_addrinfo *rtinfo, int flags, uint8_t prio, 1670 u_int ifidx, int error, u_int tableid) 1671 { 1672 struct rt_msghdr *rtm; 1673 struct mbuf *m; 1674 struct sockaddr *sa = rtinfo->rti_info[RTAX_DST]; 1675 1676 if (rtptable.rtp_count == 0) 1677 return; 1678 m = rtm_msg1(type, rtinfo); 1679 if (m == NULL) 1680 return; 1681 rtm = mtod(m, struct rt_msghdr *); 1682 rtm->rtm_flags = RTF_DONE | flags; 1683 rtm->rtm_priority = prio; 1684 rtm->rtm_errno = error; 1685 rtm->rtm_tableid = tableid; 1686 rtm->rtm_addrs = rtinfo->rti_addrs; 1687 rtm->rtm_index = ifidx; 1688 route_input(m, NULL, sa ? sa->sa_family : AF_UNSPEC); 1689 } 1690 1691 /* 1692 * This routine is called to generate a message from the routing 1693 * socket indicating that the status of a network interface has changed. 1694 */ 1695 void 1696 rtm_ifchg(struct ifnet *ifp) 1697 { 1698 struct if_msghdr *ifm; 1699 struct mbuf *m; 1700 1701 if (rtptable.rtp_count == 0) 1702 return; 1703 m = rtm_msg1(RTM_IFINFO, NULL); 1704 if (m == NULL) 1705 return; 1706 ifm = mtod(m, struct if_msghdr *); 1707 ifm->ifm_index = ifp->if_index; 1708 ifm->ifm_tableid = ifp->if_rdomain; 1709 ifm->ifm_flags = ifp->if_flags; 1710 ifm->ifm_xflags = ifp->if_xflags; 1711 if_getdata(ifp, &ifm->ifm_data); 1712 ifm->ifm_addrs = 0; 1713 route_input(m, NULL, AF_UNSPEC); 1714 } 1715 1716 /* 1717 * This is called to generate messages from the routing socket 1718 * indicating a network interface has had addresses associated with it. 1719 * if we ever reverse the logic and replace messages TO the routing 1720 * socket indicate a request to configure interfaces, then it will 1721 * be unnecessary as the routing socket will automatically generate 1722 * copies of it. 1723 */ 1724 void 1725 rtm_addr(int cmd, struct ifaddr *ifa) 1726 { 1727 struct ifnet *ifp = ifa->ifa_ifp; 1728 struct mbuf *m; 1729 struct rt_addrinfo info; 1730 struct ifa_msghdr *ifam; 1731 1732 if (rtptable.rtp_count == 0) 1733 return; 1734 1735 memset(&info, 0, sizeof(info)); 1736 info.rti_info[RTAX_IFA] = ifa->ifa_addr; 1737 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); 1738 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask; 1739 info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr; 1740 if ((m = rtm_msg1(cmd, &info)) == NULL) 1741 return; 1742 ifam = mtod(m, struct ifa_msghdr *); 1743 ifam->ifam_index = ifp->if_index; 1744 ifam->ifam_metric = ifa->ifa_metric; 1745 ifam->ifam_flags = ifa->ifa_flags; 1746 ifam->ifam_addrs = info.rti_addrs; 1747 ifam->ifam_tableid = ifp->if_rdomain; 1748 1749 route_input(m, NULL, 1750 ifa->ifa_addr ? ifa->ifa_addr->sa_family : AF_UNSPEC); 1751 } 1752 1753 /* 1754 * This is called to generate routing socket messages indicating 1755 * network interface arrival and departure. 1756 */ 1757 void 1758 rtm_ifannounce(struct ifnet *ifp, int what) 1759 { 1760 struct if_announcemsghdr *ifan; 1761 struct mbuf *m; 1762 1763 if (rtptable.rtp_count == 0) 1764 return; 1765 m = rtm_msg1(RTM_IFANNOUNCE, NULL); 1766 if (m == NULL) 1767 return; 1768 ifan = mtod(m, struct if_announcemsghdr *); 1769 ifan->ifan_index = ifp->if_index; 1770 strlcpy(ifan->ifan_name, ifp->if_xname, sizeof(ifan->ifan_name)); 1771 ifan->ifan_what = what; 1772 route_input(m, NULL, AF_UNSPEC); 1773 } 1774 1775 #ifdef BFD 1776 /* 1777 * This is used to generate routing socket messages indicating 1778 * the state of a BFD session. 1779 */ 1780 void 1781 rtm_bfd(struct bfd_config *bfd) 1782 { 1783 struct bfd_msghdr *bfdm; 1784 struct sockaddr_bfd sa_bfd; 1785 struct mbuf *m; 1786 struct rt_addrinfo info; 1787 1788 if (rtptable.rtp_count == 0) 1789 return; 1790 memset(&info, 0, sizeof(info)); 1791 info.rti_info[RTAX_DST] = rt_key(bfd->bc_rt); 1792 info.rti_info[RTAX_IFA] = bfd->bc_rt->rt_ifa->ifa_addr; 1793 1794 m = rtm_msg1(RTM_BFD, &info); 1795 if (m == NULL) 1796 return; 1797 bfdm = mtod(m, struct bfd_msghdr *); 1798 bfdm->bm_addrs = info.rti_addrs; 1799 1800 bfd2sa(bfd->bc_rt, &sa_bfd); 1801 memcpy(&bfdm->bm_sa, &sa_bfd, sizeof(sa_bfd)); 1802 1803 route_input(m, NULL, info.rti_info[RTAX_DST]->sa_family); 1804 } 1805 #endif /* BFD */ 1806 1807 /* 1808 * This is used to generate routing socket messages indicating 1809 * the state of an ieee80211 interface. 1810 */ 1811 void 1812 rtm_80211info(struct ifnet *ifp, struct if_ieee80211_data *ifie) 1813 { 1814 struct if_ieee80211_msghdr *ifim; 1815 struct mbuf *m; 1816 1817 if (rtptable.rtp_count == 0) 1818 return; 1819 m = rtm_msg1(RTM_80211INFO, NULL); 1820 if (m == NULL) 1821 return; 1822 ifim = mtod(m, struct if_ieee80211_msghdr *); 1823 ifim->ifim_index = ifp->if_index; 1824 ifim->ifim_tableid = ifp->if_rdomain; 1825 1826 memcpy(&ifim->ifim_ifie, ifie, sizeof(ifim->ifim_ifie)); 1827 route_input(m, NULL, AF_UNSPEC); 1828 } 1829 1830 /* 1831 * This is used to generate routing socket messages indicating 1832 * the address selection proposal from an interface. 1833 */ 1834 void 1835 rtm_proposal(struct ifnet *ifp, struct rt_addrinfo *rtinfo, int flags, 1836 uint8_t prio) 1837 { 1838 struct rt_msghdr *rtm; 1839 struct mbuf *m; 1840 1841 m = rtm_msg1(RTM_PROPOSAL, rtinfo); 1842 if (m == NULL) 1843 return; 1844 rtm = mtod(m, struct rt_msghdr *); 1845 rtm->rtm_flags = RTF_DONE | flags; 1846 rtm->rtm_priority = prio; 1847 rtm->rtm_tableid = ifp->if_rdomain; 1848 rtm->rtm_index = ifp->if_index; 1849 rtm->rtm_addrs = rtinfo->rti_addrs; 1850 1851 route_input(m, NULL, rtinfo->rti_info[RTAX_DNS]->sa_family); 1852 } 1853 1854 /* 1855 * This is used in dumping the kernel table via sysctl(). 1856 */ 1857 int 1858 sysctl_dumpentry(struct rtentry *rt, void *v, unsigned int id) 1859 { 1860 struct walkarg *w = v; 1861 int error = 0, size; 1862 struct rt_addrinfo info; 1863 struct ifnet *ifp; 1864 #ifdef BFD 1865 struct sockaddr_bfd sa_bfd; 1866 #endif 1867 struct sockaddr_rtlabel sa_rl; 1868 struct sockaddr_in6 sa_mask; 1869 1870 if (w->w_op == NET_RT_FLAGS && !(rt->rt_flags & w->w_arg)) 1871 return 0; 1872 if (w->w_op == NET_RT_DUMP && w->w_arg) { 1873 u_int8_t prio = w->w_arg & RTP_MASK; 1874 if (w->w_arg < 0) { 1875 prio = (-w->w_arg) & RTP_MASK; 1876 /* Show all routes that are not this priority */ 1877 if (prio == (rt->rt_priority & RTP_MASK)) 1878 return 0; 1879 } else { 1880 if (prio != (rt->rt_priority & RTP_MASK) && 1881 prio != RTP_ANY) 1882 return 0; 1883 } 1884 } 1885 bzero(&info, sizeof(info)); 1886 info.rti_info[RTAX_DST] = rt_key(rt); 1887 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 1888 info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask); 1889 ifp = if_get(rt->rt_ifidx); 1890 if (ifp != NULL) { 1891 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); 1892 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; 1893 if (ifp->if_flags & IFF_POINTOPOINT) 1894 info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr; 1895 } 1896 if_put(ifp); 1897 info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl); 1898 #ifdef BFD 1899 if (rt->rt_flags & RTF_BFD) 1900 info.rti_info[RTAX_BFD] = bfd2sa(rt, &sa_bfd); 1901 #endif 1902 #ifdef MPLS 1903 if (rt->rt_flags & RTF_MPLS) { 1904 struct sockaddr_mpls sa_mpls; 1905 1906 bzero(&sa_mpls, sizeof(sa_mpls)); 1907 sa_mpls.smpls_family = AF_MPLS; 1908 sa_mpls.smpls_len = sizeof(sa_mpls); 1909 sa_mpls.smpls_label = ((struct rt_mpls *) 1910 rt->rt_llinfo)->mpls_label; 1911 info.rti_info[RTAX_SRC] = (struct sockaddr *)&sa_mpls; 1912 info.rti_mpls = ((struct rt_mpls *) 1913 rt->rt_llinfo)->mpls_operation; 1914 } 1915 #endif 1916 1917 size = rtm_msg2(RTM_GET, RTM_VERSION, &info, NULL, w); 1918 if (w->w_where && w->w_tmem && w->w_needed <= 0) { 1919 struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem; 1920 1921 rtm->rtm_pid = curproc->p_p->ps_pid; 1922 rtm->rtm_flags = rt->rt_flags; 1923 rtm->rtm_priority = rt->rt_priority & RTP_MASK; 1924 rtm_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx); 1925 /* Do not account the routing table's reference. */ 1926 rtm->rtm_rmx.rmx_refcnt = rt->rt_refcnt - 1; 1927 rtm->rtm_index = rt->rt_ifidx; 1928 rtm->rtm_addrs = info.rti_addrs; 1929 rtm->rtm_tableid = id; 1930 #ifdef MPLS 1931 rtm->rtm_mpls = info.rti_mpls; 1932 #endif 1933 if ((error = copyout(rtm, w->w_where, size)) != 0) 1934 w->w_where = NULL; 1935 else 1936 w->w_where += size; 1937 } 1938 return (error); 1939 } 1940 1941 int 1942 sysctl_iflist(int af, struct walkarg *w) 1943 { 1944 struct ifnet *ifp; 1945 struct ifaddr *ifa; 1946 struct rt_addrinfo info; 1947 int len, error = 0; 1948 1949 bzero(&info, sizeof(info)); 1950 TAILQ_FOREACH(ifp, &ifnet, if_list) { 1951 if (w->w_arg && w->w_arg != ifp->if_index) 1952 continue; 1953 /* Copy the link-layer address first */ 1954 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); 1955 len = rtm_msg2(RTM_IFINFO, RTM_VERSION, &info, 0, w); 1956 if (w->w_where && w->w_tmem && w->w_needed <= 0) { 1957 struct if_msghdr *ifm; 1958 1959 ifm = (struct if_msghdr *)w->w_tmem; 1960 ifm->ifm_index = ifp->if_index; 1961 ifm->ifm_tableid = ifp->if_rdomain; 1962 ifm->ifm_flags = ifp->if_flags; 1963 if_getdata(ifp, &ifm->ifm_data); 1964 ifm->ifm_addrs = info.rti_addrs; 1965 error = copyout(ifm, w->w_where, len); 1966 if (error) 1967 return (error); 1968 w->w_where += len; 1969 } 1970 info.rti_info[RTAX_IFP] = NULL; 1971 TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) { 1972 KASSERT(ifa->ifa_addr->sa_family != AF_LINK); 1973 if (af && af != ifa->ifa_addr->sa_family) 1974 continue; 1975 info.rti_info[RTAX_IFA] = ifa->ifa_addr; 1976 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask; 1977 info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr; 1978 len = rtm_msg2(RTM_NEWADDR, RTM_VERSION, &info, 0, w); 1979 if (w->w_where && w->w_tmem && w->w_needed <= 0) { 1980 struct ifa_msghdr *ifam; 1981 1982 ifam = (struct ifa_msghdr *)w->w_tmem; 1983 ifam->ifam_index = ifa->ifa_ifp->if_index; 1984 ifam->ifam_flags = ifa->ifa_flags; 1985 ifam->ifam_metric = ifa->ifa_metric; 1986 ifam->ifam_addrs = info.rti_addrs; 1987 error = copyout(w->w_tmem, w->w_where, len); 1988 if (error) 1989 return (error); 1990 w->w_where += len; 1991 } 1992 } 1993 info.rti_info[RTAX_IFA] = info.rti_info[RTAX_NETMASK] = 1994 info.rti_info[RTAX_BRD] = NULL; 1995 } 1996 return (0); 1997 } 1998 1999 int 2000 sysctl_ifnames(struct walkarg *w) 2001 { 2002 struct if_nameindex_msg ifn; 2003 struct ifnet *ifp; 2004 int error = 0; 2005 2006 /* XXX ignore tableid for now */ 2007 TAILQ_FOREACH(ifp, &ifnet, if_list) { 2008 if (w->w_arg && w->w_arg != ifp->if_index) 2009 continue; 2010 w->w_needed += sizeof(ifn); 2011 if (w->w_where && w->w_needed <= 0) { 2012 2013 memset(&ifn, 0, sizeof(ifn)); 2014 ifn.if_index = ifp->if_index; 2015 strlcpy(ifn.if_name, ifp->if_xname, 2016 sizeof(ifn.if_name)); 2017 error = copyout(&ifn, w->w_where, sizeof(ifn)); 2018 if (error) 2019 return (error); 2020 w->w_where += sizeof(ifn); 2021 } 2022 } 2023 2024 return (0); 2025 } 2026 2027 int 2028 sysctl_rtable(int *name, u_int namelen, void *where, size_t *given, void *new, 2029 size_t newlen) 2030 { 2031 int i, error = EINVAL; 2032 u_char af; 2033 struct walkarg w; 2034 struct rt_tableinfo tableinfo; 2035 u_int tableid = 0; 2036 2037 if (new) 2038 return (EPERM); 2039 if (namelen < 3 || namelen > 4) 2040 return (EINVAL); 2041 af = name[0]; 2042 bzero(&w, sizeof(w)); 2043 w.w_where = where; 2044 w.w_given = *given; 2045 w.w_needed = 0 - w.w_given; 2046 w.w_op = name[1]; 2047 w.w_arg = name[2]; 2048 2049 if (namelen == 4) { 2050 tableid = name[3]; 2051 if (!rtable_exists(tableid)) 2052 return (ENOENT); 2053 } else 2054 tableid = curproc->p_p->ps_rtableid; 2055 2056 switch (w.w_op) { 2057 case NET_RT_DUMP: 2058 case NET_RT_FLAGS: 2059 NET_LOCK(); 2060 for (i = 1; i <= AF_MAX; i++) { 2061 if (af != 0 && af != i) 2062 continue; 2063 2064 error = rtable_walk(tableid, i, NULL, sysctl_dumpentry, 2065 &w); 2066 if (error == EAFNOSUPPORT) 2067 error = 0; 2068 if (error) 2069 break; 2070 } 2071 NET_UNLOCK(); 2072 break; 2073 2074 case NET_RT_IFLIST: 2075 NET_LOCK(); 2076 error = sysctl_iflist(af, &w); 2077 NET_UNLOCK(); 2078 break; 2079 2080 case NET_RT_STATS: 2081 return (sysctl_rtable_rtstat(where, given, new)); 2082 case NET_RT_TABLE: 2083 tableid = w.w_arg; 2084 if (!rtable_exists(tableid)) 2085 return (ENOENT); 2086 memset(&tableinfo, 0, sizeof tableinfo); 2087 tableinfo.rti_tableid = tableid; 2088 tableinfo.rti_domainid = rtable_l2(tableid); 2089 error = sysctl_rdstruct(where, given, new, 2090 &tableinfo, sizeof(tableinfo)); 2091 return (error); 2092 case NET_RT_IFNAMES: 2093 NET_LOCK(); 2094 error = sysctl_ifnames(&w); 2095 NET_UNLOCK(); 2096 break; 2097 } 2098 free(w.w_tmem, M_RTABLE, w.w_tmemsize); 2099 w.w_needed += w.w_given; 2100 if (where) { 2101 *given = w.w_where - (caddr_t)where; 2102 if (*given < w.w_needed) 2103 return (ENOMEM); 2104 } else 2105 *given = (11 * w.w_needed) / 10; 2106 2107 return (error); 2108 } 2109 2110 int 2111 sysctl_rtable_rtstat(void *oldp, size_t *oldlenp, void *newp) 2112 { 2113 extern struct cpumem *rtcounters; 2114 uint64_t counters[rts_ncounters]; 2115 struct rtstat rtstat; 2116 uint32_t *words = (uint32_t *)&rtstat; 2117 int i; 2118 2119 CTASSERT(sizeof(rtstat) == (nitems(counters) * sizeof(uint32_t))); 2120 memset(&rtstat, 0, sizeof rtstat); 2121 counters_read(rtcounters, counters, nitems(counters)); 2122 2123 for (i = 0; i < nitems(counters); i++) 2124 words[i] = (uint32_t)counters[i]; 2125 2126 return (sysctl_rdstruct(oldp, oldlenp, newp, &rtstat, sizeof(rtstat))); 2127 } 2128 2129 int 2130 rtm_validate_proposal(struct rt_addrinfo *info) 2131 { 2132 if (info->rti_addrs & ~(RTA_NETMASK | RTA_IFA | RTA_DNS | RTA_STATIC | 2133 RTA_SEARCH)) { 2134 return -1; 2135 } 2136 2137 if (ISSET(info->rti_addrs, RTA_NETMASK)) { 2138 struct sockaddr *sa = info->rti_info[RTAX_NETMASK]; 2139 if (sa == NULL) 2140 return -1; 2141 switch (sa->sa_family) { 2142 case AF_INET: 2143 if (sa->sa_len != sizeof(struct sockaddr_in)) 2144 return -1; 2145 break; 2146 case AF_INET6: 2147 if (sa->sa_len != sizeof(struct sockaddr_in6)) 2148 return -1; 2149 break; 2150 default: 2151 return -1; 2152 } 2153 } 2154 2155 if (ISSET(info->rti_addrs, RTA_IFA)) { 2156 struct sockaddr *sa = info->rti_info[RTAX_IFA]; 2157 if (sa == NULL) 2158 return -1; 2159 switch (sa->sa_family) { 2160 case AF_INET: 2161 if (sa->sa_len != sizeof(struct sockaddr_in)) 2162 return -1; 2163 break; 2164 case AF_INET6: 2165 if (sa->sa_len != sizeof(struct sockaddr_in6)) 2166 return -1; 2167 break; 2168 default: 2169 return -1; 2170 } 2171 } 2172 2173 if (ISSET(info->rti_addrs, RTA_DNS)) { 2174 struct sockaddr_rtdns *rtdns = 2175 (struct sockaddr_rtdns *)info->rti_info[RTAX_DNS]; 2176 if (rtdns == NULL) 2177 return -1; 2178 if (rtdns->sr_len > sizeof(*rtdns)) 2179 return -1; 2180 if (rtdns->sr_len < offsetof(struct sockaddr_rtdns, sr_dns)) 2181 return -1; 2182 switch (rtdns->sr_family) { 2183 case AF_INET: 2184 if ((rtdns->sr_len - offsetof(struct sockaddr_rtdns, 2185 sr_dns)) % sizeof(struct in_addr) != 0) 2186 return -1; 2187 break; 2188 #ifdef INET6 2189 case AF_INET6: 2190 if ((rtdns->sr_len - offsetof(struct sockaddr_rtdns, 2191 sr_dns)) % sizeof(struct in6_addr) != 0) 2192 return -1; 2193 break; 2194 #endif 2195 default: 2196 return -1; 2197 } 2198 } 2199 2200 if (ISSET(info->rti_addrs, RTA_STATIC)) { 2201 struct sockaddr_rtstatic *rtstatic = 2202 (struct sockaddr_rtstatic *)info->rti_info[RTAX_STATIC]; 2203 if (rtstatic == NULL) 2204 return -1; 2205 if (rtstatic->sr_len > sizeof(*rtstatic)) 2206 return -1; 2207 if (rtstatic->sr_len <= 2208 offsetof(struct sockaddr_rtstatic, sr_static)) 2209 return -1; 2210 } 2211 2212 if (ISSET(info->rti_addrs, RTA_SEARCH)) { 2213 struct sockaddr_rtsearch *rtsearch = 2214 (struct sockaddr_rtsearch *)info->rti_info[RTAX_SEARCH]; 2215 if (rtsearch == NULL) 2216 return -1; 2217 if (rtsearch->sr_len > sizeof(*rtsearch)) 2218 return -1; 2219 if (rtsearch->sr_len <= 2220 offsetof(struct sockaddr_rtsearch, sr_search)) 2221 return -1; 2222 } 2223 2224 return 0; 2225 } 2226 2227 /* 2228 * Definitions of protocols supported in the ROUTE domain. 2229 */ 2230 2231 extern struct domain routedomain; /* or at least forward */ 2232 2233 struct protosw routesw[] = { 2234 { 2235 .pr_type = SOCK_RAW, 2236 .pr_domain = &routedomain, 2237 .pr_flags = PR_ATOMIC|PR_ADDR|PR_WANTRCVD, 2238 .pr_output = route_output, 2239 .pr_ctloutput = route_ctloutput, 2240 .pr_usrreq = route_usrreq, 2241 .pr_attach = route_attach, 2242 .pr_detach = route_detach, 2243 .pr_init = route_prinit, 2244 .pr_sysctl = sysctl_rtable 2245 } 2246 }; 2247 2248 struct domain routedomain = { 2249 .dom_family = PF_ROUTE, 2250 .dom_name = "route", 2251 .dom_init = route_init, 2252 .dom_protosw = routesw, 2253 .dom_protoswNPROTOSW = &routesw[nitems(routesw)] 2254 }; 2255