1 /* $OpenBSD: rtsock.c,v 1.311 2021/04/26 08:21:36 claudio Exp $ */ 2 /* $NetBSD: rtsock.c,v 1.18 1996/03/29 00:32:10 cgd Exp $ */ 3 4 /* 5 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the project nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 33 /* 34 * Copyright (c) 1988, 1991, 1993 35 * The Regents of the University of California. All rights reserved. 36 * 37 * Redistribution and use in source and binary forms, with or without 38 * modification, are permitted provided that the following conditions 39 * are met: 40 * 1. Redistributions of source code must retain the above copyright 41 * notice, this list of conditions and the following disclaimer. 42 * 2. Redistributions in binary form must reproduce the above copyright 43 * notice, this list of conditions and the following disclaimer in the 44 * documentation and/or other materials provided with the distribution. 45 * 3. Neither the name of the University nor the names of its contributors 46 * may be used to endorse or promote products derived from this software 47 * without specific prior written permission. 48 * 49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 59 * SUCH DAMAGE. 60 * 61 * @(#)rtsock.c 8.6 (Berkeley) 2/11/95 62 */ 63 64 #include <sys/param.h> 65 #include <sys/systm.h> 66 #include <sys/proc.h> 67 #include <sys/sysctl.h> 68 #include <sys/mbuf.h> 69 #include <sys/socket.h> 70 #include <sys/socketvar.h> 71 #include <sys/domain.h> 72 #include <sys/pool.h> 73 #include <sys/protosw.h> 74 #include <sys/srp.h> 75 76 #include <net/if.h> 77 #include <net/if_dl.h> 78 #include <net/if_var.h> 79 #include <net/route.h> 80 81 #include <netinet/in.h> 82 83 #ifdef MPLS 84 #include <netmpls/mpls.h> 85 #endif 86 #ifdef IPSEC 87 #include <netinet/ip_ipsp.h> 88 #include <net/if_enc.h> 89 #endif 90 #ifdef BFD 91 #include <net/bfd.h> 92 #endif 93 94 #include <sys/stdarg.h> 95 #include <sys/kernel.h> 96 #include <sys/timeout.h> 97 98 #define ROUTESNDQ 8192 99 #define ROUTERCVQ 8192 100 101 const struct sockaddr route_src = { 2, PF_ROUTE, }; 102 103 struct walkarg { 104 int w_op, w_arg, w_given, w_needed, w_tmemsize; 105 caddr_t w_where, w_tmem; 106 }; 107 108 void route_prinit(void); 109 void rcb_ref(void *, void *); 110 void rcb_unref(void *, void *); 111 int route_output(struct mbuf *, struct socket *, struct sockaddr *, 112 struct mbuf *); 113 int route_ctloutput(int, struct socket *, int, int, struct mbuf *); 114 int route_usrreq(struct socket *, int, struct mbuf *, struct mbuf *, 115 struct mbuf *, struct proc *); 116 void route_input(struct mbuf *m0, struct socket *, sa_family_t); 117 int route_arp_conflict(struct rtentry *, struct rt_addrinfo *); 118 int route_cleargateway(struct rtentry *, void *, unsigned int); 119 void rtm_senddesync_timer(void *); 120 void rtm_senddesync(struct socket *); 121 int rtm_sendup(struct socket *, struct mbuf *); 122 123 int rtm_getifa(struct rt_addrinfo *, unsigned int); 124 int rtm_output(struct rt_msghdr *, struct rtentry **, struct rt_addrinfo *, 125 uint8_t, unsigned int); 126 struct rt_msghdr *rtm_report(struct rtentry *, u_char, int, int); 127 struct mbuf *rtm_msg1(int, struct rt_addrinfo *); 128 int rtm_msg2(int, int, struct rt_addrinfo *, caddr_t, 129 struct walkarg *); 130 int rtm_xaddrs(caddr_t, caddr_t, struct rt_addrinfo *); 131 int rtm_validate_proposal(struct rt_addrinfo *); 132 void rtm_setmetrics(u_long, const struct rt_metrics *, 133 struct rt_kmetrics *); 134 void rtm_getmetrics(const struct rt_kmetrics *, 135 struct rt_metrics *); 136 137 int sysctl_iflist(int, struct walkarg *); 138 int sysctl_ifnames(struct walkarg *); 139 int sysctl_rtable_rtstat(void *, size_t *, void *); 140 141 int rt_setsource(unsigned int, struct sockaddr *); 142 143 /* 144 * Locks used to protect struct members 145 * I immutable after creation 146 * sK solock (kernel lock) 147 */ 148 struct rtpcb { 149 struct socket *rop_socket; /* [I] */ 150 151 SRPL_ENTRY(rtpcb) rop_list; 152 struct refcnt rop_refcnt; 153 struct timeout rop_timeout; 154 unsigned int rop_msgfilter; /* [sK] */ 155 unsigned int rop_flagfilter; /* [sK] */ 156 unsigned int rop_flags; /* [sK] */ 157 u_int rop_rtableid; /* [sK] */ 158 unsigned short rop_proto; /* [I] */ 159 u_char rop_priority; /* [sK] */ 160 }; 161 #define sotortpcb(so) ((struct rtpcb *)(so)->so_pcb) 162 163 struct rtptable { 164 SRPL_HEAD(, rtpcb) rtp_list; 165 struct srpl_rc rtp_rc; 166 struct rwlock rtp_lk; 167 unsigned int rtp_count; 168 }; 169 170 struct pool rtpcb_pool; 171 struct rtptable rtptable; 172 173 /* 174 * These flags and timeout are used for indicating to userland (via a 175 * RTM_DESYNC msg) when the route socket has overflowed and messages 176 * have been lost. 177 */ 178 #define ROUTECB_FLAG_DESYNC 0x1 /* Route socket out of memory */ 179 #define ROUTECB_FLAG_FLUSH 0x2 /* Wait until socket is empty before 180 queueing more packets */ 181 182 #define ROUTE_DESYNC_RESEND_TIMEOUT 200 /* In ms */ 183 184 void 185 route_prinit(void) 186 { 187 srpl_rc_init(&rtptable.rtp_rc, rcb_ref, rcb_unref, NULL); 188 rw_init(&rtptable.rtp_lk, "rtsock"); 189 SRPL_INIT(&rtptable.rtp_list); 190 pool_init(&rtpcb_pool, sizeof(struct rtpcb), 0, 191 IPL_NONE, PR_WAITOK, "rtpcb", NULL); 192 } 193 194 void 195 rcb_ref(void *null, void *v) 196 { 197 struct rtpcb *rop = v; 198 199 refcnt_take(&rop->rop_refcnt); 200 } 201 202 void 203 rcb_unref(void *null, void *v) 204 { 205 struct rtpcb *rop = v; 206 207 refcnt_rele_wake(&rop->rop_refcnt); 208 } 209 210 int 211 route_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam, 212 struct mbuf *control, struct proc *p) 213 { 214 struct rtpcb *rop; 215 int error = 0; 216 217 if (req == PRU_CONTROL) 218 return (EOPNOTSUPP); 219 220 soassertlocked(so); 221 222 if (control && control->m_len) { 223 error = EOPNOTSUPP; 224 goto release; 225 } 226 227 rop = sotortpcb(so); 228 if (rop == NULL) { 229 error = EINVAL; 230 goto release; 231 } 232 233 switch (req) { 234 /* no connect, bind, accept. Socket is connected from the start */ 235 case PRU_CONNECT: 236 case PRU_BIND: 237 case PRU_CONNECT2: 238 case PRU_LISTEN: 239 case PRU_ACCEPT: 240 error = EOPNOTSUPP; 241 break; 242 243 case PRU_DISCONNECT: 244 case PRU_ABORT: 245 soisdisconnected(so); 246 break; 247 case PRU_SHUTDOWN: 248 socantsendmore(so); 249 break; 250 case PRU_SENSE: 251 /* stat: don't bother with a blocksize. */ 252 break; 253 254 /* minimal support, just implement a fake peer address */ 255 case PRU_SOCKADDR: 256 error = EINVAL; 257 break; 258 case PRU_PEERADDR: 259 bcopy(&route_src, mtod(nam, caddr_t), route_src.sa_len); 260 nam->m_len = route_src.sa_len; 261 break; 262 263 case PRU_RCVD: 264 /* 265 * If we are in a FLUSH state, check if the buffer is 266 * empty so that we can clear the flag. 267 */ 268 if (((rop->rop_flags & ROUTECB_FLAG_FLUSH) != 0) && 269 ((sbspace(rop->rop_socket, &rop->rop_socket->so_rcv) == 270 rop->rop_socket->so_rcv.sb_hiwat))) 271 rop->rop_flags &= ~ROUTECB_FLAG_FLUSH; 272 break; 273 274 case PRU_RCVOOB: 275 case PRU_SENDOOB: 276 error = EOPNOTSUPP; 277 break; 278 case PRU_SEND: 279 if (nam) { 280 error = EISCONN; 281 break; 282 } 283 error = (*so->so_proto->pr_output)(m, so, NULL, NULL); 284 m = NULL; 285 break; 286 default: 287 panic("route_usrreq"); 288 } 289 290 release: 291 if (req != PRU_RCVD && req != PRU_RCVOOB && req != PRU_SENSE) { 292 m_freem(control); 293 m_freem(m); 294 } 295 return (error); 296 } 297 298 int 299 route_attach(struct socket *so, int proto) 300 { 301 struct rtpcb *rop; 302 int error; 303 304 error = soreserve(so, ROUTESNDQ, ROUTERCVQ); 305 if (error) 306 return (error); 307 /* 308 * use the rawcb but allocate a rtpcb, this 309 * code does not care about the additional fields 310 * and works directly on the raw socket. 311 */ 312 rop = pool_get(&rtpcb_pool, PR_WAITOK|PR_ZERO); 313 so->so_pcb = rop; 314 /* Init the timeout structure */ 315 timeout_set(&rop->rop_timeout, rtm_senddesync_timer, so); 316 refcnt_init(&rop->rop_refcnt); 317 318 rop->rop_socket = so; 319 rop->rop_proto = proto; 320 321 rop->rop_rtableid = curproc->p_p->ps_rtableid; 322 323 soisconnected(so); 324 so->so_options |= SO_USELOOPBACK; 325 326 rw_enter(&rtptable.rtp_lk, RW_WRITE); 327 SRPL_INSERT_HEAD_LOCKED(&rtptable.rtp_rc, &rtptable.rtp_list, rop, 328 rop_list); 329 rtptable.rtp_count++; 330 rw_exit(&rtptable.rtp_lk); 331 332 return (0); 333 } 334 335 int 336 route_detach(struct socket *so) 337 { 338 struct rtpcb *rop; 339 340 soassertlocked(so); 341 342 rop = sotortpcb(so); 343 if (rop == NULL) 344 return (EINVAL); 345 346 rw_enter(&rtptable.rtp_lk, RW_WRITE); 347 348 timeout_del(&rop->rop_timeout); 349 rtptable.rtp_count--; 350 351 SRPL_REMOVE_LOCKED(&rtptable.rtp_rc, &rtptable.rtp_list, rop, rtpcb, 352 rop_list); 353 rw_exit(&rtptable.rtp_lk); 354 355 /* wait for all references to drop */ 356 refcnt_finalize(&rop->rop_refcnt, "rtsockrefs"); 357 358 so->so_pcb = NULL; 359 KASSERT((so->so_state & SS_NOFDREF) == 0); 360 pool_put(&rtpcb_pool, rop); 361 362 return (0); 363 } 364 365 int 366 route_ctloutput(int op, struct socket *so, int level, int optname, 367 struct mbuf *m) 368 { 369 struct rtpcb *rop = sotortpcb(so); 370 int error = 0; 371 unsigned int tid, prio; 372 373 if (level != AF_ROUTE) 374 return (EINVAL); 375 376 switch (op) { 377 case PRCO_SETOPT: 378 switch (optname) { 379 case ROUTE_MSGFILTER: 380 if (m == NULL || m->m_len != sizeof(unsigned int)) 381 error = EINVAL; 382 else 383 rop->rop_msgfilter = *mtod(m, unsigned int *); 384 break; 385 case ROUTE_TABLEFILTER: 386 if (m == NULL || m->m_len != sizeof(unsigned int)) { 387 error = EINVAL; 388 break; 389 } 390 tid = *mtod(m, unsigned int *); 391 if (tid != RTABLE_ANY && !rtable_exists(tid)) 392 error = ENOENT; 393 else 394 rop->rop_rtableid = tid; 395 break; 396 case ROUTE_PRIOFILTER: 397 if (m == NULL || m->m_len != sizeof(unsigned int)) { 398 error = EINVAL; 399 break; 400 } 401 prio = *mtod(m, unsigned int *); 402 if (prio > RTP_MAX) 403 error = EINVAL; 404 else 405 rop->rop_priority = prio; 406 break; 407 case ROUTE_FLAGFILTER: 408 if (m == NULL || m->m_len != sizeof(unsigned int)) 409 error = EINVAL; 410 else 411 rop->rop_flagfilter = *mtod(m, unsigned int *); 412 break; 413 default: 414 error = ENOPROTOOPT; 415 break; 416 } 417 break; 418 case PRCO_GETOPT: 419 switch (optname) { 420 case ROUTE_MSGFILTER: 421 m->m_len = sizeof(unsigned int); 422 *mtod(m, unsigned int *) = rop->rop_msgfilter; 423 break; 424 case ROUTE_TABLEFILTER: 425 m->m_len = sizeof(unsigned int); 426 *mtod(m, unsigned int *) = rop->rop_rtableid; 427 break; 428 case ROUTE_PRIOFILTER: 429 m->m_len = sizeof(unsigned int); 430 *mtod(m, unsigned int *) = rop->rop_priority; 431 break; 432 case ROUTE_FLAGFILTER: 433 m->m_len = sizeof(unsigned int); 434 *mtod(m, unsigned int *) = rop->rop_flagfilter; 435 break; 436 default: 437 error = ENOPROTOOPT; 438 break; 439 } 440 } 441 return (error); 442 } 443 444 void 445 rtm_senddesync_timer(void *xso) 446 { 447 struct socket *so = xso; 448 int s; 449 450 s = solock(so); 451 rtm_senddesync(so); 452 sounlock(so, s); 453 } 454 455 void 456 rtm_senddesync(struct socket *so) 457 { 458 struct rtpcb *rop = sotortpcb(so); 459 struct mbuf *desync_mbuf; 460 461 soassertlocked(so); 462 463 /* If we are in a DESYNC state, try to send a RTM_DESYNC packet */ 464 if ((rop->rop_flags & ROUTECB_FLAG_DESYNC) == 0) 465 return; 466 467 /* 468 * If we fail to alloc memory or if sbappendaddr() 469 * fails, re-add timeout and try again. 470 */ 471 desync_mbuf = rtm_msg1(RTM_DESYNC, NULL); 472 if (desync_mbuf != NULL) { 473 if (sbappendaddr(so, &so->so_rcv, &route_src, 474 desync_mbuf, NULL) != 0) { 475 rop->rop_flags &= ~ROUTECB_FLAG_DESYNC; 476 sorwakeup(rop->rop_socket); 477 return; 478 } 479 m_freem(desync_mbuf); 480 } 481 /* Re-add timeout to try sending msg again */ 482 timeout_add_msec(&rop->rop_timeout, ROUTE_DESYNC_RESEND_TIMEOUT); 483 } 484 485 void 486 route_input(struct mbuf *m0, struct socket *so0, sa_family_t sa_family) 487 { 488 struct socket *so; 489 struct rtpcb *rop; 490 struct rt_msghdr *rtm; 491 struct mbuf *m = m0; 492 struct srp_ref sr; 493 int s; 494 495 /* ensure that we can access the rtm_type via mtod() */ 496 if (m->m_len < offsetof(struct rt_msghdr, rtm_type) + 1) { 497 m_freem(m); 498 return; 499 } 500 501 SRPL_FOREACH(rop, &sr, &rtptable.rtp_list, rop_list) { 502 /* 503 * If route socket is bound to an address family only send 504 * messages that match the address family. Address family 505 * agnostic messages are always sent. 506 */ 507 if (sa_family != AF_UNSPEC && rop->rop_proto != AF_UNSPEC && 508 rop->rop_proto != sa_family) 509 continue; 510 511 512 so = rop->rop_socket; 513 s = solock(so); 514 515 /* 516 * Check to see if we don't want our own messages and 517 * if we can receive anything. 518 */ 519 if ((so0 == so && !(so0->so_options & SO_USELOOPBACK)) || 520 !(so->so_state & SS_ISCONNECTED) || 521 (so->so_state & SS_CANTRCVMORE)) 522 goto next; 523 524 /* filter messages that the process does not want */ 525 rtm = mtod(m, struct rt_msghdr *); 526 /* but RTM_DESYNC can't be filtered */ 527 if (rtm->rtm_type != RTM_DESYNC) { 528 if (rop->rop_msgfilter != 0 && 529 !(rop->rop_msgfilter & (1 << rtm->rtm_type))) 530 goto next; 531 if (ISSET(rop->rop_flagfilter, rtm->rtm_flags)) 532 goto next; 533 } 534 switch (rtm->rtm_type) { 535 case RTM_IFANNOUNCE: 536 case RTM_DESYNC: 537 /* no tableid */ 538 break; 539 case RTM_RESOLVE: 540 case RTM_NEWADDR: 541 case RTM_DELADDR: 542 case RTM_IFINFO: 543 case RTM_80211INFO: 544 case RTM_BFD: 545 /* check against rdomain id */ 546 if (rop->rop_rtableid != RTABLE_ANY && 547 rtable_l2(rop->rop_rtableid) != rtm->rtm_tableid) 548 goto next; 549 break; 550 default: 551 if (rop->rop_priority != 0 && 552 rop->rop_priority < rtm->rtm_priority) 553 goto next; 554 /* check against rtable id */ 555 if (rop->rop_rtableid != RTABLE_ANY && 556 rop->rop_rtableid != rtm->rtm_tableid) 557 goto next; 558 break; 559 } 560 561 /* 562 * Check to see if the flush flag is set. If so, don't queue 563 * any more messages until the flag is cleared. 564 */ 565 if ((rop->rop_flags & ROUTECB_FLAG_FLUSH) != 0) 566 goto next; 567 568 rtm_sendup(so, m); 569 next: 570 sounlock(so, s); 571 } 572 SRPL_LEAVE(&sr); 573 574 m_freem(m); 575 } 576 577 int 578 rtm_sendup(struct socket *so, struct mbuf *m0) 579 { 580 struct rtpcb *rop = sotortpcb(so); 581 struct mbuf *m; 582 583 soassertlocked(so); 584 585 m = m_copym(m0, 0, M_COPYALL, M_NOWAIT); 586 if (m == NULL) 587 return (ENOMEM); 588 589 if (sbspace(so, &so->so_rcv) < (2 * MSIZE) || 590 sbappendaddr(so, &so->so_rcv, &route_src, m, NULL) == 0) { 591 /* Flag socket as desync'ed and flush required */ 592 rop->rop_flags |= ROUTECB_FLAG_DESYNC | ROUTECB_FLAG_FLUSH; 593 rtm_senddesync(so); 594 m_freem(m); 595 return (ENOBUFS); 596 } 597 598 sorwakeup(so); 599 return (0); 600 } 601 602 struct rt_msghdr * 603 rtm_report(struct rtentry *rt, u_char type, int seq, int tableid) 604 { 605 struct rt_msghdr *rtm; 606 struct rt_addrinfo info; 607 struct sockaddr_rtlabel sa_rl; 608 struct sockaddr_in6 sa_mask; 609 #ifdef BFD 610 struct sockaddr_bfd sa_bfd; 611 #endif 612 struct ifnet *ifp = NULL; 613 int len; 614 615 bzero(&info, sizeof(info)); 616 info.rti_info[RTAX_DST] = rt_key(rt); 617 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 618 info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask); 619 info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl); 620 #ifdef BFD 621 if (rt->rt_flags & RTF_BFD) 622 info.rti_info[RTAX_BFD] = bfd2sa(rt, &sa_bfd); 623 #endif 624 #ifdef MPLS 625 if (rt->rt_flags & RTF_MPLS) { 626 struct sockaddr_mpls sa_mpls; 627 628 bzero(&sa_mpls, sizeof(sa_mpls)); 629 sa_mpls.smpls_family = AF_MPLS; 630 sa_mpls.smpls_len = sizeof(sa_mpls); 631 sa_mpls.smpls_label = ((struct rt_mpls *) 632 rt->rt_llinfo)->mpls_label; 633 info.rti_info[RTAX_SRC] = (struct sockaddr *)&sa_mpls; 634 info.rti_mpls = ((struct rt_mpls *) 635 rt->rt_llinfo)->mpls_operation; 636 } 637 #endif 638 ifp = if_get(rt->rt_ifidx); 639 if (ifp != NULL) { 640 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); 641 info.rti_info[RTAX_IFA] = 642 rtable_getsource(tableid, info.rti_info[RTAX_DST]->sa_family); 643 if (info.rti_info[RTAX_IFA] == NULL) 644 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; 645 if (ifp->if_flags & IFF_POINTOPOINT) 646 info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr; 647 } 648 if_put(ifp); 649 /* RTAX_GENMASK, RTAX_AUTHOR, RTAX_SRCMASK ignored */ 650 651 /* build new route message */ 652 len = rtm_msg2(type, RTM_VERSION, &info, NULL, NULL); 653 rtm = malloc(len, M_RTABLE, M_WAITOK | M_ZERO); 654 655 rtm_msg2(type, RTM_VERSION, &info, (caddr_t)rtm, NULL); 656 rtm->rtm_type = type; 657 rtm->rtm_index = rt->rt_ifidx; 658 rtm->rtm_tableid = tableid; 659 rtm->rtm_priority = rt->rt_priority & RTP_MASK; 660 rtm->rtm_flags = rt->rt_flags; 661 rtm->rtm_pid = curproc->p_p->ps_pid; 662 rtm->rtm_seq = seq; 663 rtm_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx); 664 rtm->rtm_addrs = info.rti_addrs; 665 #ifdef MPLS 666 rtm->rtm_mpls = info.rti_mpls; 667 #endif 668 return rtm; 669 } 670 671 int 672 route_output(struct mbuf *m, struct socket *so, struct sockaddr *dstaddr, 673 struct mbuf *control) 674 { 675 struct rt_msghdr *rtm = NULL; 676 struct rtentry *rt = NULL; 677 struct rt_addrinfo info; 678 struct ifnet *ifp; 679 int len, seq, error = 0; 680 u_int tableid; 681 u_int8_t prio; 682 u_char vers, type; 683 684 if (m == NULL || ((m->m_len < sizeof(int32_t)) && 685 (m = m_pullup(m, sizeof(int32_t))) == 0)) 686 return (ENOBUFS); 687 if ((m->m_flags & M_PKTHDR) == 0) 688 panic("route_output"); 689 len = m->m_pkthdr.len; 690 if (len < offsetof(struct rt_msghdr, rtm_hdrlen) + 1 || 691 len != mtod(m, struct rt_msghdr *)->rtm_msglen) { 692 error = EINVAL; 693 goto fail; 694 } 695 vers = mtod(m, struct rt_msghdr *)->rtm_version; 696 switch (vers) { 697 case RTM_VERSION: 698 if (len < sizeof(struct rt_msghdr)) { 699 error = EINVAL; 700 goto fail; 701 } 702 if (len > RTM_MAXSIZE) { 703 error = EMSGSIZE; 704 goto fail; 705 } 706 rtm = malloc(len, M_RTABLE, M_WAITOK); 707 m_copydata(m, 0, len, rtm); 708 break; 709 default: 710 error = EPROTONOSUPPORT; 711 goto fail; 712 } 713 714 /* Verify that the caller is sending an appropriate message early */ 715 switch (rtm->rtm_type) { 716 case RTM_ADD: 717 case RTM_DELETE: 718 case RTM_GET: 719 case RTM_CHANGE: 720 case RTM_PROPOSAL: 721 case RTM_SOURCE: 722 break; 723 default: 724 error = EOPNOTSUPP; 725 goto fail; 726 } 727 /* 728 * Verify that the header length is valid. 729 * All messages from userland start with a struct rt_msghdr. 730 */ 731 if (rtm->rtm_hdrlen == 0) /* old client */ 732 rtm->rtm_hdrlen = sizeof(struct rt_msghdr); 733 if (rtm->rtm_hdrlen < sizeof(struct rt_msghdr) || 734 len < rtm->rtm_hdrlen) { 735 error = EINVAL; 736 goto fail; 737 } 738 739 rtm->rtm_pid = curproc->p_p->ps_pid; 740 741 /* 742 * Verify that the caller has the appropriate privilege; RTM_GET 743 * is the only operation the non-superuser is allowed. 744 */ 745 if (rtm->rtm_type != RTM_GET && suser(curproc) != 0) { 746 error = EACCES; 747 goto fail; 748 } 749 tableid = rtm->rtm_tableid; 750 if (!rtable_exists(tableid)) { 751 if (rtm->rtm_type == RTM_ADD) { 752 if ((error = rtable_add(tableid)) != 0) 753 goto fail; 754 } else { 755 error = EINVAL; 756 goto fail; 757 } 758 } 759 760 /* Do not let userland play with kernel-only flags. */ 761 if ((rtm->rtm_flags & (RTF_LOCAL|RTF_BROADCAST)) != 0) { 762 error = EINVAL; 763 goto fail; 764 } 765 766 /* make sure that kernel-only bits are not set */ 767 rtm->rtm_priority &= RTP_MASK; 768 rtm->rtm_flags &= ~(RTF_DONE|RTF_CLONED|RTF_CACHED); 769 rtm->rtm_fmask &= RTF_FMASK; 770 771 if (rtm->rtm_priority != 0) { 772 if (rtm->rtm_priority > RTP_MAX || 773 rtm->rtm_priority == RTP_LOCAL) { 774 error = EINVAL; 775 goto fail; 776 } 777 prio = rtm->rtm_priority; 778 } else if (rtm->rtm_type != RTM_ADD) 779 prio = RTP_ANY; 780 else if (rtm->rtm_flags & RTF_STATIC) 781 prio = 0; 782 else 783 prio = RTP_DEFAULT; 784 785 bzero(&info, sizeof(info)); 786 info.rti_addrs = rtm->rtm_addrs; 787 if ((error = rtm_xaddrs(rtm->rtm_hdrlen + (caddr_t)rtm, 788 len + (caddr_t)rtm, &info)) != 0) 789 goto fail; 790 791 info.rti_flags = rtm->rtm_flags; 792 793 if (rtm->rtm_type != RTM_SOURCE && 794 rtm->rtm_type != RTM_PROPOSAL && 795 (info.rti_info[RTAX_DST] == NULL || 796 info.rti_info[RTAX_DST]->sa_family >= AF_MAX || 797 (info.rti_info[RTAX_GATEWAY] != NULL && 798 info.rti_info[RTAX_GATEWAY]->sa_family >= AF_MAX) || 799 info.rti_info[RTAX_GENMASK] != NULL)) { 800 error = EINVAL; 801 goto fail; 802 } 803 #ifdef MPLS 804 info.rti_mpls = rtm->rtm_mpls; 805 #endif 806 807 if (info.rti_info[RTAX_GATEWAY] != NULL && 808 info.rti_info[RTAX_GATEWAY]->sa_family == AF_LINK && 809 (info.rti_flags & RTF_CLONING) == 0) { 810 info.rti_flags |= RTF_LLINFO; 811 } 812 813 /* 814 * Validate RTM_PROPOSAL and pass it along or error out. 815 */ 816 if (rtm->rtm_type == RTM_PROPOSAL) { 817 if (rtm_validate_proposal(&info) == -1) { 818 error = EINVAL; 819 goto fail; 820 } 821 /* 822 * If this is a solicitation proposal forward request to 823 * all interfaces. Most handlers will ignore it but at least 824 * umb(4) will send a response to this event. 825 */ 826 if (rtm->rtm_priority == RTP_PROPOSAL_SOLICIT) { 827 NET_LOCK(); 828 TAILQ_FOREACH(ifp, &ifnet, if_list) { 829 ifp->if_rtrequest(ifp, RTM_PROPOSAL, NULL); 830 } 831 NET_UNLOCK(); 832 } 833 } else if (rtm->rtm_type == RTM_SOURCE) { 834 if (info.rti_info[RTAX_IFA] == NULL) { 835 error = EINVAL; 836 goto fail; 837 } 838 if ((error = 839 rt_setsource(tableid, info.rti_info[RTAX_IFA])) != 0) 840 goto fail; 841 } else { 842 error = rtm_output(rtm, &rt, &info, prio, tableid); 843 if (!error) { 844 type = rtm->rtm_type; 845 seq = rtm->rtm_seq; 846 free(rtm, M_RTABLE, len); 847 rtm = rtm_report(rt, type, seq, tableid); 848 len = rtm->rtm_msglen; 849 } 850 } 851 852 rtfree(rt); 853 if (error) { 854 rtm->rtm_errno = error; 855 } else { 856 rtm->rtm_flags |= RTF_DONE; 857 } 858 859 /* 860 * Check to see if we don't want our own messages. 861 */ 862 if (!(so->so_options & SO_USELOOPBACK)) { 863 if (rtptable.rtp_count <= 1) { 864 /* no other listener and no loopback of messages */ 865 fail: 866 free(rtm, M_RTABLE, len); 867 m_freem(m); 868 return (error); 869 } 870 } 871 if (m_copyback(m, 0, len, rtm, M_NOWAIT)) { 872 m_freem(m); 873 m = NULL; 874 } else if (m->m_pkthdr.len > len) 875 m_adj(m, len - m->m_pkthdr.len); 876 free(rtm, M_RTABLE, len); 877 if (m) 878 route_input(m, so, info.rti_info[RTAX_DST] ? 879 info.rti_info[RTAX_DST]->sa_family : AF_UNSPEC); 880 881 return (error); 882 } 883 884 int 885 rtm_output(struct rt_msghdr *rtm, struct rtentry **prt, 886 struct rt_addrinfo *info, uint8_t prio, unsigned int tableid) 887 { 888 struct rtentry *rt = *prt; 889 struct ifnet *ifp = NULL; 890 int plen, newgate = 0, error = 0; 891 892 switch (rtm->rtm_type) { 893 case RTM_ADD: 894 if (info->rti_info[RTAX_GATEWAY] == NULL) { 895 error = EINVAL; 896 break; 897 } 898 899 rt = rtable_match(tableid, info->rti_info[RTAX_DST], NULL); 900 if ((error = route_arp_conflict(rt, info))) { 901 rtfree(rt); 902 rt = NULL; 903 break; 904 } 905 906 /* 907 * We cannot go through a delete/create/insert cycle for 908 * cached route because this can lead to races in the 909 * receive path. Instead we update the L2 cache. 910 */ 911 if ((rt != NULL) && ISSET(rt->rt_flags, RTF_CACHED)) 912 goto change; 913 914 rtfree(rt); 915 rt = NULL; 916 917 NET_LOCK(); 918 if ((error = rtm_getifa(info, tableid)) != 0) { 919 NET_UNLOCK(); 920 break; 921 } 922 error = rtrequest(RTM_ADD, info, prio, &rt, tableid); 923 NET_UNLOCK(); 924 if (error == 0) 925 rtm_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx, 926 &rt->rt_rmx); 927 break; 928 case RTM_DELETE: 929 rt = rtable_lookup(tableid, info->rti_info[RTAX_DST], 930 info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY], 931 prio); 932 if (rt == NULL) { 933 error = ESRCH; 934 break; 935 } 936 937 /* 938 * If we got multipath routes, we require users to specify 939 * a matching gateway. 940 */ 941 if (ISSET(rt->rt_flags, RTF_MPATH) && 942 info->rti_info[RTAX_GATEWAY] == NULL) { 943 error = ESRCH; 944 break; 945 } 946 947 /* Detaching an interface requires the KERNEL_LOCK(). */ 948 ifp = if_get(rt->rt_ifidx); 949 KASSERT(ifp != NULL); 950 951 /* 952 * Invalidate the cache of automagically created and 953 * referenced L2 entries to make sure that ``rt_gwroute'' 954 * pointer stays valid for other CPUs. 955 */ 956 if ((ISSET(rt->rt_flags, RTF_CACHED))) { 957 NET_LOCK(); 958 ifp->if_rtrequest(ifp, RTM_INVALIDATE, rt); 959 /* Reset the MTU of the gateway route. */ 960 rtable_walk(tableid, rt_key(rt)->sa_family, NULL, 961 route_cleargateway, rt); 962 NET_UNLOCK(); 963 if_put(ifp); 964 break; 965 } 966 967 /* 968 * Make sure that local routes are only modified by the 969 * kernel. 970 */ 971 if (ISSET(rt->rt_flags, RTF_LOCAL|RTF_BROADCAST)) { 972 if_put(ifp); 973 error = EINVAL; 974 break; 975 } 976 977 rtfree(rt); 978 rt = NULL; 979 980 NET_LOCK(); 981 error = rtrequest_delete(info, prio, ifp, &rt, tableid); 982 NET_UNLOCK(); 983 if_put(ifp); 984 break; 985 case RTM_CHANGE: 986 rt = rtable_lookup(tableid, info->rti_info[RTAX_DST], 987 info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY], 988 prio); 989 /* 990 * If we got multipath routes, we require users to specify 991 * a matching gateway. 992 */ 993 if ((rt != NULL) && ISSET(rt->rt_flags, RTF_MPATH) && 994 (info->rti_info[RTAX_GATEWAY] == NULL)) { 995 rtfree(rt); 996 rt = NULL; 997 } 998 /* 999 * If RTAX_GATEWAY is the argument we're trying to 1000 * change, try to find a compatible route. 1001 */ 1002 if ((rt == NULL) && (info->rti_info[RTAX_GATEWAY] != NULL)) { 1003 rt = rtable_lookup(tableid, info->rti_info[RTAX_DST], 1004 info->rti_info[RTAX_NETMASK], NULL, prio); 1005 /* Ensure we don't pick a multipath one. */ 1006 if ((rt != NULL) && ISSET(rt->rt_flags, RTF_MPATH)) { 1007 rtfree(rt); 1008 rt = NULL; 1009 } 1010 } 1011 1012 if (rt == NULL) { 1013 error = ESRCH; 1014 break; 1015 } 1016 1017 /* 1018 * Make sure that local routes are only modified by the 1019 * kernel. 1020 */ 1021 if (ISSET(rt->rt_flags, RTF_LOCAL|RTF_BROADCAST)) { 1022 error = EINVAL; 1023 break; 1024 } 1025 1026 /* 1027 * RTM_CHANGE needs a perfect match. 1028 */ 1029 plen = rtable_satoplen(info->rti_info[RTAX_DST]->sa_family, 1030 info->rti_info[RTAX_NETMASK]); 1031 if (rt_plen(rt) != plen) { 1032 error = ESRCH; 1033 break; 1034 } 1035 1036 if (info->rti_info[RTAX_GATEWAY] != NULL) 1037 if (rt->rt_gateway == NULL || 1038 bcmp(rt->rt_gateway, 1039 info->rti_info[RTAX_GATEWAY], 1040 info->rti_info[RTAX_GATEWAY]->sa_len)) { 1041 newgate = 1; 1042 } 1043 /* 1044 * Check reachable gateway before changing the route. 1045 * New gateway could require new ifaddr, ifp; 1046 * flags may also be different; ifp may be specified 1047 * by ll sockaddr when protocol address is ambiguous. 1048 */ 1049 if (newgate || info->rti_info[RTAX_IFP] != NULL || 1050 info->rti_info[RTAX_IFA] != NULL) { 1051 struct ifaddr *ifa = NULL; 1052 1053 NET_LOCK(); 1054 if ((error = rtm_getifa(info, tableid)) != 0) { 1055 NET_UNLOCK(); 1056 break; 1057 } 1058 ifa = info->rti_ifa; 1059 if (rt->rt_ifa != ifa) { 1060 ifp = if_get(rt->rt_ifidx); 1061 KASSERT(ifp != NULL); 1062 ifp->if_rtrequest(ifp, RTM_DELETE, rt); 1063 ifafree(rt->rt_ifa); 1064 if_put(ifp); 1065 1066 ifa->ifa_refcnt++; 1067 rt->rt_ifa = ifa; 1068 rt->rt_ifidx = ifa->ifa_ifp->if_index; 1069 /* recheck link state after ifp change */ 1070 rt_if_linkstate_change(rt, ifa->ifa_ifp, 1071 tableid); 1072 } 1073 NET_UNLOCK(); 1074 } 1075 change: 1076 if (info->rti_info[RTAX_GATEWAY] != NULL) { 1077 /* When updating the gateway, make sure it is valid. */ 1078 if (!newgate && rt->rt_gateway->sa_family != 1079 info->rti_info[RTAX_GATEWAY]->sa_family) { 1080 error = EINVAL; 1081 break; 1082 } 1083 1084 NET_LOCK(); 1085 error = rt_setgate(rt, 1086 info->rti_info[RTAX_GATEWAY], tableid); 1087 NET_UNLOCK(); 1088 if (error) 1089 break; 1090 } 1091 #ifdef MPLS 1092 if (rtm->rtm_flags & RTF_MPLS) { 1093 NET_LOCK(); 1094 error = rt_mpls_set(rt, 1095 info->rti_info[RTAX_SRC], info->rti_mpls); 1096 NET_UNLOCK(); 1097 if (error) 1098 break; 1099 } else if (newgate || (rtm->rtm_fmask & RTF_MPLS)) { 1100 NET_LOCK(); 1101 /* if gateway changed remove MPLS information */ 1102 rt_mpls_clear(rt); 1103 NET_UNLOCK(); 1104 } 1105 #endif 1106 1107 #ifdef BFD 1108 if (ISSET(rtm->rtm_flags, RTF_BFD)) { 1109 if ((error = bfdset(rt))) 1110 break; 1111 } else if (!ISSET(rtm->rtm_flags, RTF_BFD) && 1112 ISSET(rtm->rtm_fmask, RTF_BFD)) { 1113 bfdclear(rt); 1114 } 1115 #endif 1116 1117 NET_LOCK(); 1118 /* Hack to allow some flags to be toggled */ 1119 if (rtm->rtm_fmask) { 1120 /* MPLS flag it is set by rt_mpls_set() */ 1121 rtm->rtm_fmask &= ~RTF_MPLS; 1122 rtm->rtm_flags &= ~RTF_MPLS; 1123 rt->rt_flags = 1124 (rt->rt_flags & ~rtm->rtm_fmask) | 1125 (rtm->rtm_flags & rtm->rtm_fmask); 1126 } 1127 rtm_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx, &rt->rt_rmx); 1128 1129 ifp = if_get(rt->rt_ifidx); 1130 KASSERT(ifp != NULL); 1131 ifp->if_rtrequest(ifp, RTM_ADD, rt); 1132 if_put(ifp); 1133 1134 if (info->rti_info[RTAX_LABEL] != NULL) { 1135 char *rtlabel = ((struct sockaddr_rtlabel *) 1136 info->rti_info[RTAX_LABEL])->sr_label; 1137 rtlabel_unref(rt->rt_labelid); 1138 rt->rt_labelid = rtlabel_name2id(rtlabel); 1139 } 1140 if_group_routechange(info->rti_info[RTAX_DST], 1141 info->rti_info[RTAX_NETMASK]); 1142 rt->rt_locks &= ~(rtm->rtm_inits); 1143 rt->rt_locks |= (rtm->rtm_inits & rtm->rtm_rmx.rmx_locks); 1144 NET_UNLOCK(); 1145 break; 1146 case RTM_GET: 1147 rt = rtable_lookup(tableid, info->rti_info[RTAX_DST], 1148 info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY], 1149 prio); 1150 if (rt == NULL) 1151 error = ESRCH; 1152 break; 1153 } 1154 1155 *prt = rt; 1156 return (error); 1157 } 1158 1159 struct ifaddr * 1160 ifa_ifwithroute(int flags, struct sockaddr *dst, struct sockaddr *gateway, 1161 unsigned int rtableid) 1162 { 1163 struct ifaddr *ifa; 1164 1165 if ((flags & RTF_GATEWAY) == 0) { 1166 /* 1167 * If we are adding a route to an interface, 1168 * and the interface is a pt to pt link 1169 * we should search for the destination 1170 * as our clue to the interface. Otherwise 1171 * we can use the local address. 1172 */ 1173 ifa = NULL; 1174 if (flags & RTF_HOST) 1175 ifa = ifa_ifwithdstaddr(dst, rtableid); 1176 if (ifa == NULL) 1177 ifa = ifa_ifwithaddr(gateway, rtableid); 1178 } else { 1179 /* 1180 * If we are adding a route to a remote net 1181 * or host, the gateway may still be on the 1182 * other end of a pt to pt link. 1183 */ 1184 ifa = ifa_ifwithdstaddr(gateway, rtableid); 1185 } 1186 if (ifa == NULL) { 1187 if (gateway->sa_family == AF_LINK) { 1188 struct sockaddr_dl *sdl = satosdl(gateway); 1189 struct ifnet *ifp = if_get(sdl->sdl_index); 1190 1191 if (ifp != NULL) 1192 ifa = ifaof_ifpforaddr(dst, ifp); 1193 if_put(ifp); 1194 } else { 1195 struct rtentry *rt; 1196 1197 rt = rtalloc(gateway, RT_RESOLVE, rtable_l2(rtableid)); 1198 if (rt != NULL) 1199 ifa = rt->rt_ifa; 1200 rtfree(rt); 1201 } 1202 } 1203 if (ifa == NULL) 1204 return (NULL); 1205 if (ifa->ifa_addr->sa_family != dst->sa_family) { 1206 struct ifaddr *oifa = ifa; 1207 ifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp); 1208 if (ifa == NULL) 1209 ifa = oifa; 1210 } 1211 return (ifa); 1212 } 1213 1214 int 1215 rtm_getifa(struct rt_addrinfo *info, unsigned int rtid) 1216 { 1217 struct ifnet *ifp = NULL; 1218 1219 /* 1220 * The "returned" `ifa' is guaranteed to be alive only if 1221 * the NET_LOCK() is held. 1222 */ 1223 NET_ASSERT_LOCKED(); 1224 1225 /* 1226 * ifp may be specified by sockaddr_dl when protocol address 1227 * is ambiguous 1228 */ 1229 if (info->rti_info[RTAX_IFP] != NULL) { 1230 struct sockaddr_dl *sdl; 1231 1232 sdl = satosdl(info->rti_info[RTAX_IFP]); 1233 ifp = if_get(sdl->sdl_index); 1234 } 1235 1236 #ifdef IPSEC 1237 /* 1238 * If the destination is a PF_KEY address, we'll look 1239 * for the existence of a encap interface number or address 1240 * in the options list of the gateway. By default, we'll return 1241 * enc0. 1242 */ 1243 if (info->rti_info[RTAX_DST] && 1244 info->rti_info[RTAX_DST]->sa_family == PF_KEY) 1245 info->rti_ifa = enc_getifa(rtid, 0); 1246 #endif 1247 1248 if (info->rti_ifa == NULL && info->rti_info[RTAX_IFA] != NULL) 1249 info->rti_ifa = ifa_ifwithaddr(info->rti_info[RTAX_IFA], rtid); 1250 1251 if (info->rti_ifa == NULL) { 1252 struct sockaddr *sa; 1253 1254 if ((sa = info->rti_info[RTAX_IFA]) == NULL) 1255 if ((sa = info->rti_info[RTAX_GATEWAY]) == NULL) 1256 sa = info->rti_info[RTAX_DST]; 1257 1258 if (sa != NULL && ifp != NULL) 1259 info->rti_ifa = ifaof_ifpforaddr(sa, ifp); 1260 else if (info->rti_info[RTAX_DST] != NULL && 1261 info->rti_info[RTAX_GATEWAY] != NULL) 1262 info->rti_ifa = ifa_ifwithroute(info->rti_flags, 1263 info->rti_info[RTAX_DST], 1264 info->rti_info[RTAX_GATEWAY], 1265 rtid); 1266 else if (sa != NULL) 1267 info->rti_ifa = ifa_ifwithroute(info->rti_flags, 1268 sa, sa, rtid); 1269 } 1270 1271 if_put(ifp); 1272 1273 if (info->rti_ifa == NULL) 1274 return (ENETUNREACH); 1275 1276 return (0); 1277 } 1278 1279 int 1280 route_cleargateway(struct rtentry *rt, void *arg, unsigned int rtableid) 1281 { 1282 struct rtentry *nhrt = arg; 1283 1284 if (ISSET(rt->rt_flags, RTF_GATEWAY) && rt->rt_gwroute == nhrt && 1285 !ISSET(rt->rt_locks, RTV_MTU)) 1286 rt->rt_mtu = 0; 1287 1288 return (0); 1289 } 1290 1291 /* 1292 * Check if the user request to insert an ARP entry does not conflict 1293 * with existing ones. 1294 * 1295 * Only two entries are allowed for a given IP address: a private one 1296 * (priv) and a public one (pub). 1297 */ 1298 int 1299 route_arp_conflict(struct rtentry *rt, struct rt_addrinfo *info) 1300 { 1301 int proxy = (info->rti_flags & RTF_ANNOUNCE); 1302 1303 if ((info->rti_flags & RTF_LLINFO) == 0 || 1304 (info->rti_info[RTAX_DST]->sa_family != AF_INET)) 1305 return (0); 1306 1307 if (rt == NULL || !ISSET(rt->rt_flags, RTF_LLINFO)) 1308 return (0); 1309 1310 /* If the entry is cached, it can be updated. */ 1311 if (ISSET(rt->rt_flags, RTF_CACHED)) 1312 return (0); 1313 1314 /* 1315 * Same destination, not cached and both "priv" or "pub" conflict. 1316 * If a second entry exists, it always conflict. 1317 */ 1318 if ((ISSET(rt->rt_flags, RTF_ANNOUNCE) == proxy) || 1319 ISSET(rt->rt_flags, RTF_MPATH)) 1320 return (EEXIST); 1321 1322 /* No conflict but an entry exist so we need to force mpath. */ 1323 info->rti_flags |= RTF_MPATH; 1324 return (0); 1325 } 1326 1327 void 1328 rtm_setmetrics(u_long which, const struct rt_metrics *in, 1329 struct rt_kmetrics *out) 1330 { 1331 int64_t expire; 1332 1333 if (which & RTV_MTU) 1334 out->rmx_mtu = in->rmx_mtu; 1335 if (which & RTV_EXPIRE) { 1336 expire = in->rmx_expire; 1337 if (expire != 0) { 1338 expire -= gettime(); 1339 expire += getuptime(); 1340 } 1341 1342 out->rmx_expire = expire; 1343 } 1344 } 1345 1346 void 1347 rtm_getmetrics(const struct rt_kmetrics *in, struct rt_metrics *out) 1348 { 1349 int64_t expire; 1350 1351 expire = in->rmx_expire; 1352 if (expire != 0) { 1353 expire -= getuptime(); 1354 expire += gettime(); 1355 } 1356 1357 bzero(out, sizeof(*out)); 1358 out->rmx_locks = in->rmx_locks; 1359 out->rmx_mtu = in->rmx_mtu; 1360 out->rmx_expire = expire; 1361 out->rmx_pksent = in->rmx_pksent; 1362 } 1363 1364 #define ROUNDUP(a) \ 1365 ((a) > 0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long)) 1366 #define ADVANCE(x, n) (x += ROUNDUP((n)->sa_len)) 1367 1368 int 1369 rtm_xaddrs(caddr_t cp, caddr_t cplim, struct rt_addrinfo *rtinfo) 1370 { 1371 struct sockaddr *sa; 1372 int i; 1373 1374 /* 1375 * Parse address bits, split address storage in chunks, and 1376 * set info pointers. Use sa_len for traversing the memory 1377 * and check that we stay within in the limit. 1378 */ 1379 bzero(rtinfo->rti_info, sizeof(rtinfo->rti_info)); 1380 for (i = 0; i < sizeof(rtinfo->rti_addrs) * 8; i++) { 1381 if ((rtinfo->rti_addrs & (1 << i)) == 0) 1382 continue; 1383 if (i >= RTAX_MAX || cp + sizeof(socklen_t) > cplim) 1384 return (EINVAL); 1385 sa = (struct sockaddr *)cp; 1386 if (cp + sa->sa_len > cplim) 1387 return (EINVAL); 1388 rtinfo->rti_info[i] = sa; 1389 ADVANCE(cp, sa); 1390 } 1391 /* 1392 * Check that the address family is suitable for the route address 1393 * type. Check that each address has a size that fits its family 1394 * and its length is within the size. Strings within addresses must 1395 * be NUL terminated. 1396 */ 1397 for (i = 0; i < RTAX_MAX; i++) { 1398 size_t len, maxlen, size; 1399 1400 sa = rtinfo->rti_info[i]; 1401 if (sa == NULL) 1402 continue; 1403 maxlen = size = 0; 1404 switch (i) { 1405 case RTAX_DST: 1406 case RTAX_GATEWAY: 1407 case RTAX_SRC: 1408 switch (sa->sa_family) { 1409 case AF_INET: 1410 size = sizeof(struct sockaddr_in); 1411 break; 1412 case AF_LINK: 1413 size = sizeof(struct sockaddr_dl); 1414 break; 1415 #ifdef INET6 1416 case AF_INET6: 1417 size = sizeof(struct sockaddr_in6); 1418 break; 1419 #endif 1420 #ifdef MPLS 1421 case AF_MPLS: 1422 size = sizeof(struct sockaddr_mpls); 1423 break; 1424 #endif 1425 } 1426 break; 1427 case RTAX_IFP: 1428 if (sa->sa_family != AF_LINK) 1429 return (EAFNOSUPPORT); 1430 /* 1431 * XXX Should be sizeof(struct sockaddr_dl), but 1432 * route(8) has a bug and provides less memory. 1433 * arp(8) has another bug and uses sizeof pointer. 1434 */ 1435 size = 4; 1436 break; 1437 case RTAX_IFA: 1438 switch (sa->sa_family) { 1439 case AF_INET: 1440 size = sizeof(struct sockaddr_in); 1441 break; 1442 #ifdef INET6 1443 case AF_INET6: 1444 size = sizeof(struct sockaddr_in6); 1445 break; 1446 #endif 1447 default: 1448 return (EAFNOSUPPORT); 1449 } 1450 break; 1451 case RTAX_LABEL: 1452 sa->sa_family = AF_UNSPEC; 1453 maxlen = RTLABEL_LEN; 1454 size = sizeof(struct sockaddr_rtlabel); 1455 break; 1456 #ifdef BFD 1457 case RTAX_BFD: 1458 sa->sa_family = AF_UNSPEC; 1459 size = sizeof(struct sockaddr_bfd); 1460 break; 1461 #endif 1462 case RTAX_DNS: 1463 /* more validation in rtm_validate_proposal */ 1464 if (sa->sa_len > sizeof(struct sockaddr_rtdns)) 1465 return (EINVAL); 1466 if (sa->sa_len < offsetof(struct sockaddr_rtdns, 1467 sr_dns)) 1468 return (EINVAL); 1469 switch (sa->sa_family) { 1470 case AF_INET: 1471 #ifdef INET6 1472 case AF_INET6: 1473 #endif 1474 break; 1475 default: 1476 return (EAFNOSUPPORT); 1477 } 1478 break; 1479 case RTAX_STATIC: 1480 sa->sa_family = AF_UNSPEC; 1481 maxlen = RTSTATIC_LEN; 1482 size = sizeof(struct sockaddr_rtstatic); 1483 break; 1484 case RTAX_SEARCH: 1485 sa->sa_family = AF_UNSPEC; 1486 maxlen = RTSEARCH_LEN; 1487 size = sizeof(struct sockaddr_rtsearch); 1488 break; 1489 } 1490 if (size) { 1491 /* memory for the full struct must be provided */ 1492 if (sa->sa_len < size) 1493 return (EINVAL); 1494 } 1495 if (maxlen) { 1496 /* this should not happen */ 1497 if (2 + maxlen > size) 1498 return (EINVAL); 1499 /* strings must be NUL terminated within the struct */ 1500 len = strnlen(sa->sa_data, maxlen); 1501 if (len >= maxlen || 2 + len >= sa->sa_len) 1502 return (EINVAL); 1503 break; 1504 } 1505 } 1506 return (0); 1507 } 1508 1509 struct mbuf * 1510 rtm_msg1(int type, struct rt_addrinfo *rtinfo) 1511 { 1512 struct rt_msghdr *rtm; 1513 struct mbuf *m; 1514 int i; 1515 struct sockaddr *sa; 1516 int len, dlen, hlen; 1517 1518 switch (type) { 1519 case RTM_DELADDR: 1520 case RTM_NEWADDR: 1521 len = sizeof(struct ifa_msghdr); 1522 break; 1523 case RTM_IFINFO: 1524 len = sizeof(struct if_msghdr); 1525 break; 1526 case RTM_IFANNOUNCE: 1527 len = sizeof(struct if_announcemsghdr); 1528 break; 1529 #ifdef BFD 1530 case RTM_BFD: 1531 len = sizeof(struct bfd_msghdr); 1532 break; 1533 #endif 1534 case RTM_80211INFO: 1535 len = sizeof(struct if_ieee80211_msghdr); 1536 break; 1537 default: 1538 len = sizeof(struct rt_msghdr); 1539 break; 1540 } 1541 if (len > MCLBYTES) 1542 panic("rtm_msg1"); 1543 m = m_gethdr(M_DONTWAIT, MT_DATA); 1544 if (m && len > MHLEN) { 1545 MCLGET(m, M_DONTWAIT); 1546 if ((m->m_flags & M_EXT) == 0) { 1547 m_free(m); 1548 m = NULL; 1549 } 1550 } 1551 if (m == NULL) 1552 return (m); 1553 m->m_pkthdr.len = m->m_len = hlen = len; 1554 m->m_pkthdr.ph_ifidx = 0; 1555 rtm = mtod(m, struct rt_msghdr *); 1556 bzero(rtm, len); 1557 for (i = 0; i < RTAX_MAX; i++) { 1558 if (rtinfo == NULL || (sa = rtinfo->rti_info[i]) == NULL) 1559 continue; 1560 rtinfo->rti_addrs |= (1 << i); 1561 dlen = ROUNDUP(sa->sa_len); 1562 if (m_copyback(m, len, dlen, sa, M_NOWAIT)) { 1563 m_freem(m); 1564 return (NULL); 1565 } 1566 len += dlen; 1567 } 1568 rtm->rtm_msglen = len; 1569 rtm->rtm_hdrlen = hlen; 1570 rtm->rtm_version = RTM_VERSION; 1571 rtm->rtm_type = type; 1572 return (m); 1573 } 1574 1575 int 1576 rtm_msg2(int type, int vers, struct rt_addrinfo *rtinfo, caddr_t cp, 1577 struct walkarg *w) 1578 { 1579 int i; 1580 int len, dlen, hlen, second_time = 0; 1581 caddr_t cp0; 1582 1583 rtinfo->rti_addrs = 0; 1584 again: 1585 switch (type) { 1586 case RTM_DELADDR: 1587 case RTM_NEWADDR: 1588 len = sizeof(struct ifa_msghdr); 1589 break; 1590 case RTM_IFINFO: 1591 len = sizeof(struct if_msghdr); 1592 break; 1593 default: 1594 len = sizeof(struct rt_msghdr); 1595 break; 1596 } 1597 hlen = len; 1598 if ((cp0 = cp) != NULL) 1599 cp += len; 1600 for (i = 0; i < RTAX_MAX; i++) { 1601 struct sockaddr *sa; 1602 1603 if ((sa = rtinfo->rti_info[i]) == NULL) 1604 continue; 1605 rtinfo->rti_addrs |= (1 << i); 1606 dlen = ROUNDUP(sa->sa_len); 1607 if (cp) { 1608 bcopy(sa, cp, (size_t)dlen); 1609 cp += dlen; 1610 } 1611 len += dlen; 1612 } 1613 /* align message length to the next natural boundary */ 1614 len = ALIGN(len); 1615 if (cp == 0 && w != NULL && !second_time) { 1616 w->w_needed += len; 1617 if (w->w_needed <= 0 && w->w_where) { 1618 if (w->w_tmemsize < len) { 1619 free(w->w_tmem, M_RTABLE, w->w_tmemsize); 1620 w->w_tmem = malloc(len, M_RTABLE, 1621 M_NOWAIT | M_ZERO); 1622 if (w->w_tmem) 1623 w->w_tmemsize = len; 1624 } 1625 if (w->w_tmem) { 1626 cp = w->w_tmem; 1627 second_time = 1; 1628 goto again; 1629 } else 1630 w->w_where = 0; 1631 } 1632 } 1633 if (cp && w) /* clear the message header */ 1634 bzero(cp0, hlen); 1635 1636 if (cp) { 1637 struct rt_msghdr *rtm = (struct rt_msghdr *)cp0; 1638 1639 rtm->rtm_version = RTM_VERSION; 1640 rtm->rtm_type = type; 1641 rtm->rtm_msglen = len; 1642 rtm->rtm_hdrlen = hlen; 1643 } 1644 return (len); 1645 } 1646 1647 void 1648 rtm_send(struct rtentry *rt, int cmd, int error, unsigned int rtableid) 1649 { 1650 struct rt_addrinfo info; 1651 struct ifnet *ifp; 1652 struct sockaddr_rtlabel sa_rl; 1653 struct sockaddr_in6 sa_mask; 1654 1655 memset(&info, 0, sizeof(info)); 1656 info.rti_info[RTAX_DST] = rt_key(rt); 1657 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 1658 if (!ISSET(rt->rt_flags, RTF_HOST)) 1659 info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask); 1660 info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl); 1661 ifp = if_get(rt->rt_ifidx); 1662 if (ifp != NULL) { 1663 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); 1664 info.rti_info[RTAX_IFA] = 1665 rtable_getsource(rtableid, info.rti_info[RTAX_DST]->sa_family); 1666 if (info.rti_info[RTAX_IFA] == NULL) 1667 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; 1668 } 1669 1670 rtm_miss(cmd, &info, rt->rt_flags, rt->rt_priority, rt->rt_ifidx, error, 1671 rtableid); 1672 if_put(ifp); 1673 } 1674 1675 /* 1676 * This routine is called to generate a message from the routing 1677 * socket indicating that a redirect has occurred, a routing lookup 1678 * has failed, or that a protocol has detected timeouts to a particular 1679 * destination. 1680 */ 1681 void 1682 rtm_miss(int type, struct rt_addrinfo *rtinfo, int flags, uint8_t prio, 1683 u_int ifidx, int error, u_int tableid) 1684 { 1685 struct rt_msghdr *rtm; 1686 struct mbuf *m; 1687 struct sockaddr *sa = rtinfo->rti_info[RTAX_DST]; 1688 1689 if (rtptable.rtp_count == 0) 1690 return; 1691 m = rtm_msg1(type, rtinfo); 1692 if (m == NULL) 1693 return; 1694 rtm = mtod(m, struct rt_msghdr *); 1695 rtm->rtm_flags = RTF_DONE | flags; 1696 rtm->rtm_priority = prio; 1697 rtm->rtm_errno = error; 1698 rtm->rtm_tableid = tableid; 1699 rtm->rtm_addrs = rtinfo->rti_addrs; 1700 rtm->rtm_index = ifidx; 1701 route_input(m, NULL, sa ? sa->sa_family : AF_UNSPEC); 1702 } 1703 1704 /* 1705 * This routine is called to generate a message from the routing 1706 * socket indicating that the status of a network interface has changed. 1707 */ 1708 void 1709 rtm_ifchg(struct ifnet *ifp) 1710 { 1711 struct rt_addrinfo info; 1712 struct if_msghdr *ifm; 1713 struct mbuf *m; 1714 1715 if (rtptable.rtp_count == 0) 1716 return; 1717 memset(&info, 0, sizeof(info)); 1718 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); 1719 m = rtm_msg1(RTM_IFINFO, &info); 1720 if (m == NULL) 1721 return; 1722 ifm = mtod(m, struct if_msghdr *); 1723 ifm->ifm_index = ifp->if_index; 1724 ifm->ifm_tableid = ifp->if_rdomain; 1725 ifm->ifm_flags = ifp->if_flags; 1726 ifm->ifm_xflags = ifp->if_xflags; 1727 if_getdata(ifp, &ifm->ifm_data); 1728 ifm->ifm_addrs = info.rti_addrs; 1729 route_input(m, NULL, AF_UNSPEC); 1730 } 1731 1732 /* 1733 * This is called to generate messages from the routing socket 1734 * indicating a network interface has had addresses associated with it. 1735 * if we ever reverse the logic and replace messages TO the routing 1736 * socket indicate a request to configure interfaces, then it will 1737 * be unnecessary as the routing socket will automatically generate 1738 * copies of it. 1739 */ 1740 void 1741 rtm_addr(int cmd, struct ifaddr *ifa) 1742 { 1743 struct ifnet *ifp = ifa->ifa_ifp; 1744 struct mbuf *m; 1745 struct rt_addrinfo info; 1746 struct ifa_msghdr *ifam; 1747 1748 if (rtptable.rtp_count == 0) 1749 return; 1750 1751 memset(&info, 0, sizeof(info)); 1752 info.rti_info[RTAX_IFA] = ifa->ifa_addr; 1753 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); 1754 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask; 1755 info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr; 1756 if ((m = rtm_msg1(cmd, &info)) == NULL) 1757 return; 1758 ifam = mtod(m, struct ifa_msghdr *); 1759 ifam->ifam_index = ifp->if_index; 1760 ifam->ifam_metric = ifa->ifa_metric; 1761 ifam->ifam_flags = ifa->ifa_flags; 1762 ifam->ifam_addrs = info.rti_addrs; 1763 ifam->ifam_tableid = ifp->if_rdomain; 1764 1765 route_input(m, NULL, 1766 ifa->ifa_addr ? ifa->ifa_addr->sa_family : AF_UNSPEC); 1767 } 1768 1769 /* 1770 * This is called to generate routing socket messages indicating 1771 * network interface arrival and departure. 1772 */ 1773 void 1774 rtm_ifannounce(struct ifnet *ifp, int what) 1775 { 1776 struct if_announcemsghdr *ifan; 1777 struct mbuf *m; 1778 1779 if (rtptable.rtp_count == 0) 1780 return; 1781 m = rtm_msg1(RTM_IFANNOUNCE, NULL); 1782 if (m == NULL) 1783 return; 1784 ifan = mtod(m, struct if_announcemsghdr *); 1785 ifan->ifan_index = ifp->if_index; 1786 strlcpy(ifan->ifan_name, ifp->if_xname, sizeof(ifan->ifan_name)); 1787 ifan->ifan_what = what; 1788 route_input(m, NULL, AF_UNSPEC); 1789 } 1790 1791 #ifdef BFD 1792 /* 1793 * This is used to generate routing socket messages indicating 1794 * the state of a BFD session. 1795 */ 1796 void 1797 rtm_bfd(struct bfd_config *bfd) 1798 { 1799 struct bfd_msghdr *bfdm; 1800 struct sockaddr_bfd sa_bfd; 1801 struct mbuf *m; 1802 struct rt_addrinfo info; 1803 1804 if (rtptable.rtp_count == 0) 1805 return; 1806 memset(&info, 0, sizeof(info)); 1807 info.rti_info[RTAX_DST] = rt_key(bfd->bc_rt); 1808 info.rti_info[RTAX_IFA] = bfd->bc_rt->rt_ifa->ifa_addr; 1809 1810 m = rtm_msg1(RTM_BFD, &info); 1811 if (m == NULL) 1812 return; 1813 bfdm = mtod(m, struct bfd_msghdr *); 1814 bfdm->bm_addrs = info.rti_addrs; 1815 1816 bfd2sa(bfd->bc_rt, &sa_bfd); 1817 memcpy(&bfdm->bm_sa, &sa_bfd, sizeof(sa_bfd)); 1818 1819 route_input(m, NULL, info.rti_info[RTAX_DST]->sa_family); 1820 } 1821 #endif /* BFD */ 1822 1823 /* 1824 * This is used to generate routing socket messages indicating 1825 * the state of an ieee80211 interface. 1826 */ 1827 void 1828 rtm_80211info(struct ifnet *ifp, struct if_ieee80211_data *ifie) 1829 { 1830 struct if_ieee80211_msghdr *ifim; 1831 struct mbuf *m; 1832 1833 if (rtptable.rtp_count == 0) 1834 return; 1835 m = rtm_msg1(RTM_80211INFO, NULL); 1836 if (m == NULL) 1837 return; 1838 ifim = mtod(m, struct if_ieee80211_msghdr *); 1839 ifim->ifim_index = ifp->if_index; 1840 ifim->ifim_tableid = ifp->if_rdomain; 1841 1842 memcpy(&ifim->ifim_ifie, ifie, sizeof(ifim->ifim_ifie)); 1843 route_input(m, NULL, AF_UNSPEC); 1844 } 1845 1846 /* 1847 * This is used to generate routing socket messages indicating 1848 * the address selection proposal from an interface. 1849 */ 1850 void 1851 rtm_proposal(struct ifnet *ifp, struct rt_addrinfo *rtinfo, int flags, 1852 uint8_t prio) 1853 { 1854 struct rt_msghdr *rtm; 1855 struct mbuf *m; 1856 1857 m = rtm_msg1(RTM_PROPOSAL, rtinfo); 1858 if (m == NULL) 1859 return; 1860 rtm = mtod(m, struct rt_msghdr *); 1861 rtm->rtm_flags = RTF_DONE | flags; 1862 rtm->rtm_priority = prio; 1863 rtm->rtm_tableid = ifp->if_rdomain; 1864 rtm->rtm_index = ifp->if_index; 1865 rtm->rtm_addrs = rtinfo->rti_addrs; 1866 1867 route_input(m, NULL, rtinfo->rti_info[RTAX_DNS]->sa_family); 1868 } 1869 1870 /* 1871 * This is used in dumping the kernel table via sysctl(). 1872 */ 1873 int 1874 sysctl_dumpentry(struct rtentry *rt, void *v, unsigned int id) 1875 { 1876 struct walkarg *w = v; 1877 int error = 0, size; 1878 struct rt_addrinfo info; 1879 struct ifnet *ifp; 1880 #ifdef BFD 1881 struct sockaddr_bfd sa_bfd; 1882 #endif 1883 struct sockaddr_rtlabel sa_rl; 1884 struct sockaddr_in6 sa_mask; 1885 1886 if (w->w_op == NET_RT_FLAGS && !(rt->rt_flags & w->w_arg)) 1887 return 0; 1888 if (w->w_op == NET_RT_DUMP && w->w_arg) { 1889 u_int8_t prio = w->w_arg & RTP_MASK; 1890 if (w->w_arg < 0) { 1891 prio = (-w->w_arg) & RTP_MASK; 1892 /* Show all routes that are not this priority */ 1893 if (prio == (rt->rt_priority & RTP_MASK)) 1894 return 0; 1895 } else { 1896 if (prio != (rt->rt_priority & RTP_MASK) && 1897 prio != RTP_ANY) 1898 return 0; 1899 } 1900 } 1901 bzero(&info, sizeof(info)); 1902 info.rti_info[RTAX_DST] = rt_key(rt); 1903 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 1904 info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask); 1905 ifp = if_get(rt->rt_ifidx); 1906 if (ifp != NULL) { 1907 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); 1908 info.rti_info[RTAX_IFA] = 1909 rtable_getsource(id, info.rti_info[RTAX_DST]->sa_family); 1910 if (info.rti_info[RTAX_IFA] == NULL) 1911 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; 1912 if (ifp->if_flags & IFF_POINTOPOINT) 1913 info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr; 1914 } 1915 if_put(ifp); 1916 info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl); 1917 #ifdef BFD 1918 if (rt->rt_flags & RTF_BFD) 1919 info.rti_info[RTAX_BFD] = bfd2sa(rt, &sa_bfd); 1920 #endif 1921 #ifdef MPLS 1922 if (rt->rt_flags & RTF_MPLS) { 1923 struct sockaddr_mpls sa_mpls; 1924 1925 bzero(&sa_mpls, sizeof(sa_mpls)); 1926 sa_mpls.smpls_family = AF_MPLS; 1927 sa_mpls.smpls_len = sizeof(sa_mpls); 1928 sa_mpls.smpls_label = ((struct rt_mpls *) 1929 rt->rt_llinfo)->mpls_label; 1930 info.rti_info[RTAX_SRC] = (struct sockaddr *)&sa_mpls; 1931 info.rti_mpls = ((struct rt_mpls *) 1932 rt->rt_llinfo)->mpls_operation; 1933 } 1934 #endif 1935 1936 size = rtm_msg2(RTM_GET, RTM_VERSION, &info, NULL, w); 1937 if (w->w_where && w->w_tmem && w->w_needed <= 0) { 1938 struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem; 1939 1940 rtm->rtm_pid = curproc->p_p->ps_pid; 1941 rtm->rtm_flags = rt->rt_flags; 1942 rtm->rtm_priority = rt->rt_priority & RTP_MASK; 1943 rtm_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx); 1944 /* Do not account the routing table's reference. */ 1945 rtm->rtm_rmx.rmx_refcnt = rt->rt_refcnt - 1; 1946 rtm->rtm_index = rt->rt_ifidx; 1947 rtm->rtm_addrs = info.rti_addrs; 1948 rtm->rtm_tableid = id; 1949 #ifdef MPLS 1950 rtm->rtm_mpls = info.rti_mpls; 1951 #endif 1952 if ((error = copyout(rtm, w->w_where, size)) != 0) 1953 w->w_where = NULL; 1954 else 1955 w->w_where += size; 1956 } 1957 return (error); 1958 } 1959 1960 int 1961 sysctl_iflist(int af, struct walkarg *w) 1962 { 1963 struct ifnet *ifp; 1964 struct ifaddr *ifa; 1965 struct rt_addrinfo info; 1966 int len, error = 0; 1967 1968 bzero(&info, sizeof(info)); 1969 TAILQ_FOREACH(ifp, &ifnet, if_list) { 1970 if (w->w_arg && w->w_arg != ifp->if_index) 1971 continue; 1972 /* Copy the link-layer address first */ 1973 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); 1974 len = rtm_msg2(RTM_IFINFO, RTM_VERSION, &info, 0, w); 1975 if (w->w_where && w->w_tmem && w->w_needed <= 0) { 1976 struct if_msghdr *ifm; 1977 1978 ifm = (struct if_msghdr *)w->w_tmem; 1979 ifm->ifm_index = ifp->if_index; 1980 ifm->ifm_tableid = ifp->if_rdomain; 1981 ifm->ifm_flags = ifp->if_flags; 1982 if_getdata(ifp, &ifm->ifm_data); 1983 ifm->ifm_addrs = info.rti_addrs; 1984 error = copyout(ifm, w->w_where, len); 1985 if (error) 1986 return (error); 1987 w->w_where += len; 1988 } 1989 info.rti_info[RTAX_IFP] = NULL; 1990 TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) { 1991 KASSERT(ifa->ifa_addr->sa_family != AF_LINK); 1992 if (af && af != ifa->ifa_addr->sa_family) 1993 continue; 1994 info.rti_info[RTAX_IFA] = ifa->ifa_addr; 1995 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask; 1996 info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr; 1997 len = rtm_msg2(RTM_NEWADDR, RTM_VERSION, &info, 0, w); 1998 if (w->w_where && w->w_tmem && w->w_needed <= 0) { 1999 struct ifa_msghdr *ifam; 2000 2001 ifam = (struct ifa_msghdr *)w->w_tmem; 2002 ifam->ifam_index = ifa->ifa_ifp->if_index; 2003 ifam->ifam_flags = ifa->ifa_flags; 2004 ifam->ifam_metric = ifa->ifa_metric; 2005 ifam->ifam_addrs = info.rti_addrs; 2006 error = copyout(w->w_tmem, w->w_where, len); 2007 if (error) 2008 return (error); 2009 w->w_where += len; 2010 } 2011 } 2012 info.rti_info[RTAX_IFA] = info.rti_info[RTAX_NETMASK] = 2013 info.rti_info[RTAX_BRD] = NULL; 2014 } 2015 return (0); 2016 } 2017 2018 int 2019 sysctl_ifnames(struct walkarg *w) 2020 { 2021 struct if_nameindex_msg ifn; 2022 struct ifnet *ifp; 2023 int error = 0; 2024 2025 /* XXX ignore tableid for now */ 2026 TAILQ_FOREACH(ifp, &ifnet, if_list) { 2027 if (w->w_arg && w->w_arg != ifp->if_index) 2028 continue; 2029 w->w_needed += sizeof(ifn); 2030 if (w->w_where && w->w_needed <= 0) { 2031 2032 memset(&ifn, 0, sizeof(ifn)); 2033 ifn.if_index = ifp->if_index; 2034 strlcpy(ifn.if_name, ifp->if_xname, 2035 sizeof(ifn.if_name)); 2036 error = copyout(&ifn, w->w_where, sizeof(ifn)); 2037 if (error) 2038 return (error); 2039 w->w_where += sizeof(ifn); 2040 } 2041 } 2042 2043 return (0); 2044 } 2045 2046 int 2047 sysctl_source(int af, u_int tableid, struct walkarg *w) 2048 { 2049 struct sockaddr *sa; 2050 int size, error = 0; 2051 2052 sa = rtable_getsource(tableid, af); 2053 if (sa) { 2054 switch (sa->sa_family) { 2055 case AF_INET: 2056 size = sizeof(struct sockaddr_in); 2057 break; 2058 #ifdef INET6 2059 case AF_INET6: 2060 size = sizeof(struct sockaddr_in6); 2061 break; 2062 #endif 2063 default: 2064 return (0); 2065 } 2066 w->w_needed += size; 2067 if (w->w_where && w->w_needed <= 0) { 2068 if ((error = copyout(sa, w->w_where, size))) 2069 return (error); 2070 w->w_where += size; 2071 } 2072 } 2073 return (0); 2074 } 2075 2076 int 2077 sysctl_rtable(int *name, u_int namelen, void *where, size_t *given, void *new, 2078 size_t newlen) 2079 { 2080 int i, error = EINVAL; 2081 u_char af; 2082 struct walkarg w; 2083 struct rt_tableinfo tableinfo; 2084 u_int tableid = 0; 2085 2086 if (new) 2087 return (EPERM); 2088 if (namelen < 3 || namelen > 4) 2089 return (EINVAL); 2090 af = name[0]; 2091 bzero(&w, sizeof(w)); 2092 w.w_where = where; 2093 w.w_given = *given; 2094 w.w_needed = 0 - w.w_given; 2095 w.w_op = name[1]; 2096 w.w_arg = name[2]; 2097 2098 if (namelen == 4) { 2099 tableid = name[3]; 2100 if (!rtable_exists(tableid)) 2101 return (ENOENT); 2102 } else 2103 tableid = curproc->p_p->ps_rtableid; 2104 2105 switch (w.w_op) { 2106 case NET_RT_DUMP: 2107 case NET_RT_FLAGS: 2108 NET_LOCK(); 2109 for (i = 1; i <= AF_MAX; i++) { 2110 if (af != 0 && af != i) 2111 continue; 2112 2113 error = rtable_walk(tableid, i, NULL, sysctl_dumpentry, 2114 &w); 2115 if (error == EAFNOSUPPORT) 2116 error = 0; 2117 if (error) 2118 break; 2119 } 2120 NET_UNLOCK(); 2121 break; 2122 2123 case NET_RT_IFLIST: 2124 NET_LOCK(); 2125 error = sysctl_iflist(af, &w); 2126 NET_UNLOCK(); 2127 break; 2128 2129 case NET_RT_STATS: 2130 return (sysctl_rtable_rtstat(where, given, new)); 2131 case NET_RT_TABLE: 2132 tableid = w.w_arg; 2133 if (!rtable_exists(tableid)) 2134 return (ENOENT); 2135 memset(&tableinfo, 0, sizeof tableinfo); 2136 tableinfo.rti_tableid = tableid; 2137 tableinfo.rti_domainid = rtable_l2(tableid); 2138 error = sysctl_rdstruct(where, given, new, 2139 &tableinfo, sizeof(tableinfo)); 2140 return (error); 2141 case NET_RT_IFNAMES: 2142 NET_LOCK(); 2143 error = sysctl_ifnames(&w); 2144 NET_UNLOCK(); 2145 break; 2146 case NET_RT_SOURCE: 2147 tableid = w.w_arg; 2148 if (!rtable_exists(tableid)) 2149 return (ENOENT); 2150 NET_LOCK(); 2151 for (i = 1; i <= AF_MAX; i++) { 2152 if (af != 0 && af != i) 2153 continue; 2154 2155 error = sysctl_source(i, tableid, &w); 2156 if (error == EAFNOSUPPORT) 2157 error = 0; 2158 if (error) 2159 break; 2160 } 2161 NET_UNLOCK(); 2162 break; 2163 } 2164 free(w.w_tmem, M_RTABLE, w.w_tmemsize); 2165 w.w_needed += w.w_given; 2166 if (where) { 2167 *given = w.w_where - (caddr_t)where; 2168 if (*given < w.w_needed) 2169 return (ENOMEM); 2170 } else 2171 *given = (11 * w.w_needed) / 10; 2172 2173 return (error); 2174 } 2175 2176 int 2177 sysctl_rtable_rtstat(void *oldp, size_t *oldlenp, void *newp) 2178 { 2179 extern struct cpumem *rtcounters; 2180 uint64_t counters[rts_ncounters]; 2181 struct rtstat rtstat; 2182 uint32_t *words = (uint32_t *)&rtstat; 2183 int i; 2184 2185 CTASSERT(sizeof(rtstat) == (nitems(counters) * sizeof(uint32_t))); 2186 memset(&rtstat, 0, sizeof rtstat); 2187 counters_read(rtcounters, counters, nitems(counters)); 2188 2189 for (i = 0; i < nitems(counters); i++) 2190 words[i] = (uint32_t)counters[i]; 2191 2192 return (sysctl_rdstruct(oldp, oldlenp, newp, &rtstat, sizeof(rtstat))); 2193 } 2194 2195 int 2196 rtm_validate_proposal(struct rt_addrinfo *info) 2197 { 2198 if (info->rti_addrs & ~(RTA_NETMASK | RTA_IFA | RTA_DNS | RTA_STATIC | 2199 RTA_SEARCH)) { 2200 return -1; 2201 } 2202 2203 if (ISSET(info->rti_addrs, RTA_NETMASK)) { 2204 struct sockaddr *sa = info->rti_info[RTAX_NETMASK]; 2205 if (sa == NULL) 2206 return -1; 2207 switch (sa->sa_family) { 2208 case AF_INET: 2209 if (sa->sa_len != sizeof(struct sockaddr_in)) 2210 return -1; 2211 break; 2212 case AF_INET6: 2213 if (sa->sa_len != sizeof(struct sockaddr_in6)) 2214 return -1; 2215 break; 2216 default: 2217 return -1; 2218 } 2219 } 2220 2221 if (ISSET(info->rti_addrs, RTA_IFA)) { 2222 struct sockaddr *sa = info->rti_info[RTAX_IFA]; 2223 if (sa == NULL) 2224 return -1; 2225 switch (sa->sa_family) { 2226 case AF_INET: 2227 if (sa->sa_len != sizeof(struct sockaddr_in)) 2228 return -1; 2229 break; 2230 case AF_INET6: 2231 if (sa->sa_len != sizeof(struct sockaddr_in6)) 2232 return -1; 2233 break; 2234 default: 2235 return -1; 2236 } 2237 } 2238 2239 if (ISSET(info->rti_addrs, RTA_DNS)) { 2240 struct sockaddr_rtdns *rtdns = 2241 (struct sockaddr_rtdns *)info->rti_info[RTAX_DNS]; 2242 if (rtdns == NULL) 2243 return -1; 2244 if (rtdns->sr_len > sizeof(*rtdns)) 2245 return -1; 2246 if (rtdns->sr_len < offsetof(struct sockaddr_rtdns, sr_dns)) 2247 return -1; 2248 switch (rtdns->sr_family) { 2249 case AF_INET: 2250 if ((rtdns->sr_len - offsetof(struct sockaddr_rtdns, 2251 sr_dns)) % sizeof(struct in_addr) != 0) 2252 return -1; 2253 break; 2254 #ifdef INET6 2255 case AF_INET6: 2256 if ((rtdns->sr_len - offsetof(struct sockaddr_rtdns, 2257 sr_dns)) % sizeof(struct in6_addr) != 0) 2258 return -1; 2259 break; 2260 #endif 2261 default: 2262 return -1; 2263 } 2264 } 2265 2266 if (ISSET(info->rti_addrs, RTA_STATIC)) { 2267 struct sockaddr_rtstatic *rtstatic = 2268 (struct sockaddr_rtstatic *)info->rti_info[RTAX_STATIC]; 2269 if (rtstatic == NULL) 2270 return -1; 2271 if (rtstatic->sr_len > sizeof(*rtstatic)) 2272 return -1; 2273 if (rtstatic->sr_len <= 2274 offsetof(struct sockaddr_rtstatic, sr_static)) 2275 return -1; 2276 } 2277 2278 if (ISSET(info->rti_addrs, RTA_SEARCH)) { 2279 struct sockaddr_rtsearch *rtsearch = 2280 (struct sockaddr_rtsearch *)info->rti_info[RTAX_SEARCH]; 2281 if (rtsearch == NULL) 2282 return -1; 2283 if (rtsearch->sr_len > sizeof(*rtsearch)) 2284 return -1; 2285 if (rtsearch->sr_len <= 2286 offsetof(struct sockaddr_rtsearch, sr_search)) 2287 return -1; 2288 } 2289 2290 return 0; 2291 } 2292 2293 int 2294 rt_setsource(unsigned int rtableid, struct sockaddr *src) 2295 { 2296 struct ifaddr *ifa; 2297 int error; 2298 /* 2299 * If source address is 0.0.0.0 or :: 2300 * use automatic source selection 2301 */ 2302 switch(src->sa_family) { 2303 case AF_INET: 2304 if(satosin(src)->sin_addr.s_addr == INADDR_ANY) { 2305 rtable_setsource(rtableid, AF_INET, NULL); 2306 return (0); 2307 } 2308 break; 2309 #ifdef INET6 2310 case AF_INET6: 2311 if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(src)->sin6_addr)) { 2312 rtable_setsource(rtableid, AF_INET6, NULL); 2313 return (0); 2314 } 2315 break; 2316 #endif 2317 default: 2318 return (EAFNOSUPPORT); 2319 } 2320 2321 KERNEL_LOCK(); 2322 /* 2323 * Check if source address is assigned to an interface in the 2324 * same rdomain 2325 */ 2326 if ((ifa = ifa_ifwithaddr(src, rtableid)) == NULL) { 2327 KERNEL_UNLOCK(); 2328 return (EINVAL); 2329 } 2330 2331 error = rtable_setsource(rtableid, src->sa_family, ifa->ifa_addr); 2332 KERNEL_UNLOCK(); 2333 2334 return (error); 2335 } 2336 2337 /* 2338 * Definitions of protocols supported in the ROUTE domain. 2339 */ 2340 2341 struct domain routedomain; 2342 2343 struct protosw routesw[] = { 2344 { 2345 .pr_type = SOCK_RAW, 2346 .pr_domain = &routedomain, 2347 .pr_flags = PR_ATOMIC|PR_ADDR|PR_WANTRCVD, 2348 .pr_output = route_output, 2349 .pr_ctloutput = route_ctloutput, 2350 .pr_usrreq = route_usrreq, 2351 .pr_attach = route_attach, 2352 .pr_detach = route_detach, 2353 .pr_init = route_prinit, 2354 .pr_sysctl = sysctl_rtable 2355 } 2356 }; 2357 2358 struct domain routedomain = { 2359 .dom_family = PF_ROUTE, 2360 .dom_name = "route", 2361 .dom_init = route_init, 2362 .dom_protosw = routesw, 2363 .dom_protoswNPROTOSW = &routesw[nitems(routesw)] 2364 }; 2365