1 /* $OpenBSD: rtsock.c,v 1.313 2021/05/16 13:09:39 mvs Exp $ */ 2 /* $NetBSD: rtsock.c,v 1.18 1996/03/29 00:32:10 cgd Exp $ */ 3 4 /* 5 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the project nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 33 /* 34 * Copyright (c) 1988, 1991, 1993 35 * The Regents of the University of California. All rights reserved. 36 * 37 * Redistribution and use in source and binary forms, with or without 38 * modification, are permitted provided that the following conditions 39 * are met: 40 * 1. Redistributions of source code must retain the above copyright 41 * notice, this list of conditions and the following disclaimer. 42 * 2. Redistributions in binary form must reproduce the above copyright 43 * notice, this list of conditions and the following disclaimer in the 44 * documentation and/or other materials provided with the distribution. 45 * 3. Neither the name of the University nor the names of its contributors 46 * may be used to endorse or promote products derived from this software 47 * without specific prior written permission. 48 * 49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 59 * SUCH DAMAGE. 60 * 61 * @(#)rtsock.c 8.6 (Berkeley) 2/11/95 62 */ 63 64 #include <sys/param.h> 65 #include <sys/systm.h> 66 #include <sys/proc.h> 67 #include <sys/sysctl.h> 68 #include <sys/mbuf.h> 69 #include <sys/socket.h> 70 #include <sys/socketvar.h> 71 #include <sys/domain.h> 72 #include <sys/pool.h> 73 #include <sys/protosw.h> 74 #include <sys/srp.h> 75 76 #include <net/if.h> 77 #include <net/if_dl.h> 78 #include <net/if_var.h> 79 #include <net/route.h> 80 81 #include <netinet/in.h> 82 83 #ifdef MPLS 84 #include <netmpls/mpls.h> 85 #endif 86 #ifdef IPSEC 87 #include <netinet/ip_ipsp.h> 88 #include <net/if_enc.h> 89 #endif 90 #ifdef BFD 91 #include <net/bfd.h> 92 #endif 93 94 #include <sys/stdarg.h> 95 #include <sys/kernel.h> 96 #include <sys/timeout.h> 97 98 #define ROUTESNDQ 8192 99 #define ROUTERCVQ 8192 100 101 const struct sockaddr route_src = { 2, PF_ROUTE, }; 102 103 struct walkarg { 104 int w_op, w_arg, w_given, w_needed, w_tmemsize; 105 caddr_t w_where, w_tmem; 106 }; 107 108 void route_prinit(void); 109 void rcb_ref(void *, void *); 110 void rcb_unref(void *, void *); 111 int route_output(struct mbuf *, struct socket *, struct sockaddr *, 112 struct mbuf *); 113 int route_ctloutput(int, struct socket *, int, int, struct mbuf *); 114 int route_usrreq(struct socket *, int, struct mbuf *, struct mbuf *, 115 struct mbuf *, struct proc *); 116 void route_input(struct mbuf *m0, struct socket *, sa_family_t); 117 int route_arp_conflict(struct rtentry *, struct rt_addrinfo *); 118 int route_cleargateway(struct rtentry *, void *, unsigned int); 119 void rtm_senddesync_timer(void *); 120 void rtm_senddesync(struct socket *); 121 int rtm_sendup(struct socket *, struct mbuf *); 122 123 int rtm_getifa(struct rt_addrinfo *, unsigned int); 124 int rtm_output(struct rt_msghdr *, struct rtentry **, struct rt_addrinfo *, 125 uint8_t, unsigned int); 126 struct rt_msghdr *rtm_report(struct rtentry *, u_char, int, int); 127 struct mbuf *rtm_msg1(int, struct rt_addrinfo *); 128 int rtm_msg2(int, int, struct rt_addrinfo *, caddr_t, 129 struct walkarg *); 130 int rtm_xaddrs(caddr_t, caddr_t, struct rt_addrinfo *); 131 int rtm_validate_proposal(struct rt_addrinfo *); 132 void rtm_setmetrics(u_long, const struct rt_metrics *, 133 struct rt_kmetrics *); 134 void rtm_getmetrics(const struct rt_kmetrics *, 135 struct rt_metrics *); 136 137 int sysctl_iflist(int, struct walkarg *); 138 int sysctl_ifnames(struct walkarg *); 139 int sysctl_rtable_rtstat(void *, size_t *, void *); 140 141 int rt_setsource(unsigned int, struct sockaddr *); 142 143 /* 144 * Locks used to protect struct members 145 * I immutable after creation 146 * s solock 147 */ 148 struct rtpcb { 149 struct socket *rop_socket; /* [I] */ 150 151 SRPL_ENTRY(rtpcb) rop_list; 152 struct refcnt rop_refcnt; 153 struct timeout rop_timeout; 154 unsigned int rop_msgfilter; /* [s] */ 155 unsigned int rop_flagfilter; /* [s] */ 156 unsigned int rop_flags; /* [s] */ 157 u_int rop_rtableid; /* [s] */ 158 unsigned short rop_proto; /* [I] */ 159 u_char rop_priority; /* [s] */ 160 }; 161 #define sotortpcb(so) ((struct rtpcb *)(so)->so_pcb) 162 163 struct rtptable { 164 SRPL_HEAD(, rtpcb) rtp_list; 165 struct srpl_rc rtp_rc; 166 struct rwlock rtp_lk; 167 unsigned int rtp_count; 168 }; 169 170 struct pool rtpcb_pool; 171 struct rtptable rtptable; 172 173 /* 174 * These flags and timeout are used for indicating to userland (via a 175 * RTM_DESYNC msg) when the route socket has overflowed and messages 176 * have been lost. 177 */ 178 #define ROUTECB_FLAG_DESYNC 0x1 /* Route socket out of memory */ 179 #define ROUTECB_FLAG_FLUSH 0x2 /* Wait until socket is empty before 180 queueing more packets */ 181 182 #define ROUTE_DESYNC_RESEND_TIMEOUT 200 /* In ms */ 183 184 void 185 route_prinit(void) 186 { 187 srpl_rc_init(&rtptable.rtp_rc, rcb_ref, rcb_unref, NULL); 188 rw_init(&rtptable.rtp_lk, "rtsock"); 189 SRPL_INIT(&rtptable.rtp_list); 190 pool_init(&rtpcb_pool, sizeof(struct rtpcb), 0, 191 IPL_SOFTNET, PR_WAITOK, "rtpcb", NULL); 192 } 193 194 void 195 rcb_ref(void *null, void *v) 196 { 197 struct rtpcb *rop = v; 198 199 refcnt_take(&rop->rop_refcnt); 200 } 201 202 void 203 rcb_unref(void *null, void *v) 204 { 205 struct rtpcb *rop = v; 206 207 refcnt_rele_wake(&rop->rop_refcnt); 208 } 209 210 int 211 route_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam, 212 struct mbuf *control, struct proc *p) 213 { 214 struct rtpcb *rop; 215 int error = 0; 216 217 if (req == PRU_CONTROL) 218 return (EOPNOTSUPP); 219 220 soassertlocked(so); 221 222 if (control && control->m_len) { 223 error = EOPNOTSUPP; 224 goto release; 225 } 226 227 rop = sotortpcb(so); 228 if (rop == NULL) { 229 error = EINVAL; 230 goto release; 231 } 232 233 switch (req) { 234 /* no connect, bind, accept. Socket is connected from the start */ 235 case PRU_CONNECT: 236 case PRU_BIND: 237 case PRU_CONNECT2: 238 case PRU_LISTEN: 239 case PRU_ACCEPT: 240 error = EOPNOTSUPP; 241 break; 242 243 case PRU_DISCONNECT: 244 case PRU_ABORT: 245 soisdisconnected(so); 246 break; 247 case PRU_SHUTDOWN: 248 socantsendmore(so); 249 break; 250 case PRU_SENSE: 251 /* stat: don't bother with a blocksize. */ 252 break; 253 254 /* minimal support, just implement a fake peer address */ 255 case PRU_SOCKADDR: 256 error = EINVAL; 257 break; 258 case PRU_PEERADDR: 259 bcopy(&route_src, mtod(nam, caddr_t), route_src.sa_len); 260 nam->m_len = route_src.sa_len; 261 break; 262 263 case PRU_RCVD: 264 /* 265 * If we are in a FLUSH state, check if the buffer is 266 * empty so that we can clear the flag. 267 */ 268 if (((rop->rop_flags & ROUTECB_FLAG_FLUSH) != 0) && 269 ((sbspace(rop->rop_socket, &rop->rop_socket->so_rcv) == 270 rop->rop_socket->so_rcv.sb_hiwat))) 271 rop->rop_flags &= ~ROUTECB_FLAG_FLUSH; 272 break; 273 274 case PRU_RCVOOB: 275 case PRU_SENDOOB: 276 error = EOPNOTSUPP; 277 break; 278 case PRU_SEND: 279 if (nam) { 280 error = EISCONN; 281 break; 282 } 283 error = (*so->so_proto->pr_output)(m, so, NULL, NULL); 284 m = NULL; 285 break; 286 default: 287 panic("route_usrreq"); 288 } 289 290 release: 291 if (req != PRU_RCVD && req != PRU_RCVOOB && req != PRU_SENSE) { 292 m_freem(control); 293 m_freem(m); 294 } 295 return (error); 296 } 297 298 int 299 route_attach(struct socket *so, int proto) 300 { 301 struct rtpcb *rop; 302 int error; 303 304 error = soreserve(so, ROUTESNDQ, ROUTERCVQ); 305 if (error) 306 return (error); 307 /* 308 * use the rawcb but allocate a rtpcb, this 309 * code does not care about the additional fields 310 * and works directly on the raw socket. 311 */ 312 rop = pool_get(&rtpcb_pool, PR_WAITOK|PR_ZERO); 313 so->so_pcb = rop; 314 /* Init the timeout structure */ 315 timeout_set_flags(&rop->rop_timeout, rtm_senddesync_timer, so, 316 TIMEOUT_PROC); 317 refcnt_init(&rop->rop_refcnt); 318 319 rop->rop_socket = so; 320 rop->rop_proto = proto; 321 322 rop->rop_rtableid = curproc->p_p->ps_rtableid; 323 324 soisconnected(so); 325 so->so_options |= SO_USELOOPBACK; 326 327 rw_enter(&rtptable.rtp_lk, RW_WRITE); 328 SRPL_INSERT_HEAD_LOCKED(&rtptable.rtp_rc, &rtptable.rtp_list, rop, 329 rop_list); 330 rtptable.rtp_count++; 331 rw_exit(&rtptable.rtp_lk); 332 333 return (0); 334 } 335 336 int 337 route_detach(struct socket *so) 338 { 339 struct rtpcb *rop; 340 341 soassertlocked(so); 342 343 rop = sotortpcb(so); 344 if (rop == NULL) 345 return (EINVAL); 346 347 rw_enter(&rtptable.rtp_lk, RW_WRITE); 348 349 rtptable.rtp_count--; 350 SRPL_REMOVE_LOCKED(&rtptable.rtp_rc, &rtptable.rtp_list, rop, rtpcb, 351 rop_list); 352 rw_exit(&rtptable.rtp_lk); 353 354 sounlock(so, SL_LOCKED); 355 356 /* wait for all references to drop */ 357 refcnt_finalize(&rop->rop_refcnt, "rtsockrefs"); 358 timeout_del_barrier(&rop->rop_timeout); 359 360 solock(so); 361 362 so->so_pcb = NULL; 363 KASSERT((so->so_state & SS_NOFDREF) == 0); 364 pool_put(&rtpcb_pool, rop); 365 366 return (0); 367 } 368 369 int 370 route_ctloutput(int op, struct socket *so, int level, int optname, 371 struct mbuf *m) 372 { 373 struct rtpcb *rop = sotortpcb(so); 374 int error = 0; 375 unsigned int tid, prio; 376 377 if (level != AF_ROUTE) 378 return (EINVAL); 379 380 switch (op) { 381 case PRCO_SETOPT: 382 switch (optname) { 383 case ROUTE_MSGFILTER: 384 if (m == NULL || m->m_len != sizeof(unsigned int)) 385 error = EINVAL; 386 else 387 rop->rop_msgfilter = *mtod(m, unsigned int *); 388 break; 389 case ROUTE_TABLEFILTER: 390 if (m == NULL || m->m_len != sizeof(unsigned int)) { 391 error = EINVAL; 392 break; 393 } 394 tid = *mtod(m, unsigned int *); 395 if (tid != RTABLE_ANY && !rtable_exists(tid)) 396 error = ENOENT; 397 else 398 rop->rop_rtableid = tid; 399 break; 400 case ROUTE_PRIOFILTER: 401 if (m == NULL || m->m_len != sizeof(unsigned int)) { 402 error = EINVAL; 403 break; 404 } 405 prio = *mtod(m, unsigned int *); 406 if (prio > RTP_MAX) 407 error = EINVAL; 408 else 409 rop->rop_priority = prio; 410 break; 411 case ROUTE_FLAGFILTER: 412 if (m == NULL || m->m_len != sizeof(unsigned int)) 413 error = EINVAL; 414 else 415 rop->rop_flagfilter = *mtod(m, unsigned int *); 416 break; 417 default: 418 error = ENOPROTOOPT; 419 break; 420 } 421 break; 422 case PRCO_GETOPT: 423 switch (optname) { 424 case ROUTE_MSGFILTER: 425 m->m_len = sizeof(unsigned int); 426 *mtod(m, unsigned int *) = rop->rop_msgfilter; 427 break; 428 case ROUTE_TABLEFILTER: 429 m->m_len = sizeof(unsigned int); 430 *mtod(m, unsigned int *) = rop->rop_rtableid; 431 break; 432 case ROUTE_PRIOFILTER: 433 m->m_len = sizeof(unsigned int); 434 *mtod(m, unsigned int *) = rop->rop_priority; 435 break; 436 case ROUTE_FLAGFILTER: 437 m->m_len = sizeof(unsigned int); 438 *mtod(m, unsigned int *) = rop->rop_flagfilter; 439 break; 440 default: 441 error = ENOPROTOOPT; 442 break; 443 } 444 } 445 return (error); 446 } 447 448 void 449 rtm_senddesync_timer(void *xso) 450 { 451 struct socket *so = xso; 452 int s; 453 454 s = solock(so); 455 rtm_senddesync(so); 456 sounlock(so, s); 457 } 458 459 void 460 rtm_senddesync(struct socket *so) 461 { 462 struct rtpcb *rop = sotortpcb(so); 463 struct mbuf *desync_mbuf; 464 465 soassertlocked(so); 466 467 /* If we are in a DESYNC state, try to send a RTM_DESYNC packet */ 468 if ((rop->rop_flags & ROUTECB_FLAG_DESYNC) == 0) 469 return; 470 471 /* 472 * If we fail to alloc memory or if sbappendaddr() 473 * fails, re-add timeout and try again. 474 */ 475 desync_mbuf = rtm_msg1(RTM_DESYNC, NULL); 476 if (desync_mbuf != NULL) { 477 if (sbappendaddr(so, &so->so_rcv, &route_src, 478 desync_mbuf, NULL) != 0) { 479 rop->rop_flags &= ~ROUTECB_FLAG_DESYNC; 480 sorwakeup(rop->rop_socket); 481 return; 482 } 483 m_freem(desync_mbuf); 484 } 485 /* Re-add timeout to try sending msg again */ 486 timeout_add_msec(&rop->rop_timeout, ROUTE_DESYNC_RESEND_TIMEOUT); 487 } 488 489 void 490 route_input(struct mbuf *m0, struct socket *so0, sa_family_t sa_family) 491 { 492 struct socket *so; 493 struct rtpcb *rop; 494 struct rt_msghdr *rtm; 495 struct mbuf *m = m0; 496 struct srp_ref sr; 497 int s; 498 499 /* ensure that we can access the rtm_type via mtod() */ 500 if (m->m_len < offsetof(struct rt_msghdr, rtm_type) + 1) { 501 m_freem(m); 502 return; 503 } 504 505 SRPL_FOREACH(rop, &sr, &rtptable.rtp_list, rop_list) { 506 /* 507 * If route socket is bound to an address family only send 508 * messages that match the address family. Address family 509 * agnostic messages are always sent. 510 */ 511 if (sa_family != AF_UNSPEC && rop->rop_proto != AF_UNSPEC && 512 rop->rop_proto != sa_family) 513 continue; 514 515 516 so = rop->rop_socket; 517 s = solock(so); 518 519 /* 520 * Check to see if we don't want our own messages and 521 * if we can receive anything. 522 */ 523 if ((so0 == so && !(so0->so_options & SO_USELOOPBACK)) || 524 !(so->so_state & SS_ISCONNECTED) || 525 (so->so_state & SS_CANTRCVMORE)) 526 goto next; 527 528 /* filter messages that the process does not want */ 529 rtm = mtod(m, struct rt_msghdr *); 530 /* but RTM_DESYNC can't be filtered */ 531 if (rtm->rtm_type != RTM_DESYNC) { 532 if (rop->rop_msgfilter != 0 && 533 !(rop->rop_msgfilter & (1 << rtm->rtm_type))) 534 goto next; 535 if (ISSET(rop->rop_flagfilter, rtm->rtm_flags)) 536 goto next; 537 } 538 switch (rtm->rtm_type) { 539 case RTM_IFANNOUNCE: 540 case RTM_DESYNC: 541 /* no tableid */ 542 break; 543 case RTM_RESOLVE: 544 case RTM_NEWADDR: 545 case RTM_DELADDR: 546 case RTM_IFINFO: 547 case RTM_80211INFO: 548 case RTM_BFD: 549 /* check against rdomain id */ 550 if (rop->rop_rtableid != RTABLE_ANY && 551 rtable_l2(rop->rop_rtableid) != rtm->rtm_tableid) 552 goto next; 553 break; 554 default: 555 if (rop->rop_priority != 0 && 556 rop->rop_priority < rtm->rtm_priority) 557 goto next; 558 /* check against rtable id */ 559 if (rop->rop_rtableid != RTABLE_ANY && 560 rop->rop_rtableid != rtm->rtm_tableid) 561 goto next; 562 break; 563 } 564 565 /* 566 * Check to see if the flush flag is set. If so, don't queue 567 * any more messages until the flag is cleared. 568 */ 569 if ((rop->rop_flags & ROUTECB_FLAG_FLUSH) != 0) 570 goto next; 571 572 rtm_sendup(so, m); 573 next: 574 sounlock(so, s); 575 } 576 SRPL_LEAVE(&sr); 577 578 m_freem(m); 579 } 580 581 int 582 rtm_sendup(struct socket *so, struct mbuf *m0) 583 { 584 struct rtpcb *rop = sotortpcb(so); 585 struct mbuf *m; 586 587 soassertlocked(so); 588 589 m = m_copym(m0, 0, M_COPYALL, M_NOWAIT); 590 if (m == NULL) 591 return (ENOMEM); 592 593 if (sbspace(so, &so->so_rcv) < (2 * MSIZE) || 594 sbappendaddr(so, &so->so_rcv, &route_src, m, NULL) == 0) { 595 /* Flag socket as desync'ed and flush required */ 596 rop->rop_flags |= ROUTECB_FLAG_DESYNC | ROUTECB_FLAG_FLUSH; 597 rtm_senddesync(so); 598 m_freem(m); 599 return (ENOBUFS); 600 } 601 602 sorwakeup(so); 603 return (0); 604 } 605 606 struct rt_msghdr * 607 rtm_report(struct rtentry *rt, u_char type, int seq, int tableid) 608 { 609 struct rt_msghdr *rtm; 610 struct rt_addrinfo info; 611 struct sockaddr_rtlabel sa_rl; 612 struct sockaddr_in6 sa_mask; 613 #ifdef BFD 614 struct sockaddr_bfd sa_bfd; 615 #endif 616 struct ifnet *ifp = NULL; 617 int len; 618 619 bzero(&info, sizeof(info)); 620 info.rti_info[RTAX_DST] = rt_key(rt); 621 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 622 info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask); 623 info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl); 624 #ifdef BFD 625 if (rt->rt_flags & RTF_BFD) 626 info.rti_info[RTAX_BFD] = bfd2sa(rt, &sa_bfd); 627 #endif 628 #ifdef MPLS 629 if (rt->rt_flags & RTF_MPLS) { 630 struct sockaddr_mpls sa_mpls; 631 632 bzero(&sa_mpls, sizeof(sa_mpls)); 633 sa_mpls.smpls_family = AF_MPLS; 634 sa_mpls.smpls_len = sizeof(sa_mpls); 635 sa_mpls.smpls_label = ((struct rt_mpls *) 636 rt->rt_llinfo)->mpls_label; 637 info.rti_info[RTAX_SRC] = (struct sockaddr *)&sa_mpls; 638 info.rti_mpls = ((struct rt_mpls *) 639 rt->rt_llinfo)->mpls_operation; 640 } 641 #endif 642 ifp = if_get(rt->rt_ifidx); 643 if (ifp != NULL) { 644 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); 645 info.rti_info[RTAX_IFA] = 646 rtable_getsource(tableid, info.rti_info[RTAX_DST]->sa_family); 647 if (info.rti_info[RTAX_IFA] == NULL) 648 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; 649 if (ifp->if_flags & IFF_POINTOPOINT) 650 info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr; 651 } 652 if_put(ifp); 653 /* RTAX_GENMASK, RTAX_AUTHOR, RTAX_SRCMASK ignored */ 654 655 /* build new route message */ 656 len = rtm_msg2(type, RTM_VERSION, &info, NULL, NULL); 657 rtm = malloc(len, M_RTABLE, M_WAITOK | M_ZERO); 658 659 rtm_msg2(type, RTM_VERSION, &info, (caddr_t)rtm, NULL); 660 rtm->rtm_type = type; 661 rtm->rtm_index = rt->rt_ifidx; 662 rtm->rtm_tableid = tableid; 663 rtm->rtm_priority = rt->rt_priority & RTP_MASK; 664 rtm->rtm_flags = rt->rt_flags; 665 rtm->rtm_pid = curproc->p_p->ps_pid; 666 rtm->rtm_seq = seq; 667 rtm_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx); 668 rtm->rtm_addrs = info.rti_addrs; 669 #ifdef MPLS 670 rtm->rtm_mpls = info.rti_mpls; 671 #endif 672 return rtm; 673 } 674 675 int 676 route_output(struct mbuf *m, struct socket *so, struct sockaddr *dstaddr, 677 struct mbuf *control) 678 { 679 struct rt_msghdr *rtm = NULL; 680 struct rtentry *rt = NULL; 681 struct rt_addrinfo info; 682 struct ifnet *ifp; 683 int len, seq, useloopback, error = 0; 684 u_int tableid; 685 u_int8_t prio; 686 u_char vers, type; 687 688 if (m == NULL || ((m->m_len < sizeof(int32_t)) && 689 (m = m_pullup(m, sizeof(int32_t))) == 0)) 690 return (ENOBUFS); 691 if ((m->m_flags & M_PKTHDR) == 0) 692 panic("route_output"); 693 694 useloopback = so->so_options & SO_USELOOPBACK; 695 696 /* 697 * The socket can't be closed concurrently because the file 698 * descriptor reference is still held. 699 */ 700 701 sounlock(so, SL_LOCKED); 702 703 len = m->m_pkthdr.len; 704 if (len < offsetof(struct rt_msghdr, rtm_hdrlen) + 1 || 705 len != mtod(m, struct rt_msghdr *)->rtm_msglen) { 706 error = EINVAL; 707 goto fail; 708 } 709 vers = mtod(m, struct rt_msghdr *)->rtm_version; 710 switch (vers) { 711 case RTM_VERSION: 712 if (len < sizeof(struct rt_msghdr)) { 713 error = EINVAL; 714 goto fail; 715 } 716 if (len > RTM_MAXSIZE) { 717 error = EMSGSIZE; 718 goto fail; 719 } 720 rtm = malloc(len, M_RTABLE, M_WAITOK); 721 m_copydata(m, 0, len, rtm); 722 break; 723 default: 724 error = EPROTONOSUPPORT; 725 goto fail; 726 } 727 728 /* Verify that the caller is sending an appropriate message early */ 729 switch (rtm->rtm_type) { 730 case RTM_ADD: 731 case RTM_DELETE: 732 case RTM_GET: 733 case RTM_CHANGE: 734 case RTM_PROPOSAL: 735 case RTM_SOURCE: 736 break; 737 default: 738 error = EOPNOTSUPP; 739 goto fail; 740 } 741 /* 742 * Verify that the header length is valid. 743 * All messages from userland start with a struct rt_msghdr. 744 */ 745 if (rtm->rtm_hdrlen == 0) /* old client */ 746 rtm->rtm_hdrlen = sizeof(struct rt_msghdr); 747 if (rtm->rtm_hdrlen < sizeof(struct rt_msghdr) || 748 len < rtm->rtm_hdrlen) { 749 error = EINVAL; 750 goto fail; 751 } 752 753 rtm->rtm_pid = curproc->p_p->ps_pid; 754 755 /* 756 * Verify that the caller has the appropriate privilege; RTM_GET 757 * is the only operation the non-superuser is allowed. 758 */ 759 if (rtm->rtm_type != RTM_GET && suser(curproc) != 0) { 760 error = EACCES; 761 goto fail; 762 } 763 tableid = rtm->rtm_tableid; 764 if (!rtable_exists(tableid)) { 765 if (rtm->rtm_type == RTM_ADD) { 766 if ((error = rtable_add(tableid)) != 0) 767 goto fail; 768 } else { 769 error = EINVAL; 770 goto fail; 771 } 772 } 773 774 /* Do not let userland play with kernel-only flags. */ 775 if ((rtm->rtm_flags & (RTF_LOCAL|RTF_BROADCAST)) != 0) { 776 error = EINVAL; 777 goto fail; 778 } 779 780 /* make sure that kernel-only bits are not set */ 781 rtm->rtm_priority &= RTP_MASK; 782 rtm->rtm_flags &= ~(RTF_DONE|RTF_CLONED|RTF_CACHED); 783 rtm->rtm_fmask &= RTF_FMASK; 784 785 if (rtm->rtm_priority != 0) { 786 if (rtm->rtm_priority > RTP_MAX || 787 rtm->rtm_priority == RTP_LOCAL) { 788 error = EINVAL; 789 goto fail; 790 } 791 prio = rtm->rtm_priority; 792 } else if (rtm->rtm_type != RTM_ADD) 793 prio = RTP_ANY; 794 else if (rtm->rtm_flags & RTF_STATIC) 795 prio = 0; 796 else 797 prio = RTP_DEFAULT; 798 799 bzero(&info, sizeof(info)); 800 info.rti_addrs = rtm->rtm_addrs; 801 if ((error = rtm_xaddrs(rtm->rtm_hdrlen + (caddr_t)rtm, 802 len + (caddr_t)rtm, &info)) != 0) 803 goto fail; 804 805 info.rti_flags = rtm->rtm_flags; 806 807 if (rtm->rtm_type != RTM_SOURCE && 808 rtm->rtm_type != RTM_PROPOSAL && 809 (info.rti_info[RTAX_DST] == NULL || 810 info.rti_info[RTAX_DST]->sa_family >= AF_MAX || 811 (info.rti_info[RTAX_GATEWAY] != NULL && 812 info.rti_info[RTAX_GATEWAY]->sa_family >= AF_MAX) || 813 info.rti_info[RTAX_GENMASK] != NULL)) { 814 error = EINVAL; 815 goto fail; 816 } 817 #ifdef MPLS 818 info.rti_mpls = rtm->rtm_mpls; 819 #endif 820 821 if (info.rti_info[RTAX_GATEWAY] != NULL && 822 info.rti_info[RTAX_GATEWAY]->sa_family == AF_LINK && 823 (info.rti_flags & RTF_CLONING) == 0) { 824 info.rti_flags |= RTF_LLINFO; 825 } 826 827 /* 828 * Validate RTM_PROPOSAL and pass it along or error out. 829 */ 830 if (rtm->rtm_type == RTM_PROPOSAL) { 831 if (rtm_validate_proposal(&info) == -1) { 832 error = EINVAL; 833 goto fail; 834 } 835 /* 836 * If this is a solicitation proposal forward request to 837 * all interfaces. Most handlers will ignore it but at least 838 * umb(4) will send a response to this event. 839 */ 840 if (rtm->rtm_priority == RTP_PROPOSAL_SOLICIT) { 841 NET_LOCK(); 842 TAILQ_FOREACH(ifp, &ifnet, if_list) { 843 ifp->if_rtrequest(ifp, RTM_PROPOSAL, NULL); 844 } 845 NET_UNLOCK(); 846 } 847 } else if (rtm->rtm_type == RTM_SOURCE) { 848 if (info.rti_info[RTAX_IFA] == NULL) { 849 error = EINVAL; 850 goto fail; 851 } 852 if ((error = 853 rt_setsource(tableid, info.rti_info[RTAX_IFA])) != 0) 854 goto fail; 855 } else { 856 error = rtm_output(rtm, &rt, &info, prio, tableid); 857 if (!error) { 858 type = rtm->rtm_type; 859 seq = rtm->rtm_seq; 860 free(rtm, M_RTABLE, len); 861 rtm = rtm_report(rt, type, seq, tableid); 862 len = rtm->rtm_msglen; 863 } 864 } 865 866 rtfree(rt); 867 if (error) { 868 rtm->rtm_errno = error; 869 } else { 870 rtm->rtm_flags |= RTF_DONE; 871 } 872 873 /* 874 * Check to see if we don't want our own messages. 875 */ 876 if (!useloopback) { 877 if (rtptable.rtp_count == 0) { 878 /* no other listener and no loopback of messages */ 879 goto fail; 880 } 881 } 882 if (m_copyback(m, 0, len, rtm, M_NOWAIT)) { 883 m_freem(m); 884 m = NULL; 885 } else if (m->m_pkthdr.len > len) 886 m_adj(m, len - m->m_pkthdr.len); 887 free(rtm, M_RTABLE, len); 888 if (m) 889 route_input(m, so, info.rti_info[RTAX_DST] ? 890 info.rti_info[RTAX_DST]->sa_family : AF_UNSPEC); 891 solock(so); 892 893 return (error); 894 fail: 895 free(rtm, M_RTABLE, len); 896 m_freem(m); 897 solock(so); 898 899 return (error); 900 } 901 902 int 903 rtm_output(struct rt_msghdr *rtm, struct rtentry **prt, 904 struct rt_addrinfo *info, uint8_t prio, unsigned int tableid) 905 { 906 struct rtentry *rt = *prt; 907 struct ifnet *ifp = NULL; 908 int plen, newgate = 0, error = 0; 909 910 switch (rtm->rtm_type) { 911 case RTM_ADD: 912 if (info->rti_info[RTAX_GATEWAY] == NULL) { 913 error = EINVAL; 914 break; 915 } 916 917 rt = rtable_match(tableid, info->rti_info[RTAX_DST], NULL); 918 if ((error = route_arp_conflict(rt, info))) { 919 rtfree(rt); 920 rt = NULL; 921 break; 922 } 923 924 /* 925 * We cannot go through a delete/create/insert cycle for 926 * cached route because this can lead to races in the 927 * receive path. Instead we update the L2 cache. 928 */ 929 if ((rt != NULL) && ISSET(rt->rt_flags, RTF_CACHED)) 930 goto change; 931 932 rtfree(rt); 933 rt = NULL; 934 935 NET_LOCK(); 936 if ((error = rtm_getifa(info, tableid)) != 0) { 937 NET_UNLOCK(); 938 break; 939 } 940 error = rtrequest(RTM_ADD, info, prio, &rt, tableid); 941 NET_UNLOCK(); 942 if (error == 0) 943 rtm_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx, 944 &rt->rt_rmx); 945 break; 946 case RTM_DELETE: 947 rt = rtable_lookup(tableid, info->rti_info[RTAX_DST], 948 info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY], 949 prio); 950 if (rt == NULL) { 951 error = ESRCH; 952 break; 953 } 954 955 /* 956 * If we got multipath routes, we require users to specify 957 * a matching gateway. 958 */ 959 if (ISSET(rt->rt_flags, RTF_MPATH) && 960 info->rti_info[RTAX_GATEWAY] == NULL) { 961 error = ESRCH; 962 break; 963 } 964 965 /* Detaching an interface requires the KERNEL_LOCK(). */ 966 ifp = if_get(rt->rt_ifidx); 967 KASSERT(ifp != NULL); 968 969 /* 970 * Invalidate the cache of automagically created and 971 * referenced L2 entries to make sure that ``rt_gwroute'' 972 * pointer stays valid for other CPUs. 973 */ 974 if ((ISSET(rt->rt_flags, RTF_CACHED))) { 975 NET_LOCK(); 976 ifp->if_rtrequest(ifp, RTM_INVALIDATE, rt); 977 /* Reset the MTU of the gateway route. */ 978 rtable_walk(tableid, rt_key(rt)->sa_family, NULL, 979 route_cleargateway, rt); 980 NET_UNLOCK(); 981 if_put(ifp); 982 break; 983 } 984 985 /* 986 * Make sure that local routes are only modified by the 987 * kernel. 988 */ 989 if (ISSET(rt->rt_flags, RTF_LOCAL|RTF_BROADCAST)) { 990 if_put(ifp); 991 error = EINVAL; 992 break; 993 } 994 995 rtfree(rt); 996 rt = NULL; 997 998 NET_LOCK(); 999 error = rtrequest_delete(info, prio, ifp, &rt, tableid); 1000 NET_UNLOCK(); 1001 if_put(ifp); 1002 break; 1003 case RTM_CHANGE: 1004 rt = rtable_lookup(tableid, info->rti_info[RTAX_DST], 1005 info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY], 1006 prio); 1007 /* 1008 * If we got multipath routes, we require users to specify 1009 * a matching gateway. 1010 */ 1011 if ((rt != NULL) && ISSET(rt->rt_flags, RTF_MPATH) && 1012 (info->rti_info[RTAX_GATEWAY] == NULL)) { 1013 rtfree(rt); 1014 rt = NULL; 1015 } 1016 /* 1017 * If RTAX_GATEWAY is the argument we're trying to 1018 * change, try to find a compatible route. 1019 */ 1020 if ((rt == NULL) && (info->rti_info[RTAX_GATEWAY] != NULL)) { 1021 rt = rtable_lookup(tableid, info->rti_info[RTAX_DST], 1022 info->rti_info[RTAX_NETMASK], NULL, prio); 1023 /* Ensure we don't pick a multipath one. */ 1024 if ((rt != NULL) && ISSET(rt->rt_flags, RTF_MPATH)) { 1025 rtfree(rt); 1026 rt = NULL; 1027 } 1028 } 1029 1030 if (rt == NULL) { 1031 error = ESRCH; 1032 break; 1033 } 1034 1035 /* 1036 * Make sure that local routes are only modified by the 1037 * kernel. 1038 */ 1039 if (ISSET(rt->rt_flags, RTF_LOCAL|RTF_BROADCAST)) { 1040 error = EINVAL; 1041 break; 1042 } 1043 1044 /* 1045 * RTM_CHANGE needs a perfect match. 1046 */ 1047 plen = rtable_satoplen(info->rti_info[RTAX_DST]->sa_family, 1048 info->rti_info[RTAX_NETMASK]); 1049 if (rt_plen(rt) != plen) { 1050 error = ESRCH; 1051 break; 1052 } 1053 1054 if (info->rti_info[RTAX_GATEWAY] != NULL) 1055 if (rt->rt_gateway == NULL || 1056 bcmp(rt->rt_gateway, 1057 info->rti_info[RTAX_GATEWAY], 1058 info->rti_info[RTAX_GATEWAY]->sa_len)) { 1059 newgate = 1; 1060 } 1061 /* 1062 * Check reachable gateway before changing the route. 1063 * New gateway could require new ifaddr, ifp; 1064 * flags may also be different; ifp may be specified 1065 * by ll sockaddr when protocol address is ambiguous. 1066 */ 1067 if (newgate || info->rti_info[RTAX_IFP] != NULL || 1068 info->rti_info[RTAX_IFA] != NULL) { 1069 struct ifaddr *ifa = NULL; 1070 1071 NET_LOCK(); 1072 if ((error = rtm_getifa(info, tableid)) != 0) { 1073 NET_UNLOCK(); 1074 break; 1075 } 1076 ifa = info->rti_ifa; 1077 if (rt->rt_ifa != ifa) { 1078 ifp = if_get(rt->rt_ifidx); 1079 KASSERT(ifp != NULL); 1080 ifp->if_rtrequest(ifp, RTM_DELETE, rt); 1081 ifafree(rt->rt_ifa); 1082 if_put(ifp); 1083 1084 ifa->ifa_refcnt++; 1085 rt->rt_ifa = ifa; 1086 rt->rt_ifidx = ifa->ifa_ifp->if_index; 1087 /* recheck link state after ifp change */ 1088 rt_if_linkstate_change(rt, ifa->ifa_ifp, 1089 tableid); 1090 } 1091 NET_UNLOCK(); 1092 } 1093 change: 1094 if (info->rti_info[RTAX_GATEWAY] != NULL) { 1095 /* When updating the gateway, make sure it is valid. */ 1096 if (!newgate && rt->rt_gateway->sa_family != 1097 info->rti_info[RTAX_GATEWAY]->sa_family) { 1098 error = EINVAL; 1099 break; 1100 } 1101 1102 NET_LOCK(); 1103 error = rt_setgate(rt, 1104 info->rti_info[RTAX_GATEWAY], tableid); 1105 NET_UNLOCK(); 1106 if (error) 1107 break; 1108 } 1109 #ifdef MPLS 1110 if (rtm->rtm_flags & RTF_MPLS) { 1111 NET_LOCK(); 1112 error = rt_mpls_set(rt, 1113 info->rti_info[RTAX_SRC], info->rti_mpls); 1114 NET_UNLOCK(); 1115 if (error) 1116 break; 1117 } else if (newgate || (rtm->rtm_fmask & RTF_MPLS)) { 1118 NET_LOCK(); 1119 /* if gateway changed remove MPLS information */ 1120 rt_mpls_clear(rt); 1121 NET_UNLOCK(); 1122 } 1123 #endif 1124 1125 #ifdef BFD 1126 if (ISSET(rtm->rtm_flags, RTF_BFD)) { 1127 if ((error = bfdset(rt))) 1128 break; 1129 } else if (!ISSET(rtm->rtm_flags, RTF_BFD) && 1130 ISSET(rtm->rtm_fmask, RTF_BFD)) { 1131 bfdclear(rt); 1132 } 1133 #endif 1134 1135 NET_LOCK(); 1136 /* Hack to allow some flags to be toggled */ 1137 if (rtm->rtm_fmask) { 1138 /* MPLS flag it is set by rt_mpls_set() */ 1139 rtm->rtm_fmask &= ~RTF_MPLS; 1140 rtm->rtm_flags &= ~RTF_MPLS; 1141 rt->rt_flags = 1142 (rt->rt_flags & ~rtm->rtm_fmask) | 1143 (rtm->rtm_flags & rtm->rtm_fmask); 1144 } 1145 rtm_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx, &rt->rt_rmx); 1146 1147 ifp = if_get(rt->rt_ifidx); 1148 KASSERT(ifp != NULL); 1149 ifp->if_rtrequest(ifp, RTM_ADD, rt); 1150 if_put(ifp); 1151 1152 if (info->rti_info[RTAX_LABEL] != NULL) { 1153 char *rtlabel = ((struct sockaddr_rtlabel *) 1154 info->rti_info[RTAX_LABEL])->sr_label; 1155 rtlabel_unref(rt->rt_labelid); 1156 rt->rt_labelid = rtlabel_name2id(rtlabel); 1157 } 1158 if_group_routechange(info->rti_info[RTAX_DST], 1159 info->rti_info[RTAX_NETMASK]); 1160 rt->rt_locks &= ~(rtm->rtm_inits); 1161 rt->rt_locks |= (rtm->rtm_inits & rtm->rtm_rmx.rmx_locks); 1162 NET_UNLOCK(); 1163 break; 1164 case RTM_GET: 1165 rt = rtable_lookup(tableid, info->rti_info[RTAX_DST], 1166 info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY], 1167 prio); 1168 if (rt == NULL) 1169 error = ESRCH; 1170 break; 1171 } 1172 1173 *prt = rt; 1174 return (error); 1175 } 1176 1177 struct ifaddr * 1178 ifa_ifwithroute(int flags, struct sockaddr *dst, struct sockaddr *gateway, 1179 unsigned int rtableid) 1180 { 1181 struct ifaddr *ifa; 1182 1183 if ((flags & RTF_GATEWAY) == 0) { 1184 /* 1185 * If we are adding a route to an interface, 1186 * and the interface is a pt to pt link 1187 * we should search for the destination 1188 * as our clue to the interface. Otherwise 1189 * we can use the local address. 1190 */ 1191 ifa = NULL; 1192 if (flags & RTF_HOST) 1193 ifa = ifa_ifwithdstaddr(dst, rtableid); 1194 if (ifa == NULL) 1195 ifa = ifa_ifwithaddr(gateway, rtableid); 1196 } else { 1197 /* 1198 * If we are adding a route to a remote net 1199 * or host, the gateway may still be on the 1200 * other end of a pt to pt link. 1201 */ 1202 ifa = ifa_ifwithdstaddr(gateway, rtableid); 1203 } 1204 if (ifa == NULL) { 1205 if (gateway->sa_family == AF_LINK) { 1206 struct sockaddr_dl *sdl = satosdl(gateway); 1207 struct ifnet *ifp = if_get(sdl->sdl_index); 1208 1209 if (ifp != NULL) 1210 ifa = ifaof_ifpforaddr(dst, ifp); 1211 if_put(ifp); 1212 } else { 1213 struct rtentry *rt; 1214 1215 rt = rtalloc(gateway, RT_RESOLVE, rtable_l2(rtableid)); 1216 if (rt != NULL) 1217 ifa = rt->rt_ifa; 1218 rtfree(rt); 1219 } 1220 } 1221 if (ifa == NULL) 1222 return (NULL); 1223 if (ifa->ifa_addr->sa_family != dst->sa_family) { 1224 struct ifaddr *oifa = ifa; 1225 ifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp); 1226 if (ifa == NULL) 1227 ifa = oifa; 1228 } 1229 return (ifa); 1230 } 1231 1232 int 1233 rtm_getifa(struct rt_addrinfo *info, unsigned int rtid) 1234 { 1235 struct ifnet *ifp = NULL; 1236 1237 /* 1238 * The "returned" `ifa' is guaranteed to be alive only if 1239 * the NET_LOCK() is held. 1240 */ 1241 NET_ASSERT_LOCKED(); 1242 1243 /* 1244 * ifp may be specified by sockaddr_dl when protocol address 1245 * is ambiguous 1246 */ 1247 if (info->rti_info[RTAX_IFP] != NULL) { 1248 struct sockaddr_dl *sdl; 1249 1250 sdl = satosdl(info->rti_info[RTAX_IFP]); 1251 ifp = if_get(sdl->sdl_index); 1252 } 1253 1254 #ifdef IPSEC 1255 /* 1256 * If the destination is a PF_KEY address, we'll look 1257 * for the existence of a encap interface number or address 1258 * in the options list of the gateway. By default, we'll return 1259 * enc0. 1260 */ 1261 if (info->rti_info[RTAX_DST] && 1262 info->rti_info[RTAX_DST]->sa_family == PF_KEY) 1263 info->rti_ifa = enc_getifa(rtid, 0); 1264 #endif 1265 1266 if (info->rti_ifa == NULL && info->rti_info[RTAX_IFA] != NULL) 1267 info->rti_ifa = ifa_ifwithaddr(info->rti_info[RTAX_IFA], rtid); 1268 1269 if (info->rti_ifa == NULL) { 1270 struct sockaddr *sa; 1271 1272 if ((sa = info->rti_info[RTAX_IFA]) == NULL) 1273 if ((sa = info->rti_info[RTAX_GATEWAY]) == NULL) 1274 sa = info->rti_info[RTAX_DST]; 1275 1276 if (sa != NULL && ifp != NULL) 1277 info->rti_ifa = ifaof_ifpforaddr(sa, ifp); 1278 else if (info->rti_info[RTAX_DST] != NULL && 1279 info->rti_info[RTAX_GATEWAY] != NULL) 1280 info->rti_ifa = ifa_ifwithroute(info->rti_flags, 1281 info->rti_info[RTAX_DST], 1282 info->rti_info[RTAX_GATEWAY], 1283 rtid); 1284 else if (sa != NULL) 1285 info->rti_ifa = ifa_ifwithroute(info->rti_flags, 1286 sa, sa, rtid); 1287 } 1288 1289 if_put(ifp); 1290 1291 if (info->rti_ifa == NULL) 1292 return (ENETUNREACH); 1293 1294 return (0); 1295 } 1296 1297 int 1298 route_cleargateway(struct rtentry *rt, void *arg, unsigned int rtableid) 1299 { 1300 struct rtentry *nhrt = arg; 1301 1302 if (ISSET(rt->rt_flags, RTF_GATEWAY) && rt->rt_gwroute == nhrt && 1303 !ISSET(rt->rt_locks, RTV_MTU)) 1304 rt->rt_mtu = 0; 1305 1306 return (0); 1307 } 1308 1309 /* 1310 * Check if the user request to insert an ARP entry does not conflict 1311 * with existing ones. 1312 * 1313 * Only two entries are allowed for a given IP address: a private one 1314 * (priv) and a public one (pub). 1315 */ 1316 int 1317 route_arp_conflict(struct rtentry *rt, struct rt_addrinfo *info) 1318 { 1319 int proxy = (info->rti_flags & RTF_ANNOUNCE); 1320 1321 if ((info->rti_flags & RTF_LLINFO) == 0 || 1322 (info->rti_info[RTAX_DST]->sa_family != AF_INET)) 1323 return (0); 1324 1325 if (rt == NULL || !ISSET(rt->rt_flags, RTF_LLINFO)) 1326 return (0); 1327 1328 /* If the entry is cached, it can be updated. */ 1329 if (ISSET(rt->rt_flags, RTF_CACHED)) 1330 return (0); 1331 1332 /* 1333 * Same destination, not cached and both "priv" or "pub" conflict. 1334 * If a second entry exists, it always conflict. 1335 */ 1336 if ((ISSET(rt->rt_flags, RTF_ANNOUNCE) == proxy) || 1337 ISSET(rt->rt_flags, RTF_MPATH)) 1338 return (EEXIST); 1339 1340 /* No conflict but an entry exist so we need to force mpath. */ 1341 info->rti_flags |= RTF_MPATH; 1342 return (0); 1343 } 1344 1345 void 1346 rtm_setmetrics(u_long which, const struct rt_metrics *in, 1347 struct rt_kmetrics *out) 1348 { 1349 int64_t expire; 1350 1351 if (which & RTV_MTU) 1352 out->rmx_mtu = in->rmx_mtu; 1353 if (which & RTV_EXPIRE) { 1354 expire = in->rmx_expire; 1355 if (expire != 0) { 1356 expire -= gettime(); 1357 expire += getuptime(); 1358 } 1359 1360 out->rmx_expire = expire; 1361 } 1362 } 1363 1364 void 1365 rtm_getmetrics(const struct rt_kmetrics *in, struct rt_metrics *out) 1366 { 1367 int64_t expire; 1368 1369 expire = in->rmx_expire; 1370 if (expire != 0) { 1371 expire -= getuptime(); 1372 expire += gettime(); 1373 } 1374 1375 bzero(out, sizeof(*out)); 1376 out->rmx_locks = in->rmx_locks; 1377 out->rmx_mtu = in->rmx_mtu; 1378 out->rmx_expire = expire; 1379 out->rmx_pksent = in->rmx_pksent; 1380 } 1381 1382 #define ROUNDUP(a) \ 1383 ((a) > 0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long)) 1384 #define ADVANCE(x, n) (x += ROUNDUP((n)->sa_len)) 1385 1386 int 1387 rtm_xaddrs(caddr_t cp, caddr_t cplim, struct rt_addrinfo *rtinfo) 1388 { 1389 struct sockaddr *sa; 1390 int i; 1391 1392 /* 1393 * Parse address bits, split address storage in chunks, and 1394 * set info pointers. Use sa_len for traversing the memory 1395 * and check that we stay within in the limit. 1396 */ 1397 bzero(rtinfo->rti_info, sizeof(rtinfo->rti_info)); 1398 for (i = 0; i < sizeof(rtinfo->rti_addrs) * 8; i++) { 1399 if ((rtinfo->rti_addrs & (1 << i)) == 0) 1400 continue; 1401 if (i >= RTAX_MAX || cp + sizeof(socklen_t) > cplim) 1402 return (EINVAL); 1403 sa = (struct sockaddr *)cp; 1404 if (cp + sa->sa_len > cplim) 1405 return (EINVAL); 1406 rtinfo->rti_info[i] = sa; 1407 ADVANCE(cp, sa); 1408 } 1409 /* 1410 * Check that the address family is suitable for the route address 1411 * type. Check that each address has a size that fits its family 1412 * and its length is within the size. Strings within addresses must 1413 * be NUL terminated. 1414 */ 1415 for (i = 0; i < RTAX_MAX; i++) { 1416 size_t len, maxlen, size; 1417 1418 sa = rtinfo->rti_info[i]; 1419 if (sa == NULL) 1420 continue; 1421 maxlen = size = 0; 1422 switch (i) { 1423 case RTAX_DST: 1424 case RTAX_GATEWAY: 1425 case RTAX_SRC: 1426 switch (sa->sa_family) { 1427 case AF_INET: 1428 size = sizeof(struct sockaddr_in); 1429 break; 1430 case AF_LINK: 1431 size = sizeof(struct sockaddr_dl); 1432 break; 1433 #ifdef INET6 1434 case AF_INET6: 1435 size = sizeof(struct sockaddr_in6); 1436 break; 1437 #endif 1438 #ifdef MPLS 1439 case AF_MPLS: 1440 size = sizeof(struct sockaddr_mpls); 1441 break; 1442 #endif 1443 } 1444 break; 1445 case RTAX_IFP: 1446 if (sa->sa_family != AF_LINK) 1447 return (EAFNOSUPPORT); 1448 /* 1449 * XXX Should be sizeof(struct sockaddr_dl), but 1450 * route(8) has a bug and provides less memory. 1451 * arp(8) has another bug and uses sizeof pointer. 1452 */ 1453 size = 4; 1454 break; 1455 case RTAX_IFA: 1456 switch (sa->sa_family) { 1457 case AF_INET: 1458 size = sizeof(struct sockaddr_in); 1459 break; 1460 #ifdef INET6 1461 case AF_INET6: 1462 size = sizeof(struct sockaddr_in6); 1463 break; 1464 #endif 1465 default: 1466 return (EAFNOSUPPORT); 1467 } 1468 break; 1469 case RTAX_LABEL: 1470 sa->sa_family = AF_UNSPEC; 1471 maxlen = RTLABEL_LEN; 1472 size = sizeof(struct sockaddr_rtlabel); 1473 break; 1474 #ifdef BFD 1475 case RTAX_BFD: 1476 sa->sa_family = AF_UNSPEC; 1477 size = sizeof(struct sockaddr_bfd); 1478 break; 1479 #endif 1480 case RTAX_DNS: 1481 /* more validation in rtm_validate_proposal */ 1482 if (sa->sa_len > sizeof(struct sockaddr_rtdns)) 1483 return (EINVAL); 1484 if (sa->sa_len < offsetof(struct sockaddr_rtdns, 1485 sr_dns)) 1486 return (EINVAL); 1487 switch (sa->sa_family) { 1488 case AF_INET: 1489 #ifdef INET6 1490 case AF_INET6: 1491 #endif 1492 break; 1493 default: 1494 return (EAFNOSUPPORT); 1495 } 1496 break; 1497 case RTAX_STATIC: 1498 sa->sa_family = AF_UNSPEC; 1499 maxlen = RTSTATIC_LEN; 1500 size = sizeof(struct sockaddr_rtstatic); 1501 break; 1502 case RTAX_SEARCH: 1503 sa->sa_family = AF_UNSPEC; 1504 maxlen = RTSEARCH_LEN; 1505 size = sizeof(struct sockaddr_rtsearch); 1506 break; 1507 } 1508 if (size) { 1509 /* memory for the full struct must be provided */ 1510 if (sa->sa_len < size) 1511 return (EINVAL); 1512 } 1513 if (maxlen) { 1514 /* this should not happen */ 1515 if (2 + maxlen > size) 1516 return (EINVAL); 1517 /* strings must be NUL terminated within the struct */ 1518 len = strnlen(sa->sa_data, maxlen); 1519 if (len >= maxlen || 2 + len >= sa->sa_len) 1520 return (EINVAL); 1521 break; 1522 } 1523 } 1524 return (0); 1525 } 1526 1527 struct mbuf * 1528 rtm_msg1(int type, struct rt_addrinfo *rtinfo) 1529 { 1530 struct rt_msghdr *rtm; 1531 struct mbuf *m; 1532 int i; 1533 struct sockaddr *sa; 1534 int len, dlen, hlen; 1535 1536 switch (type) { 1537 case RTM_DELADDR: 1538 case RTM_NEWADDR: 1539 len = sizeof(struct ifa_msghdr); 1540 break; 1541 case RTM_IFINFO: 1542 len = sizeof(struct if_msghdr); 1543 break; 1544 case RTM_IFANNOUNCE: 1545 len = sizeof(struct if_announcemsghdr); 1546 break; 1547 #ifdef BFD 1548 case RTM_BFD: 1549 len = sizeof(struct bfd_msghdr); 1550 break; 1551 #endif 1552 case RTM_80211INFO: 1553 len = sizeof(struct if_ieee80211_msghdr); 1554 break; 1555 default: 1556 len = sizeof(struct rt_msghdr); 1557 break; 1558 } 1559 if (len > MCLBYTES) 1560 panic("rtm_msg1"); 1561 m = m_gethdr(M_DONTWAIT, MT_DATA); 1562 if (m && len > MHLEN) { 1563 MCLGET(m, M_DONTWAIT); 1564 if ((m->m_flags & M_EXT) == 0) { 1565 m_free(m); 1566 m = NULL; 1567 } 1568 } 1569 if (m == NULL) 1570 return (m); 1571 m->m_pkthdr.len = m->m_len = hlen = len; 1572 m->m_pkthdr.ph_ifidx = 0; 1573 rtm = mtod(m, struct rt_msghdr *); 1574 bzero(rtm, len); 1575 for (i = 0; i < RTAX_MAX; i++) { 1576 if (rtinfo == NULL || (sa = rtinfo->rti_info[i]) == NULL) 1577 continue; 1578 rtinfo->rti_addrs |= (1 << i); 1579 dlen = ROUNDUP(sa->sa_len); 1580 if (m_copyback(m, len, dlen, sa, M_NOWAIT)) { 1581 m_freem(m); 1582 return (NULL); 1583 } 1584 len += dlen; 1585 } 1586 rtm->rtm_msglen = len; 1587 rtm->rtm_hdrlen = hlen; 1588 rtm->rtm_version = RTM_VERSION; 1589 rtm->rtm_type = type; 1590 return (m); 1591 } 1592 1593 int 1594 rtm_msg2(int type, int vers, struct rt_addrinfo *rtinfo, caddr_t cp, 1595 struct walkarg *w) 1596 { 1597 int i; 1598 int len, dlen, hlen, second_time = 0; 1599 caddr_t cp0; 1600 1601 rtinfo->rti_addrs = 0; 1602 again: 1603 switch (type) { 1604 case RTM_DELADDR: 1605 case RTM_NEWADDR: 1606 len = sizeof(struct ifa_msghdr); 1607 break; 1608 case RTM_IFINFO: 1609 len = sizeof(struct if_msghdr); 1610 break; 1611 default: 1612 len = sizeof(struct rt_msghdr); 1613 break; 1614 } 1615 hlen = len; 1616 if ((cp0 = cp) != NULL) 1617 cp += len; 1618 for (i = 0; i < RTAX_MAX; i++) { 1619 struct sockaddr *sa; 1620 1621 if ((sa = rtinfo->rti_info[i]) == NULL) 1622 continue; 1623 rtinfo->rti_addrs |= (1 << i); 1624 dlen = ROUNDUP(sa->sa_len); 1625 if (cp) { 1626 bcopy(sa, cp, (size_t)dlen); 1627 cp += dlen; 1628 } 1629 len += dlen; 1630 } 1631 /* align message length to the next natural boundary */ 1632 len = ALIGN(len); 1633 if (cp == 0 && w != NULL && !second_time) { 1634 w->w_needed += len; 1635 if (w->w_needed <= 0 && w->w_where) { 1636 if (w->w_tmemsize < len) { 1637 free(w->w_tmem, M_RTABLE, w->w_tmemsize); 1638 w->w_tmem = malloc(len, M_RTABLE, 1639 M_NOWAIT | M_ZERO); 1640 if (w->w_tmem) 1641 w->w_tmemsize = len; 1642 } 1643 if (w->w_tmem) { 1644 cp = w->w_tmem; 1645 second_time = 1; 1646 goto again; 1647 } else 1648 w->w_where = 0; 1649 } 1650 } 1651 if (cp && w) /* clear the message header */ 1652 bzero(cp0, hlen); 1653 1654 if (cp) { 1655 struct rt_msghdr *rtm = (struct rt_msghdr *)cp0; 1656 1657 rtm->rtm_version = RTM_VERSION; 1658 rtm->rtm_type = type; 1659 rtm->rtm_msglen = len; 1660 rtm->rtm_hdrlen = hlen; 1661 } 1662 return (len); 1663 } 1664 1665 void 1666 rtm_send(struct rtentry *rt, int cmd, int error, unsigned int rtableid) 1667 { 1668 struct rt_addrinfo info; 1669 struct ifnet *ifp; 1670 struct sockaddr_rtlabel sa_rl; 1671 struct sockaddr_in6 sa_mask; 1672 1673 memset(&info, 0, sizeof(info)); 1674 info.rti_info[RTAX_DST] = rt_key(rt); 1675 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 1676 if (!ISSET(rt->rt_flags, RTF_HOST)) 1677 info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask); 1678 info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl); 1679 ifp = if_get(rt->rt_ifidx); 1680 if (ifp != NULL) { 1681 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); 1682 info.rti_info[RTAX_IFA] = 1683 rtable_getsource(rtableid, info.rti_info[RTAX_DST]->sa_family); 1684 if (info.rti_info[RTAX_IFA] == NULL) 1685 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; 1686 } 1687 1688 rtm_miss(cmd, &info, rt->rt_flags, rt->rt_priority, rt->rt_ifidx, error, 1689 rtableid); 1690 if_put(ifp); 1691 } 1692 1693 /* 1694 * This routine is called to generate a message from the routing 1695 * socket indicating that a redirect has occurred, a routing lookup 1696 * has failed, or that a protocol has detected timeouts to a particular 1697 * destination. 1698 */ 1699 void 1700 rtm_miss(int type, struct rt_addrinfo *rtinfo, int flags, uint8_t prio, 1701 u_int ifidx, int error, u_int tableid) 1702 { 1703 struct rt_msghdr *rtm; 1704 struct mbuf *m; 1705 struct sockaddr *sa = rtinfo->rti_info[RTAX_DST]; 1706 1707 if (rtptable.rtp_count == 0) 1708 return; 1709 m = rtm_msg1(type, rtinfo); 1710 if (m == NULL) 1711 return; 1712 rtm = mtod(m, struct rt_msghdr *); 1713 rtm->rtm_flags = RTF_DONE | flags; 1714 rtm->rtm_priority = prio; 1715 rtm->rtm_errno = error; 1716 rtm->rtm_tableid = tableid; 1717 rtm->rtm_addrs = rtinfo->rti_addrs; 1718 rtm->rtm_index = ifidx; 1719 route_input(m, NULL, sa ? sa->sa_family : AF_UNSPEC); 1720 } 1721 1722 /* 1723 * This routine is called to generate a message from the routing 1724 * socket indicating that the status of a network interface has changed. 1725 */ 1726 void 1727 rtm_ifchg(struct ifnet *ifp) 1728 { 1729 struct rt_addrinfo info; 1730 struct if_msghdr *ifm; 1731 struct mbuf *m; 1732 1733 if (rtptable.rtp_count == 0) 1734 return; 1735 memset(&info, 0, sizeof(info)); 1736 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); 1737 m = rtm_msg1(RTM_IFINFO, &info); 1738 if (m == NULL) 1739 return; 1740 ifm = mtod(m, struct if_msghdr *); 1741 ifm->ifm_index = ifp->if_index; 1742 ifm->ifm_tableid = ifp->if_rdomain; 1743 ifm->ifm_flags = ifp->if_flags; 1744 ifm->ifm_xflags = ifp->if_xflags; 1745 if_getdata(ifp, &ifm->ifm_data); 1746 ifm->ifm_addrs = info.rti_addrs; 1747 route_input(m, NULL, AF_UNSPEC); 1748 } 1749 1750 /* 1751 * This is called to generate messages from the routing socket 1752 * indicating a network interface has had addresses associated with it. 1753 * if we ever reverse the logic and replace messages TO the routing 1754 * socket indicate a request to configure interfaces, then it will 1755 * be unnecessary as the routing socket will automatically generate 1756 * copies of it. 1757 */ 1758 void 1759 rtm_addr(int cmd, struct ifaddr *ifa) 1760 { 1761 struct ifnet *ifp = ifa->ifa_ifp; 1762 struct mbuf *m; 1763 struct rt_addrinfo info; 1764 struct ifa_msghdr *ifam; 1765 1766 if (rtptable.rtp_count == 0) 1767 return; 1768 1769 memset(&info, 0, sizeof(info)); 1770 info.rti_info[RTAX_IFA] = ifa->ifa_addr; 1771 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); 1772 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask; 1773 info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr; 1774 if ((m = rtm_msg1(cmd, &info)) == NULL) 1775 return; 1776 ifam = mtod(m, struct ifa_msghdr *); 1777 ifam->ifam_index = ifp->if_index; 1778 ifam->ifam_metric = ifa->ifa_metric; 1779 ifam->ifam_flags = ifa->ifa_flags; 1780 ifam->ifam_addrs = info.rti_addrs; 1781 ifam->ifam_tableid = ifp->if_rdomain; 1782 1783 route_input(m, NULL, 1784 ifa->ifa_addr ? ifa->ifa_addr->sa_family : AF_UNSPEC); 1785 } 1786 1787 /* 1788 * This is called to generate routing socket messages indicating 1789 * network interface arrival and departure. 1790 */ 1791 void 1792 rtm_ifannounce(struct ifnet *ifp, int what) 1793 { 1794 struct if_announcemsghdr *ifan; 1795 struct mbuf *m; 1796 1797 if (rtptable.rtp_count == 0) 1798 return; 1799 m = rtm_msg1(RTM_IFANNOUNCE, NULL); 1800 if (m == NULL) 1801 return; 1802 ifan = mtod(m, struct if_announcemsghdr *); 1803 ifan->ifan_index = ifp->if_index; 1804 strlcpy(ifan->ifan_name, ifp->if_xname, sizeof(ifan->ifan_name)); 1805 ifan->ifan_what = what; 1806 route_input(m, NULL, AF_UNSPEC); 1807 } 1808 1809 #ifdef BFD 1810 /* 1811 * This is used to generate routing socket messages indicating 1812 * the state of a BFD session. 1813 */ 1814 void 1815 rtm_bfd(struct bfd_config *bfd) 1816 { 1817 struct bfd_msghdr *bfdm; 1818 struct sockaddr_bfd sa_bfd; 1819 struct mbuf *m; 1820 struct rt_addrinfo info; 1821 1822 if (rtptable.rtp_count == 0) 1823 return; 1824 memset(&info, 0, sizeof(info)); 1825 info.rti_info[RTAX_DST] = rt_key(bfd->bc_rt); 1826 info.rti_info[RTAX_IFA] = bfd->bc_rt->rt_ifa->ifa_addr; 1827 1828 m = rtm_msg1(RTM_BFD, &info); 1829 if (m == NULL) 1830 return; 1831 bfdm = mtod(m, struct bfd_msghdr *); 1832 bfdm->bm_addrs = info.rti_addrs; 1833 1834 bfd2sa(bfd->bc_rt, &sa_bfd); 1835 memcpy(&bfdm->bm_sa, &sa_bfd, sizeof(sa_bfd)); 1836 1837 route_input(m, NULL, info.rti_info[RTAX_DST]->sa_family); 1838 } 1839 #endif /* BFD */ 1840 1841 /* 1842 * This is used to generate routing socket messages indicating 1843 * the state of an ieee80211 interface. 1844 */ 1845 void 1846 rtm_80211info(struct ifnet *ifp, struct if_ieee80211_data *ifie) 1847 { 1848 struct if_ieee80211_msghdr *ifim; 1849 struct mbuf *m; 1850 1851 if (rtptable.rtp_count == 0) 1852 return; 1853 m = rtm_msg1(RTM_80211INFO, NULL); 1854 if (m == NULL) 1855 return; 1856 ifim = mtod(m, struct if_ieee80211_msghdr *); 1857 ifim->ifim_index = ifp->if_index; 1858 ifim->ifim_tableid = ifp->if_rdomain; 1859 1860 memcpy(&ifim->ifim_ifie, ifie, sizeof(ifim->ifim_ifie)); 1861 route_input(m, NULL, AF_UNSPEC); 1862 } 1863 1864 /* 1865 * This is used to generate routing socket messages indicating 1866 * the address selection proposal from an interface. 1867 */ 1868 void 1869 rtm_proposal(struct ifnet *ifp, struct rt_addrinfo *rtinfo, int flags, 1870 uint8_t prio) 1871 { 1872 struct rt_msghdr *rtm; 1873 struct mbuf *m; 1874 1875 m = rtm_msg1(RTM_PROPOSAL, rtinfo); 1876 if (m == NULL) 1877 return; 1878 rtm = mtod(m, struct rt_msghdr *); 1879 rtm->rtm_flags = RTF_DONE | flags; 1880 rtm->rtm_priority = prio; 1881 rtm->rtm_tableid = ifp->if_rdomain; 1882 rtm->rtm_index = ifp->if_index; 1883 rtm->rtm_addrs = rtinfo->rti_addrs; 1884 1885 route_input(m, NULL, rtinfo->rti_info[RTAX_DNS]->sa_family); 1886 } 1887 1888 /* 1889 * This is used in dumping the kernel table via sysctl(). 1890 */ 1891 int 1892 sysctl_dumpentry(struct rtentry *rt, void *v, unsigned int id) 1893 { 1894 struct walkarg *w = v; 1895 int error = 0, size; 1896 struct rt_addrinfo info; 1897 struct ifnet *ifp; 1898 #ifdef BFD 1899 struct sockaddr_bfd sa_bfd; 1900 #endif 1901 struct sockaddr_rtlabel sa_rl; 1902 struct sockaddr_in6 sa_mask; 1903 1904 if (w->w_op == NET_RT_FLAGS && !(rt->rt_flags & w->w_arg)) 1905 return 0; 1906 if (w->w_op == NET_RT_DUMP && w->w_arg) { 1907 u_int8_t prio = w->w_arg & RTP_MASK; 1908 if (w->w_arg < 0) { 1909 prio = (-w->w_arg) & RTP_MASK; 1910 /* Show all routes that are not this priority */ 1911 if (prio == (rt->rt_priority & RTP_MASK)) 1912 return 0; 1913 } else { 1914 if (prio != (rt->rt_priority & RTP_MASK) && 1915 prio != RTP_ANY) 1916 return 0; 1917 } 1918 } 1919 bzero(&info, sizeof(info)); 1920 info.rti_info[RTAX_DST] = rt_key(rt); 1921 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 1922 info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask); 1923 ifp = if_get(rt->rt_ifidx); 1924 if (ifp != NULL) { 1925 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); 1926 info.rti_info[RTAX_IFA] = 1927 rtable_getsource(id, info.rti_info[RTAX_DST]->sa_family); 1928 if (info.rti_info[RTAX_IFA] == NULL) 1929 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; 1930 if (ifp->if_flags & IFF_POINTOPOINT) 1931 info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr; 1932 } 1933 if_put(ifp); 1934 info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl); 1935 #ifdef BFD 1936 if (rt->rt_flags & RTF_BFD) 1937 info.rti_info[RTAX_BFD] = bfd2sa(rt, &sa_bfd); 1938 #endif 1939 #ifdef MPLS 1940 if (rt->rt_flags & RTF_MPLS) { 1941 struct sockaddr_mpls sa_mpls; 1942 1943 bzero(&sa_mpls, sizeof(sa_mpls)); 1944 sa_mpls.smpls_family = AF_MPLS; 1945 sa_mpls.smpls_len = sizeof(sa_mpls); 1946 sa_mpls.smpls_label = ((struct rt_mpls *) 1947 rt->rt_llinfo)->mpls_label; 1948 info.rti_info[RTAX_SRC] = (struct sockaddr *)&sa_mpls; 1949 info.rti_mpls = ((struct rt_mpls *) 1950 rt->rt_llinfo)->mpls_operation; 1951 } 1952 #endif 1953 1954 size = rtm_msg2(RTM_GET, RTM_VERSION, &info, NULL, w); 1955 if (w->w_where && w->w_tmem && w->w_needed <= 0) { 1956 struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem; 1957 1958 rtm->rtm_pid = curproc->p_p->ps_pid; 1959 rtm->rtm_flags = rt->rt_flags; 1960 rtm->rtm_priority = rt->rt_priority & RTP_MASK; 1961 rtm_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx); 1962 /* Do not account the routing table's reference. */ 1963 rtm->rtm_rmx.rmx_refcnt = rt->rt_refcnt - 1; 1964 rtm->rtm_index = rt->rt_ifidx; 1965 rtm->rtm_addrs = info.rti_addrs; 1966 rtm->rtm_tableid = id; 1967 #ifdef MPLS 1968 rtm->rtm_mpls = info.rti_mpls; 1969 #endif 1970 if ((error = copyout(rtm, w->w_where, size)) != 0) 1971 w->w_where = NULL; 1972 else 1973 w->w_where += size; 1974 } 1975 return (error); 1976 } 1977 1978 int 1979 sysctl_iflist(int af, struct walkarg *w) 1980 { 1981 struct ifnet *ifp; 1982 struct ifaddr *ifa; 1983 struct rt_addrinfo info; 1984 int len, error = 0; 1985 1986 bzero(&info, sizeof(info)); 1987 TAILQ_FOREACH(ifp, &ifnet, if_list) { 1988 if (w->w_arg && w->w_arg != ifp->if_index) 1989 continue; 1990 /* Copy the link-layer address first */ 1991 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); 1992 len = rtm_msg2(RTM_IFINFO, RTM_VERSION, &info, 0, w); 1993 if (w->w_where && w->w_tmem && w->w_needed <= 0) { 1994 struct if_msghdr *ifm; 1995 1996 ifm = (struct if_msghdr *)w->w_tmem; 1997 ifm->ifm_index = ifp->if_index; 1998 ifm->ifm_tableid = ifp->if_rdomain; 1999 ifm->ifm_flags = ifp->if_flags; 2000 if_getdata(ifp, &ifm->ifm_data); 2001 ifm->ifm_addrs = info.rti_addrs; 2002 error = copyout(ifm, w->w_where, len); 2003 if (error) 2004 return (error); 2005 w->w_where += len; 2006 } 2007 info.rti_info[RTAX_IFP] = NULL; 2008 TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) { 2009 KASSERT(ifa->ifa_addr->sa_family != AF_LINK); 2010 if (af && af != ifa->ifa_addr->sa_family) 2011 continue; 2012 info.rti_info[RTAX_IFA] = ifa->ifa_addr; 2013 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask; 2014 info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr; 2015 len = rtm_msg2(RTM_NEWADDR, RTM_VERSION, &info, 0, w); 2016 if (w->w_where && w->w_tmem && w->w_needed <= 0) { 2017 struct ifa_msghdr *ifam; 2018 2019 ifam = (struct ifa_msghdr *)w->w_tmem; 2020 ifam->ifam_index = ifa->ifa_ifp->if_index; 2021 ifam->ifam_flags = ifa->ifa_flags; 2022 ifam->ifam_metric = ifa->ifa_metric; 2023 ifam->ifam_addrs = info.rti_addrs; 2024 error = copyout(w->w_tmem, w->w_where, len); 2025 if (error) 2026 return (error); 2027 w->w_where += len; 2028 } 2029 } 2030 info.rti_info[RTAX_IFA] = info.rti_info[RTAX_NETMASK] = 2031 info.rti_info[RTAX_BRD] = NULL; 2032 } 2033 return (0); 2034 } 2035 2036 int 2037 sysctl_ifnames(struct walkarg *w) 2038 { 2039 struct if_nameindex_msg ifn; 2040 struct ifnet *ifp; 2041 int error = 0; 2042 2043 /* XXX ignore tableid for now */ 2044 TAILQ_FOREACH(ifp, &ifnet, if_list) { 2045 if (w->w_arg && w->w_arg != ifp->if_index) 2046 continue; 2047 w->w_needed += sizeof(ifn); 2048 if (w->w_where && w->w_needed <= 0) { 2049 2050 memset(&ifn, 0, sizeof(ifn)); 2051 ifn.if_index = ifp->if_index; 2052 strlcpy(ifn.if_name, ifp->if_xname, 2053 sizeof(ifn.if_name)); 2054 error = copyout(&ifn, w->w_where, sizeof(ifn)); 2055 if (error) 2056 return (error); 2057 w->w_where += sizeof(ifn); 2058 } 2059 } 2060 2061 return (0); 2062 } 2063 2064 int 2065 sysctl_source(int af, u_int tableid, struct walkarg *w) 2066 { 2067 struct sockaddr *sa; 2068 int size, error = 0; 2069 2070 sa = rtable_getsource(tableid, af); 2071 if (sa) { 2072 switch (sa->sa_family) { 2073 case AF_INET: 2074 size = sizeof(struct sockaddr_in); 2075 break; 2076 #ifdef INET6 2077 case AF_INET6: 2078 size = sizeof(struct sockaddr_in6); 2079 break; 2080 #endif 2081 default: 2082 return (0); 2083 } 2084 w->w_needed += size; 2085 if (w->w_where && w->w_needed <= 0) { 2086 if ((error = copyout(sa, w->w_where, size))) 2087 return (error); 2088 w->w_where += size; 2089 } 2090 } 2091 return (0); 2092 } 2093 2094 int 2095 sysctl_rtable(int *name, u_int namelen, void *where, size_t *given, void *new, 2096 size_t newlen) 2097 { 2098 int i, error = EINVAL; 2099 u_char af; 2100 struct walkarg w; 2101 struct rt_tableinfo tableinfo; 2102 u_int tableid = 0; 2103 2104 if (new) 2105 return (EPERM); 2106 if (namelen < 3 || namelen > 4) 2107 return (EINVAL); 2108 af = name[0]; 2109 bzero(&w, sizeof(w)); 2110 w.w_where = where; 2111 w.w_given = *given; 2112 w.w_needed = 0 - w.w_given; 2113 w.w_op = name[1]; 2114 w.w_arg = name[2]; 2115 2116 if (namelen == 4) { 2117 tableid = name[3]; 2118 if (!rtable_exists(tableid)) 2119 return (ENOENT); 2120 } else 2121 tableid = curproc->p_p->ps_rtableid; 2122 2123 switch (w.w_op) { 2124 case NET_RT_DUMP: 2125 case NET_RT_FLAGS: 2126 NET_LOCK(); 2127 for (i = 1; i <= AF_MAX; i++) { 2128 if (af != 0 && af != i) 2129 continue; 2130 2131 error = rtable_walk(tableid, i, NULL, sysctl_dumpentry, 2132 &w); 2133 if (error == EAFNOSUPPORT) 2134 error = 0; 2135 if (error) 2136 break; 2137 } 2138 NET_UNLOCK(); 2139 break; 2140 2141 case NET_RT_IFLIST: 2142 NET_LOCK(); 2143 error = sysctl_iflist(af, &w); 2144 NET_UNLOCK(); 2145 break; 2146 2147 case NET_RT_STATS: 2148 return (sysctl_rtable_rtstat(where, given, new)); 2149 case NET_RT_TABLE: 2150 tableid = w.w_arg; 2151 if (!rtable_exists(tableid)) 2152 return (ENOENT); 2153 memset(&tableinfo, 0, sizeof tableinfo); 2154 tableinfo.rti_tableid = tableid; 2155 tableinfo.rti_domainid = rtable_l2(tableid); 2156 error = sysctl_rdstruct(where, given, new, 2157 &tableinfo, sizeof(tableinfo)); 2158 return (error); 2159 case NET_RT_IFNAMES: 2160 NET_LOCK(); 2161 error = sysctl_ifnames(&w); 2162 NET_UNLOCK(); 2163 break; 2164 case NET_RT_SOURCE: 2165 tableid = w.w_arg; 2166 if (!rtable_exists(tableid)) 2167 return (ENOENT); 2168 NET_LOCK(); 2169 for (i = 1; i <= AF_MAX; i++) { 2170 if (af != 0 && af != i) 2171 continue; 2172 2173 error = sysctl_source(i, tableid, &w); 2174 if (error == EAFNOSUPPORT) 2175 error = 0; 2176 if (error) 2177 break; 2178 } 2179 NET_UNLOCK(); 2180 break; 2181 } 2182 free(w.w_tmem, M_RTABLE, w.w_tmemsize); 2183 w.w_needed += w.w_given; 2184 if (where) { 2185 *given = w.w_where - (caddr_t)where; 2186 if (*given < w.w_needed) 2187 return (ENOMEM); 2188 } else 2189 *given = (11 * w.w_needed) / 10; 2190 2191 return (error); 2192 } 2193 2194 int 2195 sysctl_rtable_rtstat(void *oldp, size_t *oldlenp, void *newp) 2196 { 2197 extern struct cpumem *rtcounters; 2198 uint64_t counters[rts_ncounters]; 2199 struct rtstat rtstat; 2200 uint32_t *words = (uint32_t *)&rtstat; 2201 int i; 2202 2203 CTASSERT(sizeof(rtstat) == (nitems(counters) * sizeof(uint32_t))); 2204 memset(&rtstat, 0, sizeof rtstat); 2205 counters_read(rtcounters, counters, nitems(counters)); 2206 2207 for (i = 0; i < nitems(counters); i++) 2208 words[i] = (uint32_t)counters[i]; 2209 2210 return (sysctl_rdstruct(oldp, oldlenp, newp, &rtstat, sizeof(rtstat))); 2211 } 2212 2213 int 2214 rtm_validate_proposal(struct rt_addrinfo *info) 2215 { 2216 if (info->rti_addrs & ~(RTA_NETMASK | RTA_IFA | RTA_DNS | RTA_STATIC | 2217 RTA_SEARCH)) { 2218 return -1; 2219 } 2220 2221 if (ISSET(info->rti_addrs, RTA_NETMASK)) { 2222 struct sockaddr *sa = info->rti_info[RTAX_NETMASK]; 2223 if (sa == NULL) 2224 return -1; 2225 switch (sa->sa_family) { 2226 case AF_INET: 2227 if (sa->sa_len != sizeof(struct sockaddr_in)) 2228 return -1; 2229 break; 2230 case AF_INET6: 2231 if (sa->sa_len != sizeof(struct sockaddr_in6)) 2232 return -1; 2233 break; 2234 default: 2235 return -1; 2236 } 2237 } 2238 2239 if (ISSET(info->rti_addrs, RTA_IFA)) { 2240 struct sockaddr *sa = info->rti_info[RTAX_IFA]; 2241 if (sa == NULL) 2242 return -1; 2243 switch (sa->sa_family) { 2244 case AF_INET: 2245 if (sa->sa_len != sizeof(struct sockaddr_in)) 2246 return -1; 2247 break; 2248 case AF_INET6: 2249 if (sa->sa_len != sizeof(struct sockaddr_in6)) 2250 return -1; 2251 break; 2252 default: 2253 return -1; 2254 } 2255 } 2256 2257 if (ISSET(info->rti_addrs, RTA_DNS)) { 2258 struct sockaddr_rtdns *rtdns = 2259 (struct sockaddr_rtdns *)info->rti_info[RTAX_DNS]; 2260 if (rtdns == NULL) 2261 return -1; 2262 if (rtdns->sr_len > sizeof(*rtdns)) 2263 return -1; 2264 if (rtdns->sr_len < offsetof(struct sockaddr_rtdns, sr_dns)) 2265 return -1; 2266 switch (rtdns->sr_family) { 2267 case AF_INET: 2268 if ((rtdns->sr_len - offsetof(struct sockaddr_rtdns, 2269 sr_dns)) % sizeof(struct in_addr) != 0) 2270 return -1; 2271 break; 2272 #ifdef INET6 2273 case AF_INET6: 2274 if ((rtdns->sr_len - offsetof(struct sockaddr_rtdns, 2275 sr_dns)) % sizeof(struct in6_addr) != 0) 2276 return -1; 2277 break; 2278 #endif 2279 default: 2280 return -1; 2281 } 2282 } 2283 2284 if (ISSET(info->rti_addrs, RTA_STATIC)) { 2285 struct sockaddr_rtstatic *rtstatic = 2286 (struct sockaddr_rtstatic *)info->rti_info[RTAX_STATIC]; 2287 if (rtstatic == NULL) 2288 return -1; 2289 if (rtstatic->sr_len > sizeof(*rtstatic)) 2290 return -1; 2291 if (rtstatic->sr_len <= 2292 offsetof(struct sockaddr_rtstatic, sr_static)) 2293 return -1; 2294 } 2295 2296 if (ISSET(info->rti_addrs, RTA_SEARCH)) { 2297 struct sockaddr_rtsearch *rtsearch = 2298 (struct sockaddr_rtsearch *)info->rti_info[RTAX_SEARCH]; 2299 if (rtsearch == NULL) 2300 return -1; 2301 if (rtsearch->sr_len > sizeof(*rtsearch)) 2302 return -1; 2303 if (rtsearch->sr_len <= 2304 offsetof(struct sockaddr_rtsearch, sr_search)) 2305 return -1; 2306 } 2307 2308 return 0; 2309 } 2310 2311 int 2312 rt_setsource(unsigned int rtableid, struct sockaddr *src) 2313 { 2314 struct ifaddr *ifa; 2315 int error; 2316 /* 2317 * If source address is 0.0.0.0 or :: 2318 * use automatic source selection 2319 */ 2320 switch(src->sa_family) { 2321 case AF_INET: 2322 if(satosin(src)->sin_addr.s_addr == INADDR_ANY) { 2323 rtable_setsource(rtableid, AF_INET, NULL); 2324 return (0); 2325 } 2326 break; 2327 #ifdef INET6 2328 case AF_INET6: 2329 if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(src)->sin6_addr)) { 2330 rtable_setsource(rtableid, AF_INET6, NULL); 2331 return (0); 2332 } 2333 break; 2334 #endif 2335 default: 2336 return (EAFNOSUPPORT); 2337 } 2338 2339 KERNEL_LOCK(); 2340 /* 2341 * Check if source address is assigned to an interface in the 2342 * same rdomain 2343 */ 2344 if ((ifa = ifa_ifwithaddr(src, rtableid)) == NULL) { 2345 KERNEL_UNLOCK(); 2346 return (EINVAL); 2347 } 2348 2349 error = rtable_setsource(rtableid, src->sa_family, ifa->ifa_addr); 2350 KERNEL_UNLOCK(); 2351 2352 return (error); 2353 } 2354 2355 /* 2356 * Definitions of protocols supported in the ROUTE domain. 2357 */ 2358 2359 struct domain routedomain; 2360 2361 struct protosw routesw[] = { 2362 { 2363 .pr_type = SOCK_RAW, 2364 .pr_domain = &routedomain, 2365 .pr_flags = PR_ATOMIC|PR_ADDR|PR_WANTRCVD, 2366 .pr_output = route_output, 2367 .pr_ctloutput = route_ctloutput, 2368 .pr_usrreq = route_usrreq, 2369 .pr_attach = route_attach, 2370 .pr_detach = route_detach, 2371 .pr_init = route_prinit, 2372 .pr_sysctl = sysctl_rtable 2373 } 2374 }; 2375 2376 struct domain routedomain = { 2377 .dom_family = PF_ROUTE, 2378 .dom_name = "route", 2379 .dom_init = route_init, 2380 .dom_protosw = routesw, 2381 .dom_protoswNPROTOSW = &routesw[nitems(routesw)] 2382 }; 2383