1 /* $OpenBSD: rtsock.c,v 1.370 2023/09/16 09:33:27 mpi Exp $ */ 2 /* $NetBSD: rtsock.c,v 1.18 1996/03/29 00:32:10 cgd Exp $ */ 3 4 /* 5 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the project nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 33 /* 34 * Copyright (c) 1988, 1991, 1993 35 * The Regents of the University of California. All rights reserved. 36 * 37 * Redistribution and use in source and binary forms, with or without 38 * modification, are permitted provided that the following conditions 39 * are met: 40 * 1. Redistributions of source code must retain the above copyright 41 * notice, this list of conditions and the following disclaimer. 42 * 2. Redistributions in binary form must reproduce the above copyright 43 * notice, this list of conditions and the following disclaimer in the 44 * documentation and/or other materials provided with the distribution. 45 * 3. Neither the name of the University nor the names of its contributors 46 * may be used to endorse or promote products derived from this software 47 * without specific prior written permission. 48 * 49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 59 * SUCH DAMAGE. 60 * 61 * @(#)rtsock.c 8.6 (Berkeley) 2/11/95 62 */ 63 64 #include <sys/param.h> 65 #include <sys/systm.h> 66 #include <sys/proc.h> 67 #include <sys/sysctl.h> 68 #include <sys/mbuf.h> 69 #include <sys/socket.h> 70 #include <sys/socketvar.h> 71 #include <sys/domain.h> 72 #include <sys/pool.h> 73 #include <sys/protosw.h> 74 #include <sys/srp.h> 75 76 #include <net/if.h> 77 #include <net/if_dl.h> 78 #include <net/if_var.h> 79 #include <net/route.h> 80 81 #include <netinet/in.h> 82 83 #ifdef MPLS 84 #include <netmpls/mpls.h> 85 #endif 86 #ifdef IPSEC 87 #include <netinet/ip_ipsp.h> 88 #include <net/if_enc.h> 89 #endif 90 #ifdef BFD 91 #include <net/bfd.h> 92 #endif 93 94 #include <sys/stdarg.h> 95 #include <sys/kernel.h> 96 #include <sys/timeout.h> 97 98 #define ROUTESNDQ 8192 99 #define ROUTERCVQ 8192 100 101 const struct sockaddr route_src = { 2, PF_ROUTE, }; 102 103 struct walkarg { 104 int w_op, w_arg, w_tmemsize; 105 size_t w_given, w_needed; 106 caddr_t w_where, w_tmem; 107 }; 108 109 void route_prinit(void); 110 void rcb_ref(void *, void *); 111 void rcb_unref(void *, void *); 112 int route_output(struct mbuf *, struct socket *); 113 int route_ctloutput(int, struct socket *, int, int, struct mbuf *); 114 int route_attach(struct socket *, int, int); 115 int route_detach(struct socket *); 116 int route_disconnect(struct socket *); 117 int route_shutdown(struct socket *); 118 void route_rcvd(struct socket *); 119 int route_send(struct socket *, struct mbuf *, struct mbuf *, 120 struct mbuf *); 121 int route_sockaddr(struct socket *, struct mbuf *); 122 int route_peeraddr(struct socket *, struct mbuf *); 123 void route_input(struct mbuf *m0, struct socket *, sa_family_t); 124 int route_arp_conflict(struct rtentry *, struct rt_addrinfo *); 125 int route_cleargateway(struct rtentry *, void *, unsigned int); 126 void rtm_senddesync_timer(void *); 127 void rtm_senddesync(struct socket *); 128 int rtm_sendup(struct socket *, struct mbuf *); 129 130 int rtm_getifa(struct rt_addrinfo *, unsigned int); 131 int rtm_output(struct rt_msghdr *, struct rtentry **, struct rt_addrinfo *, 132 uint8_t, unsigned int); 133 struct rt_msghdr *rtm_report(struct rtentry *, u_char, int, int); 134 struct mbuf *rtm_msg1(int, struct rt_addrinfo *); 135 int rtm_msg2(int, int, struct rt_addrinfo *, caddr_t, 136 struct walkarg *); 137 int rtm_xaddrs(caddr_t, caddr_t, struct rt_addrinfo *); 138 int rtm_validate_proposal(struct rt_addrinfo *); 139 void rtm_setmetrics(u_long, const struct rt_metrics *, 140 struct rt_kmetrics *); 141 void rtm_getmetrics(const struct rtentry *, 142 struct rt_metrics *); 143 144 int sysctl_iflist(int, struct walkarg *); 145 int sysctl_ifnames(struct walkarg *); 146 int sysctl_rtable_rtstat(void *, size_t *, void *); 147 148 int rt_setsource(unsigned int, struct sockaddr *); 149 150 /* 151 * Locks used to protect struct members 152 * I immutable after creation 153 * s solock 154 */ 155 struct rtpcb { 156 struct socket *rop_socket; /* [I] */ 157 158 SRPL_ENTRY(rtpcb) rop_list; 159 struct refcnt rop_refcnt; 160 struct timeout rop_timeout; 161 unsigned int rop_msgfilter; /* [s] */ 162 unsigned int rop_flagfilter; /* [s] */ 163 unsigned int rop_flags; /* [s] */ 164 u_int rop_rtableid; /* [s] */ 165 unsigned short rop_proto; /* [I] */ 166 u_char rop_priority; /* [s] */ 167 }; 168 #define sotortpcb(so) ((struct rtpcb *)(so)->so_pcb) 169 170 struct rtptable { 171 SRPL_HEAD(, rtpcb) rtp_list; 172 struct srpl_rc rtp_rc; 173 struct rwlock rtp_lk; 174 unsigned int rtp_count; 175 }; 176 177 struct pool rtpcb_pool; 178 struct rtptable rtptable; 179 180 /* 181 * These flags and timeout are used for indicating to userland (via a 182 * RTM_DESYNC msg) when the route socket has overflowed and messages 183 * have been lost. 184 */ 185 #define ROUTECB_FLAG_DESYNC 0x1 /* Route socket out of memory */ 186 #define ROUTECB_FLAG_FLUSH 0x2 /* Wait until socket is empty before 187 queueing more packets */ 188 189 #define ROUTE_DESYNC_RESEND_TIMEOUT 200 /* In ms */ 190 191 void 192 route_prinit(void) 193 { 194 srpl_rc_init(&rtptable.rtp_rc, rcb_ref, rcb_unref, NULL); 195 rw_init(&rtptable.rtp_lk, "rtsock"); 196 SRPL_INIT(&rtptable.rtp_list); 197 pool_init(&rtpcb_pool, sizeof(struct rtpcb), 0, 198 IPL_SOFTNET, PR_WAITOK, "rtpcb", NULL); 199 } 200 201 void 202 rcb_ref(void *null, void *v) 203 { 204 struct rtpcb *rop = v; 205 206 refcnt_take(&rop->rop_refcnt); 207 } 208 209 void 210 rcb_unref(void *null, void *v) 211 { 212 struct rtpcb *rop = v; 213 214 refcnt_rele_wake(&rop->rop_refcnt); 215 } 216 217 int 218 route_attach(struct socket *so, int proto, int wait) 219 { 220 struct rtpcb *rop; 221 int error; 222 223 error = soreserve(so, ROUTESNDQ, ROUTERCVQ); 224 if (error) 225 return (error); 226 /* 227 * use the rawcb but allocate a rtpcb, this 228 * code does not care about the additional fields 229 * and works directly on the raw socket. 230 */ 231 rop = pool_get(&rtpcb_pool, (wait == M_WAIT ? PR_WAITOK : PR_NOWAIT) | 232 PR_ZERO); 233 if (rop == NULL) 234 return (ENOBUFS); 235 so->so_pcb = rop; 236 /* Init the timeout structure */ 237 timeout_set_proc(&rop->rop_timeout, rtm_senddesync_timer, so); 238 refcnt_init(&rop->rop_refcnt); 239 240 rop->rop_socket = so; 241 rop->rop_proto = proto; 242 243 rop->rop_rtableid = curproc->p_p->ps_rtableid; 244 245 soisconnected(so); 246 so->so_options |= SO_USELOOPBACK; 247 248 rw_enter(&rtptable.rtp_lk, RW_WRITE); 249 SRPL_INSERT_HEAD_LOCKED(&rtptable.rtp_rc, &rtptable.rtp_list, rop, 250 rop_list); 251 rtptable.rtp_count++; 252 rw_exit(&rtptable.rtp_lk); 253 254 return (0); 255 } 256 257 int 258 route_detach(struct socket *so) 259 { 260 struct rtpcb *rop; 261 262 soassertlocked(so); 263 264 rop = sotortpcb(so); 265 if (rop == NULL) 266 return (EINVAL); 267 268 rw_enter(&rtptable.rtp_lk, RW_WRITE); 269 270 rtptable.rtp_count--; 271 SRPL_REMOVE_LOCKED(&rtptable.rtp_rc, &rtptable.rtp_list, rop, rtpcb, 272 rop_list); 273 rw_exit(&rtptable.rtp_lk); 274 275 sounlock(so); 276 277 /* wait for all references to drop */ 278 refcnt_finalize(&rop->rop_refcnt, "rtsockrefs"); 279 timeout_del_barrier(&rop->rop_timeout); 280 281 solock(so); 282 283 so->so_pcb = NULL; 284 KASSERT((so->so_state & SS_NOFDREF) == 0); 285 pool_put(&rtpcb_pool, rop); 286 287 return (0); 288 } 289 290 int 291 route_disconnect(struct socket *so) 292 { 293 soisdisconnected(so); 294 return (0); 295 } 296 297 int 298 route_shutdown(struct socket *so) 299 { 300 socantsendmore(so); 301 return (0); 302 } 303 304 void 305 route_rcvd(struct socket *so) 306 { 307 struct rtpcb *rop = sotortpcb(so); 308 309 soassertlocked(so); 310 311 /* 312 * If we are in a FLUSH state, check if the buffer is 313 * empty so that we can clear the flag. 314 */ 315 if (((rop->rop_flags & ROUTECB_FLAG_FLUSH) != 0) && 316 ((sbspace(rop->rop_socket, &rop->rop_socket->so_rcv) == 317 rop->rop_socket->so_rcv.sb_hiwat))) 318 rop->rop_flags &= ~ROUTECB_FLAG_FLUSH; 319 } 320 321 int 322 route_send(struct socket *so, struct mbuf *m, struct mbuf *nam, 323 struct mbuf *control) 324 { 325 int error; 326 327 soassertlocked(so); 328 329 if (control && control->m_len) { 330 error = EOPNOTSUPP; 331 goto out; 332 } 333 334 if (nam) { 335 error = EISCONN; 336 goto out; 337 } 338 339 error = route_output(m, so); 340 m = NULL; 341 342 out: 343 m_freem(control); 344 m_freem(m); 345 346 return (error); 347 } 348 349 int 350 route_sockaddr(struct socket *so, struct mbuf *nam) 351 { 352 return (EINVAL); 353 } 354 355 int 356 route_peeraddr(struct socket *so, struct mbuf *nam) 357 { 358 /* minimal support, just implement a fake peer address */ 359 bcopy(&route_src, mtod(nam, caddr_t), route_src.sa_len); 360 nam->m_len = route_src.sa_len; 361 return (0); 362 } 363 364 int 365 route_ctloutput(int op, struct socket *so, int level, int optname, 366 struct mbuf *m) 367 { 368 struct rtpcb *rop = sotortpcb(so); 369 int error = 0; 370 unsigned int tid, prio; 371 372 if (level != AF_ROUTE) 373 return (EINVAL); 374 375 switch (op) { 376 case PRCO_SETOPT: 377 switch (optname) { 378 case ROUTE_MSGFILTER: 379 if (m == NULL || m->m_len != sizeof(unsigned int)) 380 error = EINVAL; 381 else 382 rop->rop_msgfilter = *mtod(m, unsigned int *); 383 break; 384 case ROUTE_TABLEFILTER: 385 if (m == NULL || m->m_len != sizeof(unsigned int)) { 386 error = EINVAL; 387 break; 388 } 389 tid = *mtod(m, unsigned int *); 390 if (tid != RTABLE_ANY && !rtable_exists(tid)) 391 error = ENOENT; 392 else 393 rop->rop_rtableid = tid; 394 break; 395 case ROUTE_PRIOFILTER: 396 if (m == NULL || m->m_len != sizeof(unsigned int)) { 397 error = EINVAL; 398 break; 399 } 400 prio = *mtod(m, unsigned int *); 401 if (prio > RTP_MAX) 402 error = EINVAL; 403 else 404 rop->rop_priority = prio; 405 break; 406 case ROUTE_FLAGFILTER: 407 if (m == NULL || m->m_len != sizeof(unsigned int)) 408 error = EINVAL; 409 else 410 rop->rop_flagfilter = *mtod(m, unsigned int *); 411 break; 412 default: 413 error = ENOPROTOOPT; 414 break; 415 } 416 break; 417 case PRCO_GETOPT: 418 switch (optname) { 419 case ROUTE_MSGFILTER: 420 m->m_len = sizeof(unsigned int); 421 *mtod(m, unsigned int *) = rop->rop_msgfilter; 422 break; 423 case ROUTE_TABLEFILTER: 424 m->m_len = sizeof(unsigned int); 425 *mtod(m, unsigned int *) = rop->rop_rtableid; 426 break; 427 case ROUTE_PRIOFILTER: 428 m->m_len = sizeof(unsigned int); 429 *mtod(m, unsigned int *) = rop->rop_priority; 430 break; 431 case ROUTE_FLAGFILTER: 432 m->m_len = sizeof(unsigned int); 433 *mtod(m, unsigned int *) = rop->rop_flagfilter; 434 break; 435 default: 436 error = ENOPROTOOPT; 437 break; 438 } 439 } 440 return (error); 441 } 442 443 void 444 rtm_senddesync_timer(void *xso) 445 { 446 struct socket *so = xso; 447 448 solock(so); 449 rtm_senddesync(so); 450 sounlock(so); 451 } 452 453 void 454 rtm_senddesync(struct socket *so) 455 { 456 struct rtpcb *rop = sotortpcb(so); 457 struct mbuf *desync_mbuf; 458 459 soassertlocked(so); 460 461 /* 462 * Dying socket is disconnected by upper layer and there is 463 * no reason to send packet. Also we shouldn't reschedule 464 * timeout(9), otherwise timeout_del_barrier(9) can't help us. 465 */ 466 if ((so->so_state & SS_ISCONNECTED) == 0 || 467 (so->so_rcv.sb_state & SS_CANTRCVMORE)) 468 return; 469 470 /* If we are in a DESYNC state, try to send a RTM_DESYNC packet */ 471 if ((rop->rop_flags & ROUTECB_FLAG_DESYNC) == 0) 472 return; 473 474 /* 475 * If we fail to alloc memory or if sbappendaddr() 476 * fails, re-add timeout and try again. 477 */ 478 desync_mbuf = rtm_msg1(RTM_DESYNC, NULL); 479 if (desync_mbuf != NULL) { 480 if (sbappendaddr(so, &so->so_rcv, &route_src, 481 desync_mbuf, NULL) != 0) { 482 rop->rop_flags &= ~ROUTECB_FLAG_DESYNC; 483 sorwakeup(rop->rop_socket); 484 return; 485 } 486 m_freem(desync_mbuf); 487 } 488 /* Re-add timeout to try sending msg again */ 489 timeout_add_msec(&rop->rop_timeout, ROUTE_DESYNC_RESEND_TIMEOUT); 490 } 491 492 void 493 route_input(struct mbuf *m0, struct socket *so0, sa_family_t sa_family) 494 { 495 struct socket *so; 496 struct rtpcb *rop; 497 struct rt_msghdr *rtm; 498 struct mbuf *m = m0; 499 struct srp_ref sr; 500 501 /* ensure that we can access the rtm_type via mtod() */ 502 if (m->m_len < offsetof(struct rt_msghdr, rtm_type) + 1) { 503 m_freem(m); 504 return; 505 } 506 507 SRPL_FOREACH(rop, &sr, &rtptable.rtp_list, rop_list) { 508 /* 509 * If route socket is bound to an address family only send 510 * messages that match the address family. Address family 511 * agnostic messages are always sent. 512 */ 513 if (sa_family != AF_UNSPEC && rop->rop_proto != AF_UNSPEC && 514 rop->rop_proto != sa_family) 515 continue; 516 517 518 so = rop->rop_socket; 519 solock(so); 520 521 /* 522 * Check to see if we don't want our own messages and 523 * if we can receive anything. 524 */ 525 if ((so0 == so && !(so0->so_options & SO_USELOOPBACK)) || 526 !(so->so_state & SS_ISCONNECTED) || 527 (so->so_rcv.sb_state & SS_CANTRCVMORE)) 528 goto next; 529 530 /* filter messages that the process does not want */ 531 rtm = mtod(m, struct rt_msghdr *); 532 /* but RTM_DESYNC can't be filtered */ 533 if (rtm->rtm_type != RTM_DESYNC) { 534 if (rop->rop_msgfilter != 0 && 535 !(rop->rop_msgfilter & (1U << rtm->rtm_type))) 536 goto next; 537 if (ISSET(rop->rop_flagfilter, rtm->rtm_flags)) 538 goto next; 539 } 540 switch (rtm->rtm_type) { 541 case RTM_IFANNOUNCE: 542 case RTM_DESYNC: 543 /* no tableid */ 544 break; 545 case RTM_RESOLVE: 546 case RTM_NEWADDR: 547 case RTM_DELADDR: 548 case RTM_IFINFO: 549 case RTM_80211INFO: 550 case RTM_BFD: 551 /* check against rdomain id */ 552 if (rop->rop_rtableid != RTABLE_ANY && 553 rtable_l2(rop->rop_rtableid) != rtm->rtm_tableid) 554 goto next; 555 break; 556 default: 557 if (rop->rop_priority != 0 && 558 rop->rop_priority < rtm->rtm_priority) 559 goto next; 560 /* check against rtable id */ 561 if (rop->rop_rtableid != RTABLE_ANY && 562 rop->rop_rtableid != rtm->rtm_tableid) 563 goto next; 564 break; 565 } 566 567 /* 568 * Check to see if the flush flag is set. If so, don't queue 569 * any more messages until the flag is cleared. 570 */ 571 if ((rop->rop_flags & ROUTECB_FLAG_FLUSH) != 0) 572 goto next; 573 574 rtm_sendup(so, m); 575 next: 576 sounlock(so); 577 } 578 SRPL_LEAVE(&sr); 579 580 m_freem(m); 581 } 582 583 int 584 rtm_sendup(struct socket *so, struct mbuf *m0) 585 { 586 struct rtpcb *rop = sotortpcb(so); 587 struct mbuf *m; 588 589 soassertlocked(so); 590 591 m = m_copym(m0, 0, M_COPYALL, M_NOWAIT); 592 if (m == NULL) 593 return (ENOMEM); 594 595 if (sbspace(so, &so->so_rcv) < (2 * MSIZE) || 596 sbappendaddr(so, &so->so_rcv, &route_src, m, NULL) == 0) { 597 /* Flag socket as desync'ed and flush required */ 598 rop->rop_flags |= ROUTECB_FLAG_DESYNC | ROUTECB_FLAG_FLUSH; 599 rtm_senddesync(so); 600 m_freem(m); 601 return (ENOBUFS); 602 } 603 604 sorwakeup(so); 605 return (0); 606 } 607 608 struct rt_msghdr * 609 rtm_report(struct rtentry *rt, u_char type, int seq, int tableid) 610 { 611 struct rt_msghdr *rtm; 612 struct rt_addrinfo info; 613 struct sockaddr_rtlabel sa_rl; 614 struct sockaddr_in6 sa_mask; 615 #ifdef BFD 616 struct sockaddr_bfd sa_bfd; 617 #endif 618 struct ifnet *ifp = NULL; 619 int len; 620 621 bzero(&info, sizeof(info)); 622 info.rti_info[RTAX_DST] = rt_key(rt); 623 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 624 info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask); 625 info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl); 626 #ifdef BFD 627 if (rt->rt_flags & RTF_BFD) { 628 KERNEL_LOCK(); 629 info.rti_info[RTAX_BFD] = bfd2sa(rt, &sa_bfd); 630 KERNEL_UNLOCK(); 631 } 632 #endif 633 #ifdef MPLS 634 if (rt->rt_flags & RTF_MPLS) { 635 struct sockaddr_mpls sa_mpls; 636 637 bzero(&sa_mpls, sizeof(sa_mpls)); 638 sa_mpls.smpls_family = AF_MPLS; 639 sa_mpls.smpls_len = sizeof(sa_mpls); 640 sa_mpls.smpls_label = ((struct rt_mpls *) 641 rt->rt_llinfo)->mpls_label; 642 info.rti_info[RTAX_SRC] = (struct sockaddr *)&sa_mpls; 643 info.rti_mpls = ((struct rt_mpls *) 644 rt->rt_llinfo)->mpls_operation; 645 } 646 #endif 647 ifp = if_get(rt->rt_ifidx); 648 if (ifp != NULL) { 649 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); 650 info.rti_info[RTAX_IFA] = rtable_getsource(tableid, 651 info.rti_info[RTAX_DST]->sa_family); 652 if (info.rti_info[RTAX_IFA] == NULL) 653 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; 654 if (ifp->if_flags & IFF_POINTOPOINT) 655 info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr; 656 } 657 if_put(ifp); 658 /* RTAX_GENMASK, RTAX_AUTHOR, RTAX_SRCMASK ignored */ 659 660 /* build new route message */ 661 len = rtm_msg2(type, RTM_VERSION, &info, NULL, NULL); 662 rtm = malloc(len, M_RTABLE, M_WAITOK | M_ZERO); 663 664 rtm_msg2(type, RTM_VERSION, &info, (caddr_t)rtm, NULL); 665 rtm->rtm_type = type; 666 rtm->rtm_index = rt->rt_ifidx; 667 rtm->rtm_tableid = tableid; 668 rtm->rtm_priority = rt->rt_priority & RTP_MASK; 669 rtm->rtm_flags = rt->rt_flags; 670 rtm->rtm_pid = curproc->p_p->ps_pid; 671 rtm->rtm_seq = seq; 672 rtm_getmetrics(rt, &rtm->rtm_rmx); 673 rtm->rtm_addrs = info.rti_addrs; 674 #ifdef MPLS 675 rtm->rtm_mpls = info.rti_mpls; 676 #endif 677 return rtm; 678 } 679 680 int 681 route_output(struct mbuf *m, struct socket *so) 682 { 683 struct rt_msghdr *rtm = NULL; 684 struct rtentry *rt = NULL; 685 struct rt_addrinfo info; 686 struct ifnet *ifp; 687 int len, seq, useloopback, error = 0; 688 u_int tableid; 689 u_int8_t prio; 690 u_char vers, type; 691 692 if (m == NULL || ((m->m_len < sizeof(int32_t)) && 693 (m = m_pullup(m, sizeof(int32_t))) == NULL)) 694 return (ENOBUFS); 695 if ((m->m_flags & M_PKTHDR) == 0) 696 panic("route_output"); 697 698 useloopback = so->so_options & SO_USELOOPBACK; 699 700 /* 701 * The socket can't be closed concurrently because the file 702 * descriptor reference is still held. 703 */ 704 705 sounlock(so); 706 707 len = m->m_pkthdr.len; 708 if (len < offsetof(struct rt_msghdr, rtm_hdrlen) + 709 sizeof(rtm->rtm_hdrlen) || 710 len != mtod(m, struct rt_msghdr *)->rtm_msglen) { 711 error = EINVAL; 712 goto fail; 713 } 714 vers = mtod(m, struct rt_msghdr *)->rtm_version; 715 switch (vers) { 716 case RTM_VERSION: 717 if (len < sizeof(struct rt_msghdr)) { 718 error = EINVAL; 719 goto fail; 720 } 721 if (len > RTM_MAXSIZE) { 722 error = EMSGSIZE; 723 goto fail; 724 } 725 rtm = malloc(len, M_RTABLE, M_WAITOK); 726 m_copydata(m, 0, len, rtm); 727 break; 728 default: 729 error = EPROTONOSUPPORT; 730 goto fail; 731 } 732 733 /* Verify that the caller is sending an appropriate message early */ 734 switch (rtm->rtm_type) { 735 case RTM_ADD: 736 case RTM_DELETE: 737 case RTM_GET: 738 case RTM_CHANGE: 739 case RTM_PROPOSAL: 740 case RTM_SOURCE: 741 break; 742 default: 743 error = EOPNOTSUPP; 744 goto fail; 745 } 746 /* 747 * Verify that the header length is valid. 748 * All messages from userland start with a struct rt_msghdr. 749 */ 750 if (rtm->rtm_hdrlen == 0) /* old client */ 751 rtm->rtm_hdrlen = sizeof(struct rt_msghdr); 752 if (rtm->rtm_hdrlen < sizeof(struct rt_msghdr) || 753 len < rtm->rtm_hdrlen) { 754 error = EINVAL; 755 goto fail; 756 } 757 758 rtm->rtm_pid = curproc->p_p->ps_pid; 759 760 /* 761 * Verify that the caller has the appropriate privilege; RTM_GET 762 * is the only operation the non-superuser is allowed. 763 */ 764 if (rtm->rtm_type != RTM_GET && suser(curproc) != 0) { 765 error = EACCES; 766 goto fail; 767 } 768 tableid = rtm->rtm_tableid; 769 if (!rtable_exists(tableid)) { 770 if (rtm->rtm_type == RTM_ADD) { 771 if ((error = rtable_add(tableid)) != 0) 772 goto fail; 773 } else { 774 error = EINVAL; 775 goto fail; 776 } 777 } 778 779 /* Do not let userland play with kernel-only flags. */ 780 if ((rtm->rtm_flags & (RTF_LOCAL|RTF_BROADCAST)) != 0) { 781 error = EINVAL; 782 goto fail; 783 } 784 785 /* make sure that kernel-only bits are not set */ 786 rtm->rtm_priority &= RTP_MASK; 787 rtm->rtm_flags &= ~(RTF_DONE|RTF_CLONED|RTF_CACHED); 788 rtm->rtm_fmask &= RTF_FMASK; 789 790 if (rtm->rtm_priority != 0) { 791 if (rtm->rtm_priority > RTP_MAX || 792 rtm->rtm_priority == RTP_LOCAL) { 793 error = EINVAL; 794 goto fail; 795 } 796 prio = rtm->rtm_priority; 797 } else if (rtm->rtm_type != RTM_ADD) 798 prio = RTP_ANY; 799 else if (rtm->rtm_flags & RTF_STATIC) 800 prio = 0; 801 else 802 prio = RTP_DEFAULT; 803 804 bzero(&info, sizeof(info)); 805 info.rti_addrs = rtm->rtm_addrs; 806 if ((error = rtm_xaddrs(rtm->rtm_hdrlen + (caddr_t)rtm, 807 len + (caddr_t)rtm, &info)) != 0) 808 goto fail; 809 810 info.rti_flags = rtm->rtm_flags; 811 812 if (rtm->rtm_type != RTM_SOURCE && 813 rtm->rtm_type != RTM_PROPOSAL && 814 (info.rti_info[RTAX_DST] == NULL || 815 info.rti_info[RTAX_DST]->sa_family >= AF_MAX || 816 (info.rti_info[RTAX_GATEWAY] != NULL && 817 info.rti_info[RTAX_GATEWAY]->sa_family >= AF_MAX) || 818 info.rti_info[RTAX_GENMASK] != NULL)) { 819 error = EINVAL; 820 goto fail; 821 } 822 #ifdef MPLS 823 info.rti_mpls = rtm->rtm_mpls; 824 #endif 825 826 if (info.rti_info[RTAX_GATEWAY] != NULL && 827 info.rti_info[RTAX_GATEWAY]->sa_family == AF_LINK && 828 (info.rti_flags & RTF_CLONING) == 0) { 829 info.rti_flags |= RTF_LLINFO; 830 } 831 832 /* 833 * Validate RTM_PROPOSAL and pass it along or error out. 834 */ 835 if (rtm->rtm_type == RTM_PROPOSAL) { 836 if (rtm_validate_proposal(&info) == -1) { 837 error = EINVAL; 838 goto fail; 839 } 840 /* 841 * If this is a solicitation proposal forward request to 842 * all interfaces. Most handlers will ignore it but at least 843 * umb(4) will send a response to this event. 844 */ 845 if (rtm->rtm_priority == RTP_PROPOSAL_SOLICIT) { 846 NET_LOCK(); 847 TAILQ_FOREACH(ifp, &ifnetlist, if_list) { 848 ifp->if_rtrequest(ifp, RTM_PROPOSAL, NULL); 849 } 850 NET_UNLOCK(); 851 } 852 } else if (rtm->rtm_type == RTM_SOURCE) { 853 if (info.rti_info[RTAX_IFA] == NULL) { 854 error = EINVAL; 855 goto fail; 856 } 857 NET_LOCK(); 858 error = rt_setsource(tableid, info.rti_info[RTAX_IFA]); 859 NET_UNLOCK(); 860 if (error) 861 goto fail; 862 } else { 863 error = rtm_output(rtm, &rt, &info, prio, tableid); 864 if (!error) { 865 type = rtm->rtm_type; 866 seq = rtm->rtm_seq; 867 free(rtm, M_RTABLE, len); 868 NET_LOCK_SHARED(); 869 rtm = rtm_report(rt, type, seq, tableid); 870 NET_UNLOCK_SHARED(); 871 len = rtm->rtm_msglen; 872 } 873 } 874 875 rtfree(rt); 876 if (error) { 877 rtm->rtm_errno = error; 878 } else { 879 rtm->rtm_flags |= RTF_DONE; 880 } 881 882 /* 883 * Check to see if we don't want our own messages. 884 */ 885 if (!useloopback) { 886 if (rtptable.rtp_count == 0) { 887 /* no other listener and no loopback of messages */ 888 goto fail; 889 } 890 } 891 if (m_copyback(m, 0, len, rtm, M_NOWAIT)) { 892 m_freem(m); 893 m = NULL; 894 } else if (m->m_pkthdr.len > len) 895 m_adj(m, len - m->m_pkthdr.len); 896 free(rtm, M_RTABLE, len); 897 if (m) 898 route_input(m, so, info.rti_info[RTAX_DST] ? 899 info.rti_info[RTAX_DST]->sa_family : AF_UNSPEC); 900 solock(so); 901 902 return (error); 903 fail: 904 free(rtm, M_RTABLE, len); 905 m_freem(m); 906 solock(so); 907 908 return (error); 909 } 910 911 int 912 rtm_output(struct rt_msghdr *rtm, struct rtentry **prt, 913 struct rt_addrinfo *info, uint8_t prio, unsigned int tableid) 914 { 915 struct rtentry *rt = *prt; 916 struct ifnet *ifp = NULL; 917 int plen, newgate = 0, error = 0; 918 919 switch (rtm->rtm_type) { 920 case RTM_ADD: 921 if (info->rti_info[RTAX_GATEWAY] == NULL) { 922 error = EINVAL; 923 break; 924 } 925 926 rt = rtable_match(tableid, info->rti_info[RTAX_DST], NULL); 927 if ((error = route_arp_conflict(rt, info))) { 928 rtfree(rt); 929 rt = NULL; 930 break; 931 } 932 933 /* 934 * We cannot go through a delete/create/insert cycle for 935 * cached route because this can lead to races in the 936 * receive path. Instead we update the L2 cache. 937 */ 938 if ((rt != NULL) && ISSET(rt->rt_flags, RTF_CACHED)) { 939 ifp = if_get(rt->rt_ifidx); 940 if (ifp == NULL) { 941 rtfree(rt); 942 rt = NULL; 943 error = ESRCH; 944 break; 945 } 946 947 goto change; 948 } 949 950 rtfree(rt); 951 rt = NULL; 952 953 NET_LOCK(); 954 if ((error = rtm_getifa(info, tableid)) != 0) { 955 NET_UNLOCK(); 956 break; 957 } 958 error = rtrequest(RTM_ADD, info, prio, &rt, tableid); 959 NET_UNLOCK(); 960 if (error == 0) 961 rtm_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx, 962 &rt->rt_rmx); 963 break; 964 case RTM_DELETE: 965 rt = rtable_lookup(tableid, info->rti_info[RTAX_DST], 966 info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY], 967 prio); 968 if (rt == NULL) { 969 error = ESRCH; 970 break; 971 } 972 973 /* 974 * If we got multipath routes, we require users to specify 975 * a matching gateway. 976 */ 977 if (ISSET(rt->rt_flags, RTF_MPATH) && 978 info->rti_info[RTAX_GATEWAY] == NULL) { 979 error = ESRCH; 980 break; 981 } 982 983 ifp = if_get(rt->rt_ifidx); 984 if (ifp == NULL) { 985 rtfree(rt); 986 rt = NULL; 987 error = ESRCH; 988 break; 989 } 990 991 /* 992 * Invalidate the cache of automagically created and 993 * referenced L2 entries to make sure that ``rt_gwroute'' 994 * pointer stays valid for other CPUs. 995 */ 996 if ((ISSET(rt->rt_flags, RTF_CACHED))) { 997 NET_LOCK(); 998 ifp->if_rtrequest(ifp, RTM_INVALIDATE, rt); 999 /* Reset the MTU of the gateway route. */ 1000 rtable_walk(tableid, rt_key(rt)->sa_family, NULL, 1001 route_cleargateway, rt); 1002 NET_UNLOCK(); 1003 break; 1004 } 1005 1006 /* 1007 * Make sure that local routes are only modified by the 1008 * kernel. 1009 */ 1010 if (ISSET(rt->rt_flags, RTF_LOCAL|RTF_BROADCAST)) { 1011 error = EINVAL; 1012 break; 1013 } 1014 1015 rtfree(rt); 1016 rt = NULL; 1017 1018 NET_LOCK(); 1019 error = rtrequest_delete(info, prio, ifp, &rt, tableid); 1020 NET_UNLOCK(); 1021 break; 1022 case RTM_CHANGE: 1023 rt = rtable_lookup(tableid, info->rti_info[RTAX_DST], 1024 info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY], 1025 prio); 1026 /* 1027 * If we got multipath routes, we require users to specify 1028 * a matching gateway. 1029 */ 1030 if ((rt != NULL) && ISSET(rt->rt_flags, RTF_MPATH) && 1031 (info->rti_info[RTAX_GATEWAY] == NULL)) { 1032 rtfree(rt); 1033 rt = NULL; 1034 } 1035 1036 /* 1037 * If RTAX_GATEWAY is the argument we're trying to 1038 * change, try to find a compatible route. 1039 */ 1040 if ((rt == NULL) && (info->rti_info[RTAX_GATEWAY] != NULL)) { 1041 rt = rtable_lookup(tableid, info->rti_info[RTAX_DST], 1042 info->rti_info[RTAX_NETMASK], NULL, prio); 1043 /* Ensure we don't pick a multipath one. */ 1044 if ((rt != NULL) && ISSET(rt->rt_flags, RTF_MPATH)) { 1045 rtfree(rt); 1046 rt = NULL; 1047 } 1048 } 1049 1050 if (rt == NULL) { 1051 error = ESRCH; 1052 break; 1053 } 1054 1055 /* 1056 * Make sure that local routes are only modified by the 1057 * kernel. 1058 */ 1059 if (ISSET(rt->rt_flags, RTF_LOCAL|RTF_BROADCAST)) { 1060 error = EINVAL; 1061 break; 1062 } 1063 1064 ifp = if_get(rt->rt_ifidx); 1065 if (ifp == NULL) { 1066 rtfree(rt); 1067 rt = NULL; 1068 error = ESRCH; 1069 break; 1070 } 1071 1072 /* 1073 * RTM_CHANGE needs a perfect match. 1074 */ 1075 plen = rtable_satoplen(info->rti_info[RTAX_DST]->sa_family, 1076 info->rti_info[RTAX_NETMASK]); 1077 if (rt_plen(rt) != plen) { 1078 error = ESRCH; 1079 break; 1080 } 1081 1082 if (info->rti_info[RTAX_GATEWAY] != NULL) 1083 if (rt->rt_gateway == NULL || 1084 bcmp(rt->rt_gateway, 1085 info->rti_info[RTAX_GATEWAY], 1086 info->rti_info[RTAX_GATEWAY]->sa_len)) { 1087 newgate = 1; 1088 } 1089 /* 1090 * Check reachable gateway before changing the route. 1091 * New gateway could require new ifaddr, ifp; 1092 * flags may also be different; ifp may be specified 1093 * by ll sockaddr when protocol address is ambiguous. 1094 */ 1095 if (newgate || info->rti_info[RTAX_IFP] != NULL || 1096 info->rti_info[RTAX_IFA] != NULL) { 1097 struct ifaddr *ifa = NULL; 1098 1099 NET_LOCK(); 1100 if ((error = rtm_getifa(info, tableid)) != 0) { 1101 NET_UNLOCK(); 1102 break; 1103 } 1104 ifa = info->rti_ifa; 1105 if (rt->rt_ifa != ifa) { 1106 ifp->if_rtrequest(ifp, RTM_DELETE, rt); 1107 ifafree(rt->rt_ifa); 1108 1109 rt->rt_ifa = ifaref(ifa); 1110 rt->rt_ifidx = ifa->ifa_ifp->if_index; 1111 /* recheck link state after ifp change */ 1112 rt_if_linkstate_change(rt, ifa->ifa_ifp, 1113 tableid); 1114 } 1115 NET_UNLOCK(); 1116 } 1117 change: 1118 if (info->rti_info[RTAX_GATEWAY] != NULL) { 1119 /* When updating the gateway, make sure it is valid. */ 1120 if (!newgate && rt->rt_gateway->sa_family != 1121 info->rti_info[RTAX_GATEWAY]->sa_family) { 1122 error = EINVAL; 1123 break; 1124 } 1125 1126 NET_LOCK(); 1127 error = rt_setgate(rt, 1128 info->rti_info[RTAX_GATEWAY], tableid); 1129 NET_UNLOCK(); 1130 if (error) 1131 break; 1132 } 1133 #ifdef MPLS 1134 if (rtm->rtm_flags & RTF_MPLS) { 1135 NET_LOCK(); 1136 error = rt_mpls_set(rt, 1137 info->rti_info[RTAX_SRC], info->rti_mpls); 1138 NET_UNLOCK(); 1139 if (error) 1140 break; 1141 } else if (newgate || (rtm->rtm_fmask & RTF_MPLS)) { 1142 NET_LOCK(); 1143 /* if gateway changed remove MPLS information */ 1144 rt_mpls_clear(rt); 1145 NET_UNLOCK(); 1146 } 1147 #endif 1148 1149 #ifdef BFD 1150 if (ISSET(rtm->rtm_flags, RTF_BFD)) { 1151 KERNEL_LOCK(); 1152 error = bfdset(rt); 1153 KERNEL_UNLOCK(); 1154 if (error) 1155 break; 1156 } else if (!ISSET(rtm->rtm_flags, RTF_BFD) && 1157 ISSET(rtm->rtm_fmask, RTF_BFD)) { 1158 KERNEL_LOCK(); 1159 bfdclear(rt); 1160 KERNEL_UNLOCK(); 1161 } 1162 #endif 1163 1164 NET_LOCK(); 1165 /* Hack to allow some flags to be toggled */ 1166 if (rtm->rtm_fmask) { 1167 /* MPLS flag it is set by rt_mpls_set() */ 1168 rtm->rtm_fmask &= ~RTF_MPLS; 1169 rtm->rtm_flags &= ~RTF_MPLS; 1170 rt->rt_flags = 1171 (rt->rt_flags & ~rtm->rtm_fmask) | 1172 (rtm->rtm_flags & rtm->rtm_fmask); 1173 } 1174 rtm_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx, &rt->rt_rmx); 1175 1176 ifp->if_rtrequest(ifp, RTM_ADD, rt); 1177 1178 if (info->rti_info[RTAX_LABEL] != NULL) { 1179 char *rtlabel = ((struct sockaddr_rtlabel *) 1180 info->rti_info[RTAX_LABEL])->sr_label; 1181 rtlabel_unref(rt->rt_labelid); 1182 rt->rt_labelid = rtlabel_name2id(rtlabel); 1183 } 1184 if_group_routechange(info->rti_info[RTAX_DST], 1185 info->rti_info[RTAX_NETMASK]); 1186 rt->rt_locks &= ~(rtm->rtm_inits); 1187 rt->rt_locks |= (rtm->rtm_inits & rtm->rtm_rmx.rmx_locks); 1188 NET_UNLOCK(); 1189 break; 1190 case RTM_GET: 1191 rt = rtable_lookup(tableid, info->rti_info[RTAX_DST], 1192 info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY], 1193 prio); 1194 if (rt == NULL) 1195 error = ESRCH; 1196 break; 1197 } 1198 1199 if_put(ifp); 1200 *prt = rt; 1201 return (error); 1202 } 1203 1204 struct ifaddr * 1205 ifa_ifwithroute(int flags, struct sockaddr *dst, struct sockaddr *gateway, 1206 unsigned int rtableid) 1207 { 1208 struct ifaddr *ifa; 1209 1210 if ((flags & RTF_GATEWAY) == 0) { 1211 /* 1212 * If we are adding a route to an interface, 1213 * and the interface is a pt to pt link 1214 * we should search for the destination 1215 * as our clue to the interface. Otherwise 1216 * we can use the local address. 1217 */ 1218 ifa = NULL; 1219 if (flags & RTF_HOST) 1220 ifa = ifa_ifwithdstaddr(dst, rtableid); 1221 if (ifa == NULL) 1222 ifa = ifa_ifwithaddr(gateway, rtableid); 1223 } else { 1224 /* 1225 * If we are adding a route to a remote net 1226 * or host, the gateway may still be on the 1227 * other end of a pt to pt link. 1228 */ 1229 ifa = ifa_ifwithdstaddr(gateway, rtableid); 1230 } 1231 if (ifa == NULL) { 1232 if (gateway->sa_family == AF_LINK) { 1233 struct sockaddr_dl *sdl = satosdl(gateway); 1234 struct ifnet *ifp = if_get(sdl->sdl_index); 1235 1236 if (ifp != NULL) 1237 ifa = ifaof_ifpforaddr(dst, ifp); 1238 if_put(ifp); 1239 } else { 1240 struct rtentry *rt; 1241 1242 rt = rtalloc(gateway, RT_RESOLVE, rtable_l2(rtableid)); 1243 if (rt != NULL) 1244 ifa = rt->rt_ifa; 1245 rtfree(rt); 1246 } 1247 } 1248 if (ifa == NULL) 1249 return (NULL); 1250 if (ifa->ifa_addr->sa_family != dst->sa_family) { 1251 struct ifaddr *oifa = ifa; 1252 ifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp); 1253 if (ifa == NULL) 1254 ifa = oifa; 1255 } 1256 return (ifa); 1257 } 1258 1259 int 1260 rtm_getifa(struct rt_addrinfo *info, unsigned int rtid) 1261 { 1262 struct ifnet *ifp = NULL; 1263 1264 /* 1265 * The "returned" `ifa' is guaranteed to be alive only if 1266 * the NET_LOCK() is held. 1267 */ 1268 NET_ASSERT_LOCKED(); 1269 1270 /* 1271 * ifp may be specified by sockaddr_dl when protocol address 1272 * is ambiguous 1273 */ 1274 if (info->rti_info[RTAX_IFP] != NULL) { 1275 struct sockaddr_dl *sdl; 1276 1277 sdl = satosdl(info->rti_info[RTAX_IFP]); 1278 ifp = if_get(sdl->sdl_index); 1279 } 1280 1281 #ifdef IPSEC 1282 /* 1283 * If the destination is a PF_KEY address, we'll look 1284 * for the existence of a encap interface number or address 1285 * in the options list of the gateway. By default, we'll return 1286 * enc0. 1287 */ 1288 if (info->rti_info[RTAX_DST] && 1289 info->rti_info[RTAX_DST]->sa_family == PF_KEY) 1290 info->rti_ifa = enc_getifa(rtid, 0); 1291 #endif 1292 1293 if (info->rti_ifa == NULL && info->rti_info[RTAX_IFA] != NULL) 1294 info->rti_ifa = ifa_ifwithaddr(info->rti_info[RTAX_IFA], rtid); 1295 1296 if (info->rti_ifa == NULL) { 1297 struct sockaddr *sa; 1298 1299 if ((sa = info->rti_info[RTAX_IFA]) == NULL) 1300 if ((sa = info->rti_info[RTAX_GATEWAY]) == NULL) 1301 sa = info->rti_info[RTAX_DST]; 1302 1303 if (sa != NULL && ifp != NULL) 1304 info->rti_ifa = ifaof_ifpforaddr(sa, ifp); 1305 else if (info->rti_info[RTAX_DST] != NULL && 1306 info->rti_info[RTAX_GATEWAY] != NULL) 1307 info->rti_ifa = ifa_ifwithroute(info->rti_flags, 1308 info->rti_info[RTAX_DST], 1309 info->rti_info[RTAX_GATEWAY], 1310 rtid); 1311 else if (sa != NULL) 1312 info->rti_ifa = ifa_ifwithroute(info->rti_flags, 1313 sa, sa, rtid); 1314 } 1315 1316 if_put(ifp); 1317 1318 if (info->rti_ifa == NULL) 1319 return (ENETUNREACH); 1320 1321 return (0); 1322 } 1323 1324 int 1325 route_cleargateway(struct rtentry *rt, void *arg, unsigned int rtableid) 1326 { 1327 struct rtentry *nhrt = arg; 1328 1329 if (ISSET(rt->rt_flags, RTF_GATEWAY) && rt->rt_gwroute == nhrt && 1330 !ISSET(rt->rt_locks, RTV_MTU)) 1331 rt->rt_mtu = 0; 1332 1333 return (0); 1334 } 1335 1336 /* 1337 * Check if the user request to insert an ARP entry does not conflict 1338 * with existing ones. 1339 * 1340 * Only two entries are allowed for a given IP address: a private one 1341 * (priv) and a public one (pub). 1342 */ 1343 int 1344 route_arp_conflict(struct rtentry *rt, struct rt_addrinfo *info) 1345 { 1346 int proxy = (info->rti_flags & RTF_ANNOUNCE); 1347 1348 if ((info->rti_flags & RTF_LLINFO) == 0 || 1349 (info->rti_info[RTAX_DST]->sa_family != AF_INET)) 1350 return (0); 1351 1352 if (rt == NULL || !ISSET(rt->rt_flags, RTF_LLINFO)) 1353 return (0); 1354 1355 /* If the entry is cached, it can be updated. */ 1356 if (ISSET(rt->rt_flags, RTF_CACHED)) 1357 return (0); 1358 1359 /* 1360 * Same destination, not cached and both "priv" or "pub" conflict. 1361 * If a second entry exists, it always conflict. 1362 */ 1363 if ((ISSET(rt->rt_flags, RTF_ANNOUNCE) == proxy) || 1364 ISSET(rt->rt_flags, RTF_MPATH)) 1365 return (EEXIST); 1366 1367 /* No conflict but an entry exist so we need to force mpath. */ 1368 info->rti_flags |= RTF_MPATH; 1369 return (0); 1370 } 1371 1372 void 1373 rtm_setmetrics(u_long which, const struct rt_metrics *in, 1374 struct rt_kmetrics *out) 1375 { 1376 int64_t expire; 1377 1378 if (which & RTV_MTU) 1379 out->rmx_mtu = in->rmx_mtu; 1380 if (which & RTV_EXPIRE) { 1381 expire = in->rmx_expire; 1382 if (expire != 0) { 1383 expire -= gettime(); 1384 expire += getuptime(); 1385 } 1386 1387 out->rmx_expire = expire; 1388 } 1389 } 1390 1391 void 1392 rtm_getmetrics(const struct rtentry *rt, struct rt_metrics *out) 1393 { 1394 const struct rt_kmetrics *in = &rt->rt_rmx; 1395 int64_t expire; 1396 1397 expire = in->rmx_expire; 1398 if (expire == 0) 1399 expire = rt_timer_get_expire(rt); 1400 if (expire != 0) { 1401 expire -= getuptime(); 1402 expire += gettime(); 1403 } 1404 1405 bzero(out, sizeof(*out)); 1406 out->rmx_locks = in->rmx_locks; 1407 out->rmx_mtu = in->rmx_mtu; 1408 out->rmx_expire = expire; 1409 out->rmx_pksent = in->rmx_pksent; 1410 } 1411 1412 #define ROUNDUP(a) \ 1413 ((a) > 0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long)) 1414 #define ADVANCE(x, n) (x += ROUNDUP((n)->sa_len)) 1415 1416 int 1417 rtm_xaddrs(caddr_t cp, caddr_t cplim, struct rt_addrinfo *rtinfo) 1418 { 1419 struct sockaddr *sa; 1420 int i; 1421 1422 /* 1423 * Parse address bits, split address storage in chunks, and 1424 * set info pointers. Use sa_len for traversing the memory 1425 * and check that we stay within in the limit. 1426 */ 1427 bzero(rtinfo->rti_info, sizeof(rtinfo->rti_info)); 1428 for (i = 0; i < sizeof(rtinfo->rti_addrs) * 8; i++) { 1429 if ((rtinfo->rti_addrs & (1U << i)) == 0) 1430 continue; 1431 if (i >= RTAX_MAX || cp + sizeof(socklen_t) > cplim) 1432 return (EINVAL); 1433 sa = (struct sockaddr *)cp; 1434 if (cp + sa->sa_len > cplim) 1435 return (EINVAL); 1436 rtinfo->rti_info[i] = sa; 1437 ADVANCE(cp, sa); 1438 } 1439 /* 1440 * Check that the address family is suitable for the route address 1441 * type. Check that each address has a size that fits its family 1442 * and its length is within the size. Strings within addresses must 1443 * be NUL terminated. 1444 */ 1445 for (i = 0; i < RTAX_MAX; i++) { 1446 size_t len, maxlen, size; 1447 1448 sa = rtinfo->rti_info[i]; 1449 if (sa == NULL) 1450 continue; 1451 maxlen = size = 0; 1452 switch (i) { 1453 case RTAX_DST: 1454 case RTAX_GATEWAY: 1455 case RTAX_SRC: 1456 switch (sa->sa_family) { 1457 case AF_INET: 1458 size = sizeof(struct sockaddr_in); 1459 break; 1460 case AF_LINK: 1461 size = sizeof(struct sockaddr_dl); 1462 break; 1463 #ifdef INET6 1464 case AF_INET6: 1465 size = sizeof(struct sockaddr_in6); 1466 break; 1467 #endif 1468 #ifdef MPLS 1469 case AF_MPLS: 1470 size = sizeof(struct sockaddr_mpls); 1471 break; 1472 #endif 1473 } 1474 break; 1475 case RTAX_IFP: 1476 if (sa->sa_family != AF_LINK) 1477 return (EAFNOSUPPORT); 1478 /* 1479 * XXX Should be sizeof(struct sockaddr_dl), but 1480 * route(8) has a bug and provides less memory. 1481 * arp(8) has another bug and uses sizeof pointer. 1482 */ 1483 size = 4; 1484 break; 1485 case RTAX_IFA: 1486 switch (sa->sa_family) { 1487 case AF_INET: 1488 size = sizeof(struct sockaddr_in); 1489 break; 1490 #ifdef INET6 1491 case AF_INET6: 1492 size = sizeof(struct sockaddr_in6); 1493 break; 1494 #endif 1495 default: 1496 return (EAFNOSUPPORT); 1497 } 1498 break; 1499 case RTAX_LABEL: 1500 sa->sa_family = AF_UNSPEC; 1501 maxlen = RTLABEL_LEN; 1502 size = sizeof(struct sockaddr_rtlabel); 1503 break; 1504 #ifdef BFD 1505 case RTAX_BFD: 1506 sa->sa_family = AF_UNSPEC; 1507 size = sizeof(struct sockaddr_bfd); 1508 break; 1509 #endif 1510 case RTAX_DNS: 1511 /* more validation in rtm_validate_proposal */ 1512 if (sa->sa_len > sizeof(struct sockaddr_rtdns)) 1513 return (EINVAL); 1514 if (sa->sa_len < offsetof(struct sockaddr_rtdns, 1515 sr_dns)) 1516 return (EINVAL); 1517 switch (sa->sa_family) { 1518 case AF_INET: 1519 #ifdef INET6 1520 case AF_INET6: 1521 #endif 1522 break; 1523 default: 1524 return (EAFNOSUPPORT); 1525 } 1526 break; 1527 case RTAX_STATIC: 1528 sa->sa_family = AF_UNSPEC; 1529 maxlen = RTSTATIC_LEN; 1530 size = sizeof(struct sockaddr_rtstatic); 1531 break; 1532 case RTAX_SEARCH: 1533 sa->sa_family = AF_UNSPEC; 1534 maxlen = RTSEARCH_LEN; 1535 size = sizeof(struct sockaddr_rtsearch); 1536 break; 1537 } 1538 if (size) { 1539 /* memory for the full struct must be provided */ 1540 if (sa->sa_len < size) 1541 return (EINVAL); 1542 } 1543 if (maxlen) { 1544 /* this should not happen */ 1545 if (2 + maxlen > size) 1546 return (EINVAL); 1547 /* strings must be NUL terminated within the struct */ 1548 len = strnlen(sa->sa_data, maxlen); 1549 if (len >= maxlen || 2 + len >= sa->sa_len) 1550 return (EINVAL); 1551 break; 1552 } 1553 } 1554 return (0); 1555 } 1556 1557 struct mbuf * 1558 rtm_msg1(int type, struct rt_addrinfo *rtinfo) 1559 { 1560 struct rt_msghdr *rtm; 1561 struct mbuf *m; 1562 int i; 1563 struct sockaddr *sa; 1564 int len, dlen, hlen; 1565 1566 switch (type) { 1567 case RTM_DELADDR: 1568 case RTM_NEWADDR: 1569 hlen = sizeof(struct ifa_msghdr); 1570 break; 1571 case RTM_IFINFO: 1572 hlen = sizeof(struct if_msghdr); 1573 break; 1574 case RTM_IFANNOUNCE: 1575 hlen = sizeof(struct if_announcemsghdr); 1576 break; 1577 #ifdef BFD 1578 case RTM_BFD: 1579 hlen = sizeof(struct bfd_msghdr); 1580 break; 1581 #endif 1582 case RTM_80211INFO: 1583 hlen = sizeof(struct if_ieee80211_msghdr); 1584 break; 1585 default: 1586 hlen = sizeof(struct rt_msghdr); 1587 break; 1588 } 1589 len = hlen; 1590 for (i = 0; i < RTAX_MAX; i++) { 1591 if (rtinfo == NULL || (sa = rtinfo->rti_info[i]) == NULL) 1592 continue; 1593 len += ROUNDUP(sa->sa_len); 1594 } 1595 if (len > MCLBYTES) 1596 panic("rtm_msg1"); 1597 m = m_gethdr(M_DONTWAIT, MT_DATA); 1598 if (m && len > MHLEN) { 1599 MCLGET(m, M_DONTWAIT); 1600 if ((m->m_flags & M_EXT) == 0) { 1601 m_free(m); 1602 m = NULL; 1603 } 1604 } 1605 if (m == NULL) 1606 return (m); 1607 m->m_pkthdr.len = m->m_len = len; 1608 m->m_pkthdr.ph_ifidx = 0; 1609 rtm = mtod(m, struct rt_msghdr *); 1610 bzero(rtm, len); 1611 len = hlen; 1612 for (i = 0; i < RTAX_MAX; i++) { 1613 if (rtinfo == NULL || (sa = rtinfo->rti_info[i]) == NULL) 1614 continue; 1615 rtinfo->rti_addrs |= (1U << i); 1616 dlen = ROUNDUP(sa->sa_len); 1617 if (m_copyback(m, len, sa->sa_len, sa, M_NOWAIT)) { 1618 m_freem(m); 1619 return (NULL); 1620 } 1621 len += dlen; 1622 } 1623 rtm->rtm_msglen = len; 1624 rtm->rtm_hdrlen = hlen; 1625 rtm->rtm_version = RTM_VERSION; 1626 rtm->rtm_type = type; 1627 return (m); 1628 } 1629 1630 int 1631 rtm_msg2(int type, int vers, struct rt_addrinfo *rtinfo, caddr_t cp, 1632 struct walkarg *w) 1633 { 1634 int i; 1635 int len, dlen, hlen, second_time = 0; 1636 caddr_t cp0; 1637 1638 rtinfo->rti_addrs = 0; 1639 again: 1640 switch (type) { 1641 case RTM_DELADDR: 1642 case RTM_NEWADDR: 1643 len = sizeof(struct ifa_msghdr); 1644 break; 1645 case RTM_IFINFO: 1646 len = sizeof(struct if_msghdr); 1647 break; 1648 default: 1649 len = sizeof(struct rt_msghdr); 1650 break; 1651 } 1652 hlen = len; 1653 if ((cp0 = cp) != NULL) 1654 cp += len; 1655 for (i = 0; i < RTAX_MAX; i++) { 1656 struct sockaddr *sa; 1657 1658 if ((sa = rtinfo->rti_info[i]) == NULL) 1659 continue; 1660 rtinfo->rti_addrs |= (1U << i); 1661 dlen = ROUNDUP(sa->sa_len); 1662 if (cp) { 1663 bcopy(sa, cp, sa->sa_len); 1664 bzero(cp + sa->sa_len, dlen - sa->sa_len); 1665 cp += dlen; 1666 } 1667 len += dlen; 1668 } 1669 /* align message length to the next natural boundary */ 1670 len = ALIGN(len); 1671 if (cp == 0 && w != NULL && !second_time) { 1672 w->w_needed += len; 1673 if (w->w_needed <= w->w_given && w->w_where) { 1674 if (w->w_tmemsize < len) { 1675 free(w->w_tmem, M_RTABLE, w->w_tmemsize); 1676 w->w_tmem = malloc(len, M_RTABLE, 1677 M_NOWAIT | M_ZERO); 1678 if (w->w_tmem) 1679 w->w_tmemsize = len; 1680 } 1681 if (w->w_tmem) { 1682 cp = w->w_tmem; 1683 second_time = 1; 1684 goto again; 1685 } else 1686 w->w_where = 0; 1687 } 1688 } 1689 if (cp && w) /* clear the message header */ 1690 bzero(cp0, hlen); 1691 1692 if (cp) { 1693 struct rt_msghdr *rtm = (struct rt_msghdr *)cp0; 1694 1695 rtm->rtm_version = RTM_VERSION; 1696 rtm->rtm_type = type; 1697 rtm->rtm_msglen = len; 1698 rtm->rtm_hdrlen = hlen; 1699 } 1700 return (len); 1701 } 1702 1703 void 1704 rtm_send(struct rtentry *rt, int cmd, int error, unsigned int rtableid) 1705 { 1706 struct rt_addrinfo info; 1707 struct ifnet *ifp; 1708 struct sockaddr_rtlabel sa_rl; 1709 struct sockaddr_in6 sa_mask; 1710 1711 memset(&info, 0, sizeof(info)); 1712 info.rti_info[RTAX_DST] = rt_key(rt); 1713 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 1714 if (!ISSET(rt->rt_flags, RTF_HOST)) 1715 info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask); 1716 info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl); 1717 ifp = if_get(rt->rt_ifidx); 1718 if (ifp != NULL) { 1719 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); 1720 info.rti_info[RTAX_IFA] = rtable_getsource(rtableid, 1721 info.rti_info[RTAX_DST]->sa_family); 1722 if (info.rti_info[RTAX_IFA] == NULL) 1723 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; 1724 } 1725 1726 rtm_miss(cmd, &info, rt->rt_flags, rt->rt_priority, rt->rt_ifidx, error, 1727 rtableid); 1728 if_put(ifp); 1729 } 1730 1731 /* 1732 * This routine is called to generate a message from the routing 1733 * socket indicating that a redirect has occurred, a routing lookup 1734 * has failed, or that a protocol has detected timeouts to a particular 1735 * destination. 1736 */ 1737 void 1738 rtm_miss(int type, struct rt_addrinfo *rtinfo, int flags, uint8_t prio, 1739 u_int ifidx, int error, u_int tableid) 1740 { 1741 struct rt_msghdr *rtm; 1742 struct mbuf *m; 1743 struct sockaddr *sa = rtinfo->rti_info[RTAX_DST]; 1744 1745 if (rtptable.rtp_count == 0) 1746 return; 1747 m = rtm_msg1(type, rtinfo); 1748 if (m == NULL) 1749 return; 1750 rtm = mtod(m, struct rt_msghdr *); 1751 rtm->rtm_flags = RTF_DONE | flags; 1752 rtm->rtm_priority = prio; 1753 rtm->rtm_errno = error; 1754 rtm->rtm_tableid = tableid; 1755 rtm->rtm_addrs = rtinfo->rti_addrs; 1756 rtm->rtm_index = ifidx; 1757 route_input(m, NULL, sa ? sa->sa_family : AF_UNSPEC); 1758 } 1759 1760 /* 1761 * This routine is called to generate a message from the routing 1762 * socket indicating that the status of a network interface has changed. 1763 */ 1764 void 1765 rtm_ifchg(struct ifnet *ifp) 1766 { 1767 struct rt_addrinfo info; 1768 struct if_msghdr *ifm; 1769 struct mbuf *m; 1770 1771 if (rtptable.rtp_count == 0) 1772 return; 1773 memset(&info, 0, sizeof(info)); 1774 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); 1775 m = rtm_msg1(RTM_IFINFO, &info); 1776 if (m == NULL) 1777 return; 1778 ifm = mtod(m, struct if_msghdr *); 1779 ifm->ifm_index = ifp->if_index; 1780 ifm->ifm_tableid = ifp->if_rdomain; 1781 ifm->ifm_flags = ifp->if_flags; 1782 ifm->ifm_xflags = ifp->if_xflags; 1783 if_getdata(ifp, &ifm->ifm_data); 1784 ifm->ifm_addrs = info.rti_addrs; 1785 route_input(m, NULL, AF_UNSPEC); 1786 } 1787 1788 /* 1789 * This is called to generate messages from the routing socket 1790 * indicating a network interface has had addresses associated with it. 1791 * if we ever reverse the logic and replace messages TO the routing 1792 * socket indicate a request to configure interfaces, then it will 1793 * be unnecessary as the routing socket will automatically generate 1794 * copies of it. 1795 */ 1796 void 1797 rtm_addr(int cmd, struct ifaddr *ifa) 1798 { 1799 struct ifnet *ifp = ifa->ifa_ifp; 1800 struct mbuf *m; 1801 struct rt_addrinfo info; 1802 struct ifa_msghdr *ifam; 1803 1804 if (rtptable.rtp_count == 0) 1805 return; 1806 1807 memset(&info, 0, sizeof(info)); 1808 info.rti_info[RTAX_IFA] = ifa->ifa_addr; 1809 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); 1810 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask; 1811 info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr; 1812 if ((m = rtm_msg1(cmd, &info)) == NULL) 1813 return; 1814 ifam = mtod(m, struct ifa_msghdr *); 1815 ifam->ifam_index = ifp->if_index; 1816 ifam->ifam_metric = ifa->ifa_metric; 1817 ifam->ifam_flags = ifa->ifa_flags; 1818 ifam->ifam_addrs = info.rti_addrs; 1819 ifam->ifam_tableid = ifp->if_rdomain; 1820 1821 route_input(m, NULL, 1822 ifa->ifa_addr ? ifa->ifa_addr->sa_family : AF_UNSPEC); 1823 } 1824 1825 /* 1826 * This is called to generate routing socket messages indicating 1827 * network interface arrival and departure. 1828 */ 1829 void 1830 rtm_ifannounce(struct ifnet *ifp, int what) 1831 { 1832 struct if_announcemsghdr *ifan; 1833 struct mbuf *m; 1834 1835 if (rtptable.rtp_count == 0) 1836 return; 1837 m = rtm_msg1(RTM_IFANNOUNCE, NULL); 1838 if (m == NULL) 1839 return; 1840 ifan = mtod(m, struct if_announcemsghdr *); 1841 ifan->ifan_index = ifp->if_index; 1842 strlcpy(ifan->ifan_name, ifp->if_xname, sizeof(ifan->ifan_name)); 1843 ifan->ifan_what = what; 1844 route_input(m, NULL, AF_UNSPEC); 1845 } 1846 1847 #ifdef BFD 1848 /* 1849 * This is used to generate routing socket messages indicating 1850 * the state of a BFD session. 1851 */ 1852 void 1853 rtm_bfd(struct bfd_config *bfd) 1854 { 1855 struct bfd_msghdr *bfdm; 1856 struct sockaddr_bfd sa_bfd; 1857 struct mbuf *m; 1858 struct rt_addrinfo info; 1859 1860 if (rtptable.rtp_count == 0) 1861 return; 1862 memset(&info, 0, sizeof(info)); 1863 info.rti_info[RTAX_DST] = rt_key(bfd->bc_rt); 1864 info.rti_info[RTAX_IFA] = bfd->bc_rt->rt_ifa->ifa_addr; 1865 1866 m = rtm_msg1(RTM_BFD, &info); 1867 if (m == NULL) 1868 return; 1869 bfdm = mtod(m, struct bfd_msghdr *); 1870 bfdm->bm_addrs = info.rti_addrs; 1871 1872 KERNEL_ASSERT_LOCKED(); 1873 bfd2sa(bfd->bc_rt, &sa_bfd); 1874 memcpy(&bfdm->bm_sa, &sa_bfd, sizeof(sa_bfd)); 1875 1876 route_input(m, NULL, info.rti_info[RTAX_DST]->sa_family); 1877 } 1878 #endif /* BFD */ 1879 1880 /* 1881 * This is used to generate routing socket messages indicating 1882 * the state of an ieee80211 interface. 1883 */ 1884 void 1885 rtm_80211info(struct ifnet *ifp, struct if_ieee80211_data *ifie) 1886 { 1887 struct if_ieee80211_msghdr *ifim; 1888 struct mbuf *m; 1889 1890 if (rtptable.rtp_count == 0) 1891 return; 1892 m = rtm_msg1(RTM_80211INFO, NULL); 1893 if (m == NULL) 1894 return; 1895 ifim = mtod(m, struct if_ieee80211_msghdr *); 1896 ifim->ifim_index = ifp->if_index; 1897 ifim->ifim_tableid = ifp->if_rdomain; 1898 1899 memcpy(&ifim->ifim_ifie, ifie, sizeof(ifim->ifim_ifie)); 1900 route_input(m, NULL, AF_UNSPEC); 1901 } 1902 1903 /* 1904 * This is used to generate routing socket messages indicating 1905 * the address selection proposal from an interface. 1906 */ 1907 void 1908 rtm_proposal(struct ifnet *ifp, struct rt_addrinfo *rtinfo, int flags, 1909 uint8_t prio) 1910 { 1911 struct rt_msghdr *rtm; 1912 struct mbuf *m; 1913 1914 m = rtm_msg1(RTM_PROPOSAL, rtinfo); 1915 if (m == NULL) 1916 return; 1917 rtm = mtod(m, struct rt_msghdr *); 1918 rtm->rtm_flags = RTF_DONE | flags; 1919 rtm->rtm_priority = prio; 1920 rtm->rtm_tableid = ifp->if_rdomain; 1921 rtm->rtm_index = ifp->if_index; 1922 rtm->rtm_addrs = rtinfo->rti_addrs; 1923 1924 route_input(m, NULL, rtinfo->rti_info[RTAX_DNS]->sa_family); 1925 } 1926 1927 /* 1928 * This is used in dumping the kernel table via sysctl(). 1929 */ 1930 int 1931 sysctl_dumpentry(struct rtentry *rt, void *v, unsigned int id) 1932 { 1933 struct walkarg *w = v; 1934 int error = 0, size; 1935 struct rt_addrinfo info; 1936 struct ifnet *ifp; 1937 #ifdef BFD 1938 struct sockaddr_bfd sa_bfd; 1939 #endif 1940 struct sockaddr_rtlabel sa_rl; 1941 struct sockaddr_in6 sa_mask; 1942 1943 if (w->w_op == NET_RT_FLAGS && !(rt->rt_flags & w->w_arg)) 1944 return 0; 1945 if (w->w_op == NET_RT_DUMP && w->w_arg) { 1946 u_int8_t prio = w->w_arg & RTP_MASK; 1947 if (w->w_arg < 0) { 1948 prio = (-w->w_arg) & RTP_MASK; 1949 /* Show all routes that are not this priority */ 1950 if (prio == (rt->rt_priority & RTP_MASK)) 1951 return 0; 1952 } else { 1953 if (prio != (rt->rt_priority & RTP_MASK) && 1954 prio != RTP_ANY) 1955 return 0; 1956 } 1957 } 1958 bzero(&info, sizeof(info)); 1959 info.rti_info[RTAX_DST] = rt_key(rt); 1960 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 1961 info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask); 1962 ifp = if_get(rt->rt_ifidx); 1963 if (ifp != NULL) { 1964 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); 1965 info.rti_info[RTAX_IFA] = 1966 rtable_getsource(id, info.rti_info[RTAX_DST]->sa_family); 1967 if (info.rti_info[RTAX_IFA] == NULL) 1968 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; 1969 if (ifp->if_flags & IFF_POINTOPOINT) 1970 info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr; 1971 } 1972 if_put(ifp); 1973 info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl); 1974 #ifdef BFD 1975 if (rt->rt_flags & RTF_BFD) { 1976 KERNEL_ASSERT_LOCKED(); 1977 info.rti_info[RTAX_BFD] = bfd2sa(rt, &sa_bfd); 1978 } 1979 #endif 1980 #ifdef MPLS 1981 if (rt->rt_flags & RTF_MPLS) { 1982 struct sockaddr_mpls sa_mpls; 1983 1984 bzero(&sa_mpls, sizeof(sa_mpls)); 1985 sa_mpls.smpls_family = AF_MPLS; 1986 sa_mpls.smpls_len = sizeof(sa_mpls); 1987 sa_mpls.smpls_label = ((struct rt_mpls *) 1988 rt->rt_llinfo)->mpls_label; 1989 info.rti_info[RTAX_SRC] = (struct sockaddr *)&sa_mpls; 1990 info.rti_mpls = ((struct rt_mpls *) 1991 rt->rt_llinfo)->mpls_operation; 1992 } 1993 #endif 1994 1995 size = rtm_msg2(RTM_GET, RTM_VERSION, &info, NULL, w); 1996 if (w->w_where && w->w_tmem && w->w_needed <= w->w_given) { 1997 struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem; 1998 1999 rtm->rtm_pid = curproc->p_p->ps_pid; 2000 rtm->rtm_flags = RTF_DONE | rt->rt_flags; 2001 rtm->rtm_priority = rt->rt_priority & RTP_MASK; 2002 rtm_getmetrics(rt, &rtm->rtm_rmx); 2003 /* Do not account the routing table's reference. */ 2004 rtm->rtm_rmx.rmx_refcnt = refcnt_read(&rt->rt_refcnt) - 1; 2005 rtm->rtm_index = rt->rt_ifidx; 2006 rtm->rtm_addrs = info.rti_addrs; 2007 rtm->rtm_tableid = id; 2008 #ifdef MPLS 2009 rtm->rtm_mpls = info.rti_mpls; 2010 #endif 2011 if ((error = copyout(rtm, w->w_where, size)) != 0) 2012 w->w_where = NULL; 2013 else 2014 w->w_where += size; 2015 } 2016 return (error); 2017 } 2018 2019 int 2020 sysctl_iflist(int af, struct walkarg *w) 2021 { 2022 struct ifnet *ifp; 2023 struct ifaddr *ifa; 2024 struct rt_addrinfo info; 2025 int len, error = 0; 2026 2027 bzero(&info, sizeof(info)); 2028 TAILQ_FOREACH(ifp, &ifnetlist, if_list) { 2029 if (w->w_arg && w->w_arg != ifp->if_index) 2030 continue; 2031 /* Copy the link-layer address first */ 2032 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); 2033 len = rtm_msg2(RTM_IFINFO, RTM_VERSION, &info, 0, w); 2034 if (w->w_where && w->w_tmem && w->w_needed <= w->w_given) { 2035 struct if_msghdr *ifm; 2036 2037 ifm = (struct if_msghdr *)w->w_tmem; 2038 ifm->ifm_index = ifp->if_index; 2039 ifm->ifm_tableid = ifp->if_rdomain; 2040 ifm->ifm_flags = ifp->if_flags; 2041 if_getdata(ifp, &ifm->ifm_data); 2042 ifm->ifm_addrs = info.rti_addrs; 2043 error = copyout(ifm, w->w_where, len); 2044 if (error) 2045 return (error); 2046 w->w_where += len; 2047 } 2048 info.rti_info[RTAX_IFP] = NULL; 2049 TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) { 2050 KASSERT(ifa->ifa_addr->sa_family != AF_LINK); 2051 if (af && af != ifa->ifa_addr->sa_family) 2052 continue; 2053 info.rti_info[RTAX_IFA] = ifa->ifa_addr; 2054 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask; 2055 info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr; 2056 len = rtm_msg2(RTM_NEWADDR, RTM_VERSION, &info, 0, w); 2057 if (w->w_where && w->w_tmem && 2058 w->w_needed <= w->w_given) { 2059 struct ifa_msghdr *ifam; 2060 2061 ifam = (struct ifa_msghdr *)w->w_tmem; 2062 ifam->ifam_index = ifa->ifa_ifp->if_index; 2063 ifam->ifam_flags = ifa->ifa_flags; 2064 ifam->ifam_metric = ifa->ifa_metric; 2065 ifam->ifam_addrs = info.rti_addrs; 2066 error = copyout(w->w_tmem, w->w_where, len); 2067 if (error) 2068 return (error); 2069 w->w_where += len; 2070 } 2071 } 2072 info.rti_info[RTAX_IFA] = info.rti_info[RTAX_NETMASK] = 2073 info.rti_info[RTAX_BRD] = NULL; 2074 } 2075 return (0); 2076 } 2077 2078 int 2079 sysctl_ifnames(struct walkarg *w) 2080 { 2081 struct if_nameindex_msg ifn; 2082 struct ifnet *ifp; 2083 int error = 0; 2084 2085 /* XXX ignore tableid for now */ 2086 TAILQ_FOREACH(ifp, &ifnetlist, if_list) { 2087 if (w->w_arg && w->w_arg != ifp->if_index) 2088 continue; 2089 w->w_needed += sizeof(ifn); 2090 if (w->w_where && w->w_needed <= w->w_given) { 2091 2092 memset(&ifn, 0, sizeof(ifn)); 2093 ifn.if_index = ifp->if_index; 2094 strlcpy(ifn.if_name, ifp->if_xname, 2095 sizeof(ifn.if_name)); 2096 error = copyout(&ifn, w->w_where, sizeof(ifn)); 2097 if (error) 2098 return (error); 2099 w->w_where += sizeof(ifn); 2100 } 2101 } 2102 2103 return (0); 2104 } 2105 2106 int 2107 sysctl_source(int af, u_int tableid, struct walkarg *w) 2108 { 2109 struct sockaddr *sa; 2110 int size, error = 0; 2111 2112 sa = rtable_getsource(tableid, af); 2113 if (sa) { 2114 switch (sa->sa_family) { 2115 case AF_INET: 2116 size = sizeof(struct sockaddr_in); 2117 break; 2118 #ifdef INET6 2119 case AF_INET6: 2120 size = sizeof(struct sockaddr_in6); 2121 break; 2122 #endif 2123 default: 2124 return (0); 2125 } 2126 w->w_needed += size; 2127 if (w->w_where && w->w_needed <= w->w_given) { 2128 if ((error = copyout(sa, w->w_where, size))) 2129 return (error); 2130 w->w_where += size; 2131 } 2132 } 2133 return (0); 2134 } 2135 2136 int 2137 sysctl_rtable(int *name, u_int namelen, void *where, size_t *given, void *new, 2138 size_t newlen) 2139 { 2140 int i, error = EINVAL; 2141 u_char af; 2142 struct walkarg w; 2143 struct rt_tableinfo tableinfo; 2144 u_int tableid = 0; 2145 2146 if (new) 2147 return (EPERM); 2148 if (namelen < 3 || namelen > 4) 2149 return (EINVAL); 2150 af = name[0]; 2151 bzero(&w, sizeof(w)); 2152 w.w_where = where; 2153 w.w_given = *given; 2154 w.w_op = name[1]; 2155 w.w_arg = name[2]; 2156 2157 if (namelen == 4) { 2158 tableid = name[3]; 2159 if (!rtable_exists(tableid)) 2160 return (ENOENT); 2161 } else 2162 tableid = curproc->p_p->ps_rtableid; 2163 2164 switch (w.w_op) { 2165 case NET_RT_DUMP: 2166 case NET_RT_FLAGS: 2167 NET_LOCK_SHARED(); 2168 for (i = 1; i <= AF_MAX; i++) { 2169 if (af != 0 && af != i) 2170 continue; 2171 2172 error = rtable_walk(tableid, i, NULL, sysctl_dumpentry, 2173 &w); 2174 if (error == EAFNOSUPPORT) 2175 error = 0; 2176 if (error) 2177 break; 2178 } 2179 NET_UNLOCK_SHARED(); 2180 break; 2181 2182 case NET_RT_IFLIST: 2183 NET_LOCK_SHARED(); 2184 error = sysctl_iflist(af, &w); 2185 NET_UNLOCK_SHARED(); 2186 break; 2187 2188 case NET_RT_STATS: 2189 return (sysctl_rtable_rtstat(where, given, new)); 2190 case NET_RT_TABLE: 2191 tableid = w.w_arg; 2192 if (!rtable_exists(tableid)) 2193 return (ENOENT); 2194 memset(&tableinfo, 0, sizeof tableinfo); 2195 tableinfo.rti_tableid = tableid; 2196 tableinfo.rti_domainid = rtable_l2(tableid); 2197 error = sysctl_rdstruct(where, given, new, 2198 &tableinfo, sizeof(tableinfo)); 2199 return (error); 2200 case NET_RT_IFNAMES: 2201 NET_LOCK_SHARED(); 2202 error = sysctl_ifnames(&w); 2203 NET_UNLOCK_SHARED(); 2204 break; 2205 case NET_RT_SOURCE: 2206 tableid = w.w_arg; 2207 if (!rtable_exists(tableid)) 2208 return (ENOENT); 2209 NET_LOCK_SHARED(); 2210 for (i = 1; i <= AF_MAX; i++) { 2211 if (af != 0 && af != i) 2212 continue; 2213 2214 error = sysctl_source(i, tableid, &w); 2215 if (error == EAFNOSUPPORT) 2216 error = 0; 2217 if (error) 2218 break; 2219 } 2220 NET_UNLOCK_SHARED(); 2221 break; 2222 } 2223 free(w.w_tmem, M_RTABLE, w.w_tmemsize); 2224 if (where) { 2225 *given = w.w_where - (caddr_t)where; 2226 if (w.w_needed > w.w_given) 2227 return (ENOMEM); 2228 } else if (w.w_needed == 0) { 2229 *given = 0; 2230 } else { 2231 *given = roundup(w.w_needed + MAX(w.w_needed / 10, 1024), 2232 PAGE_SIZE); 2233 } 2234 return (error); 2235 } 2236 2237 int 2238 sysctl_rtable_rtstat(void *oldp, size_t *oldlenp, void *newp) 2239 { 2240 extern struct cpumem *rtcounters; 2241 uint64_t counters[rts_ncounters]; 2242 struct rtstat rtstat; 2243 uint32_t *words = (uint32_t *)&rtstat; 2244 int i; 2245 2246 CTASSERT(sizeof(rtstat) == (nitems(counters) * sizeof(uint32_t))); 2247 memset(&rtstat, 0, sizeof rtstat); 2248 counters_read(rtcounters, counters, nitems(counters), NULL); 2249 2250 for (i = 0; i < nitems(counters); i++) 2251 words[i] = (uint32_t)counters[i]; 2252 2253 return (sysctl_rdstruct(oldp, oldlenp, newp, &rtstat, sizeof(rtstat))); 2254 } 2255 2256 int 2257 rtm_validate_proposal(struct rt_addrinfo *info) 2258 { 2259 if (info->rti_addrs & ~(RTA_NETMASK | RTA_IFA | RTA_DNS | RTA_STATIC | 2260 RTA_SEARCH)) { 2261 return -1; 2262 } 2263 2264 if (ISSET(info->rti_addrs, RTA_NETMASK)) { 2265 struct sockaddr *sa = info->rti_info[RTAX_NETMASK]; 2266 if (sa == NULL) 2267 return -1; 2268 switch (sa->sa_family) { 2269 case AF_INET: 2270 if (sa->sa_len != sizeof(struct sockaddr_in)) 2271 return -1; 2272 break; 2273 case AF_INET6: 2274 if (sa->sa_len != sizeof(struct sockaddr_in6)) 2275 return -1; 2276 break; 2277 default: 2278 return -1; 2279 } 2280 } 2281 2282 if (ISSET(info->rti_addrs, RTA_IFA)) { 2283 struct sockaddr *sa = info->rti_info[RTAX_IFA]; 2284 if (sa == NULL) 2285 return -1; 2286 switch (sa->sa_family) { 2287 case AF_INET: 2288 if (sa->sa_len != sizeof(struct sockaddr_in)) 2289 return -1; 2290 break; 2291 case AF_INET6: 2292 if (sa->sa_len != sizeof(struct sockaddr_in6)) 2293 return -1; 2294 break; 2295 default: 2296 return -1; 2297 } 2298 } 2299 2300 if (ISSET(info->rti_addrs, RTA_DNS)) { 2301 struct sockaddr_rtdns *rtdns = 2302 (struct sockaddr_rtdns *)info->rti_info[RTAX_DNS]; 2303 if (rtdns == NULL) 2304 return -1; 2305 if (rtdns->sr_len > sizeof(*rtdns)) 2306 return -1; 2307 if (rtdns->sr_len < offsetof(struct sockaddr_rtdns, sr_dns)) 2308 return -1; 2309 switch (rtdns->sr_family) { 2310 case AF_INET: 2311 if ((rtdns->sr_len - offsetof(struct sockaddr_rtdns, 2312 sr_dns)) % sizeof(struct in_addr) != 0) 2313 return -1; 2314 break; 2315 #ifdef INET6 2316 case AF_INET6: 2317 if ((rtdns->sr_len - offsetof(struct sockaddr_rtdns, 2318 sr_dns)) % sizeof(struct in6_addr) != 0) 2319 return -1; 2320 break; 2321 #endif 2322 default: 2323 return -1; 2324 } 2325 } 2326 2327 if (ISSET(info->rti_addrs, RTA_STATIC)) { 2328 struct sockaddr_rtstatic *rtstatic = 2329 (struct sockaddr_rtstatic *)info->rti_info[RTAX_STATIC]; 2330 if (rtstatic == NULL) 2331 return -1; 2332 if (rtstatic->sr_len > sizeof(*rtstatic)) 2333 return -1; 2334 if (rtstatic->sr_len <= 2335 offsetof(struct sockaddr_rtstatic, sr_static)) 2336 return -1; 2337 } 2338 2339 if (ISSET(info->rti_addrs, RTA_SEARCH)) { 2340 struct sockaddr_rtsearch *rtsearch = 2341 (struct sockaddr_rtsearch *)info->rti_info[RTAX_SEARCH]; 2342 if (rtsearch == NULL) 2343 return -1; 2344 if (rtsearch->sr_len > sizeof(*rtsearch)) 2345 return -1; 2346 if (rtsearch->sr_len <= 2347 offsetof(struct sockaddr_rtsearch, sr_search)) 2348 return -1; 2349 } 2350 2351 return 0; 2352 } 2353 2354 int 2355 rt_setsource(unsigned int rtableid, struct sockaddr *src) 2356 { 2357 struct ifaddr *ifa; 2358 /* 2359 * If source address is 0.0.0.0 or :: 2360 * use automatic source selection 2361 */ 2362 switch(src->sa_family) { 2363 case AF_INET: 2364 if(satosin(src)->sin_addr.s_addr == INADDR_ANY) { 2365 rtable_setsource(rtableid, AF_INET, NULL); 2366 return (0); 2367 } 2368 break; 2369 #ifdef INET6 2370 case AF_INET6: 2371 if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(src)->sin6_addr)) { 2372 rtable_setsource(rtableid, AF_INET6, NULL); 2373 return (0); 2374 } 2375 break; 2376 #endif 2377 default: 2378 return (EAFNOSUPPORT); 2379 } 2380 2381 /* 2382 * Check if source address is assigned to an interface in the 2383 * same rdomain 2384 */ 2385 if ((ifa = ifa_ifwithaddr(src, rtableid)) == NULL) 2386 return (EINVAL); 2387 2388 return rtable_setsource(rtableid, src->sa_family, ifa->ifa_addr); 2389 } 2390 2391 /* 2392 * Definitions of protocols supported in the ROUTE domain. 2393 */ 2394 2395 const struct pr_usrreqs route_usrreqs = { 2396 .pru_attach = route_attach, 2397 .pru_detach = route_detach, 2398 .pru_disconnect = route_disconnect, 2399 .pru_shutdown = route_shutdown, 2400 .pru_rcvd = route_rcvd, 2401 .pru_send = route_send, 2402 .pru_sockaddr = route_sockaddr, 2403 .pru_peeraddr = route_peeraddr, 2404 }; 2405 2406 const struct protosw routesw[] = { 2407 { 2408 .pr_type = SOCK_RAW, 2409 .pr_domain = &routedomain, 2410 .pr_flags = PR_ATOMIC|PR_ADDR|PR_WANTRCVD, 2411 .pr_ctloutput = route_ctloutput, 2412 .pr_usrreqs = &route_usrreqs, 2413 .pr_init = route_prinit, 2414 .pr_sysctl = sysctl_rtable 2415 } 2416 }; 2417 2418 const struct domain routedomain = { 2419 .dom_family = PF_ROUTE, 2420 .dom_name = "route", 2421 .dom_init = route_init, 2422 .dom_protosw = routesw, 2423 .dom_protoswNPROTOSW = &routesw[nitems(routesw)] 2424 }; 2425