1 /* $OpenBSD: rtsock.c,v 1.378 2025/01/24 09:16:55 mvs Exp $ */ 2 /* $NetBSD: rtsock.c,v 1.18 1996/03/29 00:32:10 cgd Exp $ */ 3 4 /* 5 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the project nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 33 /* 34 * Copyright (c) 1988, 1991, 1993 35 * The Regents of the University of California. All rights reserved. 36 * 37 * Redistribution and use in source and binary forms, with or without 38 * modification, are permitted provided that the following conditions 39 * are met: 40 * 1. Redistributions of source code must retain the above copyright 41 * notice, this list of conditions and the following disclaimer. 42 * 2. Redistributions in binary form must reproduce the above copyright 43 * notice, this list of conditions and the following disclaimer in the 44 * documentation and/or other materials provided with the distribution. 45 * 3. Neither the name of the University nor the names of its contributors 46 * may be used to endorse or promote products derived from this software 47 * without specific prior written permission. 48 * 49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 59 * SUCH DAMAGE. 60 * 61 * @(#)rtsock.c 8.6 (Berkeley) 2/11/95 62 */ 63 64 #include <sys/param.h> 65 #include <sys/systm.h> 66 #include <sys/proc.h> 67 #include <sys/sysctl.h> 68 #include <sys/mbuf.h> 69 #include <sys/socket.h> 70 #include <sys/socketvar.h> 71 #include <sys/domain.h> 72 #include <sys/pool.h> 73 #include <sys/protosw.h> 74 #include <sys/srp.h> 75 76 #include <net/if.h> 77 #include <net/if_dl.h> 78 #include <net/if_var.h> 79 #include <net/route.h> 80 81 #include <netinet/in.h> 82 83 #ifdef MPLS 84 #include <netmpls/mpls.h> 85 #endif 86 #ifdef IPSEC 87 #include <netinet/ip_ipsp.h> 88 #include <net/if_enc.h> 89 #endif 90 #ifdef BFD 91 #include <net/bfd.h> 92 #endif 93 94 #include <sys/stdarg.h> 95 #include <sys/kernel.h> 96 #include <sys/timeout.h> 97 98 #define ROUTESNDQ 8192 99 #define ROUTERCVQ 8192 100 101 const struct sockaddr route_src = { 2, PF_ROUTE, }; 102 103 struct walkarg { 104 int w_op, w_arg, w_tmemsize; 105 size_t w_given, w_needed; 106 caddr_t w_where, w_tmem; 107 }; 108 109 void route_prinit(void); 110 void rcb_ref(void *, void *); 111 void rcb_unref(void *, void *); 112 int route_output(struct mbuf *, struct socket *); 113 int route_ctloutput(int, struct socket *, int, int, struct mbuf *); 114 int route_attach(struct socket *, int, int); 115 int route_detach(struct socket *); 116 int route_disconnect(struct socket *); 117 int route_shutdown(struct socket *); 118 void route_rcvd(struct socket *); 119 int route_send(struct socket *, struct mbuf *, struct mbuf *, 120 struct mbuf *); 121 int route_sockaddr(struct socket *, struct mbuf *); 122 int route_peeraddr(struct socket *, struct mbuf *); 123 void route_input(struct mbuf *m0, struct socket *, sa_family_t); 124 int route_arp_conflict(struct rtentry *, struct rt_addrinfo *); 125 int route_cleargateway(struct rtentry *, void *, unsigned int); 126 void rtm_senddesync_timer(void *); 127 void rtm_senddesync(struct socket *); 128 int rtm_sendup(struct socket *, struct mbuf *); 129 130 int rtm_getifa(struct rt_addrinfo *, unsigned int); 131 int rtm_output(struct rt_msghdr *, struct rtentry **, struct rt_addrinfo *, 132 uint8_t, unsigned int); 133 struct rt_msghdr *rtm_report(struct rtentry *, u_char, int, int); 134 struct mbuf *rtm_msg1(int, struct rt_addrinfo *); 135 int rtm_msg2(int, int, struct rt_addrinfo *, caddr_t, 136 struct walkarg *); 137 int rtm_xaddrs(caddr_t, caddr_t, struct rt_addrinfo *); 138 int rtm_validate_proposal(struct rt_addrinfo *); 139 void rtm_setmetrics(u_long, const struct rt_metrics *, 140 struct rt_kmetrics *); 141 void rtm_getmetrics(const struct rtentry *, 142 struct rt_metrics *); 143 144 int sysctl_iflist(int, struct walkarg *); 145 int sysctl_ifnames(struct walkarg *); 146 int sysctl_rtable_rtstat(void *, size_t *, void *); 147 148 int rt_setsource(unsigned int, const struct sockaddr *); 149 150 /* 151 * Locks used to protect struct members 152 * I immutable after creation 153 * s solock 154 */ 155 struct rtpcb { 156 struct socket *rop_socket; /* [I] */ 157 158 SRPL_ENTRY(rtpcb) rop_list; 159 struct refcnt rop_refcnt; 160 struct timeout rop_timeout; 161 unsigned int rop_msgfilter; /* [s] */ 162 unsigned int rop_flagfilter; /* [s] */ 163 unsigned int rop_flags; /* [s] */ 164 u_int rop_rtableid; /* [s] */ 165 unsigned short rop_proto; /* [I] */ 166 u_char rop_priority; /* [s] */ 167 }; 168 #define sotortpcb(so) ((struct rtpcb *)(so)->so_pcb) 169 170 struct rtptable { 171 SRPL_HEAD(, rtpcb) rtp_list; 172 struct srpl_rc rtp_rc; 173 struct rwlock rtp_lk; 174 unsigned int rtp_count; 175 }; 176 177 struct pool rtpcb_pool; 178 struct rtptable rtptable; 179 180 /* 181 * These flags and timeout are used for indicating to userland (via a 182 * RTM_DESYNC msg) when the route socket has overflowed and messages 183 * have been lost. 184 */ 185 #define ROUTECB_FLAG_DESYNC 0x1 /* Route socket out of memory */ 186 #define ROUTECB_FLAG_FLUSH 0x2 /* Wait until socket is empty before 187 queueing more packets */ 188 189 #define ROUTE_DESYNC_RESEND_TIMEOUT 200 /* In ms */ 190 191 void 192 route_prinit(void) 193 { 194 srpl_rc_init(&rtptable.rtp_rc, rcb_ref, rcb_unref, NULL); 195 rw_init(&rtptable.rtp_lk, "rtsock"); 196 SRPL_INIT(&rtptable.rtp_list); 197 pool_init(&rtpcb_pool, sizeof(struct rtpcb), 0, 198 IPL_SOFTNET, PR_WAITOK, "rtpcb", NULL); 199 } 200 201 void 202 rcb_ref(void *null, void *v) 203 { 204 struct rtpcb *rop = v; 205 206 refcnt_take(&rop->rop_refcnt); 207 } 208 209 void 210 rcb_unref(void *null, void *v) 211 { 212 struct rtpcb *rop = v; 213 214 refcnt_rele_wake(&rop->rop_refcnt); 215 } 216 217 int 218 route_attach(struct socket *so, int proto, int wait) 219 { 220 struct rtpcb *rop; 221 int error; 222 223 error = soreserve(so, ROUTESNDQ, ROUTERCVQ); 224 if (error) 225 return (error); 226 /* 227 * use the rawcb but allocate a rtpcb, this 228 * code does not care about the additional fields 229 * and works directly on the raw socket. 230 */ 231 rop = pool_get(&rtpcb_pool, (wait == M_WAIT ? PR_WAITOK : PR_NOWAIT) | 232 PR_ZERO); 233 if (rop == NULL) 234 return (ENOBUFS); 235 so->so_pcb = rop; 236 /* Init the timeout structure */ 237 timeout_set_flags(&rop->rop_timeout, rtm_senddesync_timer, so, 238 KCLOCK_NONE, TIMEOUT_PROC | TIMEOUT_MPSAFE); 239 refcnt_init(&rop->rop_refcnt); 240 241 rop->rop_socket = so; 242 rop->rop_proto = proto; 243 244 rop->rop_rtableid = curproc->p_p->ps_rtableid; 245 246 soisconnected(so); 247 so->so_options |= SO_USELOOPBACK; 248 249 rw_enter(&rtptable.rtp_lk, RW_WRITE); 250 SRPL_INSERT_HEAD_LOCKED(&rtptable.rtp_rc, &rtptable.rtp_list, rop, 251 rop_list); 252 rtptable.rtp_count++; 253 rw_exit(&rtptable.rtp_lk); 254 255 return (0); 256 } 257 258 int 259 route_detach(struct socket *so) 260 { 261 struct rtpcb *rop; 262 263 soassertlocked(so); 264 265 rop = sotortpcb(so); 266 if (rop == NULL) 267 return (EINVAL); 268 269 rw_enter(&rtptable.rtp_lk, RW_WRITE); 270 271 rtptable.rtp_count--; 272 SRPL_REMOVE_LOCKED(&rtptable.rtp_rc, &rtptable.rtp_list, rop, rtpcb, 273 rop_list); 274 rw_exit(&rtptable.rtp_lk); 275 276 sounlock(so); 277 278 /* wait for all references to drop */ 279 refcnt_finalize(&rop->rop_refcnt, "rtsockrefs"); 280 timeout_del_barrier(&rop->rop_timeout); 281 282 solock(so); 283 284 so->so_pcb = NULL; 285 KASSERT((so->so_state & SS_NOFDREF) == 0); 286 pool_put(&rtpcb_pool, rop); 287 288 return (0); 289 } 290 291 int 292 route_disconnect(struct socket *so) 293 { 294 soisdisconnected(so); 295 return (0); 296 } 297 298 int 299 route_shutdown(struct socket *so) 300 { 301 socantsendmore(so); 302 return (0); 303 } 304 305 void 306 route_rcvd(struct socket *so) 307 { 308 struct rtpcb *rop = sotortpcb(so); 309 310 soassertlocked(so); 311 312 /* 313 * If we are in a FLUSH state, check if the buffer is 314 * empty so that we can clear the flag. 315 */ 316 317 mtx_enter(&so->so_rcv.sb_mtx); 318 if (((rop->rop_flags & ROUTECB_FLAG_FLUSH) != 0) && 319 ((sbspace_locked(so, &so->so_rcv) == so->so_rcv.sb_hiwat))) 320 rop->rop_flags &= ~ROUTECB_FLAG_FLUSH; 321 mtx_leave(&so->so_rcv.sb_mtx); 322 } 323 324 int 325 route_send(struct socket *so, struct mbuf *m, struct mbuf *nam, 326 struct mbuf *control) 327 { 328 int error; 329 330 soassertlocked(so); 331 332 if (control && control->m_len) { 333 error = EOPNOTSUPP; 334 goto out; 335 } 336 337 if (nam) { 338 error = EISCONN; 339 goto out; 340 } 341 342 error = route_output(m, so); 343 m = NULL; 344 345 out: 346 m_freem(control); 347 m_freem(m); 348 349 return (error); 350 } 351 352 int 353 route_sockaddr(struct socket *so, struct mbuf *nam) 354 { 355 return (EINVAL); 356 } 357 358 int 359 route_peeraddr(struct socket *so, struct mbuf *nam) 360 { 361 /* minimal support, just implement a fake peer address */ 362 memcpy(mtod(nam, caddr_t), &route_src, route_src.sa_len); 363 nam->m_len = route_src.sa_len; 364 return (0); 365 } 366 367 int 368 route_ctloutput(int op, struct socket *so, int level, int optname, 369 struct mbuf *m) 370 { 371 struct rtpcb *rop = sotortpcb(so); 372 int error = 0; 373 unsigned int tid, prio; 374 375 if (level != AF_ROUTE) 376 return (EINVAL); 377 378 switch (op) { 379 case PRCO_SETOPT: 380 switch (optname) { 381 case ROUTE_MSGFILTER: 382 if (m == NULL || m->m_len != sizeof(unsigned int)) 383 error = EINVAL; 384 else 385 rop->rop_msgfilter = *mtod(m, unsigned int *); 386 break; 387 case ROUTE_TABLEFILTER: 388 if (m == NULL || m->m_len != sizeof(unsigned int)) { 389 error = EINVAL; 390 break; 391 } 392 tid = *mtod(m, unsigned int *); 393 if (tid != RTABLE_ANY && !rtable_exists(tid)) 394 error = ENOENT; 395 else 396 rop->rop_rtableid = tid; 397 break; 398 case ROUTE_PRIOFILTER: 399 if (m == NULL || m->m_len != sizeof(unsigned int)) { 400 error = EINVAL; 401 break; 402 } 403 prio = *mtod(m, unsigned int *); 404 if (prio > RTP_MAX) 405 error = EINVAL; 406 else 407 rop->rop_priority = prio; 408 break; 409 case ROUTE_FLAGFILTER: 410 if (m == NULL || m->m_len != sizeof(unsigned int)) 411 error = EINVAL; 412 else 413 rop->rop_flagfilter = *mtod(m, unsigned int *); 414 break; 415 default: 416 error = ENOPROTOOPT; 417 break; 418 } 419 break; 420 case PRCO_GETOPT: 421 switch (optname) { 422 case ROUTE_MSGFILTER: 423 m->m_len = sizeof(unsigned int); 424 *mtod(m, unsigned int *) = rop->rop_msgfilter; 425 break; 426 case ROUTE_TABLEFILTER: 427 m->m_len = sizeof(unsigned int); 428 *mtod(m, unsigned int *) = rop->rop_rtableid; 429 break; 430 case ROUTE_PRIOFILTER: 431 m->m_len = sizeof(unsigned int); 432 *mtod(m, unsigned int *) = rop->rop_priority; 433 break; 434 case ROUTE_FLAGFILTER: 435 m->m_len = sizeof(unsigned int); 436 *mtod(m, unsigned int *) = rop->rop_flagfilter; 437 break; 438 default: 439 error = ENOPROTOOPT; 440 break; 441 } 442 } 443 return (error); 444 } 445 446 void 447 rtm_senddesync_timer(void *xso) 448 { 449 struct socket *so = xso; 450 451 solock(so); 452 rtm_senddesync(so); 453 sounlock(so); 454 } 455 456 void 457 rtm_senddesync(struct socket *so) 458 { 459 struct rtpcb *rop = sotortpcb(so); 460 struct mbuf *desync_mbuf; 461 462 soassertlocked(so); 463 464 /* 465 * Dying socket is disconnected by upper layer and there is 466 * no reason to send packet. Also we shouldn't reschedule 467 * timeout(9), otherwise timeout_del_barrier(9) can't help us. 468 */ 469 if ((so->so_state & SS_ISCONNECTED) == 0 || 470 (so->so_rcv.sb_state & SS_CANTRCVMORE)) 471 return; 472 473 /* If we are in a DESYNC state, try to send a RTM_DESYNC packet */ 474 if ((rop->rop_flags & ROUTECB_FLAG_DESYNC) == 0) 475 return; 476 477 /* 478 * If we fail to alloc memory or if sbappendaddr() 479 * fails, re-add timeout and try again. 480 */ 481 desync_mbuf = rtm_msg1(RTM_DESYNC, NULL); 482 if (desync_mbuf != NULL) { 483 int ret; 484 485 mtx_enter(&so->so_rcv.sb_mtx); 486 ret = sbappendaddr(so, &so->so_rcv, &route_src, 487 desync_mbuf, NULL); 488 mtx_leave(&so->so_rcv.sb_mtx); 489 490 if (ret != 0) { 491 rop->rop_flags &= ~ROUTECB_FLAG_DESYNC; 492 sorwakeup(rop->rop_socket); 493 return; 494 } 495 m_freem(desync_mbuf); 496 } 497 /* Re-add timeout to try sending msg again */ 498 timeout_add_msec(&rop->rop_timeout, ROUTE_DESYNC_RESEND_TIMEOUT); 499 } 500 501 void 502 route_input(struct mbuf *m0, struct socket *so0, sa_family_t sa_family) 503 { 504 struct socket *so; 505 struct rtpcb *rop; 506 struct rt_msghdr *rtm; 507 struct mbuf *m = m0; 508 struct srp_ref sr; 509 510 /* ensure that we can access the rtm_type via mtod() */ 511 if (m->m_len < offsetof(struct rt_msghdr, rtm_type) + 1) { 512 m_freem(m); 513 return; 514 } 515 516 SRPL_FOREACH(rop, &sr, &rtptable.rtp_list, rop_list) { 517 /* 518 * If route socket is bound to an address family only send 519 * messages that match the address family. Address family 520 * agnostic messages are always sent. 521 */ 522 if (sa_family != AF_UNSPEC && rop->rop_proto != AF_UNSPEC && 523 rop->rop_proto != sa_family) 524 continue; 525 526 527 so = rop->rop_socket; 528 solock(so); 529 530 /* 531 * Check to see if we don't want our own messages and 532 * if we can receive anything. 533 */ 534 if ((so0 == so && !(so0->so_options & SO_USELOOPBACK)) || 535 !(so->so_state & SS_ISCONNECTED) || 536 (so->so_rcv.sb_state & SS_CANTRCVMORE)) 537 goto next; 538 539 /* filter messages that the process does not want */ 540 rtm = mtod(m, struct rt_msghdr *); 541 /* but RTM_DESYNC can't be filtered */ 542 if (rtm->rtm_type != RTM_DESYNC) { 543 if (rop->rop_msgfilter != 0 && 544 !(rop->rop_msgfilter & (1U << rtm->rtm_type))) 545 goto next; 546 if (ISSET(rop->rop_flagfilter, rtm->rtm_flags)) 547 goto next; 548 } 549 switch (rtm->rtm_type) { 550 case RTM_IFANNOUNCE: 551 case RTM_DESYNC: 552 /* no tableid */ 553 break; 554 case RTM_RESOLVE: 555 case RTM_NEWADDR: 556 case RTM_DELADDR: 557 case RTM_IFINFO: 558 case RTM_80211INFO: 559 case RTM_BFD: 560 /* check against rdomain id */ 561 if (rop->rop_rtableid != RTABLE_ANY && 562 rtable_l2(rop->rop_rtableid) != rtm->rtm_tableid) 563 goto next; 564 break; 565 default: 566 if (rop->rop_priority != 0 && 567 rop->rop_priority < rtm->rtm_priority) 568 goto next; 569 /* check against rtable id */ 570 if (rop->rop_rtableid != RTABLE_ANY && 571 rop->rop_rtableid != rtm->rtm_tableid) 572 goto next; 573 break; 574 } 575 576 /* 577 * Check to see if the flush flag is set. If so, don't queue 578 * any more messages until the flag is cleared. 579 */ 580 if ((rop->rop_flags & ROUTECB_FLAG_FLUSH) != 0) 581 goto next; 582 583 rtm_sendup(so, m); 584 next: 585 sounlock(so); 586 } 587 SRPL_LEAVE(&sr); 588 589 m_freem(m); 590 } 591 592 int 593 rtm_sendup(struct socket *so, struct mbuf *m0) 594 { 595 struct rtpcb *rop = sotortpcb(so); 596 struct mbuf *m; 597 int send_desync = 0; 598 599 soassertlocked(so); 600 601 m = m_copym(m0, 0, M_COPYALL, M_NOWAIT); 602 if (m == NULL) 603 return (ENOMEM); 604 605 mtx_enter(&so->so_rcv.sb_mtx); 606 if (sbspace_locked(so, &so->so_rcv) < (2 * MSIZE) || 607 sbappendaddr(so, &so->so_rcv, &route_src, m, NULL) == 0) 608 send_desync = 1; 609 mtx_leave(&so->so_rcv.sb_mtx); 610 611 if (send_desync) { 612 /* Flag socket as desync'ed and flush required */ 613 rop->rop_flags |= ROUTECB_FLAG_DESYNC | ROUTECB_FLAG_FLUSH; 614 rtm_senddesync(so); 615 m_freem(m); 616 return (ENOBUFS); 617 } 618 619 sorwakeup(so); 620 return (0); 621 } 622 623 struct rt_msghdr * 624 rtm_report(struct rtentry *rt, u_char type, int seq, int tableid) 625 { 626 struct rt_msghdr *rtm; 627 struct rt_addrinfo info; 628 struct sockaddr_rtlabel sa_rl; 629 struct sockaddr_in6 sa_mask; 630 #ifdef BFD 631 struct sockaddr_bfd sa_bfd; 632 #endif 633 struct ifnet *ifp = NULL; 634 int len; 635 636 bzero(&info, sizeof(info)); 637 info.rti_info[RTAX_DST] = rt_key(rt); 638 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 639 info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask); 640 info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl); 641 #ifdef BFD 642 if (rt->rt_flags & RTF_BFD) { 643 KERNEL_LOCK(); 644 info.rti_info[RTAX_BFD] = bfd2sa(rt, &sa_bfd); 645 KERNEL_UNLOCK(); 646 } 647 #endif 648 #ifdef MPLS 649 if (rt->rt_flags & RTF_MPLS) { 650 struct sockaddr_mpls sa_mpls; 651 652 bzero(&sa_mpls, sizeof(sa_mpls)); 653 sa_mpls.smpls_family = AF_MPLS; 654 sa_mpls.smpls_len = sizeof(sa_mpls); 655 sa_mpls.smpls_label = ((struct rt_mpls *) 656 rt->rt_llinfo)->mpls_label; 657 info.rti_info[RTAX_SRC] = (struct sockaddr *)&sa_mpls; 658 info.rti_mpls = ((struct rt_mpls *) 659 rt->rt_llinfo)->mpls_operation; 660 } 661 #endif 662 ifp = if_get(rt->rt_ifidx); 663 if (ifp != NULL) { 664 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); 665 info.rti_info[RTAX_IFA] = rtable_getsource(tableid, 666 info.rti_info[RTAX_DST]->sa_family); 667 if (info.rti_info[RTAX_IFA] == NULL) 668 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; 669 if (ifp->if_flags & IFF_POINTOPOINT) 670 info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr; 671 } 672 if_put(ifp); 673 /* RTAX_GENMASK, RTAX_AUTHOR, RTAX_SRCMASK ignored */ 674 675 /* build new route message */ 676 len = rtm_msg2(type, RTM_VERSION, &info, NULL, NULL); 677 rtm = malloc(len, M_RTABLE, M_WAITOK | M_ZERO); 678 679 rtm_msg2(type, RTM_VERSION, &info, (caddr_t)rtm, NULL); 680 rtm->rtm_type = type; 681 rtm->rtm_index = rt->rt_ifidx; 682 rtm->rtm_tableid = tableid; 683 rtm->rtm_priority = rt->rt_priority & RTP_MASK; 684 rtm->rtm_flags = rt->rt_flags; 685 rtm->rtm_pid = curproc->p_p->ps_pid; 686 rtm->rtm_seq = seq; 687 rtm_getmetrics(rt, &rtm->rtm_rmx); 688 rtm->rtm_addrs = info.rti_addrs; 689 #ifdef MPLS 690 rtm->rtm_mpls = info.rti_mpls; 691 #endif 692 return rtm; 693 } 694 695 int 696 route_output(struct mbuf *m, struct socket *so) 697 { 698 struct rt_msghdr *rtm = NULL; 699 struct rtentry *rt = NULL; 700 struct rt_addrinfo info; 701 struct ifnet *ifp; 702 int len, seq, useloopback, error = 0; 703 u_int tableid; 704 u_int8_t prio; 705 u_char vers, type; 706 707 if (m == NULL || ((m->m_len < sizeof(int32_t)) && 708 (m = m_pullup(m, sizeof(int32_t))) == NULL)) 709 return (ENOBUFS); 710 if ((m->m_flags & M_PKTHDR) == 0) 711 panic("route_output"); 712 713 useloopback = so->so_options & SO_USELOOPBACK; 714 715 /* 716 * The socket can't be closed concurrently because the file 717 * descriptor reference is still held. 718 */ 719 720 sounlock(so); 721 722 len = m->m_pkthdr.len; 723 if (len < offsetof(struct rt_msghdr, rtm_hdrlen) + 724 sizeof(rtm->rtm_hdrlen) || 725 len != mtod(m, struct rt_msghdr *)->rtm_msglen) { 726 error = EINVAL; 727 goto fail; 728 } 729 vers = mtod(m, struct rt_msghdr *)->rtm_version; 730 switch (vers) { 731 case RTM_VERSION: 732 if (len < sizeof(struct rt_msghdr)) { 733 error = EINVAL; 734 goto fail; 735 } 736 if (len > RTM_MAXSIZE) { 737 error = EMSGSIZE; 738 goto fail; 739 } 740 rtm = malloc(len, M_RTABLE, M_WAITOK); 741 m_copydata(m, 0, len, rtm); 742 break; 743 default: 744 error = EPROTONOSUPPORT; 745 goto fail; 746 } 747 748 /* Verify that the caller is sending an appropriate message early */ 749 switch (rtm->rtm_type) { 750 case RTM_ADD: 751 case RTM_DELETE: 752 case RTM_GET: 753 case RTM_CHANGE: 754 case RTM_PROPOSAL: 755 case RTM_SOURCE: 756 break; 757 default: 758 error = EOPNOTSUPP; 759 goto fail; 760 } 761 /* 762 * Verify that the header length is valid. 763 * All messages from userland start with a struct rt_msghdr. 764 */ 765 if (rtm->rtm_hdrlen == 0) /* old client */ 766 rtm->rtm_hdrlen = sizeof(struct rt_msghdr); 767 if (rtm->rtm_hdrlen < sizeof(struct rt_msghdr) || 768 len < rtm->rtm_hdrlen) { 769 error = EINVAL; 770 goto fail; 771 } 772 773 rtm->rtm_pid = curproc->p_p->ps_pid; 774 775 /* 776 * Verify that the caller has the appropriate privilege; RTM_GET 777 * is the only operation the non-superuser is allowed. 778 */ 779 if (rtm->rtm_type != RTM_GET && suser(curproc) != 0) { 780 error = EACCES; 781 goto fail; 782 } 783 tableid = rtm->rtm_tableid; 784 if (!rtable_exists(tableid)) { 785 if (rtm->rtm_type == RTM_ADD) { 786 if ((error = rtable_add(tableid)) != 0) 787 goto fail; 788 } else { 789 error = EINVAL; 790 goto fail; 791 } 792 } 793 794 /* Do not let userland play with kernel-only flags. */ 795 if ((rtm->rtm_flags & (RTF_LOCAL|RTF_BROADCAST)) != 0) { 796 error = EINVAL; 797 goto fail; 798 } 799 800 /* make sure that kernel-only bits are not set */ 801 rtm->rtm_priority &= RTP_MASK; 802 rtm->rtm_flags &= ~(RTF_DONE|RTF_CLONED|RTF_CACHED); 803 rtm->rtm_fmask &= RTF_FMASK; 804 805 if (rtm->rtm_priority != 0) { 806 if (rtm->rtm_priority > RTP_MAX || 807 rtm->rtm_priority == RTP_LOCAL) { 808 error = EINVAL; 809 goto fail; 810 } 811 prio = rtm->rtm_priority; 812 } else if (rtm->rtm_type != RTM_ADD) 813 prio = RTP_ANY; 814 else if (rtm->rtm_flags & RTF_STATIC) 815 prio = 0; 816 else 817 prio = RTP_DEFAULT; 818 819 bzero(&info, sizeof(info)); 820 info.rti_addrs = rtm->rtm_addrs; 821 if ((error = rtm_xaddrs(rtm->rtm_hdrlen + (caddr_t)rtm, 822 len + (caddr_t)rtm, &info)) != 0) 823 goto fail; 824 825 info.rti_flags = rtm->rtm_flags; 826 827 if (rtm->rtm_type != RTM_SOURCE && 828 rtm->rtm_type != RTM_PROPOSAL && 829 (info.rti_info[RTAX_DST] == NULL || 830 info.rti_info[RTAX_DST]->sa_family >= AF_MAX || 831 (info.rti_info[RTAX_GATEWAY] != NULL && 832 info.rti_info[RTAX_GATEWAY]->sa_family >= AF_MAX) || 833 info.rti_info[RTAX_GENMASK] != NULL)) { 834 error = EINVAL; 835 goto fail; 836 } 837 #ifdef MPLS 838 info.rti_mpls = rtm->rtm_mpls; 839 #endif 840 841 if (info.rti_info[RTAX_GATEWAY] != NULL && 842 info.rti_info[RTAX_GATEWAY]->sa_family == AF_LINK && 843 (info.rti_flags & RTF_CLONING) == 0) { 844 info.rti_flags |= RTF_LLINFO; 845 } 846 847 /* 848 * Validate RTM_PROPOSAL and pass it along or error out. 849 */ 850 if (rtm->rtm_type == RTM_PROPOSAL) { 851 if (rtm_validate_proposal(&info) == -1) { 852 error = EINVAL; 853 goto fail; 854 } 855 /* 856 * If this is a solicitation proposal forward request to 857 * all interfaces. Most handlers will ignore it but at least 858 * umb(4) will send a response to this event. 859 */ 860 if (rtm->rtm_priority == RTP_PROPOSAL_SOLICIT) { 861 NET_LOCK(); 862 TAILQ_FOREACH(ifp, &ifnetlist, if_list) { 863 ifp->if_rtrequest(ifp, RTM_PROPOSAL, NULL); 864 } 865 NET_UNLOCK(); 866 } 867 } else if (rtm->rtm_type == RTM_SOURCE) { 868 if (info.rti_info[RTAX_IFA] == NULL) { 869 error = EINVAL; 870 goto fail; 871 } 872 NET_LOCK(); 873 error = rt_setsource(tableid, info.rti_info[RTAX_IFA]); 874 NET_UNLOCK(); 875 if (error) 876 goto fail; 877 } else { 878 error = rtm_output(rtm, &rt, &info, prio, tableid); 879 if (!error) { 880 type = rtm->rtm_type; 881 seq = rtm->rtm_seq; 882 free(rtm, M_RTABLE, len); 883 NET_LOCK_SHARED(); 884 rtm = rtm_report(rt, type, seq, tableid); 885 NET_UNLOCK_SHARED(); 886 len = rtm->rtm_msglen; 887 } 888 } 889 890 rtfree(rt); 891 if (error) { 892 rtm->rtm_errno = error; 893 } else { 894 rtm->rtm_flags |= RTF_DONE; 895 } 896 897 /* 898 * Check to see if we don't want our own messages. 899 */ 900 if (!useloopback) { 901 if (rtptable.rtp_count == 0) { 902 /* no other listener and no loopback of messages */ 903 goto fail; 904 } 905 } 906 if (m_copyback(m, 0, len, rtm, M_NOWAIT)) { 907 m_freem(m); 908 m = NULL; 909 } else if (m->m_pkthdr.len > len) 910 m_adj(m, len - m->m_pkthdr.len); 911 free(rtm, M_RTABLE, len); 912 if (m) 913 route_input(m, so, info.rti_info[RTAX_DST] ? 914 info.rti_info[RTAX_DST]->sa_family : AF_UNSPEC); 915 solock(so); 916 917 return (error); 918 fail: 919 free(rtm, M_RTABLE, len); 920 m_freem(m); 921 solock(so); 922 923 return (error); 924 } 925 926 int 927 rtm_output(struct rt_msghdr *rtm, struct rtentry **prt, 928 struct rt_addrinfo *info, uint8_t prio, unsigned int tableid) 929 { 930 struct rtentry *rt = *prt; 931 struct ifnet *ifp = NULL; 932 int plen, newgate = 0, error = 0; 933 934 switch (rtm->rtm_type) { 935 case RTM_ADD: 936 if (info->rti_info[RTAX_GATEWAY] == NULL) { 937 error = EINVAL; 938 break; 939 } 940 941 rt = rtable_match(tableid, info->rti_info[RTAX_DST], NULL); 942 if ((error = route_arp_conflict(rt, info))) { 943 rtfree(rt); 944 rt = NULL; 945 break; 946 } 947 948 /* 949 * We cannot go through a delete/create/insert cycle for 950 * cached route because this can lead to races in the 951 * receive path. Instead we update the L2 cache. 952 */ 953 if ((rt != NULL) && ISSET(rt->rt_flags, RTF_CACHED)) { 954 ifp = if_get(rt->rt_ifidx); 955 if (ifp == NULL) { 956 rtfree(rt); 957 rt = NULL; 958 error = ESRCH; 959 break; 960 } 961 962 goto change; 963 } 964 965 rtfree(rt); 966 rt = NULL; 967 968 NET_LOCK(); 969 if ((error = rtm_getifa(info, tableid)) != 0) { 970 NET_UNLOCK(); 971 break; 972 } 973 error = rtrequest(RTM_ADD, info, prio, &rt, tableid); 974 NET_UNLOCK(); 975 if (error == 0) 976 rtm_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx, 977 &rt->rt_rmx); 978 break; 979 case RTM_DELETE: 980 rt = rtable_lookup(tableid, info->rti_info[RTAX_DST], 981 info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY], 982 prio); 983 if (rt == NULL) { 984 error = ESRCH; 985 break; 986 } 987 988 /* 989 * If we got multipath routes, we require users to specify 990 * a matching gateway. 991 */ 992 if (ISSET(rt->rt_flags, RTF_MPATH) && 993 info->rti_info[RTAX_GATEWAY] == NULL) { 994 error = ESRCH; 995 break; 996 } 997 998 ifp = if_get(rt->rt_ifidx); 999 if (ifp == NULL) { 1000 rtfree(rt); 1001 rt = NULL; 1002 error = ESRCH; 1003 break; 1004 } 1005 1006 /* 1007 * Invalidate the cache of automagically created and 1008 * referenced L2 entries to make sure that ``rt_gwroute'' 1009 * pointer stays valid for other CPUs. 1010 */ 1011 if ((ISSET(rt->rt_flags, RTF_CACHED))) { 1012 NET_LOCK(); 1013 ifp->if_rtrequest(ifp, RTM_INVALIDATE, rt); 1014 /* Reset the MTU of the gateway route. */ 1015 rtable_walk(tableid, rt_key(rt)->sa_family, NULL, 1016 route_cleargateway, rt); 1017 NET_UNLOCK(); 1018 break; 1019 } 1020 1021 /* 1022 * Make sure that local routes are only modified by the 1023 * kernel. 1024 */ 1025 if (ISSET(rt->rt_flags, RTF_LOCAL|RTF_BROADCAST)) { 1026 error = EINVAL; 1027 break; 1028 } 1029 1030 rtfree(rt); 1031 rt = NULL; 1032 1033 NET_LOCK(); 1034 error = rtrequest_delete(info, prio, ifp, &rt, tableid); 1035 NET_UNLOCK(); 1036 break; 1037 case RTM_CHANGE: 1038 rt = rtable_lookup(tableid, info->rti_info[RTAX_DST], 1039 info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY], 1040 prio); 1041 /* 1042 * If we got multipath routes, we require users to specify 1043 * a matching gateway. 1044 */ 1045 if ((rt != NULL) && ISSET(rt->rt_flags, RTF_MPATH) && 1046 (info->rti_info[RTAX_GATEWAY] == NULL)) { 1047 rtfree(rt); 1048 rt = NULL; 1049 } 1050 1051 /* 1052 * If RTAX_GATEWAY is the argument we're trying to 1053 * change, try to find a compatible route. 1054 */ 1055 if ((rt == NULL) && (info->rti_info[RTAX_GATEWAY] != NULL)) { 1056 rt = rtable_lookup(tableid, info->rti_info[RTAX_DST], 1057 info->rti_info[RTAX_NETMASK], NULL, prio); 1058 /* Ensure we don't pick a multipath one. */ 1059 if ((rt != NULL) && ISSET(rt->rt_flags, RTF_MPATH)) { 1060 rtfree(rt); 1061 rt = NULL; 1062 } 1063 } 1064 1065 if (rt == NULL) { 1066 error = ESRCH; 1067 break; 1068 } 1069 1070 /* 1071 * Make sure that local routes are only modified by the 1072 * kernel. 1073 */ 1074 if (ISSET(rt->rt_flags, RTF_LOCAL|RTF_BROADCAST)) { 1075 error = EINVAL; 1076 break; 1077 } 1078 1079 ifp = if_get(rt->rt_ifidx); 1080 if (ifp == NULL) { 1081 rtfree(rt); 1082 rt = NULL; 1083 error = ESRCH; 1084 break; 1085 } 1086 1087 /* 1088 * RTM_CHANGE needs a perfect match. 1089 */ 1090 plen = rtable_satoplen(info->rti_info[RTAX_DST]->sa_family, 1091 info->rti_info[RTAX_NETMASK]); 1092 if (rt_plen(rt) != plen) { 1093 error = ESRCH; 1094 break; 1095 } 1096 1097 if (info->rti_info[RTAX_GATEWAY] != NULL) 1098 if (rt->rt_gateway == NULL || 1099 bcmp(rt->rt_gateway, 1100 info->rti_info[RTAX_GATEWAY], 1101 info->rti_info[RTAX_GATEWAY]->sa_len)) { 1102 newgate = 1; 1103 } 1104 /* 1105 * Check reachable gateway before changing the route. 1106 * New gateway could require new ifaddr, ifp; 1107 * flags may also be different; ifp may be specified 1108 * by ll sockaddr when protocol address is ambiguous. 1109 */ 1110 if (newgate || info->rti_info[RTAX_IFP] != NULL || 1111 info->rti_info[RTAX_IFA] != NULL) { 1112 struct ifaddr *ifa = NULL; 1113 1114 NET_LOCK(); 1115 if ((error = rtm_getifa(info, tableid)) != 0) { 1116 NET_UNLOCK(); 1117 break; 1118 } 1119 ifa = info->rti_ifa; 1120 if (rt->rt_ifa != ifa) { 1121 ifp->if_rtrequest(ifp, RTM_DELETE, rt); 1122 ifafree(rt->rt_ifa); 1123 1124 rt->rt_ifa = ifaref(ifa); 1125 rt->rt_ifidx = ifa->ifa_ifp->if_index; 1126 /* recheck link state after ifp change */ 1127 rt_if_linkstate_change(rt, ifa->ifa_ifp, 1128 tableid); 1129 } 1130 NET_UNLOCK(); 1131 } 1132 change: 1133 if (info->rti_info[RTAX_GATEWAY] != NULL) { 1134 /* When updating the gateway, make sure it is valid. */ 1135 if (!newgate && rt->rt_gateway->sa_family != 1136 info->rti_info[RTAX_GATEWAY]->sa_family) { 1137 error = EINVAL; 1138 break; 1139 } 1140 1141 NET_LOCK(); 1142 error = rt_setgate(rt, 1143 info->rti_info[RTAX_GATEWAY], tableid); 1144 NET_UNLOCK(); 1145 if (error) 1146 break; 1147 } 1148 #ifdef MPLS 1149 if (rtm->rtm_flags & RTF_MPLS) { 1150 NET_LOCK(); 1151 error = rt_mpls_set(rt, 1152 info->rti_info[RTAX_SRC], info->rti_mpls); 1153 NET_UNLOCK(); 1154 if (error) 1155 break; 1156 } else if (newgate || (rtm->rtm_fmask & RTF_MPLS)) { 1157 NET_LOCK(); 1158 /* if gateway changed remove MPLS information */ 1159 rt_mpls_clear(rt); 1160 NET_UNLOCK(); 1161 } 1162 #endif 1163 1164 #ifdef BFD 1165 if (ISSET(rtm->rtm_flags, RTF_BFD)) { 1166 KERNEL_LOCK(); 1167 error = bfdset(rt); 1168 KERNEL_UNLOCK(); 1169 if (error) 1170 break; 1171 } else if (!ISSET(rtm->rtm_flags, RTF_BFD) && 1172 ISSET(rtm->rtm_fmask, RTF_BFD)) { 1173 KERNEL_LOCK(); 1174 bfdclear(rt); 1175 KERNEL_UNLOCK(); 1176 } 1177 #endif 1178 1179 NET_LOCK(); 1180 /* Hack to allow some flags to be toggled */ 1181 if (rtm->rtm_fmask) { 1182 /* MPLS flag it is set by rt_mpls_set() */ 1183 rtm->rtm_fmask &= ~RTF_MPLS; 1184 rtm->rtm_flags &= ~RTF_MPLS; 1185 rt->rt_flags = 1186 (rt->rt_flags & ~rtm->rtm_fmask) | 1187 (rtm->rtm_flags & rtm->rtm_fmask); 1188 } 1189 rtm_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx, &rt->rt_rmx); 1190 1191 ifp->if_rtrequest(ifp, RTM_ADD, rt); 1192 1193 if (info->rti_info[RTAX_LABEL] != NULL) { 1194 const char *rtlabel = ((const struct sockaddr_rtlabel *) 1195 info->rti_info[RTAX_LABEL])->sr_label; 1196 rtlabel_unref(rt->rt_labelid); 1197 rt->rt_labelid = rtlabel_name2id(rtlabel); 1198 } 1199 if_group_routechange(info->rti_info[RTAX_DST], 1200 info->rti_info[RTAX_NETMASK]); 1201 rt->rt_locks &= ~(rtm->rtm_inits); 1202 rt->rt_locks |= (rtm->rtm_inits & rtm->rtm_rmx.rmx_locks); 1203 NET_UNLOCK(); 1204 break; 1205 case RTM_GET: 1206 rt = rtable_lookup(tableid, info->rti_info[RTAX_DST], 1207 info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY], 1208 prio); 1209 if (rt == NULL) 1210 error = ESRCH; 1211 break; 1212 } 1213 1214 if_put(ifp); 1215 *prt = rt; 1216 return (error); 1217 } 1218 1219 struct ifaddr * 1220 ifa_ifwithroute(int flags, const struct sockaddr *dst, 1221 const struct sockaddr *gateway, unsigned int rtableid) 1222 { 1223 struct ifaddr *ifa; 1224 1225 if ((flags & RTF_GATEWAY) == 0) { 1226 /* 1227 * If we are adding a route to an interface, 1228 * and the interface is a pt to pt link 1229 * we should search for the destination 1230 * as our clue to the interface. Otherwise 1231 * we can use the local address. 1232 */ 1233 ifa = NULL; 1234 if (flags & RTF_HOST) 1235 ifa = ifa_ifwithdstaddr(dst, rtableid); 1236 if (ifa == NULL) 1237 ifa = ifa_ifwithaddr(gateway, rtableid); 1238 } else { 1239 /* 1240 * If we are adding a route to a remote net 1241 * or host, the gateway may still be on the 1242 * other end of a pt to pt link. 1243 */ 1244 ifa = ifa_ifwithdstaddr(gateway, rtableid); 1245 } 1246 if (ifa == NULL) { 1247 if (gateway->sa_family == AF_LINK) { 1248 const struct sockaddr_dl *sdl; 1249 struct ifnet *ifp; 1250 1251 sdl = satosdl_const(gateway); 1252 ifp = if_get(sdl->sdl_index); 1253 if (ifp != NULL) 1254 ifa = ifaof_ifpforaddr(dst, ifp); 1255 if_put(ifp); 1256 } else { 1257 struct rtentry *rt; 1258 1259 rt = rtalloc(gateway, RT_RESOLVE, rtable_l2(rtableid)); 1260 if (rt != NULL) 1261 ifa = rt->rt_ifa; 1262 rtfree(rt); 1263 } 1264 } 1265 if (ifa == NULL) 1266 return (NULL); 1267 if (ifa->ifa_addr->sa_family != dst->sa_family) { 1268 struct ifaddr *oifa = ifa; 1269 ifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp); 1270 if (ifa == NULL) 1271 ifa = oifa; 1272 } 1273 return (ifa); 1274 } 1275 1276 int 1277 rtm_getifa(struct rt_addrinfo *info, unsigned int rtid) 1278 { 1279 struct ifnet *ifp = NULL; 1280 1281 /* 1282 * The "returned" `ifa' is guaranteed to be alive only if 1283 * the NET_LOCK() is held. 1284 */ 1285 NET_ASSERT_LOCKED(); 1286 1287 /* 1288 * ifp may be specified by sockaddr_dl when protocol address 1289 * is ambiguous 1290 */ 1291 if (info->rti_info[RTAX_IFP] != NULL) { 1292 const struct sockaddr_dl *sdl; 1293 1294 sdl = satosdl_const(info->rti_info[RTAX_IFP]); 1295 ifp = if_get(sdl->sdl_index); 1296 } 1297 1298 #ifdef IPSEC 1299 /* 1300 * If the destination is a PF_KEY address, we'll look 1301 * for the existence of a encap interface number or address 1302 * in the options list of the gateway. By default, we'll return 1303 * enc0. 1304 */ 1305 if (info->rti_info[RTAX_DST] && 1306 info->rti_info[RTAX_DST]->sa_family == PF_KEY) 1307 info->rti_ifa = enc_getifa(rtid, 0); 1308 #endif 1309 1310 if (info->rti_ifa == NULL && info->rti_info[RTAX_IFA] != NULL) 1311 info->rti_ifa = ifa_ifwithaddr(info->rti_info[RTAX_IFA], rtid); 1312 1313 if (info->rti_ifa == NULL) { 1314 const struct sockaddr *sa; 1315 1316 if ((sa = info->rti_info[RTAX_IFA]) == NULL) 1317 if ((sa = info->rti_info[RTAX_GATEWAY]) == NULL) 1318 sa = info->rti_info[RTAX_DST]; 1319 1320 if (sa != NULL && ifp != NULL) 1321 info->rti_ifa = ifaof_ifpforaddr(sa, ifp); 1322 else if (info->rti_info[RTAX_DST] != NULL && 1323 info->rti_info[RTAX_GATEWAY] != NULL) 1324 info->rti_ifa = ifa_ifwithroute(info->rti_flags, 1325 info->rti_info[RTAX_DST], 1326 info->rti_info[RTAX_GATEWAY], 1327 rtid); 1328 else if (sa != NULL) 1329 info->rti_ifa = ifa_ifwithroute(info->rti_flags, 1330 sa, sa, rtid); 1331 } 1332 1333 if_put(ifp); 1334 1335 if (info->rti_ifa == NULL) 1336 return (ENETUNREACH); 1337 1338 return (0); 1339 } 1340 1341 int 1342 route_cleargateway(struct rtentry *rt, void *arg, unsigned int rtableid) 1343 { 1344 struct rtentry *nhrt = arg; 1345 1346 if (ISSET(rt->rt_flags, RTF_GATEWAY) && rt->rt_gwroute == nhrt && 1347 !ISSET(rt->rt_locks, RTV_MTU)) 1348 atomic_store_int(&rt->rt_mtu, 0); 1349 1350 return (0); 1351 } 1352 1353 /* 1354 * Check if the user request to insert an ARP entry does not conflict 1355 * with existing ones. 1356 * 1357 * Only two entries are allowed for a given IP address: a private one 1358 * (priv) and a public one (pub). 1359 */ 1360 int 1361 route_arp_conflict(struct rtentry *rt, struct rt_addrinfo *info) 1362 { 1363 int proxy = (info->rti_flags & RTF_ANNOUNCE); 1364 1365 if ((info->rti_flags & RTF_LLINFO) == 0 || 1366 (info->rti_info[RTAX_DST]->sa_family != AF_INET)) 1367 return (0); 1368 1369 if (rt == NULL || !ISSET(rt->rt_flags, RTF_LLINFO)) 1370 return (0); 1371 1372 /* If the entry is cached, it can be updated. */ 1373 if (ISSET(rt->rt_flags, RTF_CACHED)) 1374 return (0); 1375 1376 /* 1377 * Same destination, not cached and both "priv" or "pub" conflict. 1378 * If a second entry exists, it always conflict. 1379 */ 1380 if ((ISSET(rt->rt_flags, RTF_ANNOUNCE) == proxy) || 1381 ISSET(rt->rt_flags, RTF_MPATH)) 1382 return (EEXIST); 1383 1384 /* No conflict but an entry exist so we need to force mpath. */ 1385 info->rti_flags |= RTF_MPATH; 1386 return (0); 1387 } 1388 1389 void 1390 rtm_setmetrics(u_long which, const struct rt_metrics *in, 1391 struct rt_kmetrics *out) 1392 { 1393 int64_t expire; 1394 1395 if (which & RTV_MTU) 1396 atomic_store_int(&out->rmx_mtu, in->rmx_mtu); 1397 if (which & RTV_EXPIRE) { 1398 expire = in->rmx_expire; 1399 if (expire != 0) { 1400 expire -= gettime(); 1401 expire += getuptime(); 1402 } 1403 1404 out->rmx_expire = expire; 1405 } 1406 } 1407 1408 void 1409 rtm_getmetrics(const struct rtentry *rt, struct rt_metrics *out) 1410 { 1411 const struct rt_kmetrics *in = &rt->rt_rmx; 1412 int64_t expire; 1413 1414 expire = in->rmx_expire; 1415 if (expire == 0) 1416 expire = rt_timer_get_expire(rt); 1417 if (expire != 0) { 1418 expire -= getuptime(); 1419 expire += gettime(); 1420 } 1421 1422 bzero(out, sizeof(*out)); 1423 out->rmx_locks = in->rmx_locks; 1424 out->rmx_mtu = atomic_load_int(&in->rmx_mtu); 1425 out->rmx_expire = expire; 1426 out->rmx_pksent = in->rmx_pksent; 1427 } 1428 1429 #define ROUNDUP(a) \ 1430 ((a) > 0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long)) 1431 #define ADVANCE(x, n) (x += ROUNDUP((n)->sa_len)) 1432 1433 int 1434 rtm_xaddrs(caddr_t cp, caddr_t cplim, struct rt_addrinfo *rtinfo) 1435 { 1436 int i; 1437 1438 /* 1439 * Parse address bits, split address storage in chunks, and 1440 * set info pointers. Use sa_len for traversing the memory 1441 * and check that we stay within in the limit. 1442 */ 1443 bzero(rtinfo->rti_info, sizeof(rtinfo->rti_info)); 1444 for (i = 0; i < sizeof(rtinfo->rti_addrs) * 8; i++) { 1445 struct sockaddr *sa; 1446 1447 if ((rtinfo->rti_addrs & (1U << i)) == 0) 1448 continue; 1449 if (i >= RTAX_MAX || cp + sizeof(socklen_t) > cplim) 1450 return (EINVAL); 1451 sa = (struct sockaddr *)cp; 1452 if (cp + sa->sa_len > cplim) 1453 return (EINVAL); 1454 rtinfo->rti_info[i] = sa; 1455 ADVANCE(cp, sa); 1456 } 1457 /* 1458 * Check that the address family is suitable for the route address 1459 * type. Check that each address has a size that fits its family 1460 * and its length is within the size. Strings within addresses must 1461 * be NUL terminated. 1462 */ 1463 for (i = 0; i < RTAX_MAX; i++) { 1464 const struct sockaddr *sa; 1465 size_t len, maxlen, size; 1466 1467 sa = rtinfo->rti_info[i]; 1468 if (sa == NULL) 1469 continue; 1470 maxlen = size = 0; 1471 switch (i) { 1472 case RTAX_DST: 1473 case RTAX_GATEWAY: 1474 case RTAX_SRC: 1475 switch (sa->sa_family) { 1476 case AF_INET: 1477 size = sizeof(struct sockaddr_in); 1478 break; 1479 case AF_LINK: 1480 size = sizeof(struct sockaddr_dl); 1481 break; 1482 #ifdef INET6 1483 case AF_INET6: 1484 size = sizeof(struct sockaddr_in6); 1485 break; 1486 #endif 1487 #ifdef MPLS 1488 case AF_MPLS: 1489 size = sizeof(struct sockaddr_mpls); 1490 break; 1491 #endif 1492 } 1493 break; 1494 case RTAX_IFP: 1495 if (sa->sa_family != AF_LINK) 1496 return (EAFNOSUPPORT); 1497 /* 1498 * XXX Should be sizeof(struct sockaddr_dl), but 1499 * route(8) has a bug and provides less memory. 1500 * arp(8) has another bug and uses sizeof pointer. 1501 */ 1502 size = 4; 1503 break; 1504 case RTAX_IFA: 1505 switch (sa->sa_family) { 1506 case AF_INET: 1507 size = sizeof(struct sockaddr_in); 1508 break; 1509 #ifdef INET6 1510 case AF_INET6: 1511 size = sizeof(struct sockaddr_in6); 1512 break; 1513 #endif 1514 default: 1515 return (EAFNOSUPPORT); 1516 } 1517 break; 1518 case RTAX_LABEL: 1519 if (sa->sa_family != AF_UNSPEC) 1520 return (EAFNOSUPPORT); 1521 maxlen = RTLABEL_LEN; 1522 size = sizeof(struct sockaddr_rtlabel); 1523 break; 1524 #ifdef BFD 1525 case RTAX_BFD: 1526 if (sa->sa_family != AF_UNSPEC) 1527 return (EAFNOSUPPORT); 1528 size = sizeof(struct sockaddr_bfd); 1529 break; 1530 #endif 1531 case RTAX_DNS: 1532 /* more validation in rtm_validate_proposal */ 1533 if (sa->sa_len > sizeof(struct sockaddr_rtdns)) 1534 return (EINVAL); 1535 if (sa->sa_len < offsetof(struct sockaddr_rtdns, 1536 sr_dns)) 1537 return (EINVAL); 1538 switch (sa->sa_family) { 1539 case AF_INET: 1540 #ifdef INET6 1541 case AF_INET6: 1542 #endif 1543 break; 1544 default: 1545 return (EAFNOSUPPORT); 1546 } 1547 break; 1548 case RTAX_STATIC: 1549 switch (sa->sa_family) { 1550 case AF_INET: 1551 #ifdef INET6 1552 case AF_INET6: 1553 #endif 1554 break; 1555 default: 1556 return (EAFNOSUPPORT); 1557 } 1558 maxlen = RTSTATIC_LEN; 1559 size = sizeof(struct sockaddr_rtstatic); 1560 break; 1561 case RTAX_SEARCH: 1562 if (sa->sa_family != AF_UNSPEC) 1563 return (EAFNOSUPPORT); 1564 maxlen = RTSEARCH_LEN; 1565 size = sizeof(struct sockaddr_rtsearch); 1566 break; 1567 } 1568 if (size) { 1569 /* memory for the full struct must be provided */ 1570 if (sa->sa_len < size) 1571 return (EINVAL); 1572 } 1573 if (maxlen) { 1574 /* this should not happen */ 1575 if (2 + maxlen > size) 1576 return (EINVAL); 1577 /* strings must be NUL terminated within the struct */ 1578 len = strnlen(sa->sa_data, maxlen); 1579 if (len >= maxlen || 2 + len >= sa->sa_len) 1580 return (EINVAL); 1581 break; 1582 } 1583 } 1584 return (0); 1585 } 1586 1587 struct mbuf * 1588 rtm_msg1(int type, struct rt_addrinfo *rtinfo) 1589 { 1590 struct rt_msghdr *rtm; 1591 struct mbuf *m; 1592 int i; 1593 const struct sockaddr *sa; 1594 int len, dlen, hlen; 1595 1596 switch (type) { 1597 case RTM_DELADDR: 1598 case RTM_NEWADDR: 1599 hlen = sizeof(struct ifa_msghdr); 1600 break; 1601 case RTM_IFINFO: 1602 hlen = sizeof(struct if_msghdr); 1603 break; 1604 case RTM_IFANNOUNCE: 1605 hlen = sizeof(struct if_announcemsghdr); 1606 break; 1607 #ifdef BFD 1608 case RTM_BFD: 1609 hlen = sizeof(struct bfd_msghdr); 1610 break; 1611 #endif 1612 case RTM_80211INFO: 1613 hlen = sizeof(struct if_ieee80211_msghdr); 1614 break; 1615 default: 1616 hlen = sizeof(struct rt_msghdr); 1617 break; 1618 } 1619 len = hlen; 1620 for (i = 0; i < RTAX_MAX; i++) { 1621 if (rtinfo == NULL || (sa = rtinfo->rti_info[i]) == NULL) 1622 continue; 1623 len += ROUNDUP(sa->sa_len); 1624 } 1625 if (len > MCLBYTES) 1626 panic("rtm_msg1"); 1627 m = m_gethdr(M_DONTWAIT, MT_DATA); 1628 if (m && len > MHLEN) { 1629 MCLGET(m, M_DONTWAIT); 1630 if ((m->m_flags & M_EXT) == 0) { 1631 m_free(m); 1632 m = NULL; 1633 } 1634 } 1635 if (m == NULL) 1636 return (m); 1637 m->m_pkthdr.len = m->m_len = len; 1638 m->m_pkthdr.ph_ifidx = 0; 1639 rtm = mtod(m, struct rt_msghdr *); 1640 bzero(rtm, len); 1641 len = hlen; 1642 for (i = 0; i < RTAX_MAX; i++) { 1643 if (rtinfo == NULL || (sa = rtinfo->rti_info[i]) == NULL) 1644 continue; 1645 rtinfo->rti_addrs |= (1U << i); 1646 dlen = ROUNDUP(sa->sa_len); 1647 if (m_copyback(m, len, sa->sa_len, sa, M_NOWAIT)) { 1648 m_freem(m); 1649 return (NULL); 1650 } 1651 len += dlen; 1652 } 1653 rtm->rtm_msglen = len; 1654 rtm->rtm_hdrlen = hlen; 1655 rtm->rtm_version = RTM_VERSION; 1656 rtm->rtm_type = type; 1657 return (m); 1658 } 1659 1660 int 1661 rtm_msg2(int type, int vers, struct rt_addrinfo *rtinfo, caddr_t cp, 1662 struct walkarg *w) 1663 { 1664 int i; 1665 int len, dlen, hlen, second_time = 0; 1666 caddr_t cp0; 1667 1668 rtinfo->rti_addrs = 0; 1669 again: 1670 switch (type) { 1671 case RTM_DELADDR: 1672 case RTM_NEWADDR: 1673 len = sizeof(struct ifa_msghdr); 1674 break; 1675 case RTM_IFINFO: 1676 len = sizeof(struct if_msghdr); 1677 break; 1678 default: 1679 len = sizeof(struct rt_msghdr); 1680 break; 1681 } 1682 hlen = len; 1683 if ((cp0 = cp) != NULL) 1684 cp += len; 1685 for (i = 0; i < RTAX_MAX; i++) { 1686 const struct sockaddr *sa; 1687 1688 if ((sa = rtinfo->rti_info[i]) == NULL) 1689 continue; 1690 rtinfo->rti_addrs |= (1U << i); 1691 dlen = ROUNDUP(sa->sa_len); 1692 if (cp) { 1693 bcopy(sa, cp, sa->sa_len); 1694 bzero(cp + sa->sa_len, dlen - sa->sa_len); 1695 cp += dlen; 1696 } 1697 len += dlen; 1698 } 1699 /* align message length to the next natural boundary */ 1700 len = ALIGN(len); 1701 if (cp == 0 && w != NULL && !second_time) { 1702 w->w_needed += len; 1703 if (w->w_needed <= w->w_given && w->w_where) { 1704 if (w->w_tmemsize < len) { 1705 free(w->w_tmem, M_RTABLE, w->w_tmemsize); 1706 w->w_tmem = malloc(len, M_RTABLE, 1707 M_NOWAIT | M_ZERO); 1708 if (w->w_tmem) 1709 w->w_tmemsize = len; 1710 } 1711 if (w->w_tmem) { 1712 cp = w->w_tmem; 1713 second_time = 1; 1714 goto again; 1715 } else 1716 w->w_where = 0; 1717 } 1718 } 1719 if (cp && w) /* clear the message header */ 1720 bzero(cp0, hlen); 1721 1722 if (cp) { 1723 struct rt_msghdr *rtm = (struct rt_msghdr *)cp0; 1724 1725 rtm->rtm_version = RTM_VERSION; 1726 rtm->rtm_type = type; 1727 rtm->rtm_msglen = len; 1728 rtm->rtm_hdrlen = hlen; 1729 } 1730 return (len); 1731 } 1732 1733 void 1734 rtm_send(struct rtentry *rt, int cmd, int error, unsigned int rtableid) 1735 { 1736 struct rt_addrinfo info; 1737 struct ifnet *ifp; 1738 struct sockaddr_rtlabel sa_rl; 1739 struct sockaddr_in6 sa_mask; 1740 1741 memset(&info, 0, sizeof(info)); 1742 info.rti_info[RTAX_DST] = rt_key(rt); 1743 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 1744 if (!ISSET(rt->rt_flags, RTF_HOST)) 1745 info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask); 1746 info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl); 1747 ifp = if_get(rt->rt_ifidx); 1748 if (ifp != NULL) { 1749 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); 1750 info.rti_info[RTAX_IFA] = rtable_getsource(rtableid, 1751 info.rti_info[RTAX_DST]->sa_family); 1752 if (info.rti_info[RTAX_IFA] == NULL) 1753 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; 1754 } 1755 1756 rtm_miss(cmd, &info, rt->rt_flags, rt->rt_priority, rt->rt_ifidx, error, 1757 rtableid); 1758 if_put(ifp); 1759 } 1760 1761 /* 1762 * This routine is called to generate a message from the routing 1763 * socket indicating that a redirect has occurred, a routing lookup 1764 * has failed, or that a protocol has detected timeouts to a particular 1765 * destination. 1766 */ 1767 void 1768 rtm_miss(int type, struct rt_addrinfo *rtinfo, int flags, uint8_t prio, 1769 u_int ifidx, int error, u_int tableid) 1770 { 1771 struct rt_msghdr *rtm; 1772 struct mbuf *m; 1773 const struct sockaddr *sa = rtinfo->rti_info[RTAX_DST]; 1774 1775 if (rtptable.rtp_count == 0) 1776 return; 1777 m = rtm_msg1(type, rtinfo); 1778 if (m == NULL) 1779 return; 1780 rtm = mtod(m, struct rt_msghdr *); 1781 rtm->rtm_flags = RTF_DONE | flags; 1782 rtm->rtm_priority = prio; 1783 rtm->rtm_errno = error; 1784 rtm->rtm_tableid = tableid; 1785 rtm->rtm_addrs = rtinfo->rti_addrs; 1786 rtm->rtm_index = ifidx; 1787 route_input(m, NULL, sa ? sa->sa_family : AF_UNSPEC); 1788 } 1789 1790 /* 1791 * This routine is called to generate a message from the routing 1792 * socket indicating that the status of a network interface has changed. 1793 */ 1794 void 1795 rtm_ifchg(struct ifnet *ifp) 1796 { 1797 struct rt_addrinfo info; 1798 struct if_msghdr *ifm; 1799 struct mbuf *m; 1800 1801 if (rtptable.rtp_count == 0) 1802 return; 1803 memset(&info, 0, sizeof(info)); 1804 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); 1805 m = rtm_msg1(RTM_IFINFO, &info); 1806 if (m == NULL) 1807 return; 1808 ifm = mtod(m, struct if_msghdr *); 1809 ifm->ifm_index = ifp->if_index; 1810 ifm->ifm_tableid = ifp->if_rdomain; 1811 ifm->ifm_flags = ifp->if_flags; 1812 ifm->ifm_xflags = ifp->if_xflags; 1813 if_getdata(ifp, &ifm->ifm_data); 1814 ifm->ifm_addrs = info.rti_addrs; 1815 route_input(m, NULL, AF_UNSPEC); 1816 } 1817 1818 /* 1819 * This is called to generate messages from the routing socket 1820 * indicating a network interface has had addresses associated with it. 1821 * if we ever reverse the logic and replace messages TO the routing 1822 * socket indicate a request to configure interfaces, then it will 1823 * be unnecessary as the routing socket will automatically generate 1824 * copies of it. 1825 */ 1826 void 1827 rtm_addr(int cmd, struct ifaddr *ifa) 1828 { 1829 struct ifnet *ifp = ifa->ifa_ifp; 1830 struct mbuf *m; 1831 struct rt_addrinfo info; 1832 struct ifa_msghdr *ifam; 1833 1834 if (rtptable.rtp_count == 0) 1835 return; 1836 1837 memset(&info, 0, sizeof(info)); 1838 info.rti_info[RTAX_IFA] = ifa->ifa_addr; 1839 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); 1840 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask; 1841 info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr; 1842 if ((m = rtm_msg1(cmd, &info)) == NULL) 1843 return; 1844 ifam = mtod(m, struct ifa_msghdr *); 1845 ifam->ifam_index = ifp->if_index; 1846 ifam->ifam_metric = ifa->ifa_metric; 1847 ifam->ifam_flags = ifa->ifa_flags; 1848 ifam->ifam_addrs = info.rti_addrs; 1849 ifam->ifam_tableid = ifp->if_rdomain; 1850 1851 route_input(m, NULL, 1852 ifa->ifa_addr ? ifa->ifa_addr->sa_family : AF_UNSPEC); 1853 } 1854 1855 /* 1856 * This is called to generate routing socket messages indicating 1857 * network interface arrival and departure. 1858 */ 1859 void 1860 rtm_ifannounce(struct ifnet *ifp, int what) 1861 { 1862 struct if_announcemsghdr *ifan; 1863 struct mbuf *m; 1864 1865 if (rtptable.rtp_count == 0) 1866 return; 1867 m = rtm_msg1(RTM_IFANNOUNCE, NULL); 1868 if (m == NULL) 1869 return; 1870 ifan = mtod(m, struct if_announcemsghdr *); 1871 ifan->ifan_index = ifp->if_index; 1872 strlcpy(ifan->ifan_name, ifp->if_xname, sizeof(ifan->ifan_name)); 1873 ifan->ifan_what = what; 1874 route_input(m, NULL, AF_UNSPEC); 1875 } 1876 1877 #ifdef BFD 1878 /* 1879 * This is used to generate routing socket messages indicating 1880 * the state of a BFD session. 1881 */ 1882 void 1883 rtm_bfd(struct bfd_config *bfd) 1884 { 1885 struct bfd_msghdr *bfdm; 1886 struct sockaddr_bfd sa_bfd; 1887 struct mbuf *m; 1888 struct rt_addrinfo info; 1889 1890 if (rtptable.rtp_count == 0) 1891 return; 1892 memset(&info, 0, sizeof(info)); 1893 info.rti_info[RTAX_DST] = rt_key(bfd->bc_rt); 1894 info.rti_info[RTAX_IFA] = bfd->bc_rt->rt_ifa->ifa_addr; 1895 1896 m = rtm_msg1(RTM_BFD, &info); 1897 if (m == NULL) 1898 return; 1899 bfdm = mtod(m, struct bfd_msghdr *); 1900 bfdm->bm_addrs = info.rti_addrs; 1901 1902 KERNEL_ASSERT_LOCKED(); 1903 bfd2sa(bfd->bc_rt, &sa_bfd); 1904 memcpy(&bfdm->bm_sa, &sa_bfd, sizeof(sa_bfd)); 1905 1906 route_input(m, NULL, info.rti_info[RTAX_DST]->sa_family); 1907 } 1908 #endif /* BFD */ 1909 1910 /* 1911 * This is used to generate routing socket messages indicating 1912 * the state of an ieee80211 interface. 1913 */ 1914 void 1915 rtm_80211info(struct ifnet *ifp, struct if_ieee80211_data *ifie) 1916 { 1917 struct if_ieee80211_msghdr *ifim; 1918 struct mbuf *m; 1919 1920 if (rtptable.rtp_count == 0) 1921 return; 1922 m = rtm_msg1(RTM_80211INFO, NULL); 1923 if (m == NULL) 1924 return; 1925 ifim = mtod(m, struct if_ieee80211_msghdr *); 1926 ifim->ifim_index = ifp->if_index; 1927 ifim->ifim_tableid = ifp->if_rdomain; 1928 1929 memcpy(&ifim->ifim_ifie, ifie, sizeof(ifim->ifim_ifie)); 1930 route_input(m, NULL, AF_UNSPEC); 1931 } 1932 1933 /* 1934 * This is used to generate routing socket messages indicating 1935 * the address selection proposal from an interface. 1936 */ 1937 void 1938 rtm_proposal(struct ifnet *ifp, struct rt_addrinfo *rtinfo, int flags, 1939 uint8_t prio) 1940 { 1941 struct rt_msghdr *rtm; 1942 struct mbuf *m; 1943 1944 m = rtm_msg1(RTM_PROPOSAL, rtinfo); 1945 if (m == NULL) 1946 return; 1947 rtm = mtod(m, struct rt_msghdr *); 1948 rtm->rtm_flags = RTF_DONE | flags; 1949 rtm->rtm_priority = prio; 1950 rtm->rtm_tableid = ifp->if_rdomain; 1951 rtm->rtm_index = ifp->if_index; 1952 rtm->rtm_addrs = rtinfo->rti_addrs; 1953 1954 route_input(m, NULL, rtinfo->rti_info[RTAX_DNS]->sa_family); 1955 } 1956 1957 /* 1958 * This is used in dumping the kernel table via sysctl(). 1959 */ 1960 int 1961 sysctl_dumpentry(struct rtentry *rt, void *v, unsigned int id) 1962 { 1963 struct walkarg *w = v; 1964 int error = 0, size; 1965 struct rt_addrinfo info; 1966 struct ifnet *ifp; 1967 #ifdef BFD 1968 struct sockaddr_bfd sa_bfd; 1969 #endif 1970 struct sockaddr_rtlabel sa_rl; 1971 struct sockaddr_in6 sa_mask; 1972 1973 if (w->w_op == NET_RT_FLAGS && !(rt->rt_flags & w->w_arg)) 1974 return 0; 1975 if (w->w_op == NET_RT_DUMP && w->w_arg) { 1976 u_int8_t prio = w->w_arg & RTP_MASK; 1977 if (w->w_arg < 0) { 1978 prio = (-w->w_arg) & RTP_MASK; 1979 /* Show all routes that are not this priority */ 1980 if (prio == (rt->rt_priority & RTP_MASK)) 1981 return 0; 1982 } else { 1983 if (prio != (rt->rt_priority & RTP_MASK) && 1984 prio != RTP_ANY) 1985 return 0; 1986 } 1987 } 1988 bzero(&info, sizeof(info)); 1989 info.rti_info[RTAX_DST] = rt_key(rt); 1990 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 1991 info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask); 1992 ifp = if_get(rt->rt_ifidx); 1993 if (ifp != NULL) { 1994 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); 1995 info.rti_info[RTAX_IFA] = 1996 rtable_getsource(id, info.rti_info[RTAX_DST]->sa_family); 1997 if (info.rti_info[RTAX_IFA] == NULL) 1998 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; 1999 if (ifp->if_flags & IFF_POINTOPOINT) 2000 info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr; 2001 } 2002 if_put(ifp); 2003 info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl); 2004 #ifdef BFD 2005 if (rt->rt_flags & RTF_BFD) { 2006 KERNEL_ASSERT_LOCKED(); 2007 info.rti_info[RTAX_BFD] = bfd2sa(rt, &sa_bfd); 2008 } 2009 #endif 2010 #ifdef MPLS 2011 if (rt->rt_flags & RTF_MPLS) { 2012 struct sockaddr_mpls sa_mpls; 2013 2014 bzero(&sa_mpls, sizeof(sa_mpls)); 2015 sa_mpls.smpls_family = AF_MPLS; 2016 sa_mpls.smpls_len = sizeof(sa_mpls); 2017 sa_mpls.smpls_label = ((struct rt_mpls *) 2018 rt->rt_llinfo)->mpls_label; 2019 info.rti_info[RTAX_SRC] = (struct sockaddr *)&sa_mpls; 2020 info.rti_mpls = ((struct rt_mpls *) 2021 rt->rt_llinfo)->mpls_operation; 2022 } 2023 #endif 2024 2025 size = rtm_msg2(RTM_GET, RTM_VERSION, &info, NULL, w); 2026 if (w->w_where && w->w_tmem && w->w_needed <= w->w_given) { 2027 struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem; 2028 2029 rtm->rtm_pid = curproc->p_p->ps_pid; 2030 rtm->rtm_flags = RTF_DONE | rt->rt_flags; 2031 rtm->rtm_priority = rt->rt_priority & RTP_MASK; 2032 rtm_getmetrics(rt, &rtm->rtm_rmx); 2033 /* Do not account the routing table's reference. */ 2034 rtm->rtm_rmx.rmx_refcnt = refcnt_read(&rt->rt_refcnt) - 1; 2035 rtm->rtm_index = rt->rt_ifidx; 2036 rtm->rtm_addrs = info.rti_addrs; 2037 rtm->rtm_tableid = id; 2038 #ifdef MPLS 2039 rtm->rtm_mpls = info.rti_mpls; 2040 #endif 2041 if ((error = copyout(rtm, w->w_where, size)) != 0) 2042 w->w_where = NULL; 2043 else 2044 w->w_where += size; 2045 } 2046 return (error); 2047 } 2048 2049 int 2050 sysctl_iflist(int af, struct walkarg *w) 2051 { 2052 struct ifnet *ifp; 2053 struct ifaddr *ifa; 2054 struct rt_addrinfo info; 2055 int len, error = 0; 2056 2057 bzero(&info, sizeof(info)); 2058 TAILQ_FOREACH(ifp, &ifnetlist, if_list) { 2059 if (w->w_arg && w->w_arg != ifp->if_index) 2060 continue; 2061 /* Copy the link-layer address first */ 2062 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); 2063 len = rtm_msg2(RTM_IFINFO, RTM_VERSION, &info, 0, w); 2064 if (w->w_where && w->w_tmem && w->w_needed <= w->w_given) { 2065 struct if_msghdr *ifm; 2066 2067 ifm = (struct if_msghdr *)w->w_tmem; 2068 ifm->ifm_index = ifp->if_index; 2069 ifm->ifm_tableid = ifp->if_rdomain; 2070 ifm->ifm_flags = ifp->if_flags; 2071 if_getdata(ifp, &ifm->ifm_data); 2072 ifm->ifm_addrs = info.rti_addrs; 2073 error = copyout(ifm, w->w_where, len); 2074 if (error) 2075 return (error); 2076 w->w_where += len; 2077 } 2078 info.rti_info[RTAX_IFP] = NULL; 2079 TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) { 2080 KASSERT(ifa->ifa_addr->sa_family != AF_LINK); 2081 if (af && af != ifa->ifa_addr->sa_family) 2082 continue; 2083 info.rti_info[RTAX_IFA] = ifa->ifa_addr; 2084 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask; 2085 info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr; 2086 len = rtm_msg2(RTM_NEWADDR, RTM_VERSION, &info, 0, w); 2087 if (w->w_where && w->w_tmem && 2088 w->w_needed <= w->w_given) { 2089 struct ifa_msghdr *ifam; 2090 2091 ifam = (struct ifa_msghdr *)w->w_tmem; 2092 ifam->ifam_index = ifa->ifa_ifp->if_index; 2093 ifam->ifam_flags = ifa->ifa_flags; 2094 ifam->ifam_metric = ifa->ifa_metric; 2095 ifam->ifam_addrs = info.rti_addrs; 2096 error = copyout(w->w_tmem, w->w_where, len); 2097 if (error) 2098 return (error); 2099 w->w_where += len; 2100 } 2101 } 2102 info.rti_info[RTAX_IFA] = info.rti_info[RTAX_NETMASK] = 2103 info.rti_info[RTAX_BRD] = NULL; 2104 } 2105 return (0); 2106 } 2107 2108 int 2109 sysctl_ifnames(struct walkarg *w) 2110 { 2111 struct if_nameindex_msg ifn; 2112 struct ifnet *ifp; 2113 int error = 0; 2114 2115 /* XXX ignore tableid for now */ 2116 TAILQ_FOREACH(ifp, &ifnetlist, if_list) { 2117 if (w->w_arg && w->w_arg != ifp->if_index) 2118 continue; 2119 w->w_needed += sizeof(ifn); 2120 if (w->w_where && w->w_needed <= w->w_given) { 2121 2122 memset(&ifn, 0, sizeof(ifn)); 2123 ifn.if_index = ifp->if_index; 2124 strlcpy(ifn.if_name, ifp->if_xname, 2125 sizeof(ifn.if_name)); 2126 error = copyout(&ifn, w->w_where, sizeof(ifn)); 2127 if (error) 2128 return (error); 2129 w->w_where += sizeof(ifn); 2130 } 2131 } 2132 2133 return (0); 2134 } 2135 2136 int 2137 sysctl_source(int af, u_int tableid, struct walkarg *w) 2138 { 2139 union { 2140 struct sockaddr_in in; 2141 #ifdef INET6 2142 struct sockaddr_in6 in6; 2143 #endif 2144 } buf; 2145 struct sockaddr *sa; 2146 int size, error = 0; 2147 2148 NET_LOCK_SHARED(); 2149 if ((sa = rtable_getsource(tableid, af)) != NULL) { 2150 switch (sa->sa_family) { 2151 case AF_INET: 2152 size = sizeof(struct sockaddr_in); 2153 break; 2154 #ifdef INET6 2155 case AF_INET6: 2156 size = sizeof(struct sockaddr_in6); 2157 break; 2158 #endif 2159 default: 2160 sa = NULL; 2161 break; 2162 } 2163 2164 } 2165 if (sa != NULL) 2166 memcpy(&buf, sa, size); 2167 NET_UNLOCK_SHARED(); 2168 2169 if (sa != NULL) { 2170 w->w_needed += size; 2171 if (w->w_where && w->w_needed <= w->w_given) { 2172 if ((error = copyout(&buf, w->w_where, size))) 2173 return (error); 2174 w->w_where += size; 2175 } 2176 } 2177 return (0); 2178 } 2179 2180 int 2181 sysctl_rtable(int *name, u_int namelen, void *where, size_t *given, void *new, 2182 size_t newlen) 2183 { 2184 int i, error = EINVAL; 2185 u_char af; 2186 struct walkarg w; 2187 struct rt_tableinfo tableinfo; 2188 u_int tableid = 0; 2189 2190 if (new) 2191 return (EPERM); 2192 if (namelen < 3 || namelen > 4) 2193 return (EINVAL); 2194 af = name[0]; 2195 bzero(&w, sizeof(w)); 2196 w.w_where = where; 2197 w.w_given = *given; 2198 w.w_op = name[1]; 2199 w.w_arg = name[2]; 2200 2201 if (namelen == 4) { 2202 tableid = name[3]; 2203 if (!rtable_exists(tableid)) 2204 return (ENOENT); 2205 } else 2206 tableid = curproc->p_p->ps_rtableid; 2207 2208 switch (w.w_op) { 2209 case NET_RT_DUMP: 2210 case NET_RT_FLAGS: 2211 NET_LOCK_SHARED(); 2212 for (i = 1; i <= AF_MAX; i++) { 2213 if (af != 0 && af != i) 2214 continue; 2215 2216 error = rtable_walk(tableid, i, NULL, sysctl_dumpentry, 2217 &w); 2218 if (error == EAFNOSUPPORT) 2219 error = 0; 2220 if (error) 2221 break; 2222 } 2223 NET_UNLOCK_SHARED(); 2224 break; 2225 2226 case NET_RT_IFLIST: 2227 NET_LOCK_SHARED(); 2228 error = sysctl_iflist(af, &w); 2229 NET_UNLOCK_SHARED(); 2230 break; 2231 2232 case NET_RT_STATS: 2233 return (sysctl_rtable_rtstat(where, given, new)); 2234 case NET_RT_TABLE: 2235 tableid = w.w_arg; 2236 if (!rtable_exists(tableid)) 2237 return (ENOENT); 2238 memset(&tableinfo, 0, sizeof tableinfo); 2239 tableinfo.rti_tableid = tableid; 2240 tableinfo.rti_domainid = rtable_l2(tableid); 2241 error = sysctl_rdstruct(where, given, new, 2242 &tableinfo, sizeof(tableinfo)); 2243 return (error); 2244 case NET_RT_IFNAMES: 2245 NET_LOCK_SHARED(); 2246 error = sysctl_ifnames(&w); 2247 NET_UNLOCK_SHARED(); 2248 break; 2249 case NET_RT_SOURCE: 2250 tableid = w.w_arg; 2251 if (!rtable_exists(tableid)) 2252 return (ENOENT); 2253 for (i = 1; i <= AF_MAX; i++) { 2254 if (af != 0 && af != i) 2255 continue; 2256 2257 error = sysctl_source(i, tableid, &w); 2258 if (error == EAFNOSUPPORT) 2259 error = 0; 2260 if (error) 2261 break; 2262 } 2263 break; 2264 } 2265 free(w.w_tmem, M_RTABLE, w.w_tmemsize); 2266 if (where) { 2267 *given = w.w_where - (caddr_t)where; 2268 if (w.w_needed > w.w_given) 2269 return (ENOMEM); 2270 } else if (w.w_needed == 0) { 2271 *given = 0; 2272 } else { 2273 *given = roundup(w.w_needed + MAX(w.w_needed / 10, 1024), 2274 PAGE_SIZE); 2275 } 2276 return (error); 2277 } 2278 2279 int 2280 sysctl_rtable_rtstat(void *oldp, size_t *oldlenp, void *newp) 2281 { 2282 extern struct cpumem *rtcounters; 2283 uint64_t counters[rts_ncounters]; 2284 struct rtstat rtstat; 2285 uint32_t *words = (uint32_t *)&rtstat; 2286 int i; 2287 2288 CTASSERT(sizeof(rtstat) == (nitems(counters) * sizeof(uint32_t))); 2289 memset(&rtstat, 0, sizeof rtstat); 2290 counters_read(rtcounters, counters, nitems(counters), NULL); 2291 2292 for (i = 0; i < nitems(counters); i++) 2293 words[i] = (uint32_t)counters[i]; 2294 2295 return (sysctl_rdstruct(oldp, oldlenp, newp, &rtstat, sizeof(rtstat))); 2296 } 2297 2298 int 2299 rtm_validate_proposal(struct rt_addrinfo *info) 2300 { 2301 if (info->rti_addrs & ~(RTA_NETMASK | RTA_IFA | RTA_DNS | RTA_STATIC | 2302 RTA_SEARCH)) { 2303 return -1; 2304 } 2305 2306 if (ISSET(info->rti_addrs, RTA_NETMASK)) { 2307 const struct sockaddr *sa = info->rti_info[RTAX_NETMASK]; 2308 if (sa == NULL) 2309 return -1; 2310 switch (sa->sa_family) { 2311 case AF_INET: 2312 if (sa->sa_len != sizeof(struct sockaddr_in)) 2313 return -1; 2314 break; 2315 case AF_INET6: 2316 if (sa->sa_len != sizeof(struct sockaddr_in6)) 2317 return -1; 2318 break; 2319 default: 2320 return -1; 2321 } 2322 } 2323 2324 if (ISSET(info->rti_addrs, RTA_IFA)) { 2325 const struct sockaddr *sa = info->rti_info[RTAX_IFA]; 2326 if (sa == NULL) 2327 return -1; 2328 switch (sa->sa_family) { 2329 case AF_INET: 2330 if (sa->sa_len != sizeof(struct sockaddr_in)) 2331 return -1; 2332 break; 2333 case AF_INET6: 2334 if (sa->sa_len != sizeof(struct sockaddr_in6)) 2335 return -1; 2336 break; 2337 default: 2338 return -1; 2339 } 2340 } 2341 2342 if (ISSET(info->rti_addrs, RTA_DNS)) { 2343 const struct sockaddr_rtdns *rtdns = 2344 (const struct sockaddr_rtdns *)info->rti_info[RTAX_DNS]; 2345 if (rtdns == NULL) 2346 return -1; 2347 if (rtdns->sr_len > sizeof(*rtdns)) 2348 return -1; 2349 if (rtdns->sr_len < offsetof(struct sockaddr_rtdns, sr_dns)) 2350 return -1; 2351 switch (rtdns->sr_family) { 2352 case AF_INET: 2353 if ((rtdns->sr_len - offsetof(struct sockaddr_rtdns, 2354 sr_dns)) % sizeof(struct in_addr) != 0) 2355 return -1; 2356 break; 2357 #ifdef INET6 2358 case AF_INET6: 2359 if ((rtdns->sr_len - offsetof(struct sockaddr_rtdns, 2360 sr_dns)) % sizeof(struct in6_addr) != 0) 2361 return -1; 2362 break; 2363 #endif 2364 default: 2365 return -1; 2366 } 2367 } 2368 2369 if (ISSET(info->rti_addrs, RTA_STATIC)) { 2370 const struct sockaddr_rtstatic *rtstatic = (const struct 2371 sockaddr_rtstatic *)info->rti_info[RTAX_STATIC]; 2372 if (rtstatic == NULL) 2373 return -1; 2374 if (rtstatic->sr_len > sizeof(*rtstatic)) 2375 return -1; 2376 if (rtstatic->sr_len <= 2377 offsetof(struct sockaddr_rtstatic, sr_static)) 2378 return -1; 2379 } 2380 2381 if (ISSET(info->rti_addrs, RTA_SEARCH)) { 2382 const struct sockaddr_rtsearch *rtsearch = (const struct 2383 sockaddr_rtsearch *)info->rti_info[RTAX_SEARCH]; 2384 if (rtsearch == NULL) 2385 return -1; 2386 if (rtsearch->sr_len > sizeof(*rtsearch)) 2387 return -1; 2388 if (rtsearch->sr_len <= 2389 offsetof(struct sockaddr_rtsearch, sr_search)) 2390 return -1; 2391 } 2392 2393 return 0; 2394 } 2395 2396 int 2397 rt_setsource(unsigned int rtableid, const struct sockaddr *src) 2398 { 2399 struct ifaddr *ifa; 2400 /* 2401 * If source address is 0.0.0.0 or :: 2402 * use automatic source selection 2403 */ 2404 switch(src->sa_family) { 2405 case AF_INET: 2406 if(satosin_const(src)->sin_addr.s_addr == INADDR_ANY) { 2407 rtable_setsource(rtableid, AF_INET, NULL); 2408 return (0); 2409 } 2410 break; 2411 #ifdef INET6 2412 case AF_INET6: 2413 if (IN6_IS_ADDR_UNSPECIFIED(&satosin6_const(src)->sin6_addr)) { 2414 rtable_setsource(rtableid, AF_INET6, NULL); 2415 return (0); 2416 } 2417 break; 2418 #endif 2419 default: 2420 return (EAFNOSUPPORT); 2421 } 2422 2423 /* 2424 * Check if source address is assigned to an interface in the 2425 * same rdomain 2426 */ 2427 if ((ifa = ifa_ifwithaddr(src, rtableid)) == NULL) 2428 return (EINVAL); 2429 2430 return rtable_setsource(rtableid, src->sa_family, ifa->ifa_addr); 2431 } 2432 2433 /* 2434 * Definitions of protocols supported in the ROUTE domain. 2435 */ 2436 2437 const struct pr_usrreqs route_usrreqs = { 2438 .pru_attach = route_attach, 2439 .pru_detach = route_detach, 2440 .pru_disconnect = route_disconnect, 2441 .pru_shutdown = route_shutdown, 2442 .pru_rcvd = route_rcvd, 2443 .pru_send = route_send, 2444 .pru_sockaddr = route_sockaddr, 2445 .pru_peeraddr = route_peeraddr, 2446 }; 2447 2448 const struct protosw routesw[] = { 2449 { 2450 .pr_type = SOCK_RAW, 2451 .pr_domain = &routedomain, 2452 .pr_flags = PR_ATOMIC|PR_ADDR|PR_WANTRCVD, 2453 .pr_ctloutput = route_ctloutput, 2454 .pr_usrreqs = &route_usrreqs, 2455 .pr_init = route_prinit, 2456 .pr_sysctl = sysctl_rtable 2457 } 2458 }; 2459 2460 const struct domain routedomain = { 2461 .dom_family = PF_ROUTE, 2462 .dom_name = "route", 2463 .dom_init = route_init, 2464 .dom_protosw = routesw, 2465 .dom_protoswNPROTOSW = &routesw[nitems(routesw)] 2466 }; 2467