1 /* $OpenBSD: rtsock.c,v 1.334 2022/06/28 10:01:13 bluhm Exp $ */ 2 /* $NetBSD: rtsock.c,v 1.18 1996/03/29 00:32:10 cgd Exp $ */ 3 4 /* 5 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the project nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 33 /* 34 * Copyright (c) 1988, 1991, 1993 35 * The Regents of the University of California. All rights reserved. 36 * 37 * Redistribution and use in source and binary forms, with or without 38 * modification, are permitted provided that the following conditions 39 * are met: 40 * 1. Redistributions of source code must retain the above copyright 41 * notice, this list of conditions and the following disclaimer. 42 * 2. Redistributions in binary form must reproduce the above copyright 43 * notice, this list of conditions and the following disclaimer in the 44 * documentation and/or other materials provided with the distribution. 45 * 3. Neither the name of the University nor the names of its contributors 46 * may be used to endorse or promote products derived from this software 47 * without specific prior written permission. 48 * 49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 59 * SUCH DAMAGE. 60 * 61 * @(#)rtsock.c 8.6 (Berkeley) 2/11/95 62 */ 63 64 #include <sys/param.h> 65 #include <sys/systm.h> 66 #include <sys/proc.h> 67 #include <sys/sysctl.h> 68 #include <sys/mbuf.h> 69 #include <sys/socket.h> 70 #include <sys/socketvar.h> 71 #include <sys/domain.h> 72 #include <sys/pool.h> 73 #include <sys/protosw.h> 74 #include <sys/srp.h> 75 76 #include <net/if.h> 77 #include <net/if_dl.h> 78 #include <net/if_var.h> 79 #include <net/route.h> 80 81 #include <netinet/in.h> 82 83 #ifdef MPLS 84 #include <netmpls/mpls.h> 85 #endif 86 #ifdef IPSEC 87 #include <netinet/ip_ipsp.h> 88 #include <net/if_enc.h> 89 #endif 90 #ifdef BFD 91 #include <net/bfd.h> 92 #endif 93 94 #include <sys/stdarg.h> 95 #include <sys/kernel.h> 96 #include <sys/timeout.h> 97 98 #define ROUTESNDQ 8192 99 #define ROUTERCVQ 8192 100 101 const struct sockaddr route_src = { 2, PF_ROUTE, }; 102 103 struct walkarg { 104 int w_op, w_arg, w_tmemsize; 105 size_t w_given, w_needed; 106 caddr_t w_where, w_tmem; 107 }; 108 109 void route_prinit(void); 110 void rcb_ref(void *, void *); 111 void rcb_unref(void *, void *); 112 int route_output(struct mbuf *, struct socket *, struct sockaddr *, 113 struct mbuf *); 114 int route_ctloutput(int, struct socket *, int, int, struct mbuf *); 115 int route_usrreq(struct socket *, int, struct mbuf *, struct mbuf *, 116 struct mbuf *, struct proc *); 117 void route_input(struct mbuf *m0, struct socket *, sa_family_t); 118 int route_arp_conflict(struct rtentry *, struct rt_addrinfo *); 119 int route_cleargateway(struct rtentry *, void *, unsigned int); 120 void rtm_senddesync_timer(void *); 121 void rtm_senddesync(struct socket *); 122 int rtm_sendup(struct socket *, struct mbuf *); 123 124 int rtm_getifa(struct rt_addrinfo *, unsigned int); 125 int rtm_output(struct rt_msghdr *, struct rtentry **, struct rt_addrinfo *, 126 uint8_t, unsigned int); 127 struct rt_msghdr *rtm_report(struct rtentry *, u_char, int, int); 128 struct mbuf *rtm_msg1(int, struct rt_addrinfo *); 129 int rtm_msg2(int, int, struct rt_addrinfo *, caddr_t, 130 struct walkarg *); 131 int rtm_xaddrs(caddr_t, caddr_t, struct rt_addrinfo *); 132 int rtm_validate_proposal(struct rt_addrinfo *); 133 void rtm_setmetrics(u_long, const struct rt_metrics *, 134 struct rt_kmetrics *); 135 void rtm_getmetrics(const struct rtentry *, 136 struct rt_metrics *); 137 138 int sysctl_iflist(int, struct walkarg *); 139 int sysctl_ifnames(struct walkarg *); 140 int sysctl_rtable_rtstat(void *, size_t *, void *); 141 142 int rt_setsource(unsigned int, struct sockaddr *); 143 144 /* 145 * Locks used to protect struct members 146 * I immutable after creation 147 * s solock 148 */ 149 struct rtpcb { 150 struct socket *rop_socket; /* [I] */ 151 152 SRPL_ENTRY(rtpcb) rop_list; 153 struct refcnt rop_refcnt; 154 struct timeout rop_timeout; 155 unsigned int rop_msgfilter; /* [s] */ 156 unsigned int rop_flagfilter; /* [s] */ 157 unsigned int rop_flags; /* [s] */ 158 u_int rop_rtableid; /* [s] */ 159 unsigned short rop_proto; /* [I] */ 160 u_char rop_priority; /* [s] */ 161 }; 162 #define sotortpcb(so) ((struct rtpcb *)(so)->so_pcb) 163 164 struct rtptable { 165 SRPL_HEAD(, rtpcb) rtp_list; 166 struct srpl_rc rtp_rc; 167 struct rwlock rtp_lk; 168 unsigned int rtp_count; 169 }; 170 171 struct pool rtpcb_pool; 172 struct rtptable rtptable; 173 174 /* 175 * These flags and timeout are used for indicating to userland (via a 176 * RTM_DESYNC msg) when the route socket has overflowed and messages 177 * have been lost. 178 */ 179 #define ROUTECB_FLAG_DESYNC 0x1 /* Route socket out of memory */ 180 #define ROUTECB_FLAG_FLUSH 0x2 /* Wait until socket is empty before 181 queueing more packets */ 182 183 #define ROUTE_DESYNC_RESEND_TIMEOUT 200 /* In ms */ 184 185 void 186 route_prinit(void) 187 { 188 srpl_rc_init(&rtptable.rtp_rc, rcb_ref, rcb_unref, NULL); 189 rw_init(&rtptable.rtp_lk, "rtsock"); 190 SRPL_INIT(&rtptable.rtp_list); 191 pool_init(&rtpcb_pool, sizeof(struct rtpcb), 0, 192 IPL_SOFTNET, PR_WAITOK, "rtpcb", NULL); 193 } 194 195 void 196 rcb_ref(void *null, void *v) 197 { 198 struct rtpcb *rop = v; 199 200 refcnt_take(&rop->rop_refcnt); 201 } 202 203 void 204 rcb_unref(void *null, void *v) 205 { 206 struct rtpcb *rop = v; 207 208 refcnt_rele_wake(&rop->rop_refcnt); 209 } 210 211 int 212 route_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam, 213 struct mbuf *control, struct proc *p) 214 { 215 struct rtpcb *rop; 216 int error = 0; 217 218 if (req == PRU_CONTROL) 219 return (EOPNOTSUPP); 220 221 soassertlocked(so); 222 223 if (control && control->m_len) { 224 error = EOPNOTSUPP; 225 goto release; 226 } 227 228 rop = sotortpcb(so); 229 if (rop == NULL) { 230 error = EINVAL; 231 goto release; 232 } 233 234 switch (req) { 235 /* no connect, bind, accept. Socket is connected from the start */ 236 case PRU_CONNECT: 237 case PRU_BIND: 238 case PRU_CONNECT2: 239 case PRU_LISTEN: 240 case PRU_ACCEPT: 241 error = EOPNOTSUPP; 242 break; 243 244 case PRU_DISCONNECT: 245 case PRU_ABORT: 246 soisdisconnected(so); 247 break; 248 case PRU_SHUTDOWN: 249 socantsendmore(so); 250 break; 251 case PRU_SENSE: 252 /* stat: don't bother with a blocksize. */ 253 break; 254 255 /* minimal support, just implement a fake peer address */ 256 case PRU_SOCKADDR: 257 error = EINVAL; 258 break; 259 case PRU_PEERADDR: 260 bcopy(&route_src, mtod(nam, caddr_t), route_src.sa_len); 261 nam->m_len = route_src.sa_len; 262 break; 263 264 case PRU_RCVD: 265 /* 266 * If we are in a FLUSH state, check if the buffer is 267 * empty so that we can clear the flag. 268 */ 269 if (((rop->rop_flags & ROUTECB_FLAG_FLUSH) != 0) && 270 ((sbspace(rop->rop_socket, &rop->rop_socket->so_rcv) == 271 rop->rop_socket->so_rcv.sb_hiwat))) 272 rop->rop_flags &= ~ROUTECB_FLAG_FLUSH; 273 break; 274 275 case PRU_RCVOOB: 276 case PRU_SENDOOB: 277 error = EOPNOTSUPP; 278 break; 279 case PRU_SEND: 280 if (nam) { 281 error = EISCONN; 282 break; 283 } 284 error = (*so->so_proto->pr_output)(m, so, NULL, NULL); 285 m = NULL; 286 break; 287 default: 288 panic("route_usrreq"); 289 } 290 291 release: 292 if (req != PRU_RCVD && req != PRU_RCVOOB && req != PRU_SENSE) { 293 m_freem(control); 294 m_freem(m); 295 } 296 return (error); 297 } 298 299 int 300 route_attach(struct socket *so, int proto) 301 { 302 struct rtpcb *rop; 303 int error; 304 305 error = soreserve(so, ROUTESNDQ, ROUTERCVQ); 306 if (error) 307 return (error); 308 /* 309 * use the rawcb but allocate a rtpcb, this 310 * code does not care about the additional fields 311 * and works directly on the raw socket. 312 */ 313 rop = pool_get(&rtpcb_pool, PR_WAITOK|PR_ZERO); 314 so->so_pcb = rop; 315 /* Init the timeout structure */ 316 timeout_set_proc(&rop->rop_timeout, rtm_senddesync_timer, so); 317 refcnt_init(&rop->rop_refcnt); 318 319 rop->rop_socket = so; 320 rop->rop_proto = proto; 321 322 rop->rop_rtableid = curproc->p_p->ps_rtableid; 323 324 soisconnected(so); 325 so->so_options |= SO_USELOOPBACK; 326 327 rw_enter(&rtptable.rtp_lk, RW_WRITE); 328 SRPL_INSERT_HEAD_LOCKED(&rtptable.rtp_rc, &rtptable.rtp_list, rop, 329 rop_list); 330 rtptable.rtp_count++; 331 rw_exit(&rtptable.rtp_lk); 332 333 return (0); 334 } 335 336 int 337 route_detach(struct socket *so) 338 { 339 struct rtpcb *rop; 340 341 soassertlocked(so); 342 343 rop = sotortpcb(so); 344 if (rop == NULL) 345 return (EINVAL); 346 347 rw_enter(&rtptable.rtp_lk, RW_WRITE); 348 349 rtptable.rtp_count--; 350 SRPL_REMOVE_LOCKED(&rtptable.rtp_rc, &rtptable.rtp_list, rop, rtpcb, 351 rop_list); 352 rw_exit(&rtptable.rtp_lk); 353 354 sounlock(so); 355 356 /* wait for all references to drop */ 357 refcnt_finalize(&rop->rop_refcnt, "rtsockrefs"); 358 timeout_del_barrier(&rop->rop_timeout); 359 360 solock(so); 361 362 so->so_pcb = NULL; 363 KASSERT((so->so_state & SS_NOFDREF) == 0); 364 pool_put(&rtpcb_pool, rop); 365 366 return (0); 367 } 368 369 int 370 route_ctloutput(int op, struct socket *so, int level, int optname, 371 struct mbuf *m) 372 { 373 struct rtpcb *rop = sotortpcb(so); 374 int error = 0; 375 unsigned int tid, prio; 376 377 if (level != AF_ROUTE) 378 return (EINVAL); 379 380 switch (op) { 381 case PRCO_SETOPT: 382 switch (optname) { 383 case ROUTE_MSGFILTER: 384 if (m == NULL || m->m_len != sizeof(unsigned int)) 385 error = EINVAL; 386 else 387 rop->rop_msgfilter = *mtod(m, unsigned int *); 388 break; 389 case ROUTE_TABLEFILTER: 390 if (m == NULL || m->m_len != sizeof(unsigned int)) { 391 error = EINVAL; 392 break; 393 } 394 tid = *mtod(m, unsigned int *); 395 if (tid != RTABLE_ANY && !rtable_exists(tid)) 396 error = ENOENT; 397 else 398 rop->rop_rtableid = tid; 399 break; 400 case ROUTE_PRIOFILTER: 401 if (m == NULL || m->m_len != sizeof(unsigned int)) { 402 error = EINVAL; 403 break; 404 } 405 prio = *mtod(m, unsigned int *); 406 if (prio > RTP_MAX) 407 error = EINVAL; 408 else 409 rop->rop_priority = prio; 410 break; 411 case ROUTE_FLAGFILTER: 412 if (m == NULL || m->m_len != sizeof(unsigned int)) 413 error = EINVAL; 414 else 415 rop->rop_flagfilter = *mtod(m, unsigned int *); 416 break; 417 default: 418 error = ENOPROTOOPT; 419 break; 420 } 421 break; 422 case PRCO_GETOPT: 423 switch (optname) { 424 case ROUTE_MSGFILTER: 425 m->m_len = sizeof(unsigned int); 426 *mtod(m, unsigned int *) = rop->rop_msgfilter; 427 break; 428 case ROUTE_TABLEFILTER: 429 m->m_len = sizeof(unsigned int); 430 *mtod(m, unsigned int *) = rop->rop_rtableid; 431 break; 432 case ROUTE_PRIOFILTER: 433 m->m_len = sizeof(unsigned int); 434 *mtod(m, unsigned int *) = rop->rop_priority; 435 break; 436 case ROUTE_FLAGFILTER: 437 m->m_len = sizeof(unsigned int); 438 *mtod(m, unsigned int *) = rop->rop_flagfilter; 439 break; 440 default: 441 error = ENOPROTOOPT; 442 break; 443 } 444 } 445 return (error); 446 } 447 448 void 449 rtm_senddesync_timer(void *xso) 450 { 451 struct socket *so = xso; 452 453 solock(so); 454 rtm_senddesync(so); 455 sounlock(so); 456 } 457 458 void 459 rtm_senddesync(struct socket *so) 460 { 461 struct rtpcb *rop = sotortpcb(so); 462 struct mbuf *desync_mbuf; 463 464 soassertlocked(so); 465 466 /* 467 * Dying socket is disconnected by upper layer and there is 468 * no reason to send packet. Also we shouldn't reschedule 469 * timeout(9), otherwise timeout_del_barrier(9) can't help us. 470 */ 471 if ((so->so_state & SS_ISCONNECTED) == 0 || 472 (so->so_state & SS_CANTRCVMORE)) 473 return; 474 475 /* If we are in a DESYNC state, try to send a RTM_DESYNC packet */ 476 if ((rop->rop_flags & ROUTECB_FLAG_DESYNC) == 0) 477 return; 478 479 /* 480 * If we fail to alloc memory or if sbappendaddr() 481 * fails, re-add timeout and try again. 482 */ 483 desync_mbuf = rtm_msg1(RTM_DESYNC, NULL); 484 if (desync_mbuf != NULL) { 485 if (sbappendaddr(so, &so->so_rcv, &route_src, 486 desync_mbuf, NULL) != 0) { 487 rop->rop_flags &= ~ROUTECB_FLAG_DESYNC; 488 sorwakeup(rop->rop_socket); 489 return; 490 } 491 m_freem(desync_mbuf); 492 } 493 /* Re-add timeout to try sending msg again */ 494 timeout_add_msec(&rop->rop_timeout, ROUTE_DESYNC_RESEND_TIMEOUT); 495 } 496 497 void 498 route_input(struct mbuf *m0, struct socket *so0, sa_family_t sa_family) 499 { 500 struct socket *so; 501 struct rtpcb *rop; 502 struct rt_msghdr *rtm; 503 struct mbuf *m = m0; 504 struct srp_ref sr; 505 506 /* ensure that we can access the rtm_type via mtod() */ 507 if (m->m_len < offsetof(struct rt_msghdr, rtm_type) + 1) { 508 m_freem(m); 509 return; 510 } 511 512 SRPL_FOREACH(rop, &sr, &rtptable.rtp_list, rop_list) { 513 /* 514 * If route socket is bound to an address family only send 515 * messages that match the address family. Address family 516 * agnostic messages are always sent. 517 */ 518 if (sa_family != AF_UNSPEC && rop->rop_proto != AF_UNSPEC && 519 rop->rop_proto != sa_family) 520 continue; 521 522 523 so = rop->rop_socket; 524 solock(so); 525 526 /* 527 * Check to see if we don't want our own messages and 528 * if we can receive anything. 529 */ 530 if ((so0 == so && !(so0->so_options & SO_USELOOPBACK)) || 531 !(so->so_state & SS_ISCONNECTED) || 532 (so->so_state & SS_CANTRCVMORE)) 533 goto next; 534 535 /* filter messages that the process does not want */ 536 rtm = mtod(m, struct rt_msghdr *); 537 /* but RTM_DESYNC can't be filtered */ 538 if (rtm->rtm_type != RTM_DESYNC) { 539 if (rop->rop_msgfilter != 0 && 540 !(rop->rop_msgfilter & (1U << rtm->rtm_type))) 541 goto next; 542 if (ISSET(rop->rop_flagfilter, rtm->rtm_flags)) 543 goto next; 544 } 545 switch (rtm->rtm_type) { 546 case RTM_IFANNOUNCE: 547 case RTM_DESYNC: 548 /* no tableid */ 549 break; 550 case RTM_RESOLVE: 551 case RTM_NEWADDR: 552 case RTM_DELADDR: 553 case RTM_IFINFO: 554 case RTM_80211INFO: 555 case RTM_BFD: 556 /* check against rdomain id */ 557 if (rop->rop_rtableid != RTABLE_ANY && 558 rtable_l2(rop->rop_rtableid) != rtm->rtm_tableid) 559 goto next; 560 break; 561 default: 562 if (rop->rop_priority != 0 && 563 rop->rop_priority < rtm->rtm_priority) 564 goto next; 565 /* check against rtable id */ 566 if (rop->rop_rtableid != RTABLE_ANY && 567 rop->rop_rtableid != rtm->rtm_tableid) 568 goto next; 569 break; 570 } 571 572 /* 573 * Check to see if the flush flag is set. If so, don't queue 574 * any more messages until the flag is cleared. 575 */ 576 if ((rop->rop_flags & ROUTECB_FLAG_FLUSH) != 0) 577 goto next; 578 579 rtm_sendup(so, m); 580 next: 581 sounlock(so); 582 } 583 SRPL_LEAVE(&sr); 584 585 m_freem(m); 586 } 587 588 int 589 rtm_sendup(struct socket *so, struct mbuf *m0) 590 { 591 struct rtpcb *rop = sotortpcb(so); 592 struct mbuf *m; 593 594 soassertlocked(so); 595 596 m = m_copym(m0, 0, M_COPYALL, M_NOWAIT); 597 if (m == NULL) 598 return (ENOMEM); 599 600 if (sbspace(so, &so->so_rcv) < (2 * MSIZE) || 601 sbappendaddr(so, &so->so_rcv, &route_src, m, NULL) == 0) { 602 /* Flag socket as desync'ed and flush required */ 603 rop->rop_flags |= ROUTECB_FLAG_DESYNC | ROUTECB_FLAG_FLUSH; 604 rtm_senddesync(so); 605 m_freem(m); 606 return (ENOBUFS); 607 } 608 609 sorwakeup(so); 610 return (0); 611 } 612 613 struct rt_msghdr * 614 rtm_report(struct rtentry *rt, u_char type, int seq, int tableid) 615 { 616 struct rt_msghdr *rtm; 617 struct rt_addrinfo info; 618 struct sockaddr_rtlabel sa_rl; 619 struct sockaddr_in6 sa_mask; 620 #ifdef BFD 621 struct sockaddr_bfd sa_bfd; 622 #endif 623 struct ifnet *ifp = NULL; 624 int len; 625 626 bzero(&info, sizeof(info)); 627 info.rti_info[RTAX_DST] = rt_key(rt); 628 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 629 info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask); 630 info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl); 631 #ifdef BFD 632 if (rt->rt_flags & RTF_BFD) { 633 KERNEL_LOCK(); 634 info.rti_info[RTAX_BFD] = bfd2sa(rt, &sa_bfd); 635 KERNEL_UNLOCK(); 636 } 637 #endif 638 #ifdef MPLS 639 if (rt->rt_flags & RTF_MPLS) { 640 struct sockaddr_mpls sa_mpls; 641 642 bzero(&sa_mpls, sizeof(sa_mpls)); 643 sa_mpls.smpls_family = AF_MPLS; 644 sa_mpls.smpls_len = sizeof(sa_mpls); 645 sa_mpls.smpls_label = ((struct rt_mpls *) 646 rt->rt_llinfo)->mpls_label; 647 info.rti_info[RTAX_SRC] = (struct sockaddr *)&sa_mpls; 648 info.rti_mpls = ((struct rt_mpls *) 649 rt->rt_llinfo)->mpls_operation; 650 } 651 #endif 652 ifp = if_get(rt->rt_ifidx); 653 if (ifp != NULL) { 654 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); 655 info.rti_info[RTAX_IFA] = rtable_getsource(tableid, 656 info.rti_info[RTAX_DST]->sa_family); 657 if (info.rti_info[RTAX_IFA] == NULL) 658 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; 659 if (ifp->if_flags & IFF_POINTOPOINT) 660 info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr; 661 } 662 if_put(ifp); 663 /* RTAX_GENMASK, RTAX_AUTHOR, RTAX_SRCMASK ignored */ 664 665 /* build new route message */ 666 len = rtm_msg2(type, RTM_VERSION, &info, NULL, NULL); 667 rtm = malloc(len, M_RTABLE, M_WAITOK | M_ZERO); 668 669 rtm_msg2(type, RTM_VERSION, &info, (caddr_t)rtm, NULL); 670 rtm->rtm_type = type; 671 rtm->rtm_index = rt->rt_ifidx; 672 rtm->rtm_tableid = tableid; 673 rtm->rtm_priority = rt->rt_priority & RTP_MASK; 674 rtm->rtm_flags = rt->rt_flags; 675 rtm->rtm_pid = curproc->p_p->ps_pid; 676 rtm->rtm_seq = seq; 677 rtm_getmetrics(rt, &rtm->rtm_rmx); 678 rtm->rtm_addrs = info.rti_addrs; 679 #ifdef MPLS 680 rtm->rtm_mpls = info.rti_mpls; 681 #endif 682 return rtm; 683 } 684 685 int 686 route_output(struct mbuf *m, struct socket *so, struct sockaddr *dstaddr, 687 struct mbuf *control) 688 { 689 struct rt_msghdr *rtm = NULL; 690 struct rtentry *rt = NULL; 691 struct rt_addrinfo info; 692 struct ifnet *ifp; 693 int len, seq, useloopback, error = 0; 694 u_int tableid; 695 u_int8_t prio; 696 u_char vers, type; 697 698 if (m == NULL || ((m->m_len < sizeof(int32_t)) && 699 (m = m_pullup(m, sizeof(int32_t))) == 0)) 700 return (ENOBUFS); 701 if ((m->m_flags & M_PKTHDR) == 0) 702 panic("route_output"); 703 704 useloopback = so->so_options & SO_USELOOPBACK; 705 706 /* 707 * The socket can't be closed concurrently because the file 708 * descriptor reference is still held. 709 */ 710 711 sounlock(so); 712 713 len = m->m_pkthdr.len; 714 if (len < offsetof(struct rt_msghdr, rtm_hdrlen) + 1 || 715 len != mtod(m, struct rt_msghdr *)->rtm_msglen) { 716 error = EINVAL; 717 goto fail; 718 } 719 vers = mtod(m, struct rt_msghdr *)->rtm_version; 720 switch (vers) { 721 case RTM_VERSION: 722 if (len < sizeof(struct rt_msghdr)) { 723 error = EINVAL; 724 goto fail; 725 } 726 if (len > RTM_MAXSIZE) { 727 error = EMSGSIZE; 728 goto fail; 729 } 730 rtm = malloc(len, M_RTABLE, M_WAITOK); 731 m_copydata(m, 0, len, rtm); 732 break; 733 default: 734 error = EPROTONOSUPPORT; 735 goto fail; 736 } 737 738 /* Verify that the caller is sending an appropriate message early */ 739 switch (rtm->rtm_type) { 740 case RTM_ADD: 741 case RTM_DELETE: 742 case RTM_GET: 743 case RTM_CHANGE: 744 case RTM_PROPOSAL: 745 case RTM_SOURCE: 746 break; 747 default: 748 error = EOPNOTSUPP; 749 goto fail; 750 } 751 /* 752 * Verify that the header length is valid. 753 * All messages from userland start with a struct rt_msghdr. 754 */ 755 if (rtm->rtm_hdrlen == 0) /* old client */ 756 rtm->rtm_hdrlen = sizeof(struct rt_msghdr); 757 if (rtm->rtm_hdrlen < sizeof(struct rt_msghdr) || 758 len < rtm->rtm_hdrlen) { 759 error = EINVAL; 760 goto fail; 761 } 762 763 rtm->rtm_pid = curproc->p_p->ps_pid; 764 765 /* 766 * Verify that the caller has the appropriate privilege; RTM_GET 767 * is the only operation the non-superuser is allowed. 768 */ 769 if (rtm->rtm_type != RTM_GET && suser(curproc) != 0) { 770 error = EACCES; 771 goto fail; 772 } 773 tableid = rtm->rtm_tableid; 774 if (!rtable_exists(tableid)) { 775 if (rtm->rtm_type == RTM_ADD) { 776 if ((error = rtable_add(tableid)) != 0) 777 goto fail; 778 } else { 779 error = EINVAL; 780 goto fail; 781 } 782 } 783 784 /* Do not let userland play with kernel-only flags. */ 785 if ((rtm->rtm_flags & (RTF_LOCAL|RTF_BROADCAST)) != 0) { 786 error = EINVAL; 787 goto fail; 788 } 789 790 /* make sure that kernel-only bits are not set */ 791 rtm->rtm_priority &= RTP_MASK; 792 rtm->rtm_flags &= ~(RTF_DONE|RTF_CLONED|RTF_CACHED); 793 rtm->rtm_fmask &= RTF_FMASK; 794 795 if (rtm->rtm_priority != 0) { 796 if (rtm->rtm_priority > RTP_MAX || 797 rtm->rtm_priority == RTP_LOCAL) { 798 error = EINVAL; 799 goto fail; 800 } 801 prio = rtm->rtm_priority; 802 } else if (rtm->rtm_type != RTM_ADD) 803 prio = RTP_ANY; 804 else if (rtm->rtm_flags & RTF_STATIC) 805 prio = 0; 806 else 807 prio = RTP_DEFAULT; 808 809 bzero(&info, sizeof(info)); 810 info.rti_addrs = rtm->rtm_addrs; 811 if ((error = rtm_xaddrs(rtm->rtm_hdrlen + (caddr_t)rtm, 812 len + (caddr_t)rtm, &info)) != 0) 813 goto fail; 814 815 info.rti_flags = rtm->rtm_flags; 816 817 if (rtm->rtm_type != RTM_SOURCE && 818 rtm->rtm_type != RTM_PROPOSAL && 819 (info.rti_info[RTAX_DST] == NULL || 820 info.rti_info[RTAX_DST]->sa_family >= AF_MAX || 821 (info.rti_info[RTAX_GATEWAY] != NULL && 822 info.rti_info[RTAX_GATEWAY]->sa_family >= AF_MAX) || 823 info.rti_info[RTAX_GENMASK] != NULL)) { 824 error = EINVAL; 825 goto fail; 826 } 827 #ifdef MPLS 828 info.rti_mpls = rtm->rtm_mpls; 829 #endif 830 831 if (info.rti_info[RTAX_GATEWAY] != NULL && 832 info.rti_info[RTAX_GATEWAY]->sa_family == AF_LINK && 833 (info.rti_flags & RTF_CLONING) == 0) { 834 info.rti_flags |= RTF_LLINFO; 835 } 836 837 /* 838 * Validate RTM_PROPOSAL and pass it along or error out. 839 */ 840 if (rtm->rtm_type == RTM_PROPOSAL) { 841 if (rtm_validate_proposal(&info) == -1) { 842 error = EINVAL; 843 goto fail; 844 } 845 /* 846 * If this is a solicitation proposal forward request to 847 * all interfaces. Most handlers will ignore it but at least 848 * umb(4) will send a response to this event. 849 */ 850 if (rtm->rtm_priority == RTP_PROPOSAL_SOLICIT) { 851 NET_LOCK(); 852 TAILQ_FOREACH(ifp, &ifnet, if_list) { 853 ifp->if_rtrequest(ifp, RTM_PROPOSAL, NULL); 854 } 855 NET_UNLOCK(); 856 } 857 } else if (rtm->rtm_type == RTM_SOURCE) { 858 if (info.rti_info[RTAX_IFA] == NULL) { 859 error = EINVAL; 860 goto fail; 861 } 862 if ((error = 863 rt_setsource(tableid, info.rti_info[RTAX_IFA])) != 0) 864 goto fail; 865 } else { 866 error = rtm_output(rtm, &rt, &info, prio, tableid); 867 if (!error) { 868 type = rtm->rtm_type; 869 seq = rtm->rtm_seq; 870 free(rtm, M_RTABLE, len); 871 rtm = rtm_report(rt, type, seq, tableid); 872 len = rtm->rtm_msglen; 873 } 874 } 875 876 rtfree(rt); 877 if (error) { 878 rtm->rtm_errno = error; 879 } else { 880 rtm->rtm_flags |= RTF_DONE; 881 } 882 883 /* 884 * Check to see if we don't want our own messages. 885 */ 886 if (!useloopback) { 887 if (rtptable.rtp_count == 0) { 888 /* no other listener and no loopback of messages */ 889 goto fail; 890 } 891 } 892 if (m_copyback(m, 0, len, rtm, M_NOWAIT)) { 893 m_freem(m); 894 m = NULL; 895 } else if (m->m_pkthdr.len > len) 896 m_adj(m, len - m->m_pkthdr.len); 897 free(rtm, M_RTABLE, len); 898 if (m) 899 route_input(m, so, info.rti_info[RTAX_DST] ? 900 info.rti_info[RTAX_DST]->sa_family : AF_UNSPEC); 901 solock(so); 902 903 return (error); 904 fail: 905 free(rtm, M_RTABLE, len); 906 m_freem(m); 907 solock(so); 908 909 return (error); 910 } 911 912 int 913 rtm_output(struct rt_msghdr *rtm, struct rtentry **prt, 914 struct rt_addrinfo *info, uint8_t prio, unsigned int tableid) 915 { 916 struct rtentry *rt = *prt; 917 struct ifnet *ifp = NULL; 918 int plen, newgate = 0, error = 0; 919 920 switch (rtm->rtm_type) { 921 case RTM_ADD: 922 if (info->rti_info[RTAX_GATEWAY] == NULL) { 923 error = EINVAL; 924 break; 925 } 926 927 rt = rtable_match(tableid, info->rti_info[RTAX_DST], NULL); 928 if ((error = route_arp_conflict(rt, info))) { 929 rtfree(rt); 930 rt = NULL; 931 break; 932 } 933 934 /* 935 * We cannot go through a delete/create/insert cycle for 936 * cached route because this can lead to races in the 937 * receive path. Instead we update the L2 cache. 938 */ 939 if ((rt != NULL) && ISSET(rt->rt_flags, RTF_CACHED)) { 940 ifp = if_get(rt->rt_ifidx); 941 if (ifp == NULL) { 942 rtfree(rt); 943 rt = NULL; 944 error = ESRCH; 945 break; 946 } 947 948 goto change; 949 } 950 951 rtfree(rt); 952 rt = NULL; 953 954 NET_LOCK(); 955 if ((error = rtm_getifa(info, tableid)) != 0) { 956 NET_UNLOCK(); 957 break; 958 } 959 error = rtrequest(RTM_ADD, info, prio, &rt, tableid); 960 NET_UNLOCK(); 961 if (error == 0) 962 rtm_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx, 963 &rt->rt_rmx); 964 break; 965 case RTM_DELETE: 966 rt = rtable_lookup(tableid, info->rti_info[RTAX_DST], 967 info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY], 968 prio); 969 if (rt == NULL) { 970 error = ESRCH; 971 break; 972 } 973 974 /* 975 * If we got multipath routes, we require users to specify 976 * a matching gateway. 977 */ 978 if (ISSET(rt->rt_flags, RTF_MPATH) && 979 info->rti_info[RTAX_GATEWAY] == NULL) { 980 error = ESRCH; 981 break; 982 } 983 984 ifp = if_get(rt->rt_ifidx); 985 if (ifp == NULL) { 986 rtfree(rt); 987 rt = NULL; 988 error = ESRCH; 989 break; 990 } 991 992 /* 993 * Invalidate the cache of automagically created and 994 * referenced L2 entries to make sure that ``rt_gwroute'' 995 * pointer stays valid for other CPUs. 996 */ 997 if ((ISSET(rt->rt_flags, RTF_CACHED))) { 998 NET_LOCK(); 999 ifp->if_rtrequest(ifp, RTM_INVALIDATE, rt); 1000 /* Reset the MTU of the gateway route. */ 1001 rtable_walk(tableid, rt_key(rt)->sa_family, NULL, 1002 route_cleargateway, rt); 1003 NET_UNLOCK(); 1004 break; 1005 } 1006 1007 /* 1008 * Make sure that local routes are only modified by the 1009 * kernel. 1010 */ 1011 if (ISSET(rt->rt_flags, RTF_LOCAL|RTF_BROADCAST)) { 1012 error = EINVAL; 1013 break; 1014 } 1015 1016 rtfree(rt); 1017 rt = NULL; 1018 1019 NET_LOCK(); 1020 error = rtrequest_delete(info, prio, ifp, &rt, tableid); 1021 NET_UNLOCK(); 1022 break; 1023 case RTM_CHANGE: 1024 rt = rtable_lookup(tableid, info->rti_info[RTAX_DST], 1025 info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY], 1026 prio); 1027 /* 1028 * If we got multipath routes, we require users to specify 1029 * a matching gateway. 1030 */ 1031 if ((rt != NULL) && ISSET(rt->rt_flags, RTF_MPATH) && 1032 (info->rti_info[RTAX_GATEWAY] == NULL)) { 1033 rtfree(rt); 1034 rt = NULL; 1035 } 1036 1037 /* 1038 * If RTAX_GATEWAY is the argument we're trying to 1039 * change, try to find a compatible route. 1040 */ 1041 if ((rt == NULL) && (info->rti_info[RTAX_GATEWAY] != NULL)) { 1042 rt = rtable_lookup(tableid, info->rti_info[RTAX_DST], 1043 info->rti_info[RTAX_NETMASK], NULL, prio); 1044 /* Ensure we don't pick a multipath one. */ 1045 if ((rt != NULL) && ISSET(rt->rt_flags, RTF_MPATH)) { 1046 rtfree(rt); 1047 rt = NULL; 1048 } 1049 } 1050 1051 if (rt == NULL) { 1052 error = ESRCH; 1053 break; 1054 } 1055 1056 /* 1057 * Make sure that local routes are only modified by the 1058 * kernel. 1059 */ 1060 if (ISSET(rt->rt_flags, RTF_LOCAL|RTF_BROADCAST)) { 1061 error = EINVAL; 1062 break; 1063 } 1064 1065 ifp = if_get(rt->rt_ifidx); 1066 if (ifp == NULL) { 1067 rtfree(rt); 1068 rt = NULL; 1069 error = ESRCH; 1070 break; 1071 } 1072 1073 /* 1074 * RTM_CHANGE needs a perfect match. 1075 */ 1076 plen = rtable_satoplen(info->rti_info[RTAX_DST]->sa_family, 1077 info->rti_info[RTAX_NETMASK]); 1078 if (rt_plen(rt) != plen) { 1079 error = ESRCH; 1080 break; 1081 } 1082 1083 if (info->rti_info[RTAX_GATEWAY] != NULL) 1084 if (rt->rt_gateway == NULL || 1085 bcmp(rt->rt_gateway, 1086 info->rti_info[RTAX_GATEWAY], 1087 info->rti_info[RTAX_GATEWAY]->sa_len)) { 1088 newgate = 1; 1089 } 1090 /* 1091 * Check reachable gateway before changing the route. 1092 * New gateway could require new ifaddr, ifp; 1093 * flags may also be different; ifp may be specified 1094 * by ll sockaddr when protocol address is ambiguous. 1095 */ 1096 if (newgate || info->rti_info[RTAX_IFP] != NULL || 1097 info->rti_info[RTAX_IFA] != NULL) { 1098 struct ifaddr *ifa = NULL; 1099 1100 NET_LOCK(); 1101 if ((error = rtm_getifa(info, tableid)) != 0) { 1102 NET_UNLOCK(); 1103 break; 1104 } 1105 ifa = info->rti_ifa; 1106 if (rt->rt_ifa != ifa) { 1107 ifp->if_rtrequest(ifp, RTM_DELETE, rt); 1108 ifafree(rt->rt_ifa); 1109 1110 ifa->ifa_refcnt++; 1111 rt->rt_ifa = ifa; 1112 rt->rt_ifidx = ifa->ifa_ifp->if_index; 1113 /* recheck link state after ifp change */ 1114 rt_if_linkstate_change(rt, ifa->ifa_ifp, 1115 tableid); 1116 } 1117 NET_UNLOCK(); 1118 } 1119 change: 1120 if (info->rti_info[RTAX_GATEWAY] != NULL) { 1121 /* When updating the gateway, make sure it is valid. */ 1122 if (!newgate && rt->rt_gateway->sa_family != 1123 info->rti_info[RTAX_GATEWAY]->sa_family) { 1124 error = EINVAL; 1125 break; 1126 } 1127 1128 NET_LOCK(); 1129 error = rt_setgate(rt, 1130 info->rti_info[RTAX_GATEWAY], tableid); 1131 NET_UNLOCK(); 1132 if (error) 1133 break; 1134 } 1135 #ifdef MPLS 1136 if (rtm->rtm_flags & RTF_MPLS) { 1137 NET_LOCK(); 1138 error = rt_mpls_set(rt, 1139 info->rti_info[RTAX_SRC], info->rti_mpls); 1140 NET_UNLOCK(); 1141 if (error) 1142 break; 1143 } else if (newgate || (rtm->rtm_fmask & RTF_MPLS)) { 1144 NET_LOCK(); 1145 /* if gateway changed remove MPLS information */ 1146 rt_mpls_clear(rt); 1147 NET_UNLOCK(); 1148 } 1149 #endif 1150 1151 #ifdef BFD 1152 if (ISSET(rtm->rtm_flags, RTF_BFD)) { 1153 KERNEL_LOCK(); 1154 error = bfdset(rt); 1155 KERNEL_UNLOCK(); 1156 if (error) 1157 break; 1158 } else if (!ISSET(rtm->rtm_flags, RTF_BFD) && 1159 ISSET(rtm->rtm_fmask, RTF_BFD)) { 1160 KERNEL_LOCK(); 1161 bfdclear(rt); 1162 KERNEL_UNLOCK(); 1163 } 1164 #endif 1165 1166 NET_LOCK(); 1167 /* Hack to allow some flags to be toggled */ 1168 if (rtm->rtm_fmask) { 1169 /* MPLS flag it is set by rt_mpls_set() */ 1170 rtm->rtm_fmask &= ~RTF_MPLS; 1171 rtm->rtm_flags &= ~RTF_MPLS; 1172 rt->rt_flags = 1173 (rt->rt_flags & ~rtm->rtm_fmask) | 1174 (rtm->rtm_flags & rtm->rtm_fmask); 1175 } 1176 rtm_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx, &rt->rt_rmx); 1177 1178 ifp->if_rtrequest(ifp, RTM_ADD, rt); 1179 1180 if (info->rti_info[RTAX_LABEL] != NULL) { 1181 char *rtlabel = ((struct sockaddr_rtlabel *) 1182 info->rti_info[RTAX_LABEL])->sr_label; 1183 rtlabel_unref(rt->rt_labelid); 1184 rt->rt_labelid = rtlabel_name2id(rtlabel); 1185 } 1186 if_group_routechange(info->rti_info[RTAX_DST], 1187 info->rti_info[RTAX_NETMASK]); 1188 rt->rt_locks &= ~(rtm->rtm_inits); 1189 rt->rt_locks |= (rtm->rtm_inits & rtm->rtm_rmx.rmx_locks); 1190 NET_UNLOCK(); 1191 break; 1192 case RTM_GET: 1193 rt = rtable_lookup(tableid, info->rti_info[RTAX_DST], 1194 info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY], 1195 prio); 1196 if (rt == NULL) 1197 error = ESRCH; 1198 break; 1199 } 1200 1201 if_put(ifp); 1202 *prt = rt; 1203 return (error); 1204 } 1205 1206 struct ifaddr * 1207 ifa_ifwithroute(int flags, struct sockaddr *dst, struct sockaddr *gateway, 1208 unsigned int rtableid) 1209 { 1210 struct ifaddr *ifa; 1211 1212 if ((flags & RTF_GATEWAY) == 0) { 1213 /* 1214 * If we are adding a route to an interface, 1215 * and the interface is a pt to pt link 1216 * we should search for the destination 1217 * as our clue to the interface. Otherwise 1218 * we can use the local address. 1219 */ 1220 ifa = NULL; 1221 if (flags & RTF_HOST) 1222 ifa = ifa_ifwithdstaddr(dst, rtableid); 1223 if (ifa == NULL) 1224 ifa = ifa_ifwithaddr(gateway, rtableid); 1225 } else { 1226 /* 1227 * If we are adding a route to a remote net 1228 * or host, the gateway may still be on the 1229 * other end of a pt to pt link. 1230 */ 1231 ifa = ifa_ifwithdstaddr(gateway, rtableid); 1232 } 1233 if (ifa == NULL) { 1234 if (gateway->sa_family == AF_LINK) { 1235 struct sockaddr_dl *sdl = satosdl(gateway); 1236 struct ifnet *ifp = if_get(sdl->sdl_index); 1237 1238 if (ifp != NULL) 1239 ifa = ifaof_ifpforaddr(dst, ifp); 1240 if_put(ifp); 1241 } else { 1242 struct rtentry *rt; 1243 1244 rt = rtalloc(gateway, RT_RESOLVE, rtable_l2(rtableid)); 1245 if (rt != NULL) 1246 ifa = rt->rt_ifa; 1247 rtfree(rt); 1248 } 1249 } 1250 if (ifa == NULL) 1251 return (NULL); 1252 if (ifa->ifa_addr->sa_family != dst->sa_family) { 1253 struct ifaddr *oifa = ifa; 1254 ifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp); 1255 if (ifa == NULL) 1256 ifa = oifa; 1257 } 1258 return (ifa); 1259 } 1260 1261 int 1262 rtm_getifa(struct rt_addrinfo *info, unsigned int rtid) 1263 { 1264 struct ifnet *ifp = NULL; 1265 1266 /* 1267 * The "returned" `ifa' is guaranteed to be alive only if 1268 * the NET_LOCK() is held. 1269 */ 1270 NET_ASSERT_LOCKED(); 1271 1272 /* 1273 * ifp may be specified by sockaddr_dl when protocol address 1274 * is ambiguous 1275 */ 1276 if (info->rti_info[RTAX_IFP] != NULL) { 1277 struct sockaddr_dl *sdl; 1278 1279 sdl = satosdl(info->rti_info[RTAX_IFP]); 1280 ifp = if_get(sdl->sdl_index); 1281 } 1282 1283 #ifdef IPSEC 1284 /* 1285 * If the destination is a PF_KEY address, we'll look 1286 * for the existence of a encap interface number or address 1287 * in the options list of the gateway. By default, we'll return 1288 * enc0. 1289 */ 1290 if (info->rti_info[RTAX_DST] && 1291 info->rti_info[RTAX_DST]->sa_family == PF_KEY) 1292 info->rti_ifa = enc_getifa(rtid, 0); 1293 #endif 1294 1295 if (info->rti_ifa == NULL && info->rti_info[RTAX_IFA] != NULL) 1296 info->rti_ifa = ifa_ifwithaddr(info->rti_info[RTAX_IFA], rtid); 1297 1298 if (info->rti_ifa == NULL) { 1299 struct sockaddr *sa; 1300 1301 if ((sa = info->rti_info[RTAX_IFA]) == NULL) 1302 if ((sa = info->rti_info[RTAX_GATEWAY]) == NULL) 1303 sa = info->rti_info[RTAX_DST]; 1304 1305 if (sa != NULL && ifp != NULL) 1306 info->rti_ifa = ifaof_ifpforaddr(sa, ifp); 1307 else if (info->rti_info[RTAX_DST] != NULL && 1308 info->rti_info[RTAX_GATEWAY] != NULL) 1309 info->rti_ifa = ifa_ifwithroute(info->rti_flags, 1310 info->rti_info[RTAX_DST], 1311 info->rti_info[RTAX_GATEWAY], 1312 rtid); 1313 else if (sa != NULL) 1314 info->rti_ifa = ifa_ifwithroute(info->rti_flags, 1315 sa, sa, rtid); 1316 } 1317 1318 if_put(ifp); 1319 1320 if (info->rti_ifa == NULL) 1321 return (ENETUNREACH); 1322 1323 return (0); 1324 } 1325 1326 int 1327 route_cleargateway(struct rtentry *rt, void *arg, unsigned int rtableid) 1328 { 1329 struct rtentry *nhrt = arg; 1330 1331 if (ISSET(rt->rt_flags, RTF_GATEWAY) && rt->rt_gwroute == nhrt && 1332 !ISSET(rt->rt_locks, RTV_MTU)) 1333 rt->rt_mtu = 0; 1334 1335 return (0); 1336 } 1337 1338 /* 1339 * Check if the user request to insert an ARP entry does not conflict 1340 * with existing ones. 1341 * 1342 * Only two entries are allowed for a given IP address: a private one 1343 * (priv) and a public one (pub). 1344 */ 1345 int 1346 route_arp_conflict(struct rtentry *rt, struct rt_addrinfo *info) 1347 { 1348 int proxy = (info->rti_flags & RTF_ANNOUNCE); 1349 1350 if ((info->rti_flags & RTF_LLINFO) == 0 || 1351 (info->rti_info[RTAX_DST]->sa_family != AF_INET)) 1352 return (0); 1353 1354 if (rt == NULL || !ISSET(rt->rt_flags, RTF_LLINFO)) 1355 return (0); 1356 1357 /* If the entry is cached, it can be updated. */ 1358 if (ISSET(rt->rt_flags, RTF_CACHED)) 1359 return (0); 1360 1361 /* 1362 * Same destination, not cached and both "priv" or "pub" conflict. 1363 * If a second entry exists, it always conflict. 1364 */ 1365 if ((ISSET(rt->rt_flags, RTF_ANNOUNCE) == proxy) || 1366 ISSET(rt->rt_flags, RTF_MPATH)) 1367 return (EEXIST); 1368 1369 /* No conflict but an entry exist so we need to force mpath. */ 1370 info->rti_flags |= RTF_MPATH; 1371 return (0); 1372 } 1373 1374 void 1375 rtm_setmetrics(u_long which, const struct rt_metrics *in, 1376 struct rt_kmetrics *out) 1377 { 1378 int64_t expire; 1379 1380 if (which & RTV_MTU) 1381 out->rmx_mtu = in->rmx_mtu; 1382 if (which & RTV_EXPIRE) { 1383 expire = in->rmx_expire; 1384 if (expire != 0) { 1385 expire -= gettime(); 1386 expire += getuptime(); 1387 } 1388 1389 out->rmx_expire = expire; 1390 } 1391 } 1392 1393 void 1394 rtm_getmetrics(const struct rtentry *rt, struct rt_metrics *out) 1395 { 1396 const struct rt_kmetrics *in = &rt->rt_rmx; 1397 int64_t expire; 1398 1399 expire = in->rmx_expire; 1400 if (expire == 0) 1401 expire = rt_timer_get_expire(rt); 1402 if (expire != 0) { 1403 expire -= getuptime(); 1404 expire += gettime(); 1405 } 1406 1407 bzero(out, sizeof(*out)); 1408 out->rmx_locks = in->rmx_locks; 1409 out->rmx_mtu = in->rmx_mtu; 1410 out->rmx_expire = expire; 1411 out->rmx_pksent = in->rmx_pksent; 1412 } 1413 1414 #define ROUNDUP(a) \ 1415 ((a) > 0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long)) 1416 #define ADVANCE(x, n) (x += ROUNDUP((n)->sa_len)) 1417 1418 int 1419 rtm_xaddrs(caddr_t cp, caddr_t cplim, struct rt_addrinfo *rtinfo) 1420 { 1421 struct sockaddr *sa; 1422 int i; 1423 1424 /* 1425 * Parse address bits, split address storage in chunks, and 1426 * set info pointers. Use sa_len for traversing the memory 1427 * and check that we stay within in the limit. 1428 */ 1429 bzero(rtinfo->rti_info, sizeof(rtinfo->rti_info)); 1430 for (i = 0; i < sizeof(rtinfo->rti_addrs) * 8; i++) { 1431 if ((rtinfo->rti_addrs & (1U << i)) == 0) 1432 continue; 1433 if (i >= RTAX_MAX || cp + sizeof(socklen_t) > cplim) 1434 return (EINVAL); 1435 sa = (struct sockaddr *)cp; 1436 if (cp + sa->sa_len > cplim) 1437 return (EINVAL); 1438 rtinfo->rti_info[i] = sa; 1439 ADVANCE(cp, sa); 1440 } 1441 /* 1442 * Check that the address family is suitable for the route address 1443 * type. Check that each address has a size that fits its family 1444 * and its length is within the size. Strings within addresses must 1445 * be NUL terminated. 1446 */ 1447 for (i = 0; i < RTAX_MAX; i++) { 1448 size_t len, maxlen, size; 1449 1450 sa = rtinfo->rti_info[i]; 1451 if (sa == NULL) 1452 continue; 1453 maxlen = size = 0; 1454 switch (i) { 1455 case RTAX_DST: 1456 case RTAX_GATEWAY: 1457 case RTAX_SRC: 1458 switch (sa->sa_family) { 1459 case AF_INET: 1460 size = sizeof(struct sockaddr_in); 1461 break; 1462 case AF_LINK: 1463 size = sizeof(struct sockaddr_dl); 1464 break; 1465 #ifdef INET6 1466 case AF_INET6: 1467 size = sizeof(struct sockaddr_in6); 1468 break; 1469 #endif 1470 #ifdef MPLS 1471 case AF_MPLS: 1472 size = sizeof(struct sockaddr_mpls); 1473 break; 1474 #endif 1475 } 1476 break; 1477 case RTAX_IFP: 1478 if (sa->sa_family != AF_LINK) 1479 return (EAFNOSUPPORT); 1480 /* 1481 * XXX Should be sizeof(struct sockaddr_dl), but 1482 * route(8) has a bug and provides less memory. 1483 * arp(8) has another bug and uses sizeof pointer. 1484 */ 1485 size = 4; 1486 break; 1487 case RTAX_IFA: 1488 switch (sa->sa_family) { 1489 case AF_INET: 1490 size = sizeof(struct sockaddr_in); 1491 break; 1492 #ifdef INET6 1493 case AF_INET6: 1494 size = sizeof(struct sockaddr_in6); 1495 break; 1496 #endif 1497 default: 1498 return (EAFNOSUPPORT); 1499 } 1500 break; 1501 case RTAX_LABEL: 1502 sa->sa_family = AF_UNSPEC; 1503 maxlen = RTLABEL_LEN; 1504 size = sizeof(struct sockaddr_rtlabel); 1505 break; 1506 #ifdef BFD 1507 case RTAX_BFD: 1508 sa->sa_family = AF_UNSPEC; 1509 size = sizeof(struct sockaddr_bfd); 1510 break; 1511 #endif 1512 case RTAX_DNS: 1513 /* more validation in rtm_validate_proposal */ 1514 if (sa->sa_len > sizeof(struct sockaddr_rtdns)) 1515 return (EINVAL); 1516 if (sa->sa_len < offsetof(struct sockaddr_rtdns, 1517 sr_dns)) 1518 return (EINVAL); 1519 switch (sa->sa_family) { 1520 case AF_INET: 1521 #ifdef INET6 1522 case AF_INET6: 1523 #endif 1524 break; 1525 default: 1526 return (EAFNOSUPPORT); 1527 } 1528 break; 1529 case RTAX_STATIC: 1530 sa->sa_family = AF_UNSPEC; 1531 maxlen = RTSTATIC_LEN; 1532 size = sizeof(struct sockaddr_rtstatic); 1533 break; 1534 case RTAX_SEARCH: 1535 sa->sa_family = AF_UNSPEC; 1536 maxlen = RTSEARCH_LEN; 1537 size = sizeof(struct sockaddr_rtsearch); 1538 break; 1539 } 1540 if (size) { 1541 /* memory for the full struct must be provided */ 1542 if (sa->sa_len < size) 1543 return (EINVAL); 1544 } 1545 if (maxlen) { 1546 /* this should not happen */ 1547 if (2 + maxlen > size) 1548 return (EINVAL); 1549 /* strings must be NUL terminated within the struct */ 1550 len = strnlen(sa->sa_data, maxlen); 1551 if (len >= maxlen || 2 + len >= sa->sa_len) 1552 return (EINVAL); 1553 break; 1554 } 1555 } 1556 return (0); 1557 } 1558 1559 struct mbuf * 1560 rtm_msg1(int type, struct rt_addrinfo *rtinfo) 1561 { 1562 struct rt_msghdr *rtm; 1563 struct mbuf *m; 1564 int i; 1565 struct sockaddr *sa; 1566 int len, dlen, hlen; 1567 1568 switch (type) { 1569 case RTM_DELADDR: 1570 case RTM_NEWADDR: 1571 hlen = sizeof(struct ifa_msghdr); 1572 break; 1573 case RTM_IFINFO: 1574 hlen = sizeof(struct if_msghdr); 1575 break; 1576 case RTM_IFANNOUNCE: 1577 hlen = sizeof(struct if_announcemsghdr); 1578 break; 1579 #ifdef BFD 1580 case RTM_BFD: 1581 hlen = sizeof(struct bfd_msghdr); 1582 break; 1583 #endif 1584 case RTM_80211INFO: 1585 hlen = sizeof(struct if_ieee80211_msghdr); 1586 break; 1587 default: 1588 hlen = sizeof(struct rt_msghdr); 1589 break; 1590 } 1591 len = hlen; 1592 for (i = 0; i < RTAX_MAX; i++) { 1593 if (rtinfo == NULL || (sa = rtinfo->rti_info[i]) == NULL) 1594 continue; 1595 len += ROUNDUP(sa->sa_len); 1596 } 1597 if (len > MCLBYTES) 1598 panic("rtm_msg1"); 1599 m = m_gethdr(M_DONTWAIT, MT_DATA); 1600 if (m && len > MHLEN) { 1601 MCLGET(m, M_DONTWAIT); 1602 if ((m->m_flags & M_EXT) == 0) { 1603 m_free(m); 1604 m = NULL; 1605 } 1606 } 1607 if (m == NULL) 1608 return (m); 1609 m->m_pkthdr.len = m->m_len = len; 1610 m->m_pkthdr.ph_ifidx = 0; 1611 rtm = mtod(m, struct rt_msghdr *); 1612 bzero(rtm, len); 1613 len = hlen; 1614 for (i = 0; i < RTAX_MAX; i++) { 1615 if (rtinfo == NULL || (sa = rtinfo->rti_info[i]) == NULL) 1616 continue; 1617 rtinfo->rti_addrs |= (1U << i); 1618 dlen = ROUNDUP(sa->sa_len); 1619 if (m_copyback(m, len, sa->sa_len, sa, M_NOWAIT)) { 1620 m_freem(m); 1621 return (NULL); 1622 } 1623 len += dlen; 1624 } 1625 rtm->rtm_msglen = len; 1626 rtm->rtm_hdrlen = hlen; 1627 rtm->rtm_version = RTM_VERSION; 1628 rtm->rtm_type = type; 1629 return (m); 1630 } 1631 1632 int 1633 rtm_msg2(int type, int vers, struct rt_addrinfo *rtinfo, caddr_t cp, 1634 struct walkarg *w) 1635 { 1636 int i; 1637 int len, dlen, hlen, second_time = 0; 1638 caddr_t cp0; 1639 1640 rtinfo->rti_addrs = 0; 1641 again: 1642 switch (type) { 1643 case RTM_DELADDR: 1644 case RTM_NEWADDR: 1645 len = sizeof(struct ifa_msghdr); 1646 break; 1647 case RTM_IFINFO: 1648 len = sizeof(struct if_msghdr); 1649 break; 1650 default: 1651 len = sizeof(struct rt_msghdr); 1652 break; 1653 } 1654 hlen = len; 1655 if ((cp0 = cp) != NULL) 1656 cp += len; 1657 for (i = 0; i < RTAX_MAX; i++) { 1658 struct sockaddr *sa; 1659 1660 if ((sa = rtinfo->rti_info[i]) == NULL) 1661 continue; 1662 rtinfo->rti_addrs |= (1U << i); 1663 dlen = ROUNDUP(sa->sa_len); 1664 if (cp) { 1665 bcopy(sa, cp, sa->sa_len); 1666 bzero(cp + sa->sa_len, dlen - sa->sa_len); 1667 cp += dlen; 1668 } 1669 len += dlen; 1670 } 1671 /* align message length to the next natural boundary */ 1672 len = ALIGN(len); 1673 if (cp == 0 && w != NULL && !second_time) { 1674 w->w_needed += len; 1675 if (w->w_needed <= w->w_given && w->w_where) { 1676 if (w->w_tmemsize < len) { 1677 free(w->w_tmem, M_RTABLE, w->w_tmemsize); 1678 w->w_tmem = malloc(len, M_RTABLE, 1679 M_NOWAIT | M_ZERO); 1680 if (w->w_tmem) 1681 w->w_tmemsize = len; 1682 } 1683 if (w->w_tmem) { 1684 cp = w->w_tmem; 1685 second_time = 1; 1686 goto again; 1687 } else 1688 w->w_where = 0; 1689 } 1690 } 1691 if (cp && w) /* clear the message header */ 1692 bzero(cp0, hlen); 1693 1694 if (cp) { 1695 struct rt_msghdr *rtm = (struct rt_msghdr *)cp0; 1696 1697 rtm->rtm_version = RTM_VERSION; 1698 rtm->rtm_type = type; 1699 rtm->rtm_msglen = len; 1700 rtm->rtm_hdrlen = hlen; 1701 } 1702 return (len); 1703 } 1704 1705 void 1706 rtm_send(struct rtentry *rt, int cmd, int error, unsigned int rtableid) 1707 { 1708 struct rt_addrinfo info; 1709 struct ifnet *ifp; 1710 struct sockaddr_rtlabel sa_rl; 1711 struct sockaddr_in6 sa_mask; 1712 1713 memset(&info, 0, sizeof(info)); 1714 info.rti_info[RTAX_DST] = rt_key(rt); 1715 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 1716 if (!ISSET(rt->rt_flags, RTF_HOST)) 1717 info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask); 1718 info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl); 1719 ifp = if_get(rt->rt_ifidx); 1720 if (ifp != NULL) { 1721 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); 1722 info.rti_info[RTAX_IFA] = rtable_getsource(rtableid, 1723 info.rti_info[RTAX_DST]->sa_family); 1724 if (info.rti_info[RTAX_IFA] == NULL) 1725 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; 1726 } 1727 1728 rtm_miss(cmd, &info, rt->rt_flags, rt->rt_priority, rt->rt_ifidx, error, 1729 rtableid); 1730 if_put(ifp); 1731 } 1732 1733 /* 1734 * This routine is called to generate a message from the routing 1735 * socket indicating that a redirect has occurred, a routing lookup 1736 * has failed, or that a protocol has detected timeouts to a particular 1737 * destination. 1738 */ 1739 void 1740 rtm_miss(int type, struct rt_addrinfo *rtinfo, int flags, uint8_t prio, 1741 u_int ifidx, int error, u_int tableid) 1742 { 1743 struct rt_msghdr *rtm; 1744 struct mbuf *m; 1745 struct sockaddr *sa = rtinfo->rti_info[RTAX_DST]; 1746 1747 if (rtptable.rtp_count == 0) 1748 return; 1749 m = rtm_msg1(type, rtinfo); 1750 if (m == NULL) 1751 return; 1752 rtm = mtod(m, struct rt_msghdr *); 1753 rtm->rtm_flags = RTF_DONE | flags; 1754 rtm->rtm_priority = prio; 1755 rtm->rtm_errno = error; 1756 rtm->rtm_tableid = tableid; 1757 rtm->rtm_addrs = rtinfo->rti_addrs; 1758 rtm->rtm_index = ifidx; 1759 route_input(m, NULL, sa ? sa->sa_family : AF_UNSPEC); 1760 } 1761 1762 /* 1763 * This routine is called to generate a message from the routing 1764 * socket indicating that the status of a network interface has changed. 1765 */ 1766 void 1767 rtm_ifchg(struct ifnet *ifp) 1768 { 1769 struct rt_addrinfo info; 1770 struct if_msghdr *ifm; 1771 struct mbuf *m; 1772 1773 if (rtptable.rtp_count == 0) 1774 return; 1775 memset(&info, 0, sizeof(info)); 1776 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); 1777 m = rtm_msg1(RTM_IFINFO, &info); 1778 if (m == NULL) 1779 return; 1780 ifm = mtod(m, struct if_msghdr *); 1781 ifm->ifm_index = ifp->if_index; 1782 ifm->ifm_tableid = ifp->if_rdomain; 1783 ifm->ifm_flags = ifp->if_flags; 1784 ifm->ifm_xflags = ifp->if_xflags; 1785 if_getdata(ifp, &ifm->ifm_data); 1786 ifm->ifm_addrs = info.rti_addrs; 1787 route_input(m, NULL, AF_UNSPEC); 1788 } 1789 1790 /* 1791 * This is called to generate messages from the routing socket 1792 * indicating a network interface has had addresses associated with it. 1793 * if we ever reverse the logic and replace messages TO the routing 1794 * socket indicate a request to configure interfaces, then it will 1795 * be unnecessary as the routing socket will automatically generate 1796 * copies of it. 1797 */ 1798 void 1799 rtm_addr(int cmd, struct ifaddr *ifa) 1800 { 1801 struct ifnet *ifp = ifa->ifa_ifp; 1802 struct mbuf *m; 1803 struct rt_addrinfo info; 1804 struct ifa_msghdr *ifam; 1805 1806 if (rtptable.rtp_count == 0) 1807 return; 1808 1809 memset(&info, 0, sizeof(info)); 1810 info.rti_info[RTAX_IFA] = ifa->ifa_addr; 1811 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); 1812 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask; 1813 info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr; 1814 if ((m = rtm_msg1(cmd, &info)) == NULL) 1815 return; 1816 ifam = mtod(m, struct ifa_msghdr *); 1817 ifam->ifam_index = ifp->if_index; 1818 ifam->ifam_metric = ifa->ifa_metric; 1819 ifam->ifam_flags = ifa->ifa_flags; 1820 ifam->ifam_addrs = info.rti_addrs; 1821 ifam->ifam_tableid = ifp->if_rdomain; 1822 1823 route_input(m, NULL, 1824 ifa->ifa_addr ? ifa->ifa_addr->sa_family : AF_UNSPEC); 1825 } 1826 1827 /* 1828 * This is called to generate routing socket messages indicating 1829 * network interface arrival and departure. 1830 */ 1831 void 1832 rtm_ifannounce(struct ifnet *ifp, int what) 1833 { 1834 struct if_announcemsghdr *ifan; 1835 struct mbuf *m; 1836 1837 if (rtptable.rtp_count == 0) 1838 return; 1839 m = rtm_msg1(RTM_IFANNOUNCE, NULL); 1840 if (m == NULL) 1841 return; 1842 ifan = mtod(m, struct if_announcemsghdr *); 1843 ifan->ifan_index = ifp->if_index; 1844 strlcpy(ifan->ifan_name, ifp->if_xname, sizeof(ifan->ifan_name)); 1845 ifan->ifan_what = what; 1846 route_input(m, NULL, AF_UNSPEC); 1847 } 1848 1849 #ifdef BFD 1850 /* 1851 * This is used to generate routing socket messages indicating 1852 * the state of a BFD session. 1853 */ 1854 void 1855 rtm_bfd(struct bfd_config *bfd) 1856 { 1857 struct bfd_msghdr *bfdm; 1858 struct sockaddr_bfd sa_bfd; 1859 struct mbuf *m; 1860 struct rt_addrinfo info; 1861 1862 if (rtptable.rtp_count == 0) 1863 return; 1864 memset(&info, 0, sizeof(info)); 1865 info.rti_info[RTAX_DST] = rt_key(bfd->bc_rt); 1866 info.rti_info[RTAX_IFA] = bfd->bc_rt->rt_ifa->ifa_addr; 1867 1868 m = rtm_msg1(RTM_BFD, &info); 1869 if (m == NULL) 1870 return; 1871 bfdm = mtod(m, struct bfd_msghdr *); 1872 bfdm->bm_addrs = info.rti_addrs; 1873 1874 KERNEL_ASSERT_LOCKED(); 1875 bfd2sa(bfd->bc_rt, &sa_bfd); 1876 memcpy(&bfdm->bm_sa, &sa_bfd, sizeof(sa_bfd)); 1877 1878 route_input(m, NULL, info.rti_info[RTAX_DST]->sa_family); 1879 } 1880 #endif /* BFD */ 1881 1882 /* 1883 * This is used to generate routing socket messages indicating 1884 * the state of an ieee80211 interface. 1885 */ 1886 void 1887 rtm_80211info(struct ifnet *ifp, struct if_ieee80211_data *ifie) 1888 { 1889 struct if_ieee80211_msghdr *ifim; 1890 struct mbuf *m; 1891 1892 if (rtptable.rtp_count == 0) 1893 return; 1894 m = rtm_msg1(RTM_80211INFO, NULL); 1895 if (m == NULL) 1896 return; 1897 ifim = mtod(m, struct if_ieee80211_msghdr *); 1898 ifim->ifim_index = ifp->if_index; 1899 ifim->ifim_tableid = ifp->if_rdomain; 1900 1901 memcpy(&ifim->ifim_ifie, ifie, sizeof(ifim->ifim_ifie)); 1902 route_input(m, NULL, AF_UNSPEC); 1903 } 1904 1905 /* 1906 * This is used to generate routing socket messages indicating 1907 * the address selection proposal from an interface. 1908 */ 1909 void 1910 rtm_proposal(struct ifnet *ifp, struct rt_addrinfo *rtinfo, int flags, 1911 uint8_t prio) 1912 { 1913 struct rt_msghdr *rtm; 1914 struct mbuf *m; 1915 1916 m = rtm_msg1(RTM_PROPOSAL, rtinfo); 1917 if (m == NULL) 1918 return; 1919 rtm = mtod(m, struct rt_msghdr *); 1920 rtm->rtm_flags = RTF_DONE | flags; 1921 rtm->rtm_priority = prio; 1922 rtm->rtm_tableid = ifp->if_rdomain; 1923 rtm->rtm_index = ifp->if_index; 1924 rtm->rtm_addrs = rtinfo->rti_addrs; 1925 1926 route_input(m, NULL, rtinfo->rti_info[RTAX_DNS]->sa_family); 1927 } 1928 1929 /* 1930 * This is used in dumping the kernel table via sysctl(). 1931 */ 1932 int 1933 sysctl_dumpentry(struct rtentry *rt, void *v, unsigned int id) 1934 { 1935 struct walkarg *w = v; 1936 int error = 0, size; 1937 struct rt_addrinfo info; 1938 struct ifnet *ifp; 1939 #ifdef BFD 1940 struct sockaddr_bfd sa_bfd; 1941 #endif 1942 struct sockaddr_rtlabel sa_rl; 1943 struct sockaddr_in6 sa_mask; 1944 1945 if (w->w_op == NET_RT_FLAGS && !(rt->rt_flags & w->w_arg)) 1946 return 0; 1947 if (w->w_op == NET_RT_DUMP && w->w_arg) { 1948 u_int8_t prio = w->w_arg & RTP_MASK; 1949 if (w->w_arg < 0) { 1950 prio = (-w->w_arg) & RTP_MASK; 1951 /* Show all routes that are not this priority */ 1952 if (prio == (rt->rt_priority & RTP_MASK)) 1953 return 0; 1954 } else { 1955 if (prio != (rt->rt_priority & RTP_MASK) && 1956 prio != RTP_ANY) 1957 return 0; 1958 } 1959 } 1960 bzero(&info, sizeof(info)); 1961 info.rti_info[RTAX_DST] = rt_key(rt); 1962 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 1963 info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask); 1964 ifp = if_get(rt->rt_ifidx); 1965 if (ifp != NULL) { 1966 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); 1967 info.rti_info[RTAX_IFA] = 1968 rtable_getsource(id, info.rti_info[RTAX_DST]->sa_family); 1969 if (info.rti_info[RTAX_IFA] == NULL) 1970 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; 1971 if (ifp->if_flags & IFF_POINTOPOINT) 1972 info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr; 1973 } 1974 if_put(ifp); 1975 info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl); 1976 #ifdef BFD 1977 if (rt->rt_flags & RTF_BFD) { 1978 KERNEL_ASSERT_LOCKED(); 1979 info.rti_info[RTAX_BFD] = bfd2sa(rt, &sa_bfd); 1980 } 1981 #endif 1982 #ifdef MPLS 1983 if (rt->rt_flags & RTF_MPLS) { 1984 struct sockaddr_mpls sa_mpls; 1985 1986 bzero(&sa_mpls, sizeof(sa_mpls)); 1987 sa_mpls.smpls_family = AF_MPLS; 1988 sa_mpls.smpls_len = sizeof(sa_mpls); 1989 sa_mpls.smpls_label = ((struct rt_mpls *) 1990 rt->rt_llinfo)->mpls_label; 1991 info.rti_info[RTAX_SRC] = (struct sockaddr *)&sa_mpls; 1992 info.rti_mpls = ((struct rt_mpls *) 1993 rt->rt_llinfo)->mpls_operation; 1994 } 1995 #endif 1996 1997 size = rtm_msg2(RTM_GET, RTM_VERSION, &info, NULL, w); 1998 if (w->w_where && w->w_tmem && w->w_needed <= w->w_given) { 1999 struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem; 2000 2001 rtm->rtm_pid = curproc->p_p->ps_pid; 2002 rtm->rtm_flags = RTF_DONE | rt->rt_flags; 2003 rtm->rtm_priority = rt->rt_priority & RTP_MASK; 2004 rtm_getmetrics(rt, &rtm->rtm_rmx); 2005 /* Do not account the routing table's reference. */ 2006 rtm->rtm_rmx.rmx_refcnt = refcnt_read(&rt->rt_refcnt) - 1; 2007 rtm->rtm_index = rt->rt_ifidx; 2008 rtm->rtm_addrs = info.rti_addrs; 2009 rtm->rtm_tableid = id; 2010 #ifdef MPLS 2011 rtm->rtm_mpls = info.rti_mpls; 2012 #endif 2013 if ((error = copyout(rtm, w->w_where, size)) != 0) 2014 w->w_where = NULL; 2015 else 2016 w->w_where += size; 2017 } 2018 return (error); 2019 } 2020 2021 int 2022 sysctl_iflist(int af, struct walkarg *w) 2023 { 2024 struct ifnet *ifp; 2025 struct ifaddr *ifa; 2026 struct rt_addrinfo info; 2027 int len, error = 0; 2028 2029 bzero(&info, sizeof(info)); 2030 TAILQ_FOREACH(ifp, &ifnet, if_list) { 2031 if (w->w_arg && w->w_arg != ifp->if_index) 2032 continue; 2033 /* Copy the link-layer address first */ 2034 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); 2035 len = rtm_msg2(RTM_IFINFO, RTM_VERSION, &info, 0, w); 2036 if (w->w_where && w->w_tmem && w->w_needed <= w->w_given) { 2037 struct if_msghdr *ifm; 2038 2039 ifm = (struct if_msghdr *)w->w_tmem; 2040 ifm->ifm_index = ifp->if_index; 2041 ifm->ifm_tableid = ifp->if_rdomain; 2042 ifm->ifm_flags = ifp->if_flags; 2043 if_getdata(ifp, &ifm->ifm_data); 2044 ifm->ifm_addrs = info.rti_addrs; 2045 error = copyout(ifm, w->w_where, len); 2046 if (error) 2047 return (error); 2048 w->w_where += len; 2049 } 2050 info.rti_info[RTAX_IFP] = NULL; 2051 TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) { 2052 KASSERT(ifa->ifa_addr->sa_family != AF_LINK); 2053 if (af && af != ifa->ifa_addr->sa_family) 2054 continue; 2055 info.rti_info[RTAX_IFA] = ifa->ifa_addr; 2056 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask; 2057 info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr; 2058 len = rtm_msg2(RTM_NEWADDR, RTM_VERSION, &info, 0, w); 2059 if (w->w_where && w->w_tmem && 2060 w->w_needed <= w->w_given) { 2061 struct ifa_msghdr *ifam; 2062 2063 ifam = (struct ifa_msghdr *)w->w_tmem; 2064 ifam->ifam_index = ifa->ifa_ifp->if_index; 2065 ifam->ifam_flags = ifa->ifa_flags; 2066 ifam->ifam_metric = ifa->ifa_metric; 2067 ifam->ifam_addrs = info.rti_addrs; 2068 error = copyout(w->w_tmem, w->w_where, len); 2069 if (error) 2070 return (error); 2071 w->w_where += len; 2072 } 2073 } 2074 info.rti_info[RTAX_IFA] = info.rti_info[RTAX_NETMASK] = 2075 info.rti_info[RTAX_BRD] = NULL; 2076 } 2077 return (0); 2078 } 2079 2080 int 2081 sysctl_ifnames(struct walkarg *w) 2082 { 2083 struct if_nameindex_msg ifn; 2084 struct ifnet *ifp; 2085 int error = 0; 2086 2087 /* XXX ignore tableid for now */ 2088 TAILQ_FOREACH(ifp, &ifnet, if_list) { 2089 if (w->w_arg && w->w_arg != ifp->if_index) 2090 continue; 2091 w->w_needed += sizeof(ifn); 2092 if (w->w_where && w->w_needed <= w->w_given) { 2093 2094 memset(&ifn, 0, sizeof(ifn)); 2095 ifn.if_index = ifp->if_index; 2096 strlcpy(ifn.if_name, ifp->if_xname, 2097 sizeof(ifn.if_name)); 2098 error = copyout(&ifn, w->w_where, sizeof(ifn)); 2099 if (error) 2100 return (error); 2101 w->w_where += sizeof(ifn); 2102 } 2103 } 2104 2105 return (0); 2106 } 2107 2108 int 2109 sysctl_source(int af, u_int tableid, struct walkarg *w) 2110 { 2111 struct sockaddr *sa; 2112 int size, error = 0; 2113 2114 sa = rtable_getsource(tableid, af); 2115 if (sa) { 2116 switch (sa->sa_family) { 2117 case AF_INET: 2118 size = sizeof(struct sockaddr_in); 2119 break; 2120 #ifdef INET6 2121 case AF_INET6: 2122 size = sizeof(struct sockaddr_in6); 2123 break; 2124 #endif 2125 default: 2126 return (0); 2127 } 2128 w->w_needed += size; 2129 if (w->w_where && w->w_needed <= w->w_given) { 2130 if ((error = copyout(sa, w->w_where, size))) 2131 return (error); 2132 w->w_where += size; 2133 } 2134 } 2135 return (0); 2136 } 2137 2138 int 2139 sysctl_rtable(int *name, u_int namelen, void *where, size_t *given, void *new, 2140 size_t newlen) 2141 { 2142 int i, error = EINVAL; 2143 u_char af; 2144 struct walkarg w; 2145 struct rt_tableinfo tableinfo; 2146 u_int tableid = 0; 2147 2148 if (new) 2149 return (EPERM); 2150 if (namelen < 3 || namelen > 4) 2151 return (EINVAL); 2152 af = name[0]; 2153 bzero(&w, sizeof(w)); 2154 w.w_where = where; 2155 w.w_given = *given; 2156 w.w_op = name[1]; 2157 w.w_arg = name[2]; 2158 2159 if (namelen == 4) { 2160 tableid = name[3]; 2161 if (!rtable_exists(tableid)) 2162 return (ENOENT); 2163 } else 2164 tableid = curproc->p_p->ps_rtableid; 2165 2166 switch (w.w_op) { 2167 case NET_RT_DUMP: 2168 case NET_RT_FLAGS: 2169 NET_LOCK(); 2170 for (i = 1; i <= AF_MAX; i++) { 2171 if (af != 0 && af != i) 2172 continue; 2173 2174 error = rtable_walk(tableid, i, NULL, sysctl_dumpentry, 2175 &w); 2176 if (error == EAFNOSUPPORT) 2177 error = 0; 2178 if (error) 2179 break; 2180 } 2181 NET_UNLOCK(); 2182 break; 2183 2184 case NET_RT_IFLIST: 2185 NET_LOCK(); 2186 error = sysctl_iflist(af, &w); 2187 NET_UNLOCK(); 2188 break; 2189 2190 case NET_RT_STATS: 2191 return (sysctl_rtable_rtstat(where, given, new)); 2192 case NET_RT_TABLE: 2193 tableid = w.w_arg; 2194 if (!rtable_exists(tableid)) 2195 return (ENOENT); 2196 memset(&tableinfo, 0, sizeof tableinfo); 2197 tableinfo.rti_tableid = tableid; 2198 tableinfo.rti_domainid = rtable_l2(tableid); 2199 error = sysctl_rdstruct(where, given, new, 2200 &tableinfo, sizeof(tableinfo)); 2201 return (error); 2202 case NET_RT_IFNAMES: 2203 NET_LOCK(); 2204 error = sysctl_ifnames(&w); 2205 NET_UNLOCK(); 2206 break; 2207 case NET_RT_SOURCE: 2208 tableid = w.w_arg; 2209 if (!rtable_exists(tableid)) 2210 return (ENOENT); 2211 NET_LOCK(); 2212 for (i = 1; i <= AF_MAX; i++) { 2213 if (af != 0 && af != i) 2214 continue; 2215 2216 error = sysctl_source(i, tableid, &w); 2217 if (error == EAFNOSUPPORT) 2218 error = 0; 2219 if (error) 2220 break; 2221 } 2222 NET_UNLOCK(); 2223 break; 2224 } 2225 free(w.w_tmem, M_RTABLE, w.w_tmemsize); 2226 if (where) { 2227 *given = w.w_where - (caddr_t)where; 2228 if (w.w_needed > w.w_given) 2229 return (ENOMEM); 2230 } else if (w.w_needed == 0) { 2231 *given = 0; 2232 } else { 2233 *given = roundup(w.w_needed + MAX(w.w_needed / 10, 1024), 2234 PAGE_SIZE); 2235 } 2236 return (error); 2237 } 2238 2239 int 2240 sysctl_rtable_rtstat(void *oldp, size_t *oldlenp, void *newp) 2241 { 2242 extern struct cpumem *rtcounters; 2243 uint64_t counters[rts_ncounters]; 2244 struct rtstat rtstat; 2245 uint32_t *words = (uint32_t *)&rtstat; 2246 int i; 2247 2248 CTASSERT(sizeof(rtstat) == (nitems(counters) * sizeof(uint32_t))); 2249 memset(&rtstat, 0, sizeof rtstat); 2250 counters_read(rtcounters, counters, nitems(counters)); 2251 2252 for (i = 0; i < nitems(counters); i++) 2253 words[i] = (uint32_t)counters[i]; 2254 2255 return (sysctl_rdstruct(oldp, oldlenp, newp, &rtstat, sizeof(rtstat))); 2256 } 2257 2258 int 2259 rtm_validate_proposal(struct rt_addrinfo *info) 2260 { 2261 if (info->rti_addrs & ~(RTA_NETMASK | RTA_IFA | RTA_DNS | RTA_STATIC | 2262 RTA_SEARCH)) { 2263 return -1; 2264 } 2265 2266 if (ISSET(info->rti_addrs, RTA_NETMASK)) { 2267 struct sockaddr *sa = info->rti_info[RTAX_NETMASK]; 2268 if (sa == NULL) 2269 return -1; 2270 switch (sa->sa_family) { 2271 case AF_INET: 2272 if (sa->sa_len != sizeof(struct sockaddr_in)) 2273 return -1; 2274 break; 2275 case AF_INET6: 2276 if (sa->sa_len != sizeof(struct sockaddr_in6)) 2277 return -1; 2278 break; 2279 default: 2280 return -1; 2281 } 2282 } 2283 2284 if (ISSET(info->rti_addrs, RTA_IFA)) { 2285 struct sockaddr *sa = info->rti_info[RTAX_IFA]; 2286 if (sa == NULL) 2287 return -1; 2288 switch (sa->sa_family) { 2289 case AF_INET: 2290 if (sa->sa_len != sizeof(struct sockaddr_in)) 2291 return -1; 2292 break; 2293 case AF_INET6: 2294 if (sa->sa_len != sizeof(struct sockaddr_in6)) 2295 return -1; 2296 break; 2297 default: 2298 return -1; 2299 } 2300 } 2301 2302 if (ISSET(info->rti_addrs, RTA_DNS)) { 2303 struct sockaddr_rtdns *rtdns = 2304 (struct sockaddr_rtdns *)info->rti_info[RTAX_DNS]; 2305 if (rtdns == NULL) 2306 return -1; 2307 if (rtdns->sr_len > sizeof(*rtdns)) 2308 return -1; 2309 if (rtdns->sr_len < offsetof(struct sockaddr_rtdns, sr_dns)) 2310 return -1; 2311 switch (rtdns->sr_family) { 2312 case AF_INET: 2313 if ((rtdns->sr_len - offsetof(struct sockaddr_rtdns, 2314 sr_dns)) % sizeof(struct in_addr) != 0) 2315 return -1; 2316 break; 2317 #ifdef INET6 2318 case AF_INET6: 2319 if ((rtdns->sr_len - offsetof(struct sockaddr_rtdns, 2320 sr_dns)) % sizeof(struct in6_addr) != 0) 2321 return -1; 2322 break; 2323 #endif 2324 default: 2325 return -1; 2326 } 2327 } 2328 2329 if (ISSET(info->rti_addrs, RTA_STATIC)) { 2330 struct sockaddr_rtstatic *rtstatic = 2331 (struct sockaddr_rtstatic *)info->rti_info[RTAX_STATIC]; 2332 if (rtstatic == NULL) 2333 return -1; 2334 if (rtstatic->sr_len > sizeof(*rtstatic)) 2335 return -1; 2336 if (rtstatic->sr_len <= 2337 offsetof(struct sockaddr_rtstatic, sr_static)) 2338 return -1; 2339 } 2340 2341 if (ISSET(info->rti_addrs, RTA_SEARCH)) { 2342 struct sockaddr_rtsearch *rtsearch = 2343 (struct sockaddr_rtsearch *)info->rti_info[RTAX_SEARCH]; 2344 if (rtsearch == NULL) 2345 return -1; 2346 if (rtsearch->sr_len > sizeof(*rtsearch)) 2347 return -1; 2348 if (rtsearch->sr_len <= 2349 offsetof(struct sockaddr_rtsearch, sr_search)) 2350 return -1; 2351 } 2352 2353 return 0; 2354 } 2355 2356 int 2357 rt_setsource(unsigned int rtableid, struct sockaddr *src) 2358 { 2359 struct ifaddr *ifa; 2360 int error; 2361 /* 2362 * If source address is 0.0.0.0 or :: 2363 * use automatic source selection 2364 */ 2365 switch(src->sa_family) { 2366 case AF_INET: 2367 if(satosin(src)->sin_addr.s_addr == INADDR_ANY) { 2368 rtable_setsource(rtableid, AF_INET, NULL); 2369 return (0); 2370 } 2371 break; 2372 #ifdef INET6 2373 case AF_INET6: 2374 if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(src)->sin6_addr)) { 2375 rtable_setsource(rtableid, AF_INET6, NULL); 2376 return (0); 2377 } 2378 break; 2379 #endif 2380 default: 2381 return (EAFNOSUPPORT); 2382 } 2383 2384 KERNEL_LOCK(); 2385 /* 2386 * Check if source address is assigned to an interface in the 2387 * same rdomain 2388 */ 2389 if ((ifa = ifa_ifwithaddr(src, rtableid)) == NULL) { 2390 KERNEL_UNLOCK(); 2391 return (EINVAL); 2392 } 2393 2394 error = rtable_setsource(rtableid, src->sa_family, ifa->ifa_addr); 2395 KERNEL_UNLOCK(); 2396 2397 return (error); 2398 } 2399 2400 /* 2401 * Definitions of protocols supported in the ROUTE domain. 2402 */ 2403 2404 const struct protosw routesw[] = { 2405 { 2406 .pr_type = SOCK_RAW, 2407 .pr_domain = &routedomain, 2408 .pr_flags = PR_ATOMIC|PR_ADDR|PR_WANTRCVD, 2409 .pr_output = route_output, 2410 .pr_ctloutput = route_ctloutput, 2411 .pr_usrreq = route_usrreq, 2412 .pr_attach = route_attach, 2413 .pr_detach = route_detach, 2414 .pr_init = route_prinit, 2415 .pr_sysctl = sysctl_rtable 2416 } 2417 }; 2418 2419 const struct domain routedomain = { 2420 .dom_family = PF_ROUTE, 2421 .dom_name = "route", 2422 .dom_init = route_init, 2423 .dom_protosw = routesw, 2424 .dom_protoswNPROTOSW = &routesw[nitems(routesw)] 2425 }; 2426