1 /* $OpenBSD: rtsock.c,v 1.304 2020/11/07 09:51:40 denis Exp $ */ 2 /* $NetBSD: rtsock.c,v 1.18 1996/03/29 00:32:10 cgd Exp $ */ 3 4 /* 5 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the project nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 33 /* 34 * Copyright (c) 1988, 1991, 1993 35 * The Regents of the University of California. All rights reserved. 36 * 37 * Redistribution and use in source and binary forms, with or without 38 * modification, are permitted provided that the following conditions 39 * are met: 40 * 1. Redistributions of source code must retain the above copyright 41 * notice, this list of conditions and the following disclaimer. 42 * 2. Redistributions in binary form must reproduce the above copyright 43 * notice, this list of conditions and the following disclaimer in the 44 * documentation and/or other materials provided with the distribution. 45 * 3. Neither the name of the University nor the names of its contributors 46 * may be used to endorse or promote products derived from this software 47 * without specific prior written permission. 48 * 49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 59 * SUCH DAMAGE. 60 * 61 * @(#)rtsock.c 8.6 (Berkeley) 2/11/95 62 */ 63 64 #include <sys/param.h> 65 #include <sys/systm.h> 66 #include <sys/proc.h> 67 #include <sys/sysctl.h> 68 #include <sys/mbuf.h> 69 #include <sys/socket.h> 70 #include <sys/socketvar.h> 71 #include <sys/domain.h> 72 #include <sys/pool.h> 73 #include <sys/protosw.h> 74 #include <sys/srp.h> 75 76 #include <net/if.h> 77 #include <net/if_dl.h> 78 #include <net/if_var.h> 79 #include <net/route.h> 80 81 #include <netinet/in.h> 82 83 #ifdef MPLS 84 #include <netmpls/mpls.h> 85 #endif 86 #ifdef IPSEC 87 #include <netinet/ip_ipsp.h> 88 #include <net/if_enc.h> 89 #endif 90 #ifdef BFD 91 #include <net/bfd.h> 92 #endif 93 94 #include <sys/stdarg.h> 95 #include <sys/kernel.h> 96 #include <sys/timeout.h> 97 98 #define ROUTESNDQ 8192 99 #define ROUTERCVQ 8192 100 101 const struct sockaddr route_src = { 2, PF_ROUTE, }; 102 103 struct walkarg { 104 int w_op, w_arg, w_given, w_needed, w_tmemsize; 105 caddr_t w_where, w_tmem; 106 }; 107 108 void route_prinit(void); 109 void rcb_ref(void *, void *); 110 void rcb_unref(void *, void *); 111 int route_output(struct mbuf *, struct socket *, struct sockaddr *, 112 struct mbuf *); 113 int route_ctloutput(int, struct socket *, int, int, struct mbuf *); 114 int route_usrreq(struct socket *, int, struct mbuf *, struct mbuf *, 115 struct mbuf *, struct proc *); 116 void route_input(struct mbuf *m0, struct socket *, sa_family_t); 117 int route_arp_conflict(struct rtentry *, struct rt_addrinfo *); 118 int route_cleargateway(struct rtentry *, void *, unsigned int); 119 void rtm_senddesync_timer(void *); 120 void rtm_senddesync(struct socket *); 121 int rtm_sendup(struct socket *, struct mbuf *, int); 122 123 int rtm_getifa(struct rt_addrinfo *, unsigned int); 124 int rtm_output(struct rt_msghdr *, struct rtentry **, struct rt_addrinfo *, 125 uint8_t, unsigned int); 126 struct rt_msghdr *rtm_report(struct rtentry *, u_char, int, int); 127 struct mbuf *rtm_msg1(int, struct rt_addrinfo *); 128 int rtm_msg2(int, int, struct rt_addrinfo *, caddr_t, 129 struct walkarg *); 130 int rtm_xaddrs(caddr_t, caddr_t, struct rt_addrinfo *); 131 int rtm_validate_proposal(struct rt_addrinfo *); 132 void rtm_setmetrics(u_long, const struct rt_metrics *, 133 struct rt_kmetrics *); 134 void rtm_getmetrics(const struct rt_kmetrics *, 135 struct rt_metrics *); 136 137 int sysctl_iflist(int, struct walkarg *); 138 int sysctl_ifnames(struct walkarg *); 139 int sysctl_rtable_rtstat(void *, size_t *, void *); 140 141 int rt_setsource(unsigned int, struct sockaddr *); 142 143 /* 144 * Locks used to protect struct members 145 * I immutable after creation 146 * sK solock (kernel lock) 147 */ 148 struct rtpcb { 149 struct socket *rop_socket; /* [I] */ 150 151 SRPL_ENTRY(rtpcb) rop_list; 152 struct refcnt rop_refcnt; 153 struct timeout rop_timeout; 154 unsigned int rop_msgfilter; /* [sK] */ 155 unsigned int rop_flagfilter; /* [sK] */ 156 unsigned int rop_flags; /* [sK] */ 157 u_int rop_rtableid; /* [sK] */ 158 unsigned short rop_proto; /* [I] */ 159 u_char rop_priority; /* [sK] */ 160 }; 161 #define sotortpcb(so) ((struct rtpcb *)(so)->so_pcb) 162 163 struct rtptable { 164 SRPL_HEAD(, rtpcb) rtp_list; 165 struct srpl_rc rtp_rc; 166 struct rwlock rtp_lk; 167 unsigned int rtp_count; 168 }; 169 170 struct pool rtpcb_pool; 171 struct rtptable rtptable; 172 173 /* 174 * These flags and timeout are used for indicating to userland (via a 175 * RTM_DESYNC msg) when the route socket has overflowed and messages 176 * have been lost. 177 */ 178 #define ROUTECB_FLAG_DESYNC 0x1 /* Route socket out of memory */ 179 #define ROUTECB_FLAG_FLUSH 0x2 /* Wait until socket is empty before 180 queueing more packets */ 181 182 #define ROUTE_DESYNC_RESEND_TIMEOUT 200 /* In ms */ 183 184 void 185 route_prinit(void) 186 { 187 srpl_rc_init(&rtptable.rtp_rc, rcb_ref, rcb_unref, NULL); 188 rw_init(&rtptable.rtp_lk, "rtsock"); 189 SRPL_INIT(&rtptable.rtp_list); 190 pool_init(&rtpcb_pool, sizeof(struct rtpcb), 0, 191 IPL_NONE, PR_WAITOK, "rtpcb", NULL); 192 } 193 194 void 195 rcb_ref(void *null, void *v) 196 { 197 struct rtpcb *rop = v; 198 199 refcnt_take(&rop->rop_refcnt); 200 } 201 202 void 203 rcb_unref(void *null, void *v) 204 { 205 struct rtpcb *rop = v; 206 207 refcnt_rele_wake(&rop->rop_refcnt); 208 } 209 210 int 211 route_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam, 212 struct mbuf *control, struct proc *p) 213 { 214 struct rtpcb *rop; 215 int error = 0; 216 217 if (req == PRU_CONTROL) 218 return (EOPNOTSUPP); 219 220 soassertlocked(so); 221 222 if (control && control->m_len) { 223 error = EOPNOTSUPP; 224 goto release; 225 } 226 227 rop = sotortpcb(so); 228 if (rop == NULL) { 229 error = EINVAL; 230 goto release; 231 } 232 233 switch (req) { 234 /* no connect, bind, accept. Socket is connected from the start */ 235 case PRU_CONNECT: 236 case PRU_BIND: 237 case PRU_CONNECT2: 238 case PRU_LISTEN: 239 case PRU_ACCEPT: 240 error = EOPNOTSUPP; 241 break; 242 243 case PRU_DISCONNECT: 244 case PRU_ABORT: 245 soisdisconnected(so); 246 break; 247 case PRU_SHUTDOWN: 248 socantsendmore(so); 249 break; 250 case PRU_SENSE: 251 /* stat: don't bother with a blocksize. */ 252 break; 253 254 /* minimal support, just implement a fake peer address */ 255 case PRU_SOCKADDR: 256 error = EINVAL; 257 break; 258 case PRU_PEERADDR: 259 bcopy(&route_src, mtod(nam, caddr_t), route_src.sa_len); 260 nam->m_len = route_src.sa_len; 261 break; 262 263 case PRU_RCVD: 264 /* 265 * If we are in a FLUSH state, check if the buffer is 266 * empty so that we can clear the flag. 267 */ 268 if (((rop->rop_flags & ROUTECB_FLAG_FLUSH) != 0) && 269 ((sbspace(rop->rop_socket, &rop->rop_socket->so_rcv) == 270 rop->rop_socket->so_rcv.sb_hiwat))) 271 rop->rop_flags &= ~ROUTECB_FLAG_FLUSH; 272 break; 273 274 case PRU_RCVOOB: 275 case PRU_SENDOOB: 276 error = EOPNOTSUPP; 277 break; 278 case PRU_SEND: 279 if (nam) { 280 error = EISCONN; 281 break; 282 } 283 error = (*so->so_proto->pr_output)(m, so, NULL, NULL); 284 m = NULL; 285 break; 286 default: 287 panic("route_usrreq"); 288 } 289 290 release: 291 if (req != PRU_RCVD && req != PRU_RCVOOB && req != PRU_SENSE) { 292 m_freem(control); 293 m_freem(m); 294 } 295 return (error); 296 } 297 298 int 299 route_attach(struct socket *so, int proto) 300 { 301 struct rtpcb *rop; 302 int error; 303 304 /* 305 * use the rawcb but allocate a rtpcb, this 306 * code does not care about the additional fields 307 * and works directly on the raw socket. 308 */ 309 rop = pool_get(&rtpcb_pool, PR_WAITOK|PR_ZERO); 310 so->so_pcb = rop; 311 /* Init the timeout structure */ 312 timeout_set(&rop->rop_timeout, rtm_senddesync_timer, so); 313 refcnt_init(&rop->rop_refcnt); 314 315 if (curproc == NULL) 316 error = EACCES; 317 else 318 error = soreserve(so, ROUTESNDQ, ROUTERCVQ); 319 if (error) { 320 pool_put(&rtpcb_pool, rop); 321 return (error); 322 } 323 324 rop->rop_socket = so; 325 rop->rop_proto = proto; 326 327 rop->rop_rtableid = curproc->p_p->ps_rtableid; 328 329 soisconnected(so); 330 so->so_options |= SO_USELOOPBACK; 331 332 rw_enter(&rtptable.rtp_lk, RW_WRITE); 333 SRPL_INSERT_HEAD_LOCKED(&rtptable.rtp_rc, &rtptable.rtp_list, rop, 334 rop_list); 335 rtptable.rtp_count++; 336 rw_exit(&rtptable.rtp_lk); 337 338 return (0); 339 } 340 341 int 342 route_detach(struct socket *so) 343 { 344 struct rtpcb *rop; 345 346 soassertlocked(so); 347 348 rop = sotortpcb(so); 349 if (rop == NULL) 350 return (EINVAL); 351 352 rw_enter(&rtptable.rtp_lk, RW_WRITE); 353 354 timeout_del(&rop->rop_timeout); 355 rtptable.rtp_count--; 356 357 SRPL_REMOVE_LOCKED(&rtptable.rtp_rc, &rtptable.rtp_list, rop, rtpcb, 358 rop_list); 359 rw_exit(&rtptable.rtp_lk); 360 361 /* wait for all references to drop */ 362 refcnt_finalize(&rop->rop_refcnt, "rtsockrefs"); 363 364 so->so_pcb = NULL; 365 KASSERT((so->so_state & SS_NOFDREF) == 0); 366 pool_put(&rtpcb_pool, rop); 367 368 return (0); 369 } 370 371 int 372 route_ctloutput(int op, struct socket *so, int level, int optname, 373 struct mbuf *m) 374 { 375 struct rtpcb *rop = sotortpcb(so); 376 int error = 0; 377 unsigned int tid, prio; 378 379 if (level != AF_ROUTE) 380 return (EINVAL); 381 382 switch (op) { 383 case PRCO_SETOPT: 384 switch (optname) { 385 case ROUTE_MSGFILTER: 386 if (m == NULL || m->m_len != sizeof(unsigned int)) 387 error = EINVAL; 388 else 389 rop->rop_msgfilter = *mtod(m, unsigned int *); 390 break; 391 case ROUTE_TABLEFILTER: 392 if (m == NULL || m->m_len != sizeof(unsigned int)) { 393 error = EINVAL; 394 break; 395 } 396 tid = *mtod(m, unsigned int *); 397 if (tid != RTABLE_ANY && !rtable_exists(tid)) 398 error = ENOENT; 399 else 400 rop->rop_rtableid = tid; 401 break; 402 case ROUTE_PRIOFILTER: 403 if (m == NULL || m->m_len != sizeof(unsigned int)) { 404 error = EINVAL; 405 break; 406 } 407 prio = *mtod(m, unsigned int *); 408 if (prio > RTP_MAX) 409 error = EINVAL; 410 else 411 rop->rop_priority = prio; 412 break; 413 case ROUTE_FLAGFILTER: 414 if (m == NULL || m->m_len != sizeof(unsigned int)) 415 error = EINVAL; 416 else 417 rop->rop_flagfilter = *mtod(m, unsigned int *); 418 break; 419 default: 420 error = ENOPROTOOPT; 421 break; 422 } 423 break; 424 case PRCO_GETOPT: 425 switch (optname) { 426 case ROUTE_MSGFILTER: 427 m->m_len = sizeof(unsigned int); 428 *mtod(m, unsigned int *) = rop->rop_msgfilter; 429 break; 430 case ROUTE_TABLEFILTER: 431 m->m_len = sizeof(unsigned int); 432 *mtod(m, unsigned int *) = rop->rop_rtableid; 433 break; 434 case ROUTE_PRIOFILTER: 435 m->m_len = sizeof(unsigned int); 436 *mtod(m, unsigned int *) = rop->rop_priority; 437 break; 438 case ROUTE_FLAGFILTER: 439 m->m_len = sizeof(unsigned int); 440 *mtod(m, unsigned int *) = rop->rop_flagfilter; 441 break; 442 default: 443 error = ENOPROTOOPT; 444 break; 445 } 446 } 447 return (error); 448 } 449 450 void 451 rtm_senddesync_timer(void *xso) 452 { 453 struct socket *so = xso; 454 int s; 455 456 s = solock(so); 457 rtm_senddesync(so); 458 sounlock(so, s); 459 } 460 461 void 462 rtm_senddesync(struct socket *so) 463 { 464 struct rtpcb *rop = sotortpcb(so); 465 struct mbuf *desync_mbuf; 466 467 soassertlocked(so); 468 469 /* If we are in a DESYNC state, try to send a RTM_DESYNC packet */ 470 if ((rop->rop_flags & ROUTECB_FLAG_DESYNC) == 0) 471 return; 472 473 /* 474 * If we fail to alloc memory or if sbappendaddr() 475 * fails, re-add timeout and try again. 476 */ 477 desync_mbuf = rtm_msg1(RTM_DESYNC, NULL); 478 if (desync_mbuf != NULL) { 479 if (sbappendaddr(so, &so->so_rcv, &route_src, 480 desync_mbuf, NULL) != 0) { 481 rop->rop_flags &= ~ROUTECB_FLAG_DESYNC; 482 sorwakeup(rop->rop_socket); 483 return; 484 } 485 m_freem(desync_mbuf); 486 } 487 /* Re-add timeout to try sending msg again */ 488 timeout_add_msec(&rop->rop_timeout, ROUTE_DESYNC_RESEND_TIMEOUT); 489 } 490 491 void 492 route_input(struct mbuf *m0, struct socket *so0, sa_family_t sa_family) 493 { 494 struct socket *so; 495 struct rtpcb *rop; 496 struct rt_msghdr *rtm; 497 struct mbuf *m = m0; 498 struct socket *last = NULL; 499 struct srp_ref sr; 500 int s; 501 502 /* ensure that we can access the rtm_type via mtod() */ 503 if (m->m_len < offsetof(struct rt_msghdr, rtm_type) + 1) { 504 m_freem(m); 505 return; 506 } 507 508 SRPL_FOREACH(rop, &sr, &rtptable.rtp_list, rop_list) { 509 /* 510 * If route socket is bound to an address family only send 511 * messages that match the address family. Address family 512 * agnostic messages are always sent. 513 */ 514 if (sa_family != AF_UNSPEC && rop->rop_proto != AF_UNSPEC && 515 rop->rop_proto != sa_family) 516 continue; 517 518 519 so = rop->rop_socket; 520 s = solock(so); 521 522 /* 523 * Check to see if we don't want our own messages and 524 * if we can receive anything. 525 */ 526 if ((so0 == so && !(so0->so_options & SO_USELOOPBACK)) || 527 !(so->so_state & SS_ISCONNECTED) || 528 (so->so_state & SS_CANTRCVMORE)) { 529 next: 530 sounlock(so, s); 531 continue; 532 } 533 534 /* filter messages that the process does not want */ 535 rtm = mtod(m, struct rt_msghdr *); 536 /* but RTM_DESYNC can't be filtered */ 537 if (rtm->rtm_type != RTM_DESYNC) { 538 if (rop->rop_msgfilter != 0 && 539 !(rop->rop_msgfilter & (1 << rtm->rtm_type))) 540 goto next; 541 if (ISSET(rop->rop_flagfilter, rtm->rtm_flags)) 542 goto next; 543 } 544 switch (rtm->rtm_type) { 545 case RTM_IFANNOUNCE: 546 case RTM_DESYNC: 547 /* no tableid */ 548 break; 549 case RTM_RESOLVE: 550 case RTM_NEWADDR: 551 case RTM_DELADDR: 552 case RTM_IFINFO: 553 case RTM_80211INFO: 554 case RTM_BFD: 555 /* check against rdomain id */ 556 if (rop->rop_rtableid != RTABLE_ANY && 557 rtable_l2(rop->rop_rtableid) != rtm->rtm_tableid) 558 goto next; 559 break; 560 default: 561 if (rop->rop_priority != 0 && 562 rop->rop_priority < rtm->rtm_priority) 563 goto next; 564 /* check against rtable id */ 565 if (rop->rop_rtableid != RTABLE_ANY && 566 rop->rop_rtableid != rtm->rtm_tableid) 567 goto next; 568 break; 569 } 570 571 /* 572 * Check to see if the flush flag is set. If so, don't queue 573 * any more messages until the flag is cleared. 574 */ 575 if ((rop->rop_flags & ROUTECB_FLAG_FLUSH) != 0) 576 goto next; 577 sounlock(so, s); 578 579 if (last) { 580 s = solock(last); 581 rtm_sendup(last, m, 1); 582 sounlock(last, s); 583 refcnt_rele_wake(&sotortpcb(last)->rop_refcnt); 584 } 585 /* keep a reference for last */ 586 refcnt_take(&rop->rop_refcnt); 587 last = rop->rop_socket; 588 } 589 SRPL_LEAVE(&sr); 590 591 if (last) { 592 s = solock(last); 593 rtm_sendup(last, m, 0); 594 sounlock(last, s); 595 refcnt_rele_wake(&sotortpcb(last)->rop_refcnt); 596 } else 597 m_freem(m); 598 } 599 600 int 601 rtm_sendup(struct socket *so, struct mbuf *m0, int more) 602 { 603 struct rtpcb *rop = sotortpcb(so); 604 struct mbuf *m; 605 606 soassertlocked(so); 607 608 if (more) { 609 m = m_copym(m0, 0, M_COPYALL, M_NOWAIT); 610 if (m == NULL) 611 return (ENOMEM); 612 } else 613 m = m0; 614 615 if (sbspace(so, &so->so_rcv) < (2 * MSIZE) || 616 sbappendaddr(so, &so->so_rcv, &route_src, m, NULL) == 0) { 617 /* Flag socket as desync'ed and flush required */ 618 rop->rop_flags |= ROUTECB_FLAG_DESYNC | ROUTECB_FLAG_FLUSH; 619 rtm_senddesync(so); 620 m_freem(m); 621 return (ENOBUFS); 622 } 623 624 sorwakeup(so); 625 return (0); 626 } 627 628 struct rt_msghdr * 629 rtm_report(struct rtentry *rt, u_char type, int seq, int tableid) 630 { 631 struct rt_msghdr *rtm; 632 struct rt_addrinfo info; 633 struct sockaddr_rtlabel sa_rl; 634 struct sockaddr_in6 sa_mask; 635 #ifdef BFD 636 struct sockaddr_bfd sa_bfd; 637 #endif 638 struct ifnet *ifp = NULL; 639 int len; 640 641 bzero(&info, sizeof(info)); 642 info.rti_info[RTAX_DST] = rt_key(rt); 643 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 644 info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask); 645 info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl); 646 #ifdef BFD 647 if (rt->rt_flags & RTF_BFD) 648 info.rti_info[RTAX_BFD] = bfd2sa(rt, &sa_bfd); 649 #endif 650 #ifdef MPLS 651 if (rt->rt_flags & RTF_MPLS) { 652 struct sockaddr_mpls sa_mpls; 653 654 bzero(&sa_mpls, sizeof(sa_mpls)); 655 sa_mpls.smpls_family = AF_MPLS; 656 sa_mpls.smpls_len = sizeof(sa_mpls); 657 sa_mpls.smpls_label = ((struct rt_mpls *) 658 rt->rt_llinfo)->mpls_label; 659 info.rti_info[RTAX_SRC] = (struct sockaddr *)&sa_mpls; 660 info.rti_mpls = ((struct rt_mpls *) 661 rt->rt_llinfo)->mpls_operation; 662 } 663 #endif 664 ifp = if_get(rt->rt_ifidx); 665 if (ifp != NULL) { 666 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); 667 info.rti_info[RTAX_IFA] = 668 rtable_getsource(tableid, info.rti_info[RTAX_DST]->sa_family); 669 if (info.rti_info[RTAX_IFA] == NULL) 670 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; 671 if (ifp->if_flags & IFF_POINTOPOINT) 672 info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr; 673 } 674 if_put(ifp); 675 /* RTAX_GENMASK, RTAX_AUTHOR, RTAX_SRCMASK ignored */ 676 677 /* build new route message */ 678 len = rtm_msg2(type, RTM_VERSION, &info, NULL, NULL); 679 rtm = malloc(len, M_RTABLE, M_WAITOK | M_ZERO); 680 681 rtm_msg2(type, RTM_VERSION, &info, (caddr_t)rtm, NULL); 682 rtm->rtm_type = type; 683 rtm->rtm_index = rt->rt_ifidx; 684 rtm->rtm_tableid = tableid; 685 rtm->rtm_priority = rt->rt_priority & RTP_MASK; 686 rtm->rtm_flags = rt->rt_flags; 687 rtm->rtm_pid = curproc->p_p->ps_pid; 688 rtm->rtm_seq = seq; 689 rtm_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx); 690 rtm->rtm_addrs = info.rti_addrs; 691 #ifdef MPLS 692 rtm->rtm_mpls = info.rti_mpls; 693 #endif 694 return rtm; 695 } 696 697 int 698 route_output(struct mbuf *m, struct socket *so, struct sockaddr *dstaddr, 699 struct mbuf *control) 700 { 701 struct rt_msghdr *rtm = NULL; 702 struct rtentry *rt = NULL; 703 struct rt_addrinfo info; 704 struct ifnet *ifp; 705 int len, seq, error = 0; 706 u_int tableid; 707 u_int8_t prio; 708 u_char vers, type; 709 710 if (m == NULL || ((m->m_len < sizeof(int32_t)) && 711 (m = m_pullup(m, sizeof(int32_t))) == 0)) 712 return (ENOBUFS); 713 if ((m->m_flags & M_PKTHDR) == 0) 714 panic("route_output"); 715 len = m->m_pkthdr.len; 716 if (len < offsetof(struct rt_msghdr, rtm_hdrlen) + 1 || 717 len != mtod(m, struct rt_msghdr *)->rtm_msglen) { 718 error = EINVAL; 719 goto fail; 720 } 721 vers = mtod(m, struct rt_msghdr *)->rtm_version; 722 switch (vers) { 723 case RTM_VERSION: 724 if (len < sizeof(struct rt_msghdr)) { 725 error = EINVAL; 726 goto fail; 727 } 728 if (len > RTM_MAXSIZE) { 729 error = EMSGSIZE; 730 goto fail; 731 } 732 rtm = malloc(len, M_RTABLE, M_WAITOK); 733 m_copydata(m, 0, len, (caddr_t)rtm); 734 break; 735 default: 736 error = EPROTONOSUPPORT; 737 goto fail; 738 } 739 740 /* Verify that the caller is sending an appropriate message early */ 741 switch (rtm->rtm_type) { 742 case RTM_ADD: 743 case RTM_DELETE: 744 case RTM_GET: 745 case RTM_CHANGE: 746 case RTM_PROPOSAL: 747 case RTM_SOURCE: 748 break; 749 default: 750 error = EOPNOTSUPP; 751 goto fail; 752 } 753 /* 754 * Verify that the header length is valid. 755 * All messages from userland start with a struct rt_msghdr. 756 */ 757 if (rtm->rtm_hdrlen == 0) /* old client */ 758 rtm->rtm_hdrlen = sizeof(struct rt_msghdr); 759 if (rtm->rtm_hdrlen < sizeof(struct rt_msghdr) || 760 len < rtm->rtm_hdrlen) { 761 error = EINVAL; 762 goto fail; 763 } 764 765 rtm->rtm_pid = curproc->p_p->ps_pid; 766 767 /* 768 * Verify that the caller has the appropriate privilege; RTM_GET 769 * is the only operation the non-superuser is allowed. 770 */ 771 if (rtm->rtm_type != RTM_GET && suser(curproc) != 0) { 772 error = EACCES; 773 goto fail; 774 } 775 tableid = rtm->rtm_tableid; 776 if (!rtable_exists(tableid)) { 777 if (rtm->rtm_type == RTM_ADD) { 778 if ((error = rtable_add(tableid)) != 0) 779 goto fail; 780 } else { 781 error = EINVAL; 782 goto fail; 783 } 784 } 785 786 /* Do not let userland play with kernel-only flags. */ 787 if ((rtm->rtm_flags & (RTF_LOCAL|RTF_BROADCAST)) != 0) { 788 error = EINVAL; 789 goto fail; 790 } 791 792 /* make sure that kernel-only bits are not set */ 793 rtm->rtm_priority &= RTP_MASK; 794 rtm->rtm_flags &= ~(RTF_DONE|RTF_CLONED|RTF_CACHED); 795 rtm->rtm_fmask &= RTF_FMASK; 796 797 if (rtm->rtm_priority != 0) { 798 if (rtm->rtm_priority > RTP_MAX || 799 rtm->rtm_priority == RTP_LOCAL) { 800 error = EINVAL; 801 goto fail; 802 } 803 prio = rtm->rtm_priority; 804 } else if (rtm->rtm_type != RTM_ADD) 805 prio = RTP_ANY; 806 else if (rtm->rtm_flags & RTF_STATIC) 807 prio = 0; 808 else 809 prio = RTP_DEFAULT; 810 811 bzero(&info, sizeof(info)); 812 info.rti_addrs = rtm->rtm_addrs; 813 if ((error = rtm_xaddrs(rtm->rtm_hdrlen + (caddr_t)rtm, 814 len + (caddr_t)rtm, &info)) != 0) 815 goto fail; 816 817 info.rti_flags = rtm->rtm_flags; 818 819 if (rtm->rtm_type != RTM_SOURCE && 820 rtm->rtm_type != RTM_PROPOSAL && 821 (info.rti_info[RTAX_DST] == NULL || 822 info.rti_info[RTAX_DST]->sa_family >= AF_MAX || 823 (info.rti_info[RTAX_GATEWAY] != NULL && 824 info.rti_info[RTAX_GATEWAY]->sa_family >= AF_MAX) || 825 info.rti_info[RTAX_GENMASK] != NULL)) { 826 error = EINVAL; 827 goto fail; 828 } 829 #ifdef MPLS 830 info.rti_mpls = rtm->rtm_mpls; 831 #endif 832 833 if (info.rti_info[RTAX_GATEWAY] != NULL && 834 info.rti_info[RTAX_GATEWAY]->sa_family == AF_LINK && 835 (info.rti_flags & RTF_CLONING) == 0) { 836 info.rti_flags |= RTF_LLINFO; 837 } 838 839 /* 840 * Validate RTM_PROPOSAL and pass it along or error out. 841 */ 842 if (rtm->rtm_type == RTM_PROPOSAL) { 843 if (rtm_validate_proposal(&info) == -1) { 844 error = EINVAL; 845 goto fail; 846 } 847 /* 848 * If this is a solicitation proposal forward request to 849 * all interfaces. Most handlers will ignore it but at least 850 * umb(4) will send a response to this event. 851 */ 852 if (rtm->rtm_priority == RTP_PROPOSAL_SOLICIT) { 853 NET_LOCK(); 854 TAILQ_FOREACH(ifp, &ifnet, if_list) { 855 ifp->if_rtrequest(ifp, RTM_PROPOSAL, NULL); 856 } 857 NET_UNLOCK(); 858 } 859 } else if (rtm->rtm_type == RTM_SOURCE) { 860 if (info.rti_info[RTAX_IFA] == NULL) { 861 error = EINVAL; 862 goto fail; 863 } 864 if ((error = 865 rt_setsource(tableid, info.rti_info[RTAX_IFA])) != 0) 866 goto fail; 867 } else { 868 error = rtm_output(rtm, &rt, &info, prio, tableid); 869 if (!error) { 870 type = rtm->rtm_type; 871 seq = rtm->rtm_seq; 872 free(rtm, M_RTABLE, len); 873 rtm = rtm_report(rt, type, seq, tableid); 874 len = rtm->rtm_msglen; 875 } 876 } 877 878 rtfree(rt); 879 if (error) { 880 rtm->rtm_errno = error; 881 } else { 882 rtm->rtm_flags |= RTF_DONE; 883 } 884 885 /* 886 * Check to see if we don't want our own messages. 887 */ 888 if (!(so->so_options & SO_USELOOPBACK)) { 889 if (rtptable.rtp_count <= 1) { 890 /* no other listener and no loopback of messages */ 891 fail: 892 free(rtm, M_RTABLE, len); 893 m_freem(m); 894 return (error); 895 } 896 } 897 if (m_copyback(m, 0, len, rtm, M_NOWAIT)) { 898 m_freem(m); 899 m = NULL; 900 } else if (m->m_pkthdr.len > len) 901 m_adj(m, len - m->m_pkthdr.len); 902 free(rtm, M_RTABLE, len); 903 if (m) 904 route_input(m, so, info.rti_info[RTAX_DST] ? 905 info.rti_info[RTAX_DST]->sa_family : AF_UNSPEC); 906 907 return (error); 908 } 909 910 int 911 rtm_output(struct rt_msghdr *rtm, struct rtentry **prt, 912 struct rt_addrinfo *info, uint8_t prio, unsigned int tableid) 913 { 914 struct rtentry *rt = *prt; 915 struct ifnet *ifp = NULL; 916 int plen, newgate = 0, error = 0; 917 918 switch (rtm->rtm_type) { 919 case RTM_ADD: 920 if (info->rti_info[RTAX_GATEWAY] == NULL) { 921 error = EINVAL; 922 break; 923 } 924 925 rt = rtable_match(tableid, info->rti_info[RTAX_DST], NULL); 926 if ((error = route_arp_conflict(rt, info))) { 927 rtfree(rt); 928 rt = NULL; 929 break; 930 } 931 932 /* 933 * We cannot go through a delete/create/insert cycle for 934 * cached route because this can lead to races in the 935 * receive path. Instead we update the L2 cache. 936 */ 937 if ((rt != NULL) && ISSET(rt->rt_flags, RTF_CACHED)) 938 goto change; 939 940 rtfree(rt); 941 rt = NULL; 942 943 NET_LOCK(); 944 if ((error = rtm_getifa(info, tableid)) != 0) { 945 NET_UNLOCK(); 946 break; 947 } 948 error = rtrequest(RTM_ADD, info, prio, &rt, tableid); 949 NET_UNLOCK(); 950 if (error == 0) 951 rtm_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx, 952 &rt->rt_rmx); 953 break; 954 case RTM_DELETE: 955 rt = rtable_lookup(tableid, info->rti_info[RTAX_DST], 956 info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY], 957 prio); 958 if (rt == NULL) { 959 error = ESRCH; 960 break; 961 } 962 963 /* 964 * If we got multipath routes, we require users to specify 965 * a matching gateway. 966 */ 967 if (ISSET(rt->rt_flags, RTF_MPATH) && 968 info->rti_info[RTAX_GATEWAY] == NULL) { 969 error = ESRCH; 970 break; 971 } 972 973 /* Detaching an interface requires the KERNEL_LOCK(). */ 974 ifp = if_get(rt->rt_ifidx); 975 KASSERT(ifp != NULL); 976 977 /* 978 * Invalidate the cache of automagically created and 979 * referenced L2 entries to make sure that ``rt_gwroute'' 980 * pointer stays valid for other CPUs. 981 */ 982 if ((ISSET(rt->rt_flags, RTF_CACHED))) { 983 NET_LOCK(); 984 ifp->if_rtrequest(ifp, RTM_INVALIDATE, rt); 985 /* Reset the MTU of the gateway route. */ 986 rtable_walk(tableid, rt_key(rt)->sa_family, NULL, 987 route_cleargateway, rt); 988 NET_UNLOCK(); 989 if_put(ifp); 990 break; 991 } 992 993 /* 994 * Make sure that local routes are only modified by the 995 * kernel. 996 */ 997 if (ISSET(rt->rt_flags, RTF_LOCAL|RTF_BROADCAST)) { 998 if_put(ifp); 999 error = EINVAL; 1000 break; 1001 } 1002 1003 rtfree(rt); 1004 rt = NULL; 1005 1006 NET_LOCK(); 1007 error = rtrequest_delete(info, prio, ifp, &rt, tableid); 1008 NET_UNLOCK(); 1009 if_put(ifp); 1010 break; 1011 case RTM_CHANGE: 1012 rt = rtable_lookup(tableid, info->rti_info[RTAX_DST], 1013 info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY], 1014 prio); 1015 /* 1016 * If we got multipath routes, we require users to specify 1017 * a matching gateway. 1018 */ 1019 if ((rt != NULL) && ISSET(rt->rt_flags, RTF_MPATH) && 1020 (info->rti_info[RTAX_GATEWAY] == NULL)) { 1021 rtfree(rt); 1022 rt = NULL; 1023 } 1024 /* 1025 * If RTAX_GATEWAY is the argument we're trying to 1026 * change, try to find a compatible route. 1027 */ 1028 if ((rt == NULL) && (info->rti_info[RTAX_GATEWAY] != NULL)) { 1029 rt = rtable_lookup(tableid, info->rti_info[RTAX_DST], 1030 info->rti_info[RTAX_NETMASK], NULL, prio); 1031 /* Ensure we don't pick a multipath one. */ 1032 if ((rt != NULL) && ISSET(rt->rt_flags, RTF_MPATH)) { 1033 rtfree(rt); 1034 rt = NULL; 1035 } 1036 } 1037 1038 if (rt == NULL) { 1039 error = ESRCH; 1040 break; 1041 } 1042 1043 /* 1044 * Make sure that local routes are only modified by the 1045 * kernel. 1046 */ 1047 if (ISSET(rt->rt_flags, RTF_LOCAL|RTF_BROADCAST)) { 1048 error = EINVAL; 1049 break; 1050 } 1051 1052 /* 1053 * RTM_CHANGE needs a perfect match. 1054 */ 1055 plen = rtable_satoplen(info->rti_info[RTAX_DST]->sa_family, 1056 info->rti_info[RTAX_NETMASK]); 1057 if (rt_plen(rt) != plen) { 1058 error = ESRCH; 1059 break; 1060 } 1061 1062 if (info->rti_info[RTAX_GATEWAY] != NULL) 1063 if (rt->rt_gateway == NULL || 1064 bcmp(rt->rt_gateway, 1065 info->rti_info[RTAX_GATEWAY], 1066 info->rti_info[RTAX_GATEWAY]->sa_len)) { 1067 newgate = 1; 1068 } 1069 /* 1070 * Check reachable gateway before changing the route. 1071 * New gateway could require new ifaddr, ifp; 1072 * flags may also be different; ifp may be specified 1073 * by ll sockaddr when protocol address is ambiguous. 1074 */ 1075 if (newgate || info->rti_info[RTAX_IFP] != NULL || 1076 info->rti_info[RTAX_IFA] != NULL) { 1077 struct ifaddr *ifa = NULL; 1078 1079 NET_LOCK(); 1080 if ((error = rtm_getifa(info, tableid)) != 0) { 1081 NET_UNLOCK(); 1082 break; 1083 } 1084 ifa = info->rti_ifa; 1085 if (rt->rt_ifa != ifa) { 1086 ifp = if_get(rt->rt_ifidx); 1087 KASSERT(ifp != NULL); 1088 ifp->if_rtrequest(ifp, RTM_DELETE, rt); 1089 ifafree(rt->rt_ifa); 1090 if_put(ifp); 1091 1092 ifa->ifa_refcnt++; 1093 rt->rt_ifa = ifa; 1094 rt->rt_ifidx = ifa->ifa_ifp->if_index; 1095 /* recheck link state after ifp change */ 1096 rt_if_linkstate_change(rt, ifa->ifa_ifp, 1097 tableid); 1098 } 1099 NET_UNLOCK(); 1100 } 1101 change: 1102 if (info->rti_info[RTAX_GATEWAY] != NULL) { 1103 /* When updating the gateway, make sure it is valid. */ 1104 if (!newgate && rt->rt_gateway->sa_family != 1105 info->rti_info[RTAX_GATEWAY]->sa_family) { 1106 error = EINVAL; 1107 break; 1108 } 1109 1110 NET_LOCK(); 1111 error = rt_setgate(rt, 1112 info->rti_info[RTAX_GATEWAY], tableid); 1113 NET_UNLOCK(); 1114 if (error) 1115 break; 1116 } 1117 #ifdef MPLS 1118 if (rtm->rtm_flags & RTF_MPLS) { 1119 NET_LOCK(); 1120 error = rt_mpls_set(rt, 1121 info->rti_info[RTAX_SRC], info->rti_mpls); 1122 NET_UNLOCK(); 1123 if (error) 1124 break; 1125 } else if (newgate || (rtm->rtm_fmask & RTF_MPLS)) { 1126 NET_LOCK(); 1127 /* if gateway changed remove MPLS information */ 1128 rt_mpls_clear(rt); 1129 NET_UNLOCK(); 1130 } 1131 #endif 1132 1133 #ifdef BFD 1134 if (ISSET(rtm->rtm_flags, RTF_BFD)) { 1135 if ((error = bfdset(rt))) 1136 break; 1137 } else if (!ISSET(rtm->rtm_flags, RTF_BFD) && 1138 ISSET(rtm->rtm_fmask, RTF_BFD)) { 1139 bfdclear(rt); 1140 } 1141 #endif 1142 1143 NET_LOCK(); 1144 /* Hack to allow some flags to be toggled */ 1145 if (rtm->rtm_fmask) { 1146 /* MPLS flag it is set by rt_mpls_set() */ 1147 rtm->rtm_fmask &= ~RTF_MPLS; 1148 rtm->rtm_flags &= ~RTF_MPLS; 1149 rt->rt_flags = 1150 (rt->rt_flags & ~rtm->rtm_fmask) | 1151 (rtm->rtm_flags & rtm->rtm_fmask); 1152 } 1153 rtm_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx, &rt->rt_rmx); 1154 1155 ifp = if_get(rt->rt_ifidx); 1156 KASSERT(ifp != NULL); 1157 ifp->if_rtrequest(ifp, RTM_ADD, rt); 1158 if_put(ifp); 1159 1160 if (info->rti_info[RTAX_LABEL] != NULL) { 1161 char *rtlabel = ((struct sockaddr_rtlabel *) 1162 info->rti_info[RTAX_LABEL])->sr_label; 1163 rtlabel_unref(rt->rt_labelid); 1164 rt->rt_labelid = rtlabel_name2id(rtlabel); 1165 } 1166 if_group_routechange(info->rti_info[RTAX_DST], 1167 info->rti_info[RTAX_NETMASK]); 1168 rt->rt_locks &= ~(rtm->rtm_inits); 1169 rt->rt_locks |= (rtm->rtm_inits & rtm->rtm_rmx.rmx_locks); 1170 NET_UNLOCK(); 1171 break; 1172 case RTM_GET: 1173 rt = rtable_lookup(tableid, info->rti_info[RTAX_DST], 1174 info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY], 1175 prio); 1176 if (rt == NULL) 1177 error = ESRCH; 1178 break; 1179 } 1180 1181 *prt = rt; 1182 return (error); 1183 } 1184 1185 struct ifaddr * 1186 ifa_ifwithroute(int flags, struct sockaddr *dst, struct sockaddr *gateway, 1187 unsigned int rtableid) 1188 { 1189 struct ifaddr *ifa; 1190 1191 if ((flags & RTF_GATEWAY) == 0) { 1192 /* 1193 * If we are adding a route to an interface, 1194 * and the interface is a pt to pt link 1195 * we should search for the destination 1196 * as our clue to the interface. Otherwise 1197 * we can use the local address. 1198 */ 1199 ifa = NULL; 1200 if (flags & RTF_HOST) 1201 ifa = ifa_ifwithdstaddr(dst, rtableid); 1202 if (ifa == NULL) 1203 ifa = ifa_ifwithaddr(gateway, rtableid); 1204 } else { 1205 /* 1206 * If we are adding a route to a remote net 1207 * or host, the gateway may still be on the 1208 * other end of a pt to pt link. 1209 */ 1210 ifa = ifa_ifwithdstaddr(gateway, rtableid); 1211 } 1212 if (ifa == NULL) { 1213 if (gateway->sa_family == AF_LINK) { 1214 struct sockaddr_dl *sdl = satosdl(gateway); 1215 struct ifnet *ifp = if_get(sdl->sdl_index); 1216 1217 if (ifp != NULL) 1218 ifa = ifaof_ifpforaddr(dst, ifp); 1219 if_put(ifp); 1220 } else { 1221 struct rtentry *rt; 1222 1223 rt = rtalloc(gateway, RT_RESOLVE, rtable_l2(rtableid)); 1224 if (rt != NULL) 1225 ifa = rt->rt_ifa; 1226 rtfree(rt); 1227 } 1228 } 1229 if (ifa == NULL) 1230 return (NULL); 1231 if (ifa->ifa_addr->sa_family != dst->sa_family) { 1232 struct ifaddr *oifa = ifa; 1233 ifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp); 1234 if (ifa == NULL) 1235 ifa = oifa; 1236 } 1237 return (ifa); 1238 } 1239 1240 int 1241 rtm_getifa(struct rt_addrinfo *info, unsigned int rtid) 1242 { 1243 struct ifnet *ifp = NULL; 1244 1245 /* 1246 * The "returned" `ifa' is guaranteed to be alive only if 1247 * the NET_LOCK() is held. 1248 */ 1249 NET_ASSERT_LOCKED(); 1250 1251 /* 1252 * ifp may be specified by sockaddr_dl when protocol address 1253 * is ambiguous 1254 */ 1255 if (info->rti_info[RTAX_IFP] != NULL) { 1256 struct sockaddr_dl *sdl; 1257 1258 sdl = satosdl(info->rti_info[RTAX_IFP]); 1259 ifp = if_get(sdl->sdl_index); 1260 } 1261 1262 #ifdef IPSEC 1263 /* 1264 * If the destination is a PF_KEY address, we'll look 1265 * for the existence of a encap interface number or address 1266 * in the options list of the gateway. By default, we'll return 1267 * enc0. 1268 */ 1269 if (info->rti_info[RTAX_DST] && 1270 info->rti_info[RTAX_DST]->sa_family == PF_KEY) 1271 info->rti_ifa = enc_getifa(rtid, 0); 1272 #endif 1273 1274 if (info->rti_ifa == NULL && info->rti_info[RTAX_IFA] != NULL) 1275 info->rti_ifa = ifa_ifwithaddr(info->rti_info[RTAX_IFA], rtid); 1276 1277 if (info->rti_ifa == NULL) { 1278 struct sockaddr *sa; 1279 1280 if ((sa = info->rti_info[RTAX_IFA]) == NULL) 1281 if ((sa = info->rti_info[RTAX_GATEWAY]) == NULL) 1282 sa = info->rti_info[RTAX_DST]; 1283 1284 if (sa != NULL && ifp != NULL) 1285 info->rti_ifa = ifaof_ifpforaddr(sa, ifp); 1286 else if (info->rti_info[RTAX_DST] != NULL && 1287 info->rti_info[RTAX_GATEWAY] != NULL) 1288 info->rti_ifa = ifa_ifwithroute(info->rti_flags, 1289 info->rti_info[RTAX_DST], 1290 info->rti_info[RTAX_GATEWAY], 1291 rtid); 1292 else if (sa != NULL) 1293 info->rti_ifa = ifa_ifwithroute(info->rti_flags, 1294 sa, sa, rtid); 1295 } 1296 1297 if_put(ifp); 1298 1299 if (info->rti_ifa == NULL) 1300 return (ENETUNREACH); 1301 1302 return (0); 1303 } 1304 1305 int 1306 route_cleargateway(struct rtentry *rt, void *arg, unsigned int rtableid) 1307 { 1308 struct rtentry *nhrt = arg; 1309 1310 if (ISSET(rt->rt_flags, RTF_GATEWAY) && rt->rt_gwroute == nhrt && 1311 !ISSET(rt->rt_locks, RTV_MTU)) 1312 rt->rt_mtu = 0; 1313 1314 return (0); 1315 } 1316 1317 /* 1318 * Check if the user request to insert an ARP entry does not conflict 1319 * with existing ones. 1320 * 1321 * Only two entries are allowed for a given IP address: a private one 1322 * (priv) and a public one (pub). 1323 */ 1324 int 1325 route_arp_conflict(struct rtentry *rt, struct rt_addrinfo *info) 1326 { 1327 int proxy = (info->rti_flags & RTF_ANNOUNCE); 1328 1329 if ((info->rti_flags & RTF_LLINFO) == 0 || 1330 (info->rti_info[RTAX_DST]->sa_family != AF_INET)) 1331 return (0); 1332 1333 if (rt == NULL || !ISSET(rt->rt_flags, RTF_LLINFO)) 1334 return (0); 1335 1336 /* If the entry is cached, it can be updated. */ 1337 if (ISSET(rt->rt_flags, RTF_CACHED)) 1338 return (0); 1339 1340 /* 1341 * Same destination, not cached and both "priv" or "pub" conflict. 1342 * If a second entry exists, it always conflict. 1343 */ 1344 if ((ISSET(rt->rt_flags, RTF_ANNOUNCE) == proxy) || 1345 ISSET(rt->rt_flags, RTF_MPATH)) 1346 return (EEXIST); 1347 1348 /* No conflict but an entry exist so we need to force mpath. */ 1349 info->rti_flags |= RTF_MPATH; 1350 return (0); 1351 } 1352 1353 void 1354 rtm_setmetrics(u_long which, const struct rt_metrics *in, 1355 struct rt_kmetrics *out) 1356 { 1357 int64_t expire; 1358 1359 if (which & RTV_MTU) 1360 out->rmx_mtu = in->rmx_mtu; 1361 if (which & RTV_EXPIRE) { 1362 expire = in->rmx_expire; 1363 if (expire != 0) { 1364 expire -= gettime(); 1365 expire += getuptime(); 1366 } 1367 1368 out->rmx_expire = expire; 1369 } 1370 } 1371 1372 void 1373 rtm_getmetrics(const struct rt_kmetrics *in, struct rt_metrics *out) 1374 { 1375 int64_t expire; 1376 1377 expire = in->rmx_expire; 1378 if (expire != 0) { 1379 expire -= getuptime(); 1380 expire += gettime(); 1381 } 1382 1383 bzero(out, sizeof(*out)); 1384 out->rmx_locks = in->rmx_locks; 1385 out->rmx_mtu = in->rmx_mtu; 1386 out->rmx_expire = expire; 1387 out->rmx_pksent = in->rmx_pksent; 1388 } 1389 1390 #define ROUNDUP(a) \ 1391 ((a) > 0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long)) 1392 #define ADVANCE(x, n) (x += ROUNDUP((n)->sa_len)) 1393 1394 int 1395 rtm_xaddrs(caddr_t cp, caddr_t cplim, struct rt_addrinfo *rtinfo) 1396 { 1397 struct sockaddr *sa; 1398 int i; 1399 1400 /* 1401 * Parse address bits, split address storage in chunks, and 1402 * set info pointers. Use sa_len for traversing the memory 1403 * and check that we stay within in the limit. 1404 */ 1405 bzero(rtinfo->rti_info, sizeof(rtinfo->rti_info)); 1406 for (i = 0; i < sizeof(rtinfo->rti_addrs) * 8; i++) { 1407 if ((rtinfo->rti_addrs & (1 << i)) == 0) 1408 continue; 1409 if (i >= RTAX_MAX || cp + sizeof(socklen_t) > cplim) 1410 return (EINVAL); 1411 sa = (struct sockaddr *)cp; 1412 if (cp + sa->sa_len > cplim) 1413 return (EINVAL); 1414 rtinfo->rti_info[i] = sa; 1415 ADVANCE(cp, sa); 1416 } 1417 /* 1418 * Check that the address family is suitable for the route address 1419 * type. Check that each address has a size that fits its family 1420 * and its length is within the size. Strings within addresses must 1421 * be NUL terminated. 1422 */ 1423 for (i = 0; i < RTAX_MAX; i++) { 1424 size_t len, maxlen, size; 1425 1426 sa = rtinfo->rti_info[i]; 1427 if (sa == NULL) 1428 continue; 1429 maxlen = size = 0; 1430 switch (i) { 1431 case RTAX_DST: 1432 case RTAX_GATEWAY: 1433 case RTAX_SRC: 1434 switch (sa->sa_family) { 1435 case AF_INET: 1436 size = sizeof(struct sockaddr_in); 1437 break; 1438 case AF_LINK: 1439 size = sizeof(struct sockaddr_dl); 1440 break; 1441 #ifdef INET6 1442 case AF_INET6: 1443 size = sizeof(struct sockaddr_in6); 1444 break; 1445 #endif 1446 #ifdef MPLS 1447 case AF_MPLS: 1448 size = sizeof(struct sockaddr_mpls); 1449 break; 1450 #endif 1451 } 1452 break; 1453 case RTAX_IFP: 1454 if (sa->sa_family != AF_LINK) 1455 return (EAFNOSUPPORT); 1456 /* 1457 * XXX Should be sizeof(struct sockaddr_dl), but 1458 * route(8) has a bug and provides less memory. 1459 * arp(8) has another bug and uses sizeof pointer. 1460 */ 1461 size = 4; 1462 break; 1463 case RTAX_IFA: 1464 switch (sa->sa_family) { 1465 case AF_INET: 1466 size = sizeof(struct sockaddr_in); 1467 break; 1468 #ifdef INET6 1469 case AF_INET6: 1470 size = sizeof(struct sockaddr_in6); 1471 break; 1472 #endif 1473 default: 1474 return (EAFNOSUPPORT); 1475 } 1476 break; 1477 case RTAX_LABEL: 1478 sa->sa_family = AF_UNSPEC; 1479 maxlen = RTLABEL_LEN; 1480 size = sizeof(struct sockaddr_rtlabel); 1481 break; 1482 #ifdef BFD 1483 case RTAX_BFD: 1484 sa->sa_family = AF_UNSPEC; 1485 size = sizeof(struct sockaddr_bfd); 1486 break; 1487 #endif 1488 case RTAX_DNS: 1489 /* more validation in rtm_validate_proposal */ 1490 if (sa->sa_len > sizeof(struct sockaddr_rtdns)) 1491 return (EINVAL); 1492 if (sa->sa_len < offsetof(struct sockaddr_rtdns, 1493 sr_dns)) 1494 return (EINVAL); 1495 switch (sa->sa_family) { 1496 case AF_INET: 1497 #ifdef INET6 1498 case AF_INET6: 1499 #endif 1500 break; 1501 default: 1502 return (EAFNOSUPPORT); 1503 } 1504 break; 1505 case RTAX_STATIC: 1506 sa->sa_family = AF_UNSPEC; 1507 maxlen = RTSTATIC_LEN; 1508 size = sizeof(struct sockaddr_rtstatic); 1509 break; 1510 case RTAX_SEARCH: 1511 sa->sa_family = AF_UNSPEC; 1512 maxlen = RTSEARCH_LEN; 1513 size = sizeof(struct sockaddr_rtsearch); 1514 break; 1515 } 1516 if (size) { 1517 /* memory for the full struct must be provided */ 1518 if (sa->sa_len < size) 1519 return (EINVAL); 1520 } 1521 if (maxlen) { 1522 /* this should not happen */ 1523 if (2 + maxlen > size) 1524 return (EINVAL); 1525 /* strings must be NUL terminated within the struct */ 1526 len = strnlen(sa->sa_data, maxlen); 1527 if (len >= maxlen || 2 + len >= sa->sa_len) 1528 return (EINVAL); 1529 break; 1530 } 1531 } 1532 return (0); 1533 } 1534 1535 struct mbuf * 1536 rtm_msg1(int type, struct rt_addrinfo *rtinfo) 1537 { 1538 struct rt_msghdr *rtm; 1539 struct mbuf *m; 1540 int i; 1541 struct sockaddr *sa; 1542 int len, dlen, hlen; 1543 1544 switch (type) { 1545 case RTM_DELADDR: 1546 case RTM_NEWADDR: 1547 len = sizeof(struct ifa_msghdr); 1548 break; 1549 case RTM_IFINFO: 1550 len = sizeof(struct if_msghdr); 1551 break; 1552 case RTM_IFANNOUNCE: 1553 len = sizeof(struct if_announcemsghdr); 1554 break; 1555 #ifdef BFD 1556 case RTM_BFD: 1557 len = sizeof(struct bfd_msghdr); 1558 break; 1559 #endif 1560 case RTM_80211INFO: 1561 len = sizeof(struct if_ieee80211_msghdr); 1562 break; 1563 default: 1564 len = sizeof(struct rt_msghdr); 1565 break; 1566 } 1567 if (len > MCLBYTES) 1568 panic("rtm_msg1"); 1569 m = m_gethdr(M_DONTWAIT, MT_DATA); 1570 if (m && len > MHLEN) { 1571 MCLGET(m, M_DONTWAIT); 1572 if ((m->m_flags & M_EXT) == 0) { 1573 m_free(m); 1574 m = NULL; 1575 } 1576 } 1577 if (m == NULL) 1578 return (m); 1579 m->m_pkthdr.len = m->m_len = hlen = len; 1580 m->m_pkthdr.ph_ifidx = 0; 1581 rtm = mtod(m, struct rt_msghdr *); 1582 bzero(rtm, len); 1583 for (i = 0; i < RTAX_MAX; i++) { 1584 if (rtinfo == NULL || (sa = rtinfo->rti_info[i]) == NULL) 1585 continue; 1586 rtinfo->rti_addrs |= (1 << i); 1587 dlen = ROUNDUP(sa->sa_len); 1588 if (m_copyback(m, len, dlen, sa, M_NOWAIT)) { 1589 m_freem(m); 1590 return (NULL); 1591 } 1592 len += dlen; 1593 } 1594 rtm->rtm_msglen = len; 1595 rtm->rtm_hdrlen = hlen; 1596 rtm->rtm_version = RTM_VERSION; 1597 rtm->rtm_type = type; 1598 return (m); 1599 } 1600 1601 int 1602 rtm_msg2(int type, int vers, struct rt_addrinfo *rtinfo, caddr_t cp, 1603 struct walkarg *w) 1604 { 1605 int i; 1606 int len, dlen, hlen, second_time = 0; 1607 caddr_t cp0; 1608 1609 rtinfo->rti_addrs = 0; 1610 again: 1611 switch (type) { 1612 case RTM_DELADDR: 1613 case RTM_NEWADDR: 1614 len = sizeof(struct ifa_msghdr); 1615 break; 1616 case RTM_IFINFO: 1617 len = sizeof(struct if_msghdr); 1618 break; 1619 default: 1620 len = sizeof(struct rt_msghdr); 1621 break; 1622 } 1623 hlen = len; 1624 if ((cp0 = cp) != NULL) 1625 cp += len; 1626 for (i = 0; i < RTAX_MAX; i++) { 1627 struct sockaddr *sa; 1628 1629 if ((sa = rtinfo->rti_info[i]) == NULL) 1630 continue; 1631 rtinfo->rti_addrs |= (1 << i); 1632 dlen = ROUNDUP(sa->sa_len); 1633 if (cp) { 1634 bcopy(sa, cp, (size_t)dlen); 1635 cp += dlen; 1636 } 1637 len += dlen; 1638 } 1639 /* align message length to the next natural boundary */ 1640 len = ALIGN(len); 1641 if (cp == 0 && w != NULL && !second_time) { 1642 w->w_needed += len; 1643 if (w->w_needed <= 0 && w->w_where) { 1644 if (w->w_tmemsize < len) { 1645 free(w->w_tmem, M_RTABLE, w->w_tmemsize); 1646 w->w_tmem = malloc(len, M_RTABLE, 1647 M_NOWAIT | M_ZERO); 1648 if (w->w_tmem) 1649 w->w_tmemsize = len; 1650 } 1651 if (w->w_tmem) { 1652 cp = w->w_tmem; 1653 second_time = 1; 1654 goto again; 1655 } else 1656 w->w_where = 0; 1657 } 1658 } 1659 if (cp && w) /* clear the message header */ 1660 bzero(cp0, hlen); 1661 1662 if (cp) { 1663 struct rt_msghdr *rtm = (struct rt_msghdr *)cp0; 1664 1665 rtm->rtm_version = RTM_VERSION; 1666 rtm->rtm_type = type; 1667 rtm->rtm_msglen = len; 1668 rtm->rtm_hdrlen = hlen; 1669 } 1670 return (len); 1671 } 1672 1673 void 1674 rtm_send(struct rtentry *rt, int cmd, int error, unsigned int rtableid) 1675 { 1676 struct rt_addrinfo info; 1677 struct ifnet *ifp; 1678 struct sockaddr_rtlabel sa_rl; 1679 struct sockaddr_in6 sa_mask; 1680 1681 memset(&info, 0, sizeof(info)); 1682 info.rti_info[RTAX_DST] = rt_key(rt); 1683 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 1684 if (!ISSET(rt->rt_flags, RTF_HOST)) 1685 info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask); 1686 info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl); 1687 ifp = if_get(rt->rt_ifidx); 1688 if (ifp != NULL) { 1689 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); 1690 info.rti_info[RTAX_IFA] = 1691 rtable_getsource(rtableid, info.rti_info[RTAX_DST]->sa_family); 1692 if (info.rti_info[RTAX_IFA] == NULL) 1693 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; 1694 } 1695 1696 rtm_miss(cmd, &info, rt->rt_flags, rt->rt_priority, rt->rt_ifidx, error, 1697 rtableid); 1698 if_put(ifp); 1699 } 1700 1701 /* 1702 * This routine is called to generate a message from the routing 1703 * socket indicating that a redirect has occurred, a routing lookup 1704 * has failed, or that a protocol has detected timeouts to a particular 1705 * destination. 1706 */ 1707 void 1708 rtm_miss(int type, struct rt_addrinfo *rtinfo, int flags, uint8_t prio, 1709 u_int ifidx, int error, u_int tableid) 1710 { 1711 struct rt_msghdr *rtm; 1712 struct mbuf *m; 1713 struct sockaddr *sa = rtinfo->rti_info[RTAX_DST]; 1714 1715 if (rtptable.rtp_count == 0) 1716 return; 1717 m = rtm_msg1(type, rtinfo); 1718 if (m == NULL) 1719 return; 1720 rtm = mtod(m, struct rt_msghdr *); 1721 rtm->rtm_flags = RTF_DONE | flags; 1722 rtm->rtm_priority = prio; 1723 rtm->rtm_errno = error; 1724 rtm->rtm_tableid = tableid; 1725 rtm->rtm_addrs = rtinfo->rti_addrs; 1726 rtm->rtm_index = ifidx; 1727 route_input(m, NULL, sa ? sa->sa_family : AF_UNSPEC); 1728 } 1729 1730 /* 1731 * This routine is called to generate a message from the routing 1732 * socket indicating that the status of a network interface has changed. 1733 */ 1734 void 1735 rtm_ifchg(struct ifnet *ifp) 1736 { 1737 struct if_msghdr *ifm; 1738 struct mbuf *m; 1739 1740 if (rtptable.rtp_count == 0) 1741 return; 1742 m = rtm_msg1(RTM_IFINFO, NULL); 1743 if (m == NULL) 1744 return; 1745 ifm = mtod(m, struct if_msghdr *); 1746 ifm->ifm_index = ifp->if_index; 1747 ifm->ifm_tableid = ifp->if_rdomain; 1748 ifm->ifm_flags = ifp->if_flags; 1749 ifm->ifm_xflags = ifp->if_xflags; 1750 if_getdata(ifp, &ifm->ifm_data); 1751 ifm->ifm_addrs = 0; 1752 route_input(m, NULL, AF_UNSPEC); 1753 } 1754 1755 /* 1756 * This is called to generate messages from the routing socket 1757 * indicating a network interface has had addresses associated with it. 1758 * if we ever reverse the logic and replace messages TO the routing 1759 * socket indicate a request to configure interfaces, then it will 1760 * be unnecessary as the routing socket will automatically generate 1761 * copies of it. 1762 */ 1763 void 1764 rtm_addr(int cmd, struct ifaddr *ifa) 1765 { 1766 struct ifnet *ifp = ifa->ifa_ifp; 1767 struct mbuf *m; 1768 struct rt_addrinfo info; 1769 struct ifa_msghdr *ifam; 1770 1771 if (rtptable.rtp_count == 0) 1772 return; 1773 1774 memset(&info, 0, sizeof(info)); 1775 info.rti_info[RTAX_IFA] = ifa->ifa_addr; 1776 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); 1777 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask; 1778 info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr; 1779 if ((m = rtm_msg1(cmd, &info)) == NULL) 1780 return; 1781 ifam = mtod(m, struct ifa_msghdr *); 1782 ifam->ifam_index = ifp->if_index; 1783 ifam->ifam_metric = ifa->ifa_metric; 1784 ifam->ifam_flags = ifa->ifa_flags; 1785 ifam->ifam_addrs = info.rti_addrs; 1786 ifam->ifam_tableid = ifp->if_rdomain; 1787 1788 route_input(m, NULL, 1789 ifa->ifa_addr ? ifa->ifa_addr->sa_family : AF_UNSPEC); 1790 } 1791 1792 /* 1793 * This is called to generate routing socket messages indicating 1794 * network interface arrival and departure. 1795 */ 1796 void 1797 rtm_ifannounce(struct ifnet *ifp, int what) 1798 { 1799 struct if_announcemsghdr *ifan; 1800 struct mbuf *m; 1801 1802 if (rtptable.rtp_count == 0) 1803 return; 1804 m = rtm_msg1(RTM_IFANNOUNCE, NULL); 1805 if (m == NULL) 1806 return; 1807 ifan = mtod(m, struct if_announcemsghdr *); 1808 ifan->ifan_index = ifp->if_index; 1809 strlcpy(ifan->ifan_name, ifp->if_xname, sizeof(ifan->ifan_name)); 1810 ifan->ifan_what = what; 1811 route_input(m, NULL, AF_UNSPEC); 1812 } 1813 1814 #ifdef BFD 1815 /* 1816 * This is used to generate routing socket messages indicating 1817 * the state of a BFD session. 1818 */ 1819 void 1820 rtm_bfd(struct bfd_config *bfd) 1821 { 1822 struct bfd_msghdr *bfdm; 1823 struct sockaddr_bfd sa_bfd; 1824 struct mbuf *m; 1825 struct rt_addrinfo info; 1826 1827 if (rtptable.rtp_count == 0) 1828 return; 1829 memset(&info, 0, sizeof(info)); 1830 info.rti_info[RTAX_DST] = rt_key(bfd->bc_rt); 1831 info.rti_info[RTAX_IFA] = bfd->bc_rt->rt_ifa->ifa_addr; 1832 1833 m = rtm_msg1(RTM_BFD, &info); 1834 if (m == NULL) 1835 return; 1836 bfdm = mtod(m, struct bfd_msghdr *); 1837 bfdm->bm_addrs = info.rti_addrs; 1838 1839 bfd2sa(bfd->bc_rt, &sa_bfd); 1840 memcpy(&bfdm->bm_sa, &sa_bfd, sizeof(sa_bfd)); 1841 1842 route_input(m, NULL, info.rti_info[RTAX_DST]->sa_family); 1843 } 1844 #endif /* BFD */ 1845 1846 /* 1847 * This is used to generate routing socket messages indicating 1848 * the state of an ieee80211 interface. 1849 */ 1850 void 1851 rtm_80211info(struct ifnet *ifp, struct if_ieee80211_data *ifie) 1852 { 1853 struct if_ieee80211_msghdr *ifim; 1854 struct mbuf *m; 1855 1856 if (rtptable.rtp_count == 0) 1857 return; 1858 m = rtm_msg1(RTM_80211INFO, NULL); 1859 if (m == NULL) 1860 return; 1861 ifim = mtod(m, struct if_ieee80211_msghdr *); 1862 ifim->ifim_index = ifp->if_index; 1863 ifim->ifim_tableid = ifp->if_rdomain; 1864 1865 memcpy(&ifim->ifim_ifie, ifie, sizeof(ifim->ifim_ifie)); 1866 route_input(m, NULL, AF_UNSPEC); 1867 } 1868 1869 /* 1870 * This is used to generate routing socket messages indicating 1871 * the address selection proposal from an interface. 1872 */ 1873 void 1874 rtm_proposal(struct ifnet *ifp, struct rt_addrinfo *rtinfo, int flags, 1875 uint8_t prio) 1876 { 1877 struct rt_msghdr *rtm; 1878 struct mbuf *m; 1879 1880 m = rtm_msg1(RTM_PROPOSAL, rtinfo); 1881 if (m == NULL) 1882 return; 1883 rtm = mtod(m, struct rt_msghdr *); 1884 rtm->rtm_flags = RTF_DONE | flags; 1885 rtm->rtm_priority = prio; 1886 rtm->rtm_tableid = ifp->if_rdomain; 1887 rtm->rtm_index = ifp->if_index; 1888 rtm->rtm_addrs = rtinfo->rti_addrs; 1889 1890 route_input(m, NULL, rtinfo->rti_info[RTAX_DNS]->sa_family); 1891 } 1892 1893 /* 1894 * This is used in dumping the kernel table via sysctl(). 1895 */ 1896 int 1897 sysctl_dumpentry(struct rtentry *rt, void *v, unsigned int id) 1898 { 1899 struct walkarg *w = v; 1900 int error = 0, size; 1901 struct rt_addrinfo info; 1902 struct ifnet *ifp; 1903 #ifdef BFD 1904 struct sockaddr_bfd sa_bfd; 1905 #endif 1906 struct sockaddr_rtlabel sa_rl; 1907 struct sockaddr_in6 sa_mask; 1908 1909 if (w->w_op == NET_RT_FLAGS && !(rt->rt_flags & w->w_arg)) 1910 return 0; 1911 if (w->w_op == NET_RT_DUMP && w->w_arg) { 1912 u_int8_t prio = w->w_arg & RTP_MASK; 1913 if (w->w_arg < 0) { 1914 prio = (-w->w_arg) & RTP_MASK; 1915 /* Show all routes that are not this priority */ 1916 if (prio == (rt->rt_priority & RTP_MASK)) 1917 return 0; 1918 } else { 1919 if (prio != (rt->rt_priority & RTP_MASK) && 1920 prio != RTP_ANY) 1921 return 0; 1922 } 1923 } 1924 bzero(&info, sizeof(info)); 1925 info.rti_info[RTAX_DST] = rt_key(rt); 1926 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 1927 info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask); 1928 ifp = if_get(rt->rt_ifidx); 1929 if (ifp != NULL) { 1930 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); 1931 info.rti_info[RTAX_IFA] = 1932 rtable_getsource(id, info.rti_info[RTAX_DST]->sa_family); 1933 if (info.rti_info[RTAX_IFA] == NULL) 1934 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; 1935 if (ifp->if_flags & IFF_POINTOPOINT) 1936 info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr; 1937 } 1938 if_put(ifp); 1939 info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl); 1940 #ifdef BFD 1941 if (rt->rt_flags & RTF_BFD) 1942 info.rti_info[RTAX_BFD] = bfd2sa(rt, &sa_bfd); 1943 #endif 1944 #ifdef MPLS 1945 if (rt->rt_flags & RTF_MPLS) { 1946 struct sockaddr_mpls sa_mpls; 1947 1948 bzero(&sa_mpls, sizeof(sa_mpls)); 1949 sa_mpls.smpls_family = AF_MPLS; 1950 sa_mpls.smpls_len = sizeof(sa_mpls); 1951 sa_mpls.smpls_label = ((struct rt_mpls *) 1952 rt->rt_llinfo)->mpls_label; 1953 info.rti_info[RTAX_SRC] = (struct sockaddr *)&sa_mpls; 1954 info.rti_mpls = ((struct rt_mpls *) 1955 rt->rt_llinfo)->mpls_operation; 1956 } 1957 #endif 1958 1959 size = rtm_msg2(RTM_GET, RTM_VERSION, &info, NULL, w); 1960 if (w->w_where && w->w_tmem && w->w_needed <= 0) { 1961 struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem; 1962 1963 rtm->rtm_pid = curproc->p_p->ps_pid; 1964 rtm->rtm_flags = rt->rt_flags; 1965 rtm->rtm_priority = rt->rt_priority & RTP_MASK; 1966 rtm_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx); 1967 /* Do not account the routing table's reference. */ 1968 rtm->rtm_rmx.rmx_refcnt = rt->rt_refcnt - 1; 1969 rtm->rtm_index = rt->rt_ifidx; 1970 rtm->rtm_addrs = info.rti_addrs; 1971 rtm->rtm_tableid = id; 1972 #ifdef MPLS 1973 rtm->rtm_mpls = info.rti_mpls; 1974 #endif 1975 if ((error = copyout(rtm, w->w_where, size)) != 0) 1976 w->w_where = NULL; 1977 else 1978 w->w_where += size; 1979 } 1980 return (error); 1981 } 1982 1983 int 1984 sysctl_iflist(int af, struct walkarg *w) 1985 { 1986 struct ifnet *ifp; 1987 struct ifaddr *ifa; 1988 struct rt_addrinfo info; 1989 int len, error = 0; 1990 1991 bzero(&info, sizeof(info)); 1992 TAILQ_FOREACH(ifp, &ifnet, if_list) { 1993 if (w->w_arg && w->w_arg != ifp->if_index) 1994 continue; 1995 /* Copy the link-layer address first */ 1996 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); 1997 len = rtm_msg2(RTM_IFINFO, RTM_VERSION, &info, 0, w); 1998 if (w->w_where && w->w_tmem && w->w_needed <= 0) { 1999 struct if_msghdr *ifm; 2000 2001 ifm = (struct if_msghdr *)w->w_tmem; 2002 ifm->ifm_index = ifp->if_index; 2003 ifm->ifm_tableid = ifp->if_rdomain; 2004 ifm->ifm_flags = ifp->if_flags; 2005 if_getdata(ifp, &ifm->ifm_data); 2006 ifm->ifm_addrs = info.rti_addrs; 2007 error = copyout(ifm, w->w_where, len); 2008 if (error) 2009 return (error); 2010 w->w_where += len; 2011 } 2012 info.rti_info[RTAX_IFP] = NULL; 2013 TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) { 2014 KASSERT(ifa->ifa_addr->sa_family != AF_LINK); 2015 if (af && af != ifa->ifa_addr->sa_family) 2016 continue; 2017 info.rti_info[RTAX_IFA] = ifa->ifa_addr; 2018 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask; 2019 info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr; 2020 len = rtm_msg2(RTM_NEWADDR, RTM_VERSION, &info, 0, w); 2021 if (w->w_where && w->w_tmem && w->w_needed <= 0) { 2022 struct ifa_msghdr *ifam; 2023 2024 ifam = (struct ifa_msghdr *)w->w_tmem; 2025 ifam->ifam_index = ifa->ifa_ifp->if_index; 2026 ifam->ifam_flags = ifa->ifa_flags; 2027 ifam->ifam_metric = ifa->ifa_metric; 2028 ifam->ifam_addrs = info.rti_addrs; 2029 error = copyout(w->w_tmem, w->w_where, len); 2030 if (error) 2031 return (error); 2032 w->w_where += len; 2033 } 2034 } 2035 info.rti_info[RTAX_IFA] = info.rti_info[RTAX_NETMASK] = 2036 info.rti_info[RTAX_BRD] = NULL; 2037 } 2038 return (0); 2039 } 2040 2041 int 2042 sysctl_ifnames(struct walkarg *w) 2043 { 2044 struct if_nameindex_msg ifn; 2045 struct ifnet *ifp; 2046 int error = 0; 2047 2048 /* XXX ignore tableid for now */ 2049 TAILQ_FOREACH(ifp, &ifnet, if_list) { 2050 if (w->w_arg && w->w_arg != ifp->if_index) 2051 continue; 2052 w->w_needed += sizeof(ifn); 2053 if (w->w_where && w->w_needed <= 0) { 2054 2055 memset(&ifn, 0, sizeof(ifn)); 2056 ifn.if_index = ifp->if_index; 2057 strlcpy(ifn.if_name, ifp->if_xname, 2058 sizeof(ifn.if_name)); 2059 error = copyout(&ifn, w->w_where, sizeof(ifn)); 2060 if (error) 2061 return (error); 2062 w->w_where += sizeof(ifn); 2063 } 2064 } 2065 2066 return (0); 2067 } 2068 2069 int 2070 sysctl_source(int af, u_int tableid, struct walkarg *w) 2071 { 2072 struct sockaddr *sa; 2073 int size, error = 0; 2074 2075 sa = rtable_getsource(tableid, af); 2076 if (sa) { 2077 switch (sa->sa_family) { 2078 case AF_INET: 2079 size = sizeof(struct sockaddr_in); 2080 break; 2081 #ifdef INET6 2082 case AF_INET6: 2083 size = sizeof(struct sockaddr_in6); 2084 break; 2085 #endif 2086 default: 2087 return (0); 2088 } 2089 w->w_needed += size; 2090 if (w->w_where && w->w_needed <= 0) { 2091 if ((error = copyout(sa, w->w_where, size))) 2092 return (error); 2093 w->w_where += size; 2094 } 2095 } 2096 return (0); 2097 } 2098 2099 int 2100 sysctl_rtable(int *name, u_int namelen, void *where, size_t *given, void *new, 2101 size_t newlen) 2102 { 2103 int i, error = EINVAL; 2104 u_char af; 2105 struct walkarg w; 2106 struct rt_tableinfo tableinfo; 2107 u_int tableid = 0; 2108 2109 if (new) 2110 return (EPERM); 2111 if (namelen < 3 || namelen > 4) 2112 return (EINVAL); 2113 af = name[0]; 2114 bzero(&w, sizeof(w)); 2115 w.w_where = where; 2116 w.w_given = *given; 2117 w.w_needed = 0 - w.w_given; 2118 w.w_op = name[1]; 2119 w.w_arg = name[2]; 2120 2121 if (namelen == 4) { 2122 tableid = name[3]; 2123 if (!rtable_exists(tableid)) 2124 return (ENOENT); 2125 } else 2126 tableid = curproc->p_p->ps_rtableid; 2127 2128 switch (w.w_op) { 2129 case NET_RT_DUMP: 2130 case NET_RT_FLAGS: 2131 NET_LOCK(); 2132 for (i = 1; i <= AF_MAX; i++) { 2133 if (af != 0 && af != i) 2134 continue; 2135 2136 error = rtable_walk(tableid, i, NULL, sysctl_dumpentry, 2137 &w); 2138 if (error == EAFNOSUPPORT) 2139 error = 0; 2140 if (error) 2141 break; 2142 } 2143 NET_UNLOCK(); 2144 break; 2145 2146 case NET_RT_IFLIST: 2147 NET_LOCK(); 2148 error = sysctl_iflist(af, &w); 2149 NET_UNLOCK(); 2150 break; 2151 2152 case NET_RT_STATS: 2153 return (sysctl_rtable_rtstat(where, given, new)); 2154 case NET_RT_TABLE: 2155 tableid = w.w_arg; 2156 if (!rtable_exists(tableid)) 2157 return (ENOENT); 2158 memset(&tableinfo, 0, sizeof tableinfo); 2159 tableinfo.rti_tableid = tableid; 2160 tableinfo.rti_domainid = rtable_l2(tableid); 2161 error = sysctl_rdstruct(where, given, new, 2162 &tableinfo, sizeof(tableinfo)); 2163 return (error); 2164 case NET_RT_IFNAMES: 2165 NET_LOCK(); 2166 error = sysctl_ifnames(&w); 2167 NET_UNLOCK(); 2168 break; 2169 case NET_RT_SOURCE: 2170 tableid = w.w_arg; 2171 if (!rtable_exists(tableid)) 2172 return (ENOENT); 2173 NET_LOCK(); 2174 for (i = 1; i <= AF_MAX; i++) { 2175 if (af != 0 && af != i) 2176 continue; 2177 2178 error = sysctl_source(i, tableid, &w); 2179 if (error == EAFNOSUPPORT) 2180 error = 0; 2181 if (error) 2182 break; 2183 } 2184 NET_UNLOCK(); 2185 break; 2186 } 2187 free(w.w_tmem, M_RTABLE, w.w_tmemsize); 2188 w.w_needed += w.w_given; 2189 if (where) { 2190 *given = w.w_where - (caddr_t)where; 2191 if (*given < w.w_needed) 2192 return (ENOMEM); 2193 } else 2194 *given = (11 * w.w_needed) / 10; 2195 2196 return (error); 2197 } 2198 2199 int 2200 sysctl_rtable_rtstat(void *oldp, size_t *oldlenp, void *newp) 2201 { 2202 extern struct cpumem *rtcounters; 2203 uint64_t counters[rts_ncounters]; 2204 struct rtstat rtstat; 2205 uint32_t *words = (uint32_t *)&rtstat; 2206 int i; 2207 2208 CTASSERT(sizeof(rtstat) == (nitems(counters) * sizeof(uint32_t))); 2209 memset(&rtstat, 0, sizeof rtstat); 2210 counters_read(rtcounters, counters, nitems(counters)); 2211 2212 for (i = 0; i < nitems(counters); i++) 2213 words[i] = (uint32_t)counters[i]; 2214 2215 return (sysctl_rdstruct(oldp, oldlenp, newp, &rtstat, sizeof(rtstat))); 2216 } 2217 2218 int 2219 rtm_validate_proposal(struct rt_addrinfo *info) 2220 { 2221 if (info->rti_addrs & ~(RTA_NETMASK | RTA_IFA | RTA_DNS | RTA_STATIC | 2222 RTA_SEARCH)) { 2223 return -1; 2224 } 2225 2226 if (ISSET(info->rti_addrs, RTA_NETMASK)) { 2227 struct sockaddr *sa = info->rti_info[RTAX_NETMASK]; 2228 if (sa == NULL) 2229 return -1; 2230 switch (sa->sa_family) { 2231 case AF_INET: 2232 if (sa->sa_len != sizeof(struct sockaddr_in)) 2233 return -1; 2234 break; 2235 case AF_INET6: 2236 if (sa->sa_len != sizeof(struct sockaddr_in6)) 2237 return -1; 2238 break; 2239 default: 2240 return -1; 2241 } 2242 } 2243 2244 if (ISSET(info->rti_addrs, RTA_IFA)) { 2245 struct sockaddr *sa = info->rti_info[RTAX_IFA]; 2246 if (sa == NULL) 2247 return -1; 2248 switch (sa->sa_family) { 2249 case AF_INET: 2250 if (sa->sa_len != sizeof(struct sockaddr_in)) 2251 return -1; 2252 break; 2253 case AF_INET6: 2254 if (sa->sa_len != sizeof(struct sockaddr_in6)) 2255 return -1; 2256 break; 2257 default: 2258 return -1; 2259 } 2260 } 2261 2262 if (ISSET(info->rti_addrs, RTA_DNS)) { 2263 struct sockaddr_rtdns *rtdns = 2264 (struct sockaddr_rtdns *)info->rti_info[RTAX_DNS]; 2265 if (rtdns == NULL) 2266 return -1; 2267 if (rtdns->sr_len > sizeof(*rtdns)) 2268 return -1; 2269 if (rtdns->sr_len < offsetof(struct sockaddr_rtdns, sr_dns)) 2270 return -1; 2271 switch (rtdns->sr_family) { 2272 case AF_INET: 2273 if ((rtdns->sr_len - offsetof(struct sockaddr_rtdns, 2274 sr_dns)) % sizeof(struct in_addr) != 0) 2275 return -1; 2276 break; 2277 #ifdef INET6 2278 case AF_INET6: 2279 if ((rtdns->sr_len - offsetof(struct sockaddr_rtdns, 2280 sr_dns)) % sizeof(struct in6_addr) != 0) 2281 return -1; 2282 break; 2283 #endif 2284 default: 2285 return -1; 2286 } 2287 } 2288 2289 if (ISSET(info->rti_addrs, RTA_STATIC)) { 2290 struct sockaddr_rtstatic *rtstatic = 2291 (struct sockaddr_rtstatic *)info->rti_info[RTAX_STATIC]; 2292 if (rtstatic == NULL) 2293 return -1; 2294 if (rtstatic->sr_len > sizeof(*rtstatic)) 2295 return -1; 2296 if (rtstatic->sr_len <= 2297 offsetof(struct sockaddr_rtstatic, sr_static)) 2298 return -1; 2299 } 2300 2301 if (ISSET(info->rti_addrs, RTA_SEARCH)) { 2302 struct sockaddr_rtsearch *rtsearch = 2303 (struct sockaddr_rtsearch *)info->rti_info[RTAX_SEARCH]; 2304 if (rtsearch == NULL) 2305 return -1; 2306 if (rtsearch->sr_len > sizeof(*rtsearch)) 2307 return -1; 2308 if (rtsearch->sr_len <= 2309 offsetof(struct sockaddr_rtsearch, sr_search)) 2310 return -1; 2311 } 2312 2313 return 0; 2314 } 2315 2316 int 2317 rt_setsource(unsigned int rtableid, struct sockaddr *src) 2318 { 2319 struct ifaddr *ifa; 2320 /* 2321 * If source address is 0.0.0.0 or :: 2322 * use automatic source selection 2323 */ 2324 switch(src->sa_family) { 2325 case AF_INET: 2326 if(satosin(src)->sin_addr.s_addr == INADDR_ANY) { 2327 rtable_setsource(rtableid, AF_INET, NULL); 2328 return (0); 2329 } 2330 break; 2331 #ifdef INET6 2332 case AF_INET6: 2333 if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(src)->sin6_addr)) { 2334 rtable_setsource(rtableid, AF_INET6, NULL); 2335 return (0); 2336 } 2337 break; 2338 #endif 2339 default: 2340 return (EAFNOSUPPORT); 2341 } 2342 2343 /* 2344 * Check if source address is assigned to an interface in the 2345 * same rdomain 2346 */ 2347 if ((ifa = ifa_ifwithaddr(src, rtableid)) == NULL) 2348 return (EINVAL); 2349 2350 return (rtable_setsource(rtableid, src->sa_family, ifa->ifa_addr)); 2351 } 2352 2353 /* 2354 * Definitions of protocols supported in the ROUTE domain. 2355 */ 2356 2357 struct domain routedomain; 2358 2359 struct protosw routesw[] = { 2360 { 2361 .pr_type = SOCK_RAW, 2362 .pr_domain = &routedomain, 2363 .pr_flags = PR_ATOMIC|PR_ADDR|PR_WANTRCVD, 2364 .pr_output = route_output, 2365 .pr_ctloutput = route_ctloutput, 2366 .pr_usrreq = route_usrreq, 2367 .pr_attach = route_attach, 2368 .pr_detach = route_detach, 2369 .pr_init = route_prinit, 2370 .pr_sysctl = sysctl_rtable 2371 } 2372 }; 2373 2374 struct domain routedomain = { 2375 .dom_family = PF_ROUTE, 2376 .dom_name = "route", 2377 .dom_init = route_init, 2378 .dom_protosw = routesw, 2379 .dom_protoswNPROTOSW = &routesw[nitems(routesw)] 2380 }; 2381