1 /* $OpenBSD: rtsock.c,v 1.302 2020/09/23 17:52:58 mvs Exp $ */ 2 /* $NetBSD: rtsock.c,v 1.18 1996/03/29 00:32:10 cgd Exp $ */ 3 4 /* 5 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the project nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 33 /* 34 * Copyright (c) 1988, 1991, 1993 35 * The Regents of the University of California. All rights reserved. 36 * 37 * Redistribution and use in source and binary forms, with or without 38 * modification, are permitted provided that the following conditions 39 * are met: 40 * 1. Redistributions of source code must retain the above copyright 41 * notice, this list of conditions and the following disclaimer. 42 * 2. Redistributions in binary form must reproduce the above copyright 43 * notice, this list of conditions and the following disclaimer in the 44 * documentation and/or other materials provided with the distribution. 45 * 3. Neither the name of the University nor the names of its contributors 46 * may be used to endorse or promote products derived from this software 47 * without specific prior written permission. 48 * 49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 59 * SUCH DAMAGE. 60 * 61 * @(#)rtsock.c 8.6 (Berkeley) 2/11/95 62 */ 63 64 #include <sys/param.h> 65 #include <sys/systm.h> 66 #include <sys/proc.h> 67 #include <sys/sysctl.h> 68 #include <sys/mbuf.h> 69 #include <sys/socket.h> 70 #include <sys/socketvar.h> 71 #include <sys/domain.h> 72 #include <sys/pool.h> 73 #include <sys/protosw.h> 74 #include <sys/srp.h> 75 76 #include <net/if.h> 77 #include <net/if_dl.h> 78 #include <net/if_var.h> 79 #include <net/route.h> 80 81 #include <netinet/in.h> 82 83 #ifdef MPLS 84 #include <netmpls/mpls.h> 85 #endif 86 #ifdef IPSEC 87 #include <netinet/ip_ipsp.h> 88 #include <net/if_enc.h> 89 #endif 90 #ifdef BFD 91 #include <net/bfd.h> 92 #endif 93 94 #include <sys/stdarg.h> 95 #include <sys/kernel.h> 96 #include <sys/timeout.h> 97 98 #define ROUTESNDQ 8192 99 #define ROUTERCVQ 8192 100 101 const struct sockaddr route_src = { 2, PF_ROUTE, }; 102 103 struct walkarg { 104 int w_op, w_arg, w_given, w_needed, w_tmemsize; 105 caddr_t w_where, w_tmem; 106 }; 107 108 void route_prinit(void); 109 void rcb_ref(void *, void *); 110 void rcb_unref(void *, void *); 111 int route_output(struct mbuf *, struct socket *, struct sockaddr *, 112 struct mbuf *); 113 int route_ctloutput(int, struct socket *, int, int, struct mbuf *); 114 int route_usrreq(struct socket *, int, struct mbuf *, struct mbuf *, 115 struct mbuf *, struct proc *); 116 void route_input(struct mbuf *m0, struct socket *, sa_family_t); 117 int route_arp_conflict(struct rtentry *, struct rt_addrinfo *); 118 int route_cleargateway(struct rtentry *, void *, unsigned int); 119 void rtm_senddesync_timer(void *); 120 void rtm_senddesync(struct socket *); 121 int rtm_sendup(struct socket *, struct mbuf *, int); 122 123 int rtm_getifa(struct rt_addrinfo *, unsigned int); 124 int rtm_output(struct rt_msghdr *, struct rtentry **, struct rt_addrinfo *, 125 uint8_t, unsigned int); 126 struct rt_msghdr *rtm_report(struct rtentry *, u_char, int, int); 127 struct mbuf *rtm_msg1(int, struct rt_addrinfo *); 128 int rtm_msg2(int, int, struct rt_addrinfo *, caddr_t, 129 struct walkarg *); 130 int rtm_xaddrs(caddr_t, caddr_t, struct rt_addrinfo *); 131 int rtm_validate_proposal(struct rt_addrinfo *); 132 void rtm_setmetrics(u_long, const struct rt_metrics *, 133 struct rt_kmetrics *); 134 void rtm_getmetrics(const struct rt_kmetrics *, 135 struct rt_metrics *); 136 137 int sysctl_iflist(int, struct walkarg *); 138 int sysctl_ifnames(struct walkarg *); 139 int sysctl_rtable_rtstat(void *, size_t *, void *); 140 141 /* 142 * Locks used to protect struct members 143 * I immutable after creation 144 * sK solock (kernel lock) 145 */ 146 struct rtpcb { 147 struct socket *rop_socket; /* [I] */ 148 149 SRPL_ENTRY(rtpcb) rop_list; 150 struct refcnt rop_refcnt; 151 struct timeout rop_timeout; 152 unsigned int rop_msgfilter; /* [sK] */ 153 unsigned int rop_flagfilter; /* [sK] */ 154 unsigned int rop_flags; /* [sK] */ 155 u_int rop_rtableid; /* [sK] */ 156 unsigned short rop_proto; /* [I] */ 157 u_char rop_priority; /* [sK] */ 158 }; 159 #define sotortpcb(so) ((struct rtpcb *)(so)->so_pcb) 160 161 struct rtptable { 162 SRPL_HEAD(, rtpcb) rtp_list; 163 struct srpl_rc rtp_rc; 164 struct rwlock rtp_lk; 165 unsigned int rtp_count; 166 }; 167 168 struct pool rtpcb_pool; 169 struct rtptable rtptable; 170 171 /* 172 * These flags and timeout are used for indicating to userland (via a 173 * RTM_DESYNC msg) when the route socket has overflowed and messages 174 * have been lost. 175 */ 176 #define ROUTECB_FLAG_DESYNC 0x1 /* Route socket out of memory */ 177 #define ROUTECB_FLAG_FLUSH 0x2 /* Wait until socket is empty before 178 queueing more packets */ 179 180 #define ROUTE_DESYNC_RESEND_TIMEOUT 200 /* In ms */ 181 182 void 183 route_prinit(void) 184 { 185 srpl_rc_init(&rtptable.rtp_rc, rcb_ref, rcb_unref, NULL); 186 rw_init(&rtptable.rtp_lk, "rtsock"); 187 SRPL_INIT(&rtptable.rtp_list); 188 pool_init(&rtpcb_pool, sizeof(struct rtpcb), 0, 189 IPL_NONE, PR_WAITOK, "rtpcb", NULL); 190 } 191 192 void 193 rcb_ref(void *null, void *v) 194 { 195 struct rtpcb *rop = v; 196 197 refcnt_take(&rop->rop_refcnt); 198 } 199 200 void 201 rcb_unref(void *null, void *v) 202 { 203 struct rtpcb *rop = v; 204 205 refcnt_rele_wake(&rop->rop_refcnt); 206 } 207 208 int 209 route_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam, 210 struct mbuf *control, struct proc *p) 211 { 212 struct rtpcb *rop; 213 int error = 0; 214 215 if (req == PRU_CONTROL) 216 return (EOPNOTSUPP); 217 218 soassertlocked(so); 219 220 if (control && control->m_len) { 221 error = EOPNOTSUPP; 222 goto release; 223 } 224 225 rop = sotortpcb(so); 226 if (rop == NULL) { 227 error = EINVAL; 228 goto release; 229 } 230 231 switch (req) { 232 /* no connect, bind, accept. Socket is connected from the start */ 233 case PRU_CONNECT: 234 case PRU_BIND: 235 case PRU_CONNECT2: 236 case PRU_LISTEN: 237 case PRU_ACCEPT: 238 error = EOPNOTSUPP; 239 break; 240 241 case PRU_DISCONNECT: 242 case PRU_ABORT: 243 soisdisconnected(so); 244 break; 245 case PRU_SHUTDOWN: 246 socantsendmore(so); 247 break; 248 case PRU_SENSE: 249 /* stat: don't bother with a blocksize. */ 250 break; 251 252 /* minimal support, just implement a fake peer address */ 253 case PRU_SOCKADDR: 254 error = EINVAL; 255 break; 256 case PRU_PEERADDR: 257 bcopy(&route_src, mtod(nam, caddr_t), route_src.sa_len); 258 nam->m_len = route_src.sa_len; 259 break; 260 261 case PRU_RCVD: 262 /* 263 * If we are in a FLUSH state, check if the buffer is 264 * empty so that we can clear the flag. 265 */ 266 if (((rop->rop_flags & ROUTECB_FLAG_FLUSH) != 0) && 267 ((sbspace(rop->rop_socket, &rop->rop_socket->so_rcv) == 268 rop->rop_socket->so_rcv.sb_hiwat))) 269 rop->rop_flags &= ~ROUTECB_FLAG_FLUSH; 270 break; 271 272 case PRU_RCVOOB: 273 case PRU_SENDOOB: 274 error = EOPNOTSUPP; 275 break; 276 case PRU_SEND: 277 if (nam) { 278 error = EISCONN; 279 break; 280 } 281 error = (*so->so_proto->pr_output)(m, so, NULL, NULL); 282 m = NULL; 283 break; 284 default: 285 panic("route_usrreq"); 286 } 287 288 release: 289 if (req != PRU_RCVD && req != PRU_RCVOOB && req != PRU_SENSE) { 290 m_freem(control); 291 m_freem(m); 292 } 293 return (error); 294 } 295 296 int 297 route_attach(struct socket *so, int proto) 298 { 299 struct rtpcb *rop; 300 int error; 301 302 /* 303 * use the rawcb but allocate a rtpcb, this 304 * code does not care about the additional fields 305 * and works directly on the raw socket. 306 */ 307 rop = pool_get(&rtpcb_pool, PR_WAITOK|PR_ZERO); 308 so->so_pcb = rop; 309 /* Init the timeout structure */ 310 timeout_set(&rop->rop_timeout, rtm_senddesync_timer, so); 311 refcnt_init(&rop->rop_refcnt); 312 313 if (curproc == NULL) 314 error = EACCES; 315 else 316 error = soreserve(so, ROUTESNDQ, ROUTERCVQ); 317 if (error) { 318 pool_put(&rtpcb_pool, rop); 319 return (error); 320 } 321 322 rop->rop_socket = so; 323 rop->rop_proto = proto; 324 325 rop->rop_rtableid = curproc->p_p->ps_rtableid; 326 327 soisconnected(so); 328 so->so_options |= SO_USELOOPBACK; 329 330 rw_enter(&rtptable.rtp_lk, RW_WRITE); 331 SRPL_INSERT_HEAD_LOCKED(&rtptable.rtp_rc, &rtptable.rtp_list, rop, 332 rop_list); 333 rtptable.rtp_count++; 334 rw_exit(&rtptable.rtp_lk); 335 336 return (0); 337 } 338 339 int 340 route_detach(struct socket *so) 341 { 342 struct rtpcb *rop; 343 344 soassertlocked(so); 345 346 rop = sotortpcb(so); 347 if (rop == NULL) 348 return (EINVAL); 349 350 rw_enter(&rtptable.rtp_lk, RW_WRITE); 351 352 timeout_del(&rop->rop_timeout); 353 rtptable.rtp_count--; 354 355 SRPL_REMOVE_LOCKED(&rtptable.rtp_rc, &rtptable.rtp_list, rop, rtpcb, 356 rop_list); 357 rw_exit(&rtptable.rtp_lk); 358 359 /* wait for all references to drop */ 360 refcnt_finalize(&rop->rop_refcnt, "rtsockrefs"); 361 362 so->so_pcb = NULL; 363 KASSERT((so->so_state & SS_NOFDREF) == 0); 364 pool_put(&rtpcb_pool, rop); 365 366 return (0); 367 } 368 369 int 370 route_ctloutput(int op, struct socket *so, int level, int optname, 371 struct mbuf *m) 372 { 373 struct rtpcb *rop = sotortpcb(so); 374 int error = 0; 375 unsigned int tid, prio; 376 377 if (level != AF_ROUTE) 378 return (EINVAL); 379 380 switch (op) { 381 case PRCO_SETOPT: 382 switch (optname) { 383 case ROUTE_MSGFILTER: 384 if (m == NULL || m->m_len != sizeof(unsigned int)) 385 error = EINVAL; 386 else 387 rop->rop_msgfilter = *mtod(m, unsigned int *); 388 break; 389 case ROUTE_TABLEFILTER: 390 if (m == NULL || m->m_len != sizeof(unsigned int)) { 391 error = EINVAL; 392 break; 393 } 394 tid = *mtod(m, unsigned int *); 395 if (tid != RTABLE_ANY && !rtable_exists(tid)) 396 error = ENOENT; 397 else 398 rop->rop_rtableid = tid; 399 break; 400 case ROUTE_PRIOFILTER: 401 if (m == NULL || m->m_len != sizeof(unsigned int)) { 402 error = EINVAL; 403 break; 404 } 405 prio = *mtod(m, unsigned int *); 406 if (prio > RTP_MAX) 407 error = EINVAL; 408 else 409 rop->rop_priority = prio; 410 break; 411 case ROUTE_FLAGFILTER: 412 if (m == NULL || m->m_len != sizeof(unsigned int)) 413 error = EINVAL; 414 else 415 rop->rop_flagfilter = *mtod(m, unsigned int *); 416 break; 417 default: 418 error = ENOPROTOOPT; 419 break; 420 } 421 break; 422 case PRCO_GETOPT: 423 switch (optname) { 424 case ROUTE_MSGFILTER: 425 m->m_len = sizeof(unsigned int); 426 *mtod(m, unsigned int *) = rop->rop_msgfilter; 427 break; 428 case ROUTE_TABLEFILTER: 429 m->m_len = sizeof(unsigned int); 430 *mtod(m, unsigned int *) = rop->rop_rtableid; 431 break; 432 case ROUTE_PRIOFILTER: 433 m->m_len = sizeof(unsigned int); 434 *mtod(m, unsigned int *) = rop->rop_priority; 435 break; 436 case ROUTE_FLAGFILTER: 437 m->m_len = sizeof(unsigned int); 438 *mtod(m, unsigned int *) = rop->rop_flagfilter; 439 break; 440 default: 441 error = ENOPROTOOPT; 442 break; 443 } 444 } 445 return (error); 446 } 447 448 void 449 rtm_senddesync_timer(void *xso) 450 { 451 struct socket *so = xso; 452 int s; 453 454 s = solock(so); 455 rtm_senddesync(so); 456 sounlock(so, s); 457 } 458 459 void 460 rtm_senddesync(struct socket *so) 461 { 462 struct rtpcb *rop = sotortpcb(so); 463 struct mbuf *desync_mbuf; 464 465 soassertlocked(so); 466 467 /* If we are in a DESYNC state, try to send a RTM_DESYNC packet */ 468 if ((rop->rop_flags & ROUTECB_FLAG_DESYNC) == 0) 469 return; 470 471 /* 472 * If we fail to alloc memory or if sbappendaddr() 473 * fails, re-add timeout and try again. 474 */ 475 desync_mbuf = rtm_msg1(RTM_DESYNC, NULL); 476 if (desync_mbuf != NULL) { 477 if (sbappendaddr(so, &so->so_rcv, &route_src, 478 desync_mbuf, NULL) != 0) { 479 rop->rop_flags &= ~ROUTECB_FLAG_DESYNC; 480 sorwakeup(rop->rop_socket); 481 return; 482 } 483 m_freem(desync_mbuf); 484 } 485 /* Re-add timeout to try sending msg again */ 486 timeout_add_msec(&rop->rop_timeout, ROUTE_DESYNC_RESEND_TIMEOUT); 487 } 488 489 void 490 route_input(struct mbuf *m0, struct socket *so0, sa_family_t sa_family) 491 { 492 struct socket *so; 493 struct rtpcb *rop; 494 struct rt_msghdr *rtm; 495 struct mbuf *m = m0; 496 struct socket *last = NULL; 497 struct srp_ref sr; 498 int s; 499 500 /* ensure that we can access the rtm_type via mtod() */ 501 if (m->m_len < offsetof(struct rt_msghdr, rtm_type) + 1) { 502 m_freem(m); 503 return; 504 } 505 506 SRPL_FOREACH(rop, &sr, &rtptable.rtp_list, rop_list) { 507 /* 508 * If route socket is bound to an address family only send 509 * messages that match the address family. Address family 510 * agnostic messages are always sent. 511 */ 512 if (sa_family != AF_UNSPEC && rop->rop_proto != AF_UNSPEC && 513 rop->rop_proto != sa_family) 514 continue; 515 516 517 so = rop->rop_socket; 518 s = solock(so); 519 520 /* 521 * Check to see if we don't want our own messages and 522 * if we can receive anything. 523 */ 524 if ((so0 == so && !(so0->so_options & SO_USELOOPBACK)) || 525 !(so->so_state & SS_ISCONNECTED) || 526 (so->so_state & SS_CANTRCVMORE)) { 527 next: 528 sounlock(so, s); 529 continue; 530 } 531 532 /* filter messages that the process does not want */ 533 rtm = mtod(m, struct rt_msghdr *); 534 /* but RTM_DESYNC can't be filtered */ 535 if (rtm->rtm_type != RTM_DESYNC) { 536 if (rop->rop_msgfilter != 0 && 537 !(rop->rop_msgfilter & (1 << rtm->rtm_type))) 538 goto next; 539 if (ISSET(rop->rop_flagfilter, rtm->rtm_flags)) 540 goto next; 541 } 542 switch (rtm->rtm_type) { 543 case RTM_IFANNOUNCE: 544 case RTM_DESYNC: 545 /* no tableid */ 546 break; 547 case RTM_RESOLVE: 548 case RTM_NEWADDR: 549 case RTM_DELADDR: 550 case RTM_IFINFO: 551 case RTM_80211INFO: 552 case RTM_BFD: 553 /* check against rdomain id */ 554 if (rop->rop_rtableid != RTABLE_ANY && 555 rtable_l2(rop->rop_rtableid) != rtm->rtm_tableid) 556 goto next; 557 break; 558 default: 559 if (rop->rop_priority != 0 && 560 rop->rop_priority < rtm->rtm_priority) 561 goto next; 562 /* check against rtable id */ 563 if (rop->rop_rtableid != RTABLE_ANY && 564 rop->rop_rtableid != rtm->rtm_tableid) 565 goto next; 566 break; 567 } 568 569 /* 570 * Check to see if the flush flag is set. If so, don't queue 571 * any more messages until the flag is cleared. 572 */ 573 if ((rop->rop_flags & ROUTECB_FLAG_FLUSH) != 0) 574 goto next; 575 sounlock(so, s); 576 577 if (last) { 578 s = solock(last); 579 rtm_sendup(last, m, 1); 580 sounlock(last, s); 581 refcnt_rele_wake(&sotortpcb(last)->rop_refcnt); 582 } 583 /* keep a reference for last */ 584 refcnt_take(&rop->rop_refcnt); 585 last = rop->rop_socket; 586 } 587 SRPL_LEAVE(&sr); 588 589 if (last) { 590 s = solock(last); 591 rtm_sendup(last, m, 0); 592 sounlock(last, s); 593 refcnt_rele_wake(&sotortpcb(last)->rop_refcnt); 594 } else 595 m_freem(m); 596 } 597 598 int 599 rtm_sendup(struct socket *so, struct mbuf *m0, int more) 600 { 601 struct rtpcb *rop = sotortpcb(so); 602 struct mbuf *m; 603 604 soassertlocked(so); 605 606 if (more) { 607 m = m_copym(m0, 0, M_COPYALL, M_NOWAIT); 608 if (m == NULL) 609 return (ENOMEM); 610 } else 611 m = m0; 612 613 if (sbspace(so, &so->so_rcv) < (2 * MSIZE) || 614 sbappendaddr(so, &so->so_rcv, &route_src, m, NULL) == 0) { 615 /* Flag socket as desync'ed and flush required */ 616 rop->rop_flags |= ROUTECB_FLAG_DESYNC | ROUTECB_FLAG_FLUSH; 617 rtm_senddesync(so); 618 m_freem(m); 619 return (ENOBUFS); 620 } 621 622 sorwakeup(so); 623 return (0); 624 } 625 626 struct rt_msghdr * 627 rtm_report(struct rtentry *rt, u_char type, int seq, int tableid) 628 { 629 struct rt_msghdr *rtm; 630 struct rt_addrinfo info; 631 struct sockaddr_rtlabel sa_rl; 632 struct sockaddr_in6 sa_mask; 633 #ifdef BFD 634 struct sockaddr_bfd sa_bfd; 635 #endif 636 struct ifnet *ifp = NULL; 637 int len; 638 639 bzero(&info, sizeof(info)); 640 info.rti_info[RTAX_DST] = rt_key(rt); 641 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 642 info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask); 643 info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl); 644 #ifdef BFD 645 if (rt->rt_flags & RTF_BFD) 646 info.rti_info[RTAX_BFD] = bfd2sa(rt, &sa_bfd); 647 #endif 648 #ifdef MPLS 649 if (rt->rt_flags & RTF_MPLS) { 650 struct sockaddr_mpls sa_mpls; 651 652 bzero(&sa_mpls, sizeof(sa_mpls)); 653 sa_mpls.smpls_family = AF_MPLS; 654 sa_mpls.smpls_len = sizeof(sa_mpls); 655 sa_mpls.smpls_label = ((struct rt_mpls *) 656 rt->rt_llinfo)->mpls_label; 657 info.rti_info[RTAX_SRC] = (struct sockaddr *)&sa_mpls; 658 info.rti_mpls = ((struct rt_mpls *) 659 rt->rt_llinfo)->mpls_operation; 660 } 661 #endif 662 ifp = if_get(rt->rt_ifidx); 663 if (ifp != NULL) { 664 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); 665 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; 666 if (ifp->if_flags & IFF_POINTOPOINT) 667 info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr; 668 } 669 if_put(ifp); 670 /* RTAX_GENMASK, RTAX_AUTHOR, RTAX_SRCMASK ignored */ 671 672 /* build new route message */ 673 len = rtm_msg2(type, RTM_VERSION, &info, NULL, NULL); 674 rtm = malloc(len, M_RTABLE, M_WAITOK | M_ZERO); 675 676 rtm_msg2(type, RTM_VERSION, &info, (caddr_t)rtm, NULL); 677 rtm->rtm_type = type; 678 rtm->rtm_index = rt->rt_ifidx; 679 rtm->rtm_tableid = tableid; 680 rtm->rtm_priority = rt->rt_priority & RTP_MASK; 681 rtm->rtm_flags = rt->rt_flags; 682 rtm->rtm_pid = curproc->p_p->ps_pid; 683 rtm->rtm_seq = seq; 684 rtm_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx); 685 rtm->rtm_addrs = info.rti_addrs; 686 #ifdef MPLS 687 rtm->rtm_mpls = info.rti_mpls; 688 #endif 689 return rtm; 690 } 691 692 int 693 route_output(struct mbuf *m, struct socket *so, struct sockaddr *dstaddr, 694 struct mbuf *control) 695 { 696 struct rt_msghdr *rtm = NULL; 697 struct rtentry *rt = NULL; 698 struct rt_addrinfo info; 699 int len, seq, error = 0; 700 u_int tableid; 701 u_int8_t prio; 702 u_char vers, type; 703 704 if (m == NULL || ((m->m_len < sizeof(int32_t)) && 705 (m = m_pullup(m, sizeof(int32_t))) == 0)) 706 return (ENOBUFS); 707 if ((m->m_flags & M_PKTHDR) == 0) 708 panic("route_output"); 709 len = m->m_pkthdr.len; 710 if (len < offsetof(struct rt_msghdr, rtm_hdrlen) + 1 || 711 len != mtod(m, struct rt_msghdr *)->rtm_msglen) { 712 error = EINVAL; 713 goto fail; 714 } 715 vers = mtod(m, struct rt_msghdr *)->rtm_version; 716 switch (vers) { 717 case RTM_VERSION: 718 if (len < sizeof(struct rt_msghdr)) { 719 error = EINVAL; 720 goto fail; 721 } 722 if (len > RTM_MAXSIZE) { 723 error = EMSGSIZE; 724 goto fail; 725 } 726 rtm = malloc(len, M_RTABLE, M_WAITOK); 727 m_copydata(m, 0, len, (caddr_t)rtm); 728 break; 729 default: 730 error = EPROTONOSUPPORT; 731 goto fail; 732 } 733 734 /* Verify that the caller is sending an appropriate message early */ 735 switch (rtm->rtm_type) { 736 case RTM_ADD: 737 case RTM_DELETE: 738 case RTM_GET: 739 case RTM_CHANGE: 740 case RTM_PROPOSAL: 741 break; 742 default: 743 error = EOPNOTSUPP; 744 goto fail; 745 } 746 /* 747 * Verify that the header length is valid. 748 * All messages from userland start with a struct rt_msghdr. 749 */ 750 if (rtm->rtm_hdrlen == 0) /* old client */ 751 rtm->rtm_hdrlen = sizeof(struct rt_msghdr); 752 if (rtm->rtm_hdrlen < sizeof(struct rt_msghdr) || 753 len < rtm->rtm_hdrlen) { 754 error = EINVAL; 755 goto fail; 756 } 757 758 rtm->rtm_pid = curproc->p_p->ps_pid; 759 760 /* 761 * Verify that the caller has the appropriate privilege; RTM_GET 762 * is the only operation the non-superuser is allowed. 763 */ 764 if (rtm->rtm_type != RTM_GET && suser(curproc) != 0) { 765 error = EACCES; 766 goto fail; 767 } 768 tableid = rtm->rtm_tableid; 769 if (!rtable_exists(tableid)) { 770 if (rtm->rtm_type == RTM_ADD) { 771 if ((error = rtable_add(tableid)) != 0) 772 goto fail; 773 } else { 774 error = EINVAL; 775 goto fail; 776 } 777 } 778 779 780 /* Do not let userland play with kernel-only flags. */ 781 if ((rtm->rtm_flags & (RTF_LOCAL|RTF_BROADCAST)) != 0) { 782 error = EINVAL; 783 goto fail; 784 } 785 786 /* make sure that kernel-only bits are not set */ 787 rtm->rtm_priority &= RTP_MASK; 788 rtm->rtm_flags &= ~(RTF_DONE|RTF_CLONED|RTF_CACHED); 789 rtm->rtm_fmask &= RTF_FMASK; 790 791 if (rtm->rtm_priority != 0) { 792 if (rtm->rtm_priority > RTP_MAX || 793 rtm->rtm_priority == RTP_LOCAL) { 794 error = EINVAL; 795 goto fail; 796 } 797 prio = rtm->rtm_priority; 798 } else if (rtm->rtm_type != RTM_ADD) 799 prio = RTP_ANY; 800 else if (rtm->rtm_flags & RTF_STATIC) 801 prio = 0; 802 else 803 prio = RTP_DEFAULT; 804 805 bzero(&info, sizeof(info)); 806 info.rti_addrs = rtm->rtm_addrs; 807 if ((error = rtm_xaddrs(rtm->rtm_hdrlen + (caddr_t)rtm, 808 len + (caddr_t)rtm, &info)) != 0) 809 goto fail; 810 info.rti_flags = rtm->rtm_flags; 811 if (rtm->rtm_type != RTM_PROPOSAL && 812 (info.rti_info[RTAX_DST] == NULL || 813 info.rti_info[RTAX_DST]->sa_family >= AF_MAX || 814 (info.rti_info[RTAX_GATEWAY] != NULL && 815 info.rti_info[RTAX_GATEWAY]->sa_family >= AF_MAX) || 816 info.rti_info[RTAX_GENMASK] != NULL)) { 817 error = EINVAL; 818 goto fail; 819 } 820 #ifdef MPLS 821 info.rti_mpls = rtm->rtm_mpls; 822 #endif 823 824 if (info.rti_info[RTAX_GATEWAY] != NULL && 825 info.rti_info[RTAX_GATEWAY]->sa_family == AF_LINK && 826 (info.rti_flags & RTF_CLONING) == 0) { 827 info.rti_flags |= RTF_LLINFO; 828 } 829 830 /* 831 * Validate RTM_PROPOSAL and pass it along or error out. 832 */ 833 if (rtm->rtm_type == RTM_PROPOSAL) { 834 if (rtm_validate_proposal(&info) == -1) { 835 error = EINVAL; 836 goto fail; 837 } 838 /* 839 * If this is a solicitation proposal forward request to 840 * all interfaces. Most handlers will ignore it but at least 841 * umb(4) will send a response to this event. 842 */ 843 if (rtm->rtm_priority == RTP_PROPOSAL_SOLICIT) { 844 struct ifnet *ifp; 845 NET_LOCK(); 846 TAILQ_FOREACH(ifp, &ifnet, if_list) { 847 ifp->if_rtrequest(ifp, RTM_PROPOSAL, NULL); 848 } 849 NET_UNLOCK(); 850 } 851 } else { 852 error = rtm_output(rtm, &rt, &info, prio, tableid); 853 if (!error) { 854 type = rtm->rtm_type; 855 seq = rtm->rtm_seq; 856 free(rtm, M_RTABLE, len); 857 rtm = rtm_report(rt, type, seq, tableid); 858 len = rtm->rtm_msglen; 859 } 860 } 861 862 rtfree(rt); 863 if (error) { 864 rtm->rtm_errno = error; 865 } else { 866 rtm->rtm_flags |= RTF_DONE; 867 } 868 869 /* 870 * Check to see if we don't want our own messages. 871 */ 872 if (!(so->so_options & SO_USELOOPBACK)) { 873 if (rtptable.rtp_count <= 1) { 874 /* no other listener and no loopback of messages */ 875 fail: 876 free(rtm, M_RTABLE, len); 877 m_freem(m); 878 return (error); 879 } 880 } 881 if (m_copyback(m, 0, len, rtm, M_NOWAIT)) { 882 m_freem(m); 883 m = NULL; 884 } else if (m->m_pkthdr.len > len) 885 m_adj(m, len - m->m_pkthdr.len); 886 free(rtm, M_RTABLE, len); 887 if (m) 888 route_input(m, so, info.rti_info[RTAX_DST] ? 889 info.rti_info[RTAX_DST]->sa_family : AF_UNSPEC); 890 891 return (error); 892 } 893 894 int 895 rtm_output(struct rt_msghdr *rtm, struct rtentry **prt, 896 struct rt_addrinfo *info, uint8_t prio, unsigned int tableid) 897 { 898 struct rtentry *rt = *prt; 899 struct ifnet *ifp = NULL; 900 int plen, newgate = 0, error = 0; 901 902 switch (rtm->rtm_type) { 903 case RTM_ADD: 904 if (info->rti_info[RTAX_GATEWAY] == NULL) { 905 error = EINVAL; 906 break; 907 } 908 909 rt = rtable_match(tableid, info->rti_info[RTAX_DST], NULL); 910 if ((error = route_arp_conflict(rt, info))) { 911 rtfree(rt); 912 rt = NULL; 913 break; 914 } 915 916 /* 917 * We cannot go through a delete/create/insert cycle for 918 * cached route because this can lead to races in the 919 * receive path. Instead we update the L2 cache. 920 */ 921 if ((rt != NULL) && ISSET(rt->rt_flags, RTF_CACHED)) 922 goto change; 923 924 rtfree(rt); 925 rt = NULL; 926 927 NET_LOCK(); 928 if ((error = rtm_getifa(info, tableid)) != 0) { 929 NET_UNLOCK(); 930 break; 931 } 932 error = rtrequest(RTM_ADD, info, prio, &rt, tableid); 933 NET_UNLOCK(); 934 if (error == 0) 935 rtm_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx, 936 &rt->rt_rmx); 937 break; 938 case RTM_DELETE: 939 rt = rtable_lookup(tableid, info->rti_info[RTAX_DST], 940 info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY], 941 prio); 942 if (rt == NULL) { 943 error = ESRCH; 944 break; 945 } 946 947 /* 948 * If we got multipath routes, we require users to specify 949 * a matching gateway. 950 */ 951 if (ISSET(rt->rt_flags, RTF_MPATH) && 952 info->rti_info[RTAX_GATEWAY] == NULL) { 953 error = ESRCH; 954 break; 955 } 956 957 /* Detaching an interface requires the KERNEL_LOCK(). */ 958 ifp = if_get(rt->rt_ifidx); 959 KASSERT(ifp != NULL); 960 961 /* 962 * Invalidate the cache of automagically created and 963 * referenced L2 entries to make sure that ``rt_gwroute'' 964 * pointer stays valid for other CPUs. 965 */ 966 if ((ISSET(rt->rt_flags, RTF_CACHED))) { 967 NET_LOCK(); 968 ifp->if_rtrequest(ifp, RTM_INVALIDATE, rt); 969 /* Reset the MTU of the gateway route. */ 970 rtable_walk(tableid, rt_key(rt)->sa_family, NULL, 971 route_cleargateway, rt); 972 NET_UNLOCK(); 973 if_put(ifp); 974 break; 975 } 976 977 /* 978 * Make sure that local routes are only modified by the 979 * kernel. 980 */ 981 if (ISSET(rt->rt_flags, RTF_LOCAL|RTF_BROADCAST)) { 982 if_put(ifp); 983 error = EINVAL; 984 break; 985 } 986 987 rtfree(rt); 988 rt = NULL; 989 990 NET_LOCK(); 991 error = rtrequest_delete(info, prio, ifp, &rt, tableid); 992 NET_UNLOCK(); 993 if_put(ifp); 994 break; 995 case RTM_CHANGE: 996 rt = rtable_lookup(tableid, info->rti_info[RTAX_DST], 997 info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY], 998 prio); 999 /* 1000 * If we got multipath routes, we require users to specify 1001 * a matching gateway. 1002 */ 1003 if ((rt != NULL) && ISSET(rt->rt_flags, RTF_MPATH) && 1004 (info->rti_info[RTAX_GATEWAY] == NULL)) { 1005 rtfree(rt); 1006 rt = NULL; 1007 } 1008 /* 1009 * If RTAX_GATEWAY is the argument we're trying to 1010 * change, try to find a compatible route. 1011 */ 1012 if ((rt == NULL) && (info->rti_info[RTAX_GATEWAY] != NULL)) { 1013 rt = rtable_lookup(tableid, info->rti_info[RTAX_DST], 1014 info->rti_info[RTAX_NETMASK], NULL, prio); 1015 /* Ensure we don't pick a multipath one. */ 1016 if ((rt != NULL) && ISSET(rt->rt_flags, RTF_MPATH)) { 1017 rtfree(rt); 1018 rt = NULL; 1019 } 1020 } 1021 1022 if (rt == NULL) { 1023 error = ESRCH; 1024 break; 1025 } 1026 1027 /* 1028 * Make sure that local routes are only modified by the 1029 * kernel. 1030 */ 1031 if (ISSET(rt->rt_flags, RTF_LOCAL|RTF_BROADCAST)) { 1032 error = EINVAL; 1033 break; 1034 } 1035 1036 /* 1037 * RTM_CHANGE needs a perfect match. 1038 */ 1039 plen = rtable_satoplen(info->rti_info[RTAX_DST]->sa_family, 1040 info->rti_info[RTAX_NETMASK]); 1041 if (rt_plen(rt) != plen) { 1042 error = ESRCH; 1043 break; 1044 } 1045 1046 if (info->rti_info[RTAX_GATEWAY] != NULL) 1047 if (rt->rt_gateway == NULL || 1048 bcmp(rt->rt_gateway, 1049 info->rti_info[RTAX_GATEWAY], 1050 info->rti_info[RTAX_GATEWAY]->sa_len)) { 1051 newgate = 1; 1052 } 1053 /* 1054 * Check reachable gateway before changing the route. 1055 * New gateway could require new ifaddr, ifp; 1056 * flags may also be different; ifp may be specified 1057 * by ll sockaddr when protocol address is ambiguous. 1058 */ 1059 if (newgate || info->rti_info[RTAX_IFP] != NULL || 1060 info->rti_info[RTAX_IFA] != NULL) { 1061 struct ifaddr *ifa = NULL; 1062 1063 NET_LOCK(); 1064 if ((error = rtm_getifa(info, tableid)) != 0) { 1065 NET_UNLOCK(); 1066 break; 1067 } 1068 ifa = info->rti_ifa; 1069 if (rt->rt_ifa != ifa) { 1070 ifp = if_get(rt->rt_ifidx); 1071 KASSERT(ifp != NULL); 1072 ifp->if_rtrequest(ifp, RTM_DELETE, rt); 1073 ifafree(rt->rt_ifa); 1074 if_put(ifp); 1075 1076 ifa->ifa_refcnt++; 1077 rt->rt_ifa = ifa; 1078 rt->rt_ifidx = ifa->ifa_ifp->if_index; 1079 /* recheck link state after ifp change */ 1080 rt_if_linkstate_change(rt, ifa->ifa_ifp, 1081 tableid); 1082 } 1083 NET_UNLOCK(); 1084 } 1085 change: 1086 if (info->rti_info[RTAX_GATEWAY] != NULL) { 1087 /* When updating the gateway, make sure it is valid. */ 1088 if (!newgate && rt->rt_gateway->sa_family != 1089 info->rti_info[RTAX_GATEWAY]->sa_family) { 1090 error = EINVAL; 1091 break; 1092 } 1093 1094 NET_LOCK(); 1095 error = rt_setgate(rt, 1096 info->rti_info[RTAX_GATEWAY], tableid); 1097 NET_UNLOCK(); 1098 if (error) 1099 break; 1100 } 1101 #ifdef MPLS 1102 if (rtm->rtm_flags & RTF_MPLS) { 1103 NET_LOCK(); 1104 error = rt_mpls_set(rt, 1105 info->rti_info[RTAX_SRC], info->rti_mpls); 1106 NET_UNLOCK(); 1107 if (error) 1108 break; 1109 } else if (newgate || (rtm->rtm_fmask & RTF_MPLS)) { 1110 NET_LOCK(); 1111 /* if gateway changed remove MPLS information */ 1112 rt_mpls_clear(rt); 1113 NET_UNLOCK(); 1114 } 1115 #endif 1116 1117 #ifdef BFD 1118 if (ISSET(rtm->rtm_flags, RTF_BFD)) { 1119 if ((error = bfdset(rt))) 1120 break; 1121 } else if (!ISSET(rtm->rtm_flags, RTF_BFD) && 1122 ISSET(rtm->rtm_fmask, RTF_BFD)) { 1123 bfdclear(rt); 1124 } 1125 #endif 1126 1127 NET_LOCK(); 1128 /* Hack to allow some flags to be toggled */ 1129 if (rtm->rtm_fmask) { 1130 /* MPLS flag it is set by rt_mpls_set() */ 1131 rtm->rtm_fmask &= ~RTF_MPLS; 1132 rtm->rtm_flags &= ~RTF_MPLS; 1133 rt->rt_flags = 1134 (rt->rt_flags & ~rtm->rtm_fmask) | 1135 (rtm->rtm_flags & rtm->rtm_fmask); 1136 } 1137 rtm_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx, &rt->rt_rmx); 1138 1139 ifp = if_get(rt->rt_ifidx); 1140 KASSERT(ifp != NULL); 1141 ifp->if_rtrequest(ifp, RTM_ADD, rt); 1142 if_put(ifp); 1143 1144 if (info->rti_info[RTAX_LABEL] != NULL) { 1145 char *rtlabel = ((struct sockaddr_rtlabel *) 1146 info->rti_info[RTAX_LABEL])->sr_label; 1147 rtlabel_unref(rt->rt_labelid); 1148 rt->rt_labelid = rtlabel_name2id(rtlabel); 1149 } 1150 if_group_routechange(info->rti_info[RTAX_DST], 1151 info->rti_info[RTAX_NETMASK]); 1152 rt->rt_locks &= ~(rtm->rtm_inits); 1153 rt->rt_locks |= (rtm->rtm_inits & rtm->rtm_rmx.rmx_locks); 1154 NET_UNLOCK(); 1155 break; 1156 case RTM_GET: 1157 rt = rtable_lookup(tableid, info->rti_info[RTAX_DST], 1158 info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY], 1159 prio); 1160 if (rt == NULL) 1161 error = ESRCH; 1162 break; 1163 } 1164 1165 *prt = rt; 1166 return (error); 1167 } 1168 1169 struct ifaddr * 1170 ifa_ifwithroute(int flags, struct sockaddr *dst, struct sockaddr *gateway, 1171 unsigned int rtableid) 1172 { 1173 struct ifaddr *ifa; 1174 1175 if ((flags & RTF_GATEWAY) == 0) { 1176 /* 1177 * If we are adding a route to an interface, 1178 * and the interface is a pt to pt link 1179 * we should search for the destination 1180 * as our clue to the interface. Otherwise 1181 * we can use the local address. 1182 */ 1183 ifa = NULL; 1184 if (flags & RTF_HOST) 1185 ifa = ifa_ifwithdstaddr(dst, rtableid); 1186 if (ifa == NULL) 1187 ifa = ifa_ifwithaddr(gateway, rtableid); 1188 } else { 1189 /* 1190 * If we are adding a route to a remote net 1191 * or host, the gateway may still be on the 1192 * other end of a pt to pt link. 1193 */ 1194 ifa = ifa_ifwithdstaddr(gateway, rtableid); 1195 } 1196 if (ifa == NULL) { 1197 if (gateway->sa_family == AF_LINK) { 1198 struct sockaddr_dl *sdl = satosdl(gateway); 1199 struct ifnet *ifp = if_get(sdl->sdl_index); 1200 1201 if (ifp != NULL) 1202 ifa = ifaof_ifpforaddr(dst, ifp); 1203 if_put(ifp); 1204 } else { 1205 struct rtentry *rt; 1206 1207 rt = rtalloc(gateway, RT_RESOLVE, rtable_l2(rtableid)); 1208 if (rt != NULL) 1209 ifa = rt->rt_ifa; 1210 rtfree(rt); 1211 } 1212 } 1213 if (ifa == NULL) 1214 return (NULL); 1215 if (ifa->ifa_addr->sa_family != dst->sa_family) { 1216 struct ifaddr *oifa = ifa; 1217 ifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp); 1218 if (ifa == NULL) 1219 ifa = oifa; 1220 } 1221 return (ifa); 1222 } 1223 1224 int 1225 rtm_getifa(struct rt_addrinfo *info, unsigned int rtid) 1226 { 1227 struct ifnet *ifp = NULL; 1228 1229 /* 1230 * The "returned" `ifa' is guaranteed to be alive only if 1231 * the NET_LOCK() is held. 1232 */ 1233 NET_ASSERT_LOCKED(); 1234 1235 /* 1236 * ifp may be specified by sockaddr_dl when protocol address 1237 * is ambiguous 1238 */ 1239 if (info->rti_info[RTAX_IFP] != NULL) { 1240 struct sockaddr_dl *sdl; 1241 1242 sdl = satosdl(info->rti_info[RTAX_IFP]); 1243 ifp = if_get(sdl->sdl_index); 1244 } 1245 1246 #ifdef IPSEC 1247 /* 1248 * If the destination is a PF_KEY address, we'll look 1249 * for the existence of a encap interface number or address 1250 * in the options list of the gateway. By default, we'll return 1251 * enc0. 1252 */ 1253 if (info->rti_info[RTAX_DST] && 1254 info->rti_info[RTAX_DST]->sa_family == PF_KEY) 1255 info->rti_ifa = enc_getifa(rtid, 0); 1256 #endif 1257 1258 if (info->rti_ifa == NULL && info->rti_info[RTAX_IFA] != NULL) 1259 info->rti_ifa = ifa_ifwithaddr(info->rti_info[RTAX_IFA], rtid); 1260 1261 if (info->rti_ifa == NULL) { 1262 struct sockaddr *sa; 1263 1264 if ((sa = info->rti_info[RTAX_IFA]) == NULL) 1265 if ((sa = info->rti_info[RTAX_GATEWAY]) == NULL) 1266 sa = info->rti_info[RTAX_DST]; 1267 1268 if (sa != NULL && ifp != NULL) 1269 info->rti_ifa = ifaof_ifpforaddr(sa, ifp); 1270 else if (info->rti_info[RTAX_DST] != NULL && 1271 info->rti_info[RTAX_GATEWAY] != NULL) 1272 info->rti_ifa = ifa_ifwithroute(info->rti_flags, 1273 info->rti_info[RTAX_DST], 1274 info->rti_info[RTAX_GATEWAY], 1275 rtid); 1276 else if (sa != NULL) 1277 info->rti_ifa = ifa_ifwithroute(info->rti_flags, 1278 sa, sa, rtid); 1279 } 1280 1281 if_put(ifp); 1282 1283 if (info->rti_ifa == NULL) 1284 return (ENETUNREACH); 1285 1286 return (0); 1287 } 1288 1289 int 1290 route_cleargateway(struct rtentry *rt, void *arg, unsigned int rtableid) 1291 { 1292 struct rtentry *nhrt = arg; 1293 1294 if (ISSET(rt->rt_flags, RTF_GATEWAY) && rt->rt_gwroute == nhrt && 1295 !ISSET(rt->rt_locks, RTV_MTU)) 1296 rt->rt_mtu = 0; 1297 1298 return (0); 1299 } 1300 1301 /* 1302 * Check if the user request to insert an ARP entry does not conflict 1303 * with existing ones. 1304 * 1305 * Only two entries are allowed for a given IP address: a private one 1306 * (priv) and a public one (pub). 1307 */ 1308 int 1309 route_arp_conflict(struct rtentry *rt, struct rt_addrinfo *info) 1310 { 1311 int proxy = (info->rti_flags & RTF_ANNOUNCE); 1312 1313 if ((info->rti_flags & RTF_LLINFO) == 0 || 1314 (info->rti_info[RTAX_DST]->sa_family != AF_INET)) 1315 return (0); 1316 1317 if (rt == NULL || !ISSET(rt->rt_flags, RTF_LLINFO)) 1318 return (0); 1319 1320 /* If the entry is cached, it can be updated. */ 1321 if (ISSET(rt->rt_flags, RTF_CACHED)) 1322 return (0); 1323 1324 /* 1325 * Same destination, not cached and both "priv" or "pub" conflict. 1326 * If a second entry exists, it always conflict. 1327 */ 1328 if ((ISSET(rt->rt_flags, RTF_ANNOUNCE) == proxy) || 1329 ISSET(rt->rt_flags, RTF_MPATH)) 1330 return (EEXIST); 1331 1332 /* No conflict but an entry exist so we need to force mpath. */ 1333 info->rti_flags |= RTF_MPATH; 1334 return (0); 1335 } 1336 1337 void 1338 rtm_setmetrics(u_long which, const struct rt_metrics *in, 1339 struct rt_kmetrics *out) 1340 { 1341 int64_t expire; 1342 1343 if (which & RTV_MTU) 1344 out->rmx_mtu = in->rmx_mtu; 1345 if (which & RTV_EXPIRE) { 1346 expire = in->rmx_expire; 1347 if (expire != 0) { 1348 expire -= gettime(); 1349 expire += getuptime(); 1350 } 1351 1352 out->rmx_expire = expire; 1353 } 1354 } 1355 1356 void 1357 rtm_getmetrics(const struct rt_kmetrics *in, struct rt_metrics *out) 1358 { 1359 int64_t expire; 1360 1361 expire = in->rmx_expire; 1362 if (expire != 0) { 1363 expire -= getuptime(); 1364 expire += gettime(); 1365 } 1366 1367 bzero(out, sizeof(*out)); 1368 out->rmx_locks = in->rmx_locks; 1369 out->rmx_mtu = in->rmx_mtu; 1370 out->rmx_expire = expire; 1371 out->rmx_pksent = in->rmx_pksent; 1372 } 1373 1374 #define ROUNDUP(a) \ 1375 ((a) > 0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long)) 1376 #define ADVANCE(x, n) (x += ROUNDUP((n)->sa_len)) 1377 1378 int 1379 rtm_xaddrs(caddr_t cp, caddr_t cplim, struct rt_addrinfo *rtinfo) 1380 { 1381 struct sockaddr *sa; 1382 int i; 1383 1384 /* 1385 * Parse address bits, split address storage in chunks, and 1386 * set info pointers. Use sa_len for traversing the memory 1387 * and check that we stay within in the limit. 1388 */ 1389 bzero(rtinfo->rti_info, sizeof(rtinfo->rti_info)); 1390 for (i = 0; i < sizeof(rtinfo->rti_addrs) * 8; i++) { 1391 if ((rtinfo->rti_addrs & (1 << i)) == 0) 1392 continue; 1393 if (i >= RTAX_MAX || cp + sizeof(socklen_t) > cplim) 1394 return (EINVAL); 1395 sa = (struct sockaddr *)cp; 1396 if (cp + sa->sa_len > cplim) 1397 return (EINVAL); 1398 rtinfo->rti_info[i] = sa; 1399 ADVANCE(cp, sa); 1400 } 1401 /* 1402 * Check that the address family is suitable for the route address 1403 * type. Check that each address has a size that fits its family 1404 * and its length is within the size. Strings within addresses must 1405 * be NUL terminated. 1406 */ 1407 for (i = 0; i < RTAX_MAX; i++) { 1408 size_t len, maxlen, size; 1409 1410 sa = rtinfo->rti_info[i]; 1411 if (sa == NULL) 1412 continue; 1413 maxlen = size = 0; 1414 switch (i) { 1415 case RTAX_DST: 1416 case RTAX_GATEWAY: 1417 case RTAX_SRC: 1418 switch (sa->sa_family) { 1419 case AF_INET: 1420 size = sizeof(struct sockaddr_in); 1421 break; 1422 case AF_LINK: 1423 size = sizeof(struct sockaddr_dl); 1424 break; 1425 #ifdef INET6 1426 case AF_INET6: 1427 size = sizeof(struct sockaddr_in6); 1428 break; 1429 #endif 1430 #ifdef MPLS 1431 case AF_MPLS: 1432 size = sizeof(struct sockaddr_mpls); 1433 break; 1434 #endif 1435 } 1436 break; 1437 case RTAX_IFP: 1438 if (sa->sa_family != AF_LINK) 1439 return (EAFNOSUPPORT); 1440 /* 1441 * XXX Should be sizeof(struct sockaddr_dl), but 1442 * route(8) has a bug and provides less memory. 1443 * arp(8) has another bug and uses sizeof pointer. 1444 */ 1445 size = 4; 1446 break; 1447 case RTAX_IFA: 1448 switch (sa->sa_family) { 1449 case AF_INET: 1450 size = sizeof(struct sockaddr_in); 1451 break; 1452 #ifdef INET6 1453 case AF_INET6: 1454 size = sizeof(struct sockaddr_in6); 1455 break; 1456 #endif 1457 default: 1458 return (EAFNOSUPPORT); 1459 } 1460 break; 1461 case RTAX_LABEL: 1462 sa->sa_family = AF_UNSPEC; 1463 maxlen = RTLABEL_LEN; 1464 size = sizeof(struct sockaddr_rtlabel); 1465 break; 1466 #ifdef BFD 1467 case RTAX_BFD: 1468 sa->sa_family = AF_UNSPEC; 1469 size = sizeof(struct sockaddr_bfd); 1470 break; 1471 #endif 1472 case RTAX_DNS: 1473 /* more validation in rtm_validate_proposal */ 1474 if (sa->sa_len > sizeof(struct sockaddr_rtdns)) 1475 return (EINVAL); 1476 if (sa->sa_len < offsetof(struct sockaddr_rtdns, 1477 sr_dns)) 1478 return (EINVAL); 1479 switch (sa->sa_family) { 1480 case AF_INET: 1481 #ifdef INET6 1482 case AF_INET6: 1483 #endif 1484 break; 1485 default: 1486 return (EAFNOSUPPORT); 1487 } 1488 break; 1489 case RTAX_STATIC: 1490 sa->sa_family = AF_UNSPEC; 1491 maxlen = RTSTATIC_LEN; 1492 size = sizeof(struct sockaddr_rtstatic); 1493 break; 1494 case RTAX_SEARCH: 1495 sa->sa_family = AF_UNSPEC; 1496 maxlen = RTSEARCH_LEN; 1497 size = sizeof(struct sockaddr_rtsearch); 1498 break; 1499 } 1500 if (size) { 1501 /* memory for the full struct must be provided */ 1502 if (sa->sa_len < size) 1503 return (EINVAL); 1504 } 1505 if (maxlen) { 1506 /* this should not happen */ 1507 if (2 + maxlen > size) 1508 return (EINVAL); 1509 /* strings must be NUL terminated within the struct */ 1510 len = strnlen(sa->sa_data, maxlen); 1511 if (len >= maxlen || 2 + len >= sa->sa_len) 1512 return (EINVAL); 1513 break; 1514 } 1515 } 1516 return (0); 1517 } 1518 1519 struct mbuf * 1520 rtm_msg1(int type, struct rt_addrinfo *rtinfo) 1521 { 1522 struct rt_msghdr *rtm; 1523 struct mbuf *m; 1524 int i; 1525 struct sockaddr *sa; 1526 int len, dlen, hlen; 1527 1528 switch (type) { 1529 case RTM_DELADDR: 1530 case RTM_NEWADDR: 1531 len = sizeof(struct ifa_msghdr); 1532 break; 1533 case RTM_IFINFO: 1534 len = sizeof(struct if_msghdr); 1535 break; 1536 case RTM_IFANNOUNCE: 1537 len = sizeof(struct if_announcemsghdr); 1538 break; 1539 #ifdef BFD 1540 case RTM_BFD: 1541 len = sizeof(struct bfd_msghdr); 1542 break; 1543 #endif 1544 case RTM_80211INFO: 1545 len = sizeof(struct if_ieee80211_msghdr); 1546 break; 1547 default: 1548 len = sizeof(struct rt_msghdr); 1549 break; 1550 } 1551 if (len > MCLBYTES) 1552 panic("rtm_msg1"); 1553 m = m_gethdr(M_DONTWAIT, MT_DATA); 1554 if (m && len > MHLEN) { 1555 MCLGET(m, M_DONTWAIT); 1556 if ((m->m_flags & M_EXT) == 0) { 1557 m_free(m); 1558 m = NULL; 1559 } 1560 } 1561 if (m == NULL) 1562 return (m); 1563 m->m_pkthdr.len = m->m_len = hlen = len; 1564 m->m_pkthdr.ph_ifidx = 0; 1565 rtm = mtod(m, struct rt_msghdr *); 1566 bzero(rtm, len); 1567 for (i = 0; i < RTAX_MAX; i++) { 1568 if (rtinfo == NULL || (sa = rtinfo->rti_info[i]) == NULL) 1569 continue; 1570 rtinfo->rti_addrs |= (1 << i); 1571 dlen = ROUNDUP(sa->sa_len); 1572 if (m_copyback(m, len, dlen, sa, M_NOWAIT)) { 1573 m_freem(m); 1574 return (NULL); 1575 } 1576 len += dlen; 1577 } 1578 rtm->rtm_msglen = len; 1579 rtm->rtm_hdrlen = hlen; 1580 rtm->rtm_version = RTM_VERSION; 1581 rtm->rtm_type = type; 1582 return (m); 1583 } 1584 1585 int 1586 rtm_msg2(int type, int vers, struct rt_addrinfo *rtinfo, caddr_t cp, 1587 struct walkarg *w) 1588 { 1589 int i; 1590 int len, dlen, hlen, second_time = 0; 1591 caddr_t cp0; 1592 1593 rtinfo->rti_addrs = 0; 1594 again: 1595 switch (type) { 1596 case RTM_DELADDR: 1597 case RTM_NEWADDR: 1598 len = sizeof(struct ifa_msghdr); 1599 break; 1600 case RTM_IFINFO: 1601 len = sizeof(struct if_msghdr); 1602 break; 1603 default: 1604 len = sizeof(struct rt_msghdr); 1605 break; 1606 } 1607 hlen = len; 1608 if ((cp0 = cp) != NULL) 1609 cp += len; 1610 for (i = 0; i < RTAX_MAX; i++) { 1611 struct sockaddr *sa; 1612 1613 if ((sa = rtinfo->rti_info[i]) == NULL) 1614 continue; 1615 rtinfo->rti_addrs |= (1 << i); 1616 dlen = ROUNDUP(sa->sa_len); 1617 if (cp) { 1618 bcopy(sa, cp, (size_t)dlen); 1619 cp += dlen; 1620 } 1621 len += dlen; 1622 } 1623 /* align message length to the next natural boundary */ 1624 len = ALIGN(len); 1625 if (cp == 0 && w != NULL && !second_time) { 1626 w->w_needed += len; 1627 if (w->w_needed <= 0 && w->w_where) { 1628 if (w->w_tmemsize < len) { 1629 free(w->w_tmem, M_RTABLE, w->w_tmemsize); 1630 w->w_tmem = malloc(len, M_RTABLE, 1631 M_NOWAIT | M_ZERO); 1632 if (w->w_tmem) 1633 w->w_tmemsize = len; 1634 } 1635 if (w->w_tmem) { 1636 cp = w->w_tmem; 1637 second_time = 1; 1638 goto again; 1639 } else 1640 w->w_where = 0; 1641 } 1642 } 1643 if (cp && w) /* clear the message header */ 1644 bzero(cp0, hlen); 1645 1646 if (cp) { 1647 struct rt_msghdr *rtm = (struct rt_msghdr *)cp0; 1648 1649 rtm->rtm_version = RTM_VERSION; 1650 rtm->rtm_type = type; 1651 rtm->rtm_msglen = len; 1652 rtm->rtm_hdrlen = hlen; 1653 } 1654 return (len); 1655 } 1656 1657 void 1658 rtm_send(struct rtentry *rt, int cmd, int error, unsigned int rtableid) 1659 { 1660 struct rt_addrinfo info; 1661 struct ifnet *ifp; 1662 struct sockaddr_rtlabel sa_rl; 1663 struct sockaddr_in6 sa_mask; 1664 1665 memset(&info, 0, sizeof(info)); 1666 info.rti_info[RTAX_DST] = rt_key(rt); 1667 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 1668 if (!ISSET(rt->rt_flags, RTF_HOST)) 1669 info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask); 1670 info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl); 1671 ifp = if_get(rt->rt_ifidx); 1672 if (ifp != NULL) { 1673 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); 1674 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; 1675 } 1676 1677 rtm_miss(cmd, &info, rt->rt_flags, rt->rt_priority, rt->rt_ifidx, error, 1678 rtableid); 1679 if_put(ifp); 1680 } 1681 1682 /* 1683 * This routine is called to generate a message from the routing 1684 * socket indicating that a redirect has occurred, a routing lookup 1685 * has failed, or that a protocol has detected timeouts to a particular 1686 * destination. 1687 */ 1688 void 1689 rtm_miss(int type, struct rt_addrinfo *rtinfo, int flags, uint8_t prio, 1690 u_int ifidx, int error, u_int tableid) 1691 { 1692 struct rt_msghdr *rtm; 1693 struct mbuf *m; 1694 struct sockaddr *sa = rtinfo->rti_info[RTAX_DST]; 1695 1696 if (rtptable.rtp_count == 0) 1697 return; 1698 m = rtm_msg1(type, rtinfo); 1699 if (m == NULL) 1700 return; 1701 rtm = mtod(m, struct rt_msghdr *); 1702 rtm->rtm_flags = RTF_DONE | flags; 1703 rtm->rtm_priority = prio; 1704 rtm->rtm_errno = error; 1705 rtm->rtm_tableid = tableid; 1706 rtm->rtm_addrs = rtinfo->rti_addrs; 1707 rtm->rtm_index = ifidx; 1708 route_input(m, NULL, sa ? sa->sa_family : AF_UNSPEC); 1709 } 1710 1711 /* 1712 * This routine is called to generate a message from the routing 1713 * socket indicating that the status of a network interface has changed. 1714 */ 1715 void 1716 rtm_ifchg(struct ifnet *ifp) 1717 { 1718 struct if_msghdr *ifm; 1719 struct mbuf *m; 1720 1721 if (rtptable.rtp_count == 0) 1722 return; 1723 m = rtm_msg1(RTM_IFINFO, NULL); 1724 if (m == NULL) 1725 return; 1726 ifm = mtod(m, struct if_msghdr *); 1727 ifm->ifm_index = ifp->if_index; 1728 ifm->ifm_tableid = ifp->if_rdomain; 1729 ifm->ifm_flags = ifp->if_flags; 1730 ifm->ifm_xflags = ifp->if_xflags; 1731 if_getdata(ifp, &ifm->ifm_data); 1732 ifm->ifm_addrs = 0; 1733 route_input(m, NULL, AF_UNSPEC); 1734 } 1735 1736 /* 1737 * This is called to generate messages from the routing socket 1738 * indicating a network interface has had addresses associated with it. 1739 * if we ever reverse the logic and replace messages TO the routing 1740 * socket indicate a request to configure interfaces, then it will 1741 * be unnecessary as the routing socket will automatically generate 1742 * copies of it. 1743 */ 1744 void 1745 rtm_addr(int cmd, struct ifaddr *ifa) 1746 { 1747 struct ifnet *ifp = ifa->ifa_ifp; 1748 struct mbuf *m; 1749 struct rt_addrinfo info; 1750 struct ifa_msghdr *ifam; 1751 1752 if (rtptable.rtp_count == 0) 1753 return; 1754 1755 memset(&info, 0, sizeof(info)); 1756 info.rti_info[RTAX_IFA] = ifa->ifa_addr; 1757 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); 1758 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask; 1759 info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr; 1760 if ((m = rtm_msg1(cmd, &info)) == NULL) 1761 return; 1762 ifam = mtod(m, struct ifa_msghdr *); 1763 ifam->ifam_index = ifp->if_index; 1764 ifam->ifam_metric = ifa->ifa_metric; 1765 ifam->ifam_flags = ifa->ifa_flags; 1766 ifam->ifam_addrs = info.rti_addrs; 1767 ifam->ifam_tableid = ifp->if_rdomain; 1768 1769 route_input(m, NULL, 1770 ifa->ifa_addr ? ifa->ifa_addr->sa_family : AF_UNSPEC); 1771 } 1772 1773 /* 1774 * This is called to generate routing socket messages indicating 1775 * network interface arrival and departure. 1776 */ 1777 void 1778 rtm_ifannounce(struct ifnet *ifp, int what) 1779 { 1780 struct if_announcemsghdr *ifan; 1781 struct mbuf *m; 1782 1783 if (rtptable.rtp_count == 0) 1784 return; 1785 m = rtm_msg1(RTM_IFANNOUNCE, NULL); 1786 if (m == NULL) 1787 return; 1788 ifan = mtod(m, struct if_announcemsghdr *); 1789 ifan->ifan_index = ifp->if_index; 1790 strlcpy(ifan->ifan_name, ifp->if_xname, sizeof(ifan->ifan_name)); 1791 ifan->ifan_what = what; 1792 route_input(m, NULL, AF_UNSPEC); 1793 } 1794 1795 #ifdef BFD 1796 /* 1797 * This is used to generate routing socket messages indicating 1798 * the state of a BFD session. 1799 */ 1800 void 1801 rtm_bfd(struct bfd_config *bfd) 1802 { 1803 struct bfd_msghdr *bfdm; 1804 struct sockaddr_bfd sa_bfd; 1805 struct mbuf *m; 1806 struct rt_addrinfo info; 1807 1808 if (rtptable.rtp_count == 0) 1809 return; 1810 memset(&info, 0, sizeof(info)); 1811 info.rti_info[RTAX_DST] = rt_key(bfd->bc_rt); 1812 info.rti_info[RTAX_IFA] = bfd->bc_rt->rt_ifa->ifa_addr; 1813 1814 m = rtm_msg1(RTM_BFD, &info); 1815 if (m == NULL) 1816 return; 1817 bfdm = mtod(m, struct bfd_msghdr *); 1818 bfdm->bm_addrs = info.rti_addrs; 1819 1820 bfd2sa(bfd->bc_rt, &sa_bfd); 1821 memcpy(&bfdm->bm_sa, &sa_bfd, sizeof(sa_bfd)); 1822 1823 route_input(m, NULL, info.rti_info[RTAX_DST]->sa_family); 1824 } 1825 #endif /* BFD */ 1826 1827 /* 1828 * This is used to generate routing socket messages indicating 1829 * the state of an ieee80211 interface. 1830 */ 1831 void 1832 rtm_80211info(struct ifnet *ifp, struct if_ieee80211_data *ifie) 1833 { 1834 struct if_ieee80211_msghdr *ifim; 1835 struct mbuf *m; 1836 1837 if (rtptable.rtp_count == 0) 1838 return; 1839 m = rtm_msg1(RTM_80211INFO, NULL); 1840 if (m == NULL) 1841 return; 1842 ifim = mtod(m, struct if_ieee80211_msghdr *); 1843 ifim->ifim_index = ifp->if_index; 1844 ifim->ifim_tableid = ifp->if_rdomain; 1845 1846 memcpy(&ifim->ifim_ifie, ifie, sizeof(ifim->ifim_ifie)); 1847 route_input(m, NULL, AF_UNSPEC); 1848 } 1849 1850 /* 1851 * This is used to generate routing socket messages indicating 1852 * the address selection proposal from an interface. 1853 */ 1854 void 1855 rtm_proposal(struct ifnet *ifp, struct rt_addrinfo *rtinfo, int flags, 1856 uint8_t prio) 1857 { 1858 struct rt_msghdr *rtm; 1859 struct mbuf *m; 1860 1861 m = rtm_msg1(RTM_PROPOSAL, rtinfo); 1862 if (m == NULL) 1863 return; 1864 rtm = mtod(m, struct rt_msghdr *); 1865 rtm->rtm_flags = RTF_DONE | flags; 1866 rtm->rtm_priority = prio; 1867 rtm->rtm_tableid = ifp->if_rdomain; 1868 rtm->rtm_index = ifp->if_index; 1869 rtm->rtm_addrs = rtinfo->rti_addrs; 1870 1871 route_input(m, NULL, rtinfo->rti_info[RTAX_DNS]->sa_family); 1872 } 1873 1874 /* 1875 * This is used in dumping the kernel table via sysctl(). 1876 */ 1877 int 1878 sysctl_dumpentry(struct rtentry *rt, void *v, unsigned int id) 1879 { 1880 struct walkarg *w = v; 1881 int error = 0, size; 1882 struct rt_addrinfo info; 1883 struct ifnet *ifp; 1884 #ifdef BFD 1885 struct sockaddr_bfd sa_bfd; 1886 #endif 1887 struct sockaddr_rtlabel sa_rl; 1888 struct sockaddr_in6 sa_mask; 1889 1890 if (w->w_op == NET_RT_FLAGS && !(rt->rt_flags & w->w_arg)) 1891 return 0; 1892 if (w->w_op == NET_RT_DUMP && w->w_arg) { 1893 u_int8_t prio = w->w_arg & RTP_MASK; 1894 if (w->w_arg < 0) { 1895 prio = (-w->w_arg) & RTP_MASK; 1896 /* Show all routes that are not this priority */ 1897 if (prio == (rt->rt_priority & RTP_MASK)) 1898 return 0; 1899 } else { 1900 if (prio != (rt->rt_priority & RTP_MASK) && 1901 prio != RTP_ANY) 1902 return 0; 1903 } 1904 } 1905 bzero(&info, sizeof(info)); 1906 info.rti_info[RTAX_DST] = rt_key(rt); 1907 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 1908 info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask); 1909 ifp = if_get(rt->rt_ifidx); 1910 if (ifp != NULL) { 1911 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); 1912 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; 1913 if (ifp->if_flags & IFF_POINTOPOINT) 1914 info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr; 1915 } 1916 if_put(ifp); 1917 info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl); 1918 #ifdef BFD 1919 if (rt->rt_flags & RTF_BFD) 1920 info.rti_info[RTAX_BFD] = bfd2sa(rt, &sa_bfd); 1921 #endif 1922 #ifdef MPLS 1923 if (rt->rt_flags & RTF_MPLS) { 1924 struct sockaddr_mpls sa_mpls; 1925 1926 bzero(&sa_mpls, sizeof(sa_mpls)); 1927 sa_mpls.smpls_family = AF_MPLS; 1928 sa_mpls.smpls_len = sizeof(sa_mpls); 1929 sa_mpls.smpls_label = ((struct rt_mpls *) 1930 rt->rt_llinfo)->mpls_label; 1931 info.rti_info[RTAX_SRC] = (struct sockaddr *)&sa_mpls; 1932 info.rti_mpls = ((struct rt_mpls *) 1933 rt->rt_llinfo)->mpls_operation; 1934 } 1935 #endif 1936 1937 size = rtm_msg2(RTM_GET, RTM_VERSION, &info, NULL, w); 1938 if (w->w_where && w->w_tmem && w->w_needed <= 0) { 1939 struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem; 1940 1941 rtm->rtm_pid = curproc->p_p->ps_pid; 1942 rtm->rtm_flags = rt->rt_flags; 1943 rtm->rtm_priority = rt->rt_priority & RTP_MASK; 1944 rtm_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx); 1945 /* Do not account the routing table's reference. */ 1946 rtm->rtm_rmx.rmx_refcnt = rt->rt_refcnt - 1; 1947 rtm->rtm_index = rt->rt_ifidx; 1948 rtm->rtm_addrs = info.rti_addrs; 1949 rtm->rtm_tableid = id; 1950 #ifdef MPLS 1951 rtm->rtm_mpls = info.rti_mpls; 1952 #endif 1953 if ((error = copyout(rtm, w->w_where, size)) != 0) 1954 w->w_where = NULL; 1955 else 1956 w->w_where += size; 1957 } 1958 return (error); 1959 } 1960 1961 int 1962 sysctl_iflist(int af, struct walkarg *w) 1963 { 1964 struct ifnet *ifp; 1965 struct ifaddr *ifa; 1966 struct rt_addrinfo info; 1967 int len, error = 0; 1968 1969 bzero(&info, sizeof(info)); 1970 TAILQ_FOREACH(ifp, &ifnet, if_list) { 1971 if (w->w_arg && w->w_arg != ifp->if_index) 1972 continue; 1973 /* Copy the link-layer address first */ 1974 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); 1975 len = rtm_msg2(RTM_IFINFO, RTM_VERSION, &info, 0, w); 1976 if (w->w_where && w->w_tmem && w->w_needed <= 0) { 1977 struct if_msghdr *ifm; 1978 1979 ifm = (struct if_msghdr *)w->w_tmem; 1980 ifm->ifm_index = ifp->if_index; 1981 ifm->ifm_tableid = ifp->if_rdomain; 1982 ifm->ifm_flags = ifp->if_flags; 1983 if_getdata(ifp, &ifm->ifm_data); 1984 ifm->ifm_addrs = info.rti_addrs; 1985 error = copyout(ifm, w->w_where, len); 1986 if (error) 1987 return (error); 1988 w->w_where += len; 1989 } 1990 info.rti_info[RTAX_IFP] = NULL; 1991 TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) { 1992 KASSERT(ifa->ifa_addr->sa_family != AF_LINK); 1993 if (af && af != ifa->ifa_addr->sa_family) 1994 continue; 1995 info.rti_info[RTAX_IFA] = ifa->ifa_addr; 1996 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask; 1997 info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr; 1998 len = rtm_msg2(RTM_NEWADDR, RTM_VERSION, &info, 0, w); 1999 if (w->w_where && w->w_tmem && w->w_needed <= 0) { 2000 struct ifa_msghdr *ifam; 2001 2002 ifam = (struct ifa_msghdr *)w->w_tmem; 2003 ifam->ifam_index = ifa->ifa_ifp->if_index; 2004 ifam->ifam_flags = ifa->ifa_flags; 2005 ifam->ifam_metric = ifa->ifa_metric; 2006 ifam->ifam_addrs = info.rti_addrs; 2007 error = copyout(w->w_tmem, w->w_where, len); 2008 if (error) 2009 return (error); 2010 w->w_where += len; 2011 } 2012 } 2013 info.rti_info[RTAX_IFA] = info.rti_info[RTAX_NETMASK] = 2014 info.rti_info[RTAX_BRD] = NULL; 2015 } 2016 return (0); 2017 } 2018 2019 int 2020 sysctl_ifnames(struct walkarg *w) 2021 { 2022 struct if_nameindex_msg ifn; 2023 struct ifnet *ifp; 2024 int error = 0; 2025 2026 /* XXX ignore tableid for now */ 2027 TAILQ_FOREACH(ifp, &ifnet, if_list) { 2028 if (w->w_arg && w->w_arg != ifp->if_index) 2029 continue; 2030 w->w_needed += sizeof(ifn); 2031 if (w->w_where && w->w_needed <= 0) { 2032 2033 memset(&ifn, 0, sizeof(ifn)); 2034 ifn.if_index = ifp->if_index; 2035 strlcpy(ifn.if_name, ifp->if_xname, 2036 sizeof(ifn.if_name)); 2037 error = copyout(&ifn, w->w_where, sizeof(ifn)); 2038 if (error) 2039 return (error); 2040 w->w_where += sizeof(ifn); 2041 } 2042 } 2043 2044 return (0); 2045 } 2046 2047 int 2048 sysctl_rtable(int *name, u_int namelen, void *where, size_t *given, void *new, 2049 size_t newlen) 2050 { 2051 int i, error = EINVAL; 2052 u_char af; 2053 struct walkarg w; 2054 struct rt_tableinfo tableinfo; 2055 u_int tableid = 0; 2056 2057 if (new) 2058 return (EPERM); 2059 if (namelen < 3 || namelen > 4) 2060 return (EINVAL); 2061 af = name[0]; 2062 bzero(&w, sizeof(w)); 2063 w.w_where = where; 2064 w.w_given = *given; 2065 w.w_needed = 0 - w.w_given; 2066 w.w_op = name[1]; 2067 w.w_arg = name[2]; 2068 2069 if (namelen == 4) { 2070 tableid = name[3]; 2071 if (!rtable_exists(tableid)) 2072 return (ENOENT); 2073 } else 2074 tableid = curproc->p_p->ps_rtableid; 2075 2076 switch (w.w_op) { 2077 case NET_RT_DUMP: 2078 case NET_RT_FLAGS: 2079 NET_LOCK(); 2080 for (i = 1; i <= AF_MAX; i++) { 2081 if (af != 0 && af != i) 2082 continue; 2083 2084 error = rtable_walk(tableid, i, NULL, sysctl_dumpentry, 2085 &w); 2086 if (error == EAFNOSUPPORT) 2087 error = 0; 2088 if (error) 2089 break; 2090 } 2091 NET_UNLOCK(); 2092 break; 2093 2094 case NET_RT_IFLIST: 2095 NET_LOCK(); 2096 error = sysctl_iflist(af, &w); 2097 NET_UNLOCK(); 2098 break; 2099 2100 case NET_RT_STATS: 2101 return (sysctl_rtable_rtstat(where, given, new)); 2102 case NET_RT_TABLE: 2103 tableid = w.w_arg; 2104 if (!rtable_exists(tableid)) 2105 return (ENOENT); 2106 memset(&tableinfo, 0, sizeof tableinfo); 2107 tableinfo.rti_tableid = tableid; 2108 tableinfo.rti_domainid = rtable_l2(tableid); 2109 error = sysctl_rdstruct(where, given, new, 2110 &tableinfo, sizeof(tableinfo)); 2111 return (error); 2112 case NET_RT_IFNAMES: 2113 NET_LOCK(); 2114 error = sysctl_ifnames(&w); 2115 NET_UNLOCK(); 2116 break; 2117 } 2118 free(w.w_tmem, M_RTABLE, w.w_tmemsize); 2119 w.w_needed += w.w_given; 2120 if (where) { 2121 *given = w.w_where - (caddr_t)where; 2122 if (*given < w.w_needed) 2123 return (ENOMEM); 2124 } else 2125 *given = (11 * w.w_needed) / 10; 2126 2127 return (error); 2128 } 2129 2130 int 2131 sysctl_rtable_rtstat(void *oldp, size_t *oldlenp, void *newp) 2132 { 2133 extern struct cpumem *rtcounters; 2134 uint64_t counters[rts_ncounters]; 2135 struct rtstat rtstat; 2136 uint32_t *words = (uint32_t *)&rtstat; 2137 int i; 2138 2139 CTASSERT(sizeof(rtstat) == (nitems(counters) * sizeof(uint32_t))); 2140 memset(&rtstat, 0, sizeof rtstat); 2141 counters_read(rtcounters, counters, nitems(counters)); 2142 2143 for (i = 0; i < nitems(counters); i++) 2144 words[i] = (uint32_t)counters[i]; 2145 2146 return (sysctl_rdstruct(oldp, oldlenp, newp, &rtstat, sizeof(rtstat))); 2147 } 2148 2149 int 2150 rtm_validate_proposal(struct rt_addrinfo *info) 2151 { 2152 if (info->rti_addrs & ~(RTA_NETMASK | RTA_IFA | RTA_DNS | RTA_STATIC | 2153 RTA_SEARCH)) { 2154 return -1; 2155 } 2156 2157 if (ISSET(info->rti_addrs, RTA_NETMASK)) { 2158 struct sockaddr *sa = info->rti_info[RTAX_NETMASK]; 2159 if (sa == NULL) 2160 return -1; 2161 switch (sa->sa_family) { 2162 case AF_INET: 2163 if (sa->sa_len != sizeof(struct sockaddr_in)) 2164 return -1; 2165 break; 2166 case AF_INET6: 2167 if (sa->sa_len != sizeof(struct sockaddr_in6)) 2168 return -1; 2169 break; 2170 default: 2171 return -1; 2172 } 2173 } 2174 2175 if (ISSET(info->rti_addrs, RTA_IFA)) { 2176 struct sockaddr *sa = info->rti_info[RTAX_IFA]; 2177 if (sa == NULL) 2178 return -1; 2179 switch (sa->sa_family) { 2180 case AF_INET: 2181 if (sa->sa_len != sizeof(struct sockaddr_in)) 2182 return -1; 2183 break; 2184 case AF_INET6: 2185 if (sa->sa_len != sizeof(struct sockaddr_in6)) 2186 return -1; 2187 break; 2188 default: 2189 return -1; 2190 } 2191 } 2192 2193 if (ISSET(info->rti_addrs, RTA_DNS)) { 2194 struct sockaddr_rtdns *rtdns = 2195 (struct sockaddr_rtdns *)info->rti_info[RTAX_DNS]; 2196 if (rtdns == NULL) 2197 return -1; 2198 if (rtdns->sr_len > sizeof(*rtdns)) 2199 return -1; 2200 if (rtdns->sr_len < offsetof(struct sockaddr_rtdns, sr_dns)) 2201 return -1; 2202 switch (rtdns->sr_family) { 2203 case AF_INET: 2204 if ((rtdns->sr_len - offsetof(struct sockaddr_rtdns, 2205 sr_dns)) % sizeof(struct in_addr) != 0) 2206 return -1; 2207 break; 2208 #ifdef INET6 2209 case AF_INET6: 2210 if ((rtdns->sr_len - offsetof(struct sockaddr_rtdns, 2211 sr_dns)) % sizeof(struct in6_addr) != 0) 2212 return -1; 2213 break; 2214 #endif 2215 default: 2216 return -1; 2217 } 2218 } 2219 2220 if (ISSET(info->rti_addrs, RTA_STATIC)) { 2221 struct sockaddr_rtstatic *rtstatic = 2222 (struct sockaddr_rtstatic *)info->rti_info[RTAX_STATIC]; 2223 if (rtstatic == NULL) 2224 return -1; 2225 if (rtstatic->sr_len > sizeof(*rtstatic)) 2226 return -1; 2227 if (rtstatic->sr_len <= 2228 offsetof(struct sockaddr_rtstatic, sr_static)) 2229 return -1; 2230 } 2231 2232 if (ISSET(info->rti_addrs, RTA_SEARCH)) { 2233 struct sockaddr_rtsearch *rtsearch = 2234 (struct sockaddr_rtsearch *)info->rti_info[RTAX_SEARCH]; 2235 if (rtsearch == NULL) 2236 return -1; 2237 if (rtsearch->sr_len > sizeof(*rtsearch)) 2238 return -1; 2239 if (rtsearch->sr_len <= 2240 offsetof(struct sockaddr_rtsearch, sr_search)) 2241 return -1; 2242 } 2243 2244 return 0; 2245 } 2246 2247 /* 2248 * Definitions of protocols supported in the ROUTE domain. 2249 */ 2250 2251 struct domain routedomain; 2252 2253 struct protosw routesw[] = { 2254 { 2255 .pr_type = SOCK_RAW, 2256 .pr_domain = &routedomain, 2257 .pr_flags = PR_ATOMIC|PR_ADDR|PR_WANTRCVD, 2258 .pr_output = route_output, 2259 .pr_ctloutput = route_ctloutput, 2260 .pr_usrreq = route_usrreq, 2261 .pr_attach = route_attach, 2262 .pr_detach = route_detach, 2263 .pr_init = route_prinit, 2264 .pr_sysctl = sysctl_rtable 2265 } 2266 }; 2267 2268 struct domain routedomain = { 2269 .dom_family = PF_ROUTE, 2270 .dom_name = "route", 2271 .dom_init = route_init, 2272 .dom_protosw = routesw, 2273 .dom_protoswNPROTOSW = &routesw[nitems(routesw)] 2274 }; 2275