1 /* $OpenBSD: rtsock.c,v 1.297 2019/11/24 07:56:03 claudio Exp $ */ 2 /* $NetBSD: rtsock.c,v 1.18 1996/03/29 00:32:10 cgd Exp $ */ 3 4 /* 5 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the project nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 33 /* 34 * Copyright (c) 1988, 1991, 1993 35 * The Regents of the University of California. All rights reserved. 36 * 37 * Redistribution and use in source and binary forms, with or without 38 * modification, are permitted provided that the following conditions 39 * are met: 40 * 1. Redistributions of source code must retain the above copyright 41 * notice, this list of conditions and the following disclaimer. 42 * 2. Redistributions in binary form must reproduce the above copyright 43 * notice, this list of conditions and the following disclaimer in the 44 * documentation and/or other materials provided with the distribution. 45 * 3. Neither the name of the University nor the names of its contributors 46 * may be used to endorse or promote products derived from this software 47 * without specific prior written permission. 48 * 49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 59 * SUCH DAMAGE. 60 * 61 * @(#)rtsock.c 8.6 (Berkeley) 2/11/95 62 */ 63 64 #include <sys/param.h> 65 #include <sys/systm.h> 66 #include <sys/proc.h> 67 #include <sys/sysctl.h> 68 #include <sys/mbuf.h> 69 #include <sys/socket.h> 70 #include <sys/socketvar.h> 71 #include <sys/domain.h> 72 #include <sys/pool.h> 73 #include <sys/protosw.h> 74 #include <sys/srp.h> 75 76 #include <net/if.h> 77 #include <net/if_dl.h> 78 #include <net/if_var.h> 79 #include <net/route.h> 80 81 #include <netinet/in.h> 82 83 #ifdef MPLS 84 #include <netmpls/mpls.h> 85 #endif 86 #ifdef IPSEC 87 #include <netinet/ip_ipsp.h> 88 #include <net/if_enc.h> 89 #endif 90 #ifdef BFD 91 #include <net/bfd.h> 92 #endif 93 94 #include <sys/stdarg.h> 95 #include <sys/kernel.h> 96 #include <sys/timeout.h> 97 98 #define ROUTESNDQ 8192 99 #define ROUTERCVQ 8192 100 101 const struct sockaddr route_src = { 2, PF_ROUTE, }; 102 103 struct walkarg { 104 int w_op, w_arg, w_given, w_needed, w_tmemsize; 105 caddr_t w_where, w_tmem; 106 }; 107 108 void route_prinit(void); 109 void rcb_ref(void *, void *); 110 void rcb_unref(void *, void *); 111 int route_output(struct mbuf *, struct socket *, struct sockaddr *, 112 struct mbuf *); 113 int route_ctloutput(int, struct socket *, int, int, struct mbuf *); 114 int route_usrreq(struct socket *, int, struct mbuf *, struct mbuf *, 115 struct mbuf *, struct proc *); 116 void route_input(struct mbuf *m0, struct socket *, sa_family_t); 117 int route_arp_conflict(struct rtentry *, struct rt_addrinfo *); 118 int route_cleargateway(struct rtentry *, void *, unsigned int); 119 void rtm_senddesync_timer(void *); 120 void rtm_senddesync(struct socket *); 121 int rtm_sendup(struct socket *, struct mbuf *, int); 122 123 int rtm_getifa(struct rt_addrinfo *, unsigned int); 124 int rtm_output(struct rt_msghdr *, struct rtentry **, struct rt_addrinfo *, 125 uint8_t, unsigned int); 126 struct rt_msghdr *rtm_report(struct rtentry *, u_char, int, int); 127 struct mbuf *rtm_msg1(int, struct rt_addrinfo *); 128 int rtm_msg2(int, int, struct rt_addrinfo *, caddr_t, 129 struct walkarg *); 130 int rtm_xaddrs(caddr_t, caddr_t, struct rt_addrinfo *); 131 int rtm_validate_proposal(struct rt_addrinfo *); 132 void rtm_setmetrics(u_long, const struct rt_metrics *, 133 struct rt_kmetrics *); 134 void rtm_getmetrics(const struct rt_kmetrics *, 135 struct rt_metrics *); 136 137 int sysctl_iflist(int, struct walkarg *); 138 int sysctl_ifnames(struct walkarg *); 139 int sysctl_rtable_rtstat(void *, size_t *, void *); 140 141 struct rtpcb { 142 struct socket *rop_socket; 143 144 SRPL_ENTRY(rtpcb) rop_list; 145 struct refcnt rop_refcnt; 146 struct timeout rop_timeout; 147 unsigned int rop_msgfilter; 148 unsigned int rop_flags; 149 u_int rop_rtableid; 150 unsigned short rop_proto; 151 u_char rop_priority; 152 }; 153 #define sotortpcb(so) ((struct rtpcb *)(so)->so_pcb) 154 155 struct rtptable { 156 SRPL_HEAD(, rtpcb) rtp_list; 157 struct srpl_rc rtp_rc; 158 struct rwlock rtp_lk; 159 unsigned int rtp_count; 160 }; 161 162 struct pool rtpcb_pool; 163 struct rtptable rtptable; 164 165 /* 166 * These flags and timeout are used for indicating to userland (via a 167 * RTM_DESYNC msg) when the route socket has overflowed and messages 168 * have been lost. 169 */ 170 #define ROUTECB_FLAG_DESYNC 0x1 /* Route socket out of memory */ 171 #define ROUTECB_FLAG_FLUSH 0x2 /* Wait until socket is empty before 172 queueing more packets */ 173 174 #define ROUTE_DESYNC_RESEND_TIMEOUT 200 /* In ms */ 175 176 void 177 route_prinit(void) 178 { 179 srpl_rc_init(&rtptable.rtp_rc, rcb_ref, rcb_unref, NULL); 180 rw_init(&rtptable.rtp_lk, "rtsock"); 181 SRPL_INIT(&rtptable.rtp_list); 182 pool_init(&rtpcb_pool, sizeof(struct rtpcb), 0, 183 IPL_NONE, PR_WAITOK, "rtpcb", NULL); 184 } 185 186 void 187 rcb_ref(void *null, void *v) 188 { 189 struct rtpcb *rop = v; 190 191 refcnt_take(&rop->rop_refcnt); 192 } 193 194 void 195 rcb_unref(void *null, void *v) 196 { 197 struct rtpcb *rop = v; 198 199 refcnt_rele_wake(&rop->rop_refcnt); 200 } 201 202 int 203 route_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam, 204 struct mbuf *control, struct proc *p) 205 { 206 struct rtpcb *rop; 207 int error = 0; 208 209 if (req == PRU_CONTROL) 210 return (EOPNOTSUPP); 211 212 soassertlocked(so); 213 214 if (control && control->m_len) { 215 error = EOPNOTSUPP; 216 goto release; 217 } 218 219 rop = sotortpcb(so); 220 if (rop == NULL) { 221 error = EINVAL; 222 goto release; 223 } 224 225 switch (req) { 226 /* no connect, bind, accept. Socket is connected from the start */ 227 case PRU_CONNECT: 228 case PRU_BIND: 229 case PRU_CONNECT2: 230 case PRU_LISTEN: 231 case PRU_ACCEPT: 232 error = EOPNOTSUPP; 233 break; 234 235 case PRU_DISCONNECT: 236 case PRU_ABORT: 237 soisdisconnected(so); 238 break; 239 case PRU_SHUTDOWN: 240 socantsendmore(so); 241 break; 242 case PRU_SENSE: 243 /* stat: don't bother with a blocksize. */ 244 break; 245 246 /* minimal support, just implement a fake peer address */ 247 case PRU_SOCKADDR: 248 error = EINVAL; 249 break; 250 case PRU_PEERADDR: 251 bcopy(&route_src, mtod(nam, caddr_t), route_src.sa_len); 252 nam->m_len = route_src.sa_len; 253 break; 254 255 case PRU_RCVD: 256 /* 257 * If we are in a FLUSH state, check if the buffer is 258 * empty so that we can clear the flag. 259 */ 260 if (((rop->rop_flags & ROUTECB_FLAG_FLUSH) != 0) && 261 ((sbspace(rop->rop_socket, &rop->rop_socket->so_rcv) == 262 rop->rop_socket->so_rcv.sb_hiwat))) 263 rop->rop_flags &= ~ROUTECB_FLAG_FLUSH; 264 break; 265 266 case PRU_RCVOOB: 267 case PRU_SENDOOB: 268 error = EOPNOTSUPP; 269 break; 270 case PRU_SEND: 271 if (nam) { 272 error = EISCONN; 273 break; 274 } 275 error = (*so->so_proto->pr_output)(m, so, NULL, NULL); 276 m = NULL; 277 break; 278 default: 279 panic("route_usrreq"); 280 } 281 282 release: 283 if (req != PRU_RCVD && req != PRU_RCVOOB && req != PRU_SENSE) { 284 m_freem(control); 285 m_freem(m); 286 } 287 return (error); 288 } 289 290 int 291 route_attach(struct socket *so, int proto) 292 { 293 struct rtpcb *rop; 294 int error; 295 296 /* 297 * use the rawcb but allocate a rtpcb, this 298 * code does not care about the additional fields 299 * and works directly on the raw socket. 300 */ 301 rop = pool_get(&rtpcb_pool, PR_WAITOK|PR_ZERO); 302 so->so_pcb = rop; 303 /* Init the timeout structure */ 304 timeout_set(&rop->rop_timeout, rtm_senddesync_timer, so); 305 refcnt_init(&rop->rop_refcnt); 306 307 if (curproc == NULL) 308 error = EACCES; 309 else 310 error = soreserve(so, ROUTESNDQ, ROUTERCVQ); 311 if (error) { 312 pool_put(&rtpcb_pool, rop); 313 return (error); 314 } 315 316 rop->rop_socket = so; 317 rop->rop_proto = proto; 318 319 rop->rop_rtableid = curproc->p_p->ps_rtableid; 320 321 soisconnected(so); 322 so->so_options |= SO_USELOOPBACK; 323 324 rw_enter(&rtptable.rtp_lk, RW_WRITE); 325 SRPL_INSERT_HEAD_LOCKED(&rtptable.rtp_rc, &rtptable.rtp_list, rop, 326 rop_list); 327 rtptable.rtp_count++; 328 rw_exit(&rtptable.rtp_lk); 329 330 return (0); 331 } 332 333 int 334 route_detach(struct socket *so) 335 { 336 struct rtpcb *rop; 337 338 soassertlocked(so); 339 340 rop = sotortpcb(so); 341 if (rop == NULL) 342 return (EINVAL); 343 344 rw_enter(&rtptable.rtp_lk, RW_WRITE); 345 346 timeout_del(&rop->rop_timeout); 347 rtptable.rtp_count--; 348 349 SRPL_REMOVE_LOCKED(&rtptable.rtp_rc, &rtptable.rtp_list, rop, rtpcb, 350 rop_list); 351 rw_exit(&rtptable.rtp_lk); 352 353 /* wait for all references to drop */ 354 refcnt_finalize(&rop->rop_refcnt, "rtsockrefs"); 355 356 so->so_pcb = NULL; 357 KASSERT((so->so_state & SS_NOFDREF) == 0); 358 pool_put(&rtpcb_pool, rop); 359 360 return (0); 361 } 362 363 int 364 route_ctloutput(int op, struct socket *so, int level, int optname, 365 struct mbuf *m) 366 { 367 struct rtpcb *rop = sotortpcb(so); 368 int error = 0; 369 unsigned int tid, prio; 370 371 if (level != AF_ROUTE) 372 return (EINVAL); 373 374 switch (op) { 375 case PRCO_SETOPT: 376 switch (optname) { 377 case ROUTE_MSGFILTER: 378 if (m == NULL || m->m_len != sizeof(unsigned int)) 379 error = EINVAL; 380 else 381 rop->rop_msgfilter = *mtod(m, unsigned int *); 382 break; 383 case ROUTE_TABLEFILTER: 384 if (m == NULL || m->m_len != sizeof(unsigned int)) { 385 error = EINVAL; 386 break; 387 } 388 tid = *mtod(m, unsigned int *); 389 if (tid != RTABLE_ANY && !rtable_exists(tid)) 390 error = ENOENT; 391 else 392 rop->rop_rtableid = tid; 393 break; 394 case ROUTE_PRIOFILTER: 395 if (m == NULL || m->m_len != sizeof(unsigned int)) { 396 error = EINVAL; 397 break; 398 } 399 prio = *mtod(m, unsigned int *); 400 if (prio > RTP_MAX) 401 error = EINVAL; 402 else 403 rop->rop_priority = prio; 404 break; 405 default: 406 error = ENOPROTOOPT; 407 break; 408 } 409 break; 410 case PRCO_GETOPT: 411 switch (optname) { 412 case ROUTE_MSGFILTER: 413 m->m_len = sizeof(unsigned int); 414 *mtod(m, unsigned int *) = rop->rop_msgfilter; 415 break; 416 case ROUTE_TABLEFILTER: 417 m->m_len = sizeof(unsigned int); 418 *mtod(m, unsigned int *) = rop->rop_rtableid; 419 break; 420 case ROUTE_PRIOFILTER: 421 m->m_len = sizeof(unsigned int); 422 *mtod(m, unsigned int *) = rop->rop_priority; 423 break; 424 default: 425 error = ENOPROTOOPT; 426 break; 427 } 428 } 429 return (error); 430 } 431 432 void 433 rtm_senddesync_timer(void *xso) 434 { 435 struct socket *so = xso; 436 int s; 437 438 s = solock(so); 439 rtm_senddesync(so); 440 sounlock(so, s); 441 } 442 443 void 444 rtm_senddesync(struct socket *so) 445 { 446 struct rtpcb *rop = sotortpcb(so); 447 struct mbuf *desync_mbuf; 448 449 soassertlocked(so); 450 451 /* If we are in a DESYNC state, try to send a RTM_DESYNC packet */ 452 if ((rop->rop_flags & ROUTECB_FLAG_DESYNC) == 0) 453 return; 454 455 /* 456 * If we fail to alloc memory or if sbappendaddr() 457 * fails, re-add timeout and try again. 458 */ 459 desync_mbuf = rtm_msg1(RTM_DESYNC, NULL); 460 if (desync_mbuf != NULL) { 461 if (sbappendaddr(so, &so->so_rcv, &route_src, 462 desync_mbuf, NULL) != 0) { 463 rop->rop_flags &= ~ROUTECB_FLAG_DESYNC; 464 sorwakeup(rop->rop_socket); 465 return; 466 } 467 m_freem(desync_mbuf); 468 } 469 /* Re-add timeout to try sending msg again */ 470 timeout_add_msec(&rop->rop_timeout, ROUTE_DESYNC_RESEND_TIMEOUT); 471 } 472 473 void 474 route_input(struct mbuf *m0, struct socket *so0, sa_family_t sa_family) 475 { 476 struct socket *so; 477 struct rtpcb *rop; 478 struct rt_msghdr *rtm; 479 struct mbuf *m = m0; 480 struct socket *last = NULL; 481 struct srp_ref sr; 482 int s; 483 484 /* ensure that we can access the rtm_type via mtod() */ 485 if (m->m_len < offsetof(struct rt_msghdr, rtm_type) + 1) { 486 m_freem(m); 487 return; 488 } 489 490 SRPL_FOREACH(rop, &sr, &rtptable.rtp_list, rop_list) { 491 /* 492 * If route socket is bound to an address family only send 493 * messages that match the address family. Address family 494 * agnostic messages are always sent. 495 */ 496 if (sa_family != AF_UNSPEC && rop->rop_proto != AF_UNSPEC && 497 rop->rop_proto != sa_family) 498 continue; 499 500 501 so = rop->rop_socket; 502 s = solock(so); 503 504 /* 505 * Check to see if we don't want our own messages and 506 * if we can receive anything. 507 */ 508 if ((so0 == so && !(so0->so_options & SO_USELOOPBACK)) || 509 !(so->so_state & SS_ISCONNECTED) || 510 (so->so_state & SS_CANTRCVMORE)) { 511 next: 512 sounlock(so, s); 513 continue; 514 } 515 516 /* filter messages that the process does not want */ 517 rtm = mtod(m, struct rt_msghdr *); 518 /* but RTM_DESYNC can't be filtered */ 519 if (rtm->rtm_type != RTM_DESYNC && rop->rop_msgfilter != 0 && 520 !(rop->rop_msgfilter & (1 << rtm->rtm_type))) 521 goto next; 522 switch (rtm->rtm_type) { 523 case RTM_IFANNOUNCE: 524 case RTM_DESYNC: 525 /* no tableid */ 526 break; 527 case RTM_RESOLVE: 528 case RTM_NEWADDR: 529 case RTM_DELADDR: 530 case RTM_IFINFO: 531 case RTM_80211INFO: 532 case RTM_BFD: 533 /* check against rdomain id */ 534 if (rop->rop_rtableid != RTABLE_ANY && 535 rtable_l2(rop->rop_rtableid) != rtm->rtm_tableid) 536 goto next; 537 break; 538 default: 539 if (rop->rop_priority != 0 && 540 rop->rop_priority < rtm->rtm_priority) 541 goto next; 542 /* check against rtable id */ 543 if (rop->rop_rtableid != RTABLE_ANY && 544 rop->rop_rtableid != rtm->rtm_tableid) 545 goto next; 546 break; 547 } 548 549 /* 550 * Check to see if the flush flag is set. If so, don't queue 551 * any more messages until the flag is cleared. 552 */ 553 if ((rop->rop_flags & ROUTECB_FLAG_FLUSH) != 0) 554 goto next; 555 sounlock(so, s); 556 557 if (last) { 558 s = solock(last); 559 rtm_sendup(last, m, 1); 560 sounlock(last, s); 561 refcnt_rele_wake(&sotortpcb(last)->rop_refcnt); 562 } 563 /* keep a reference for last */ 564 refcnt_take(&rop->rop_refcnt); 565 last = rop->rop_socket; 566 } 567 SRPL_LEAVE(&sr); 568 569 if (last) { 570 s = solock(last); 571 rtm_sendup(last, m, 0); 572 sounlock(last, s); 573 refcnt_rele_wake(&sotortpcb(last)->rop_refcnt); 574 } else 575 m_freem(m); 576 } 577 578 int 579 rtm_sendup(struct socket *so, struct mbuf *m0, int more) 580 { 581 struct rtpcb *rop = sotortpcb(so); 582 struct mbuf *m; 583 584 soassertlocked(so); 585 586 if (more) { 587 m = m_copym(m0, 0, M_COPYALL, M_NOWAIT); 588 if (m == NULL) 589 return (ENOMEM); 590 } else 591 m = m0; 592 593 if (sbspace(so, &so->so_rcv) < (2 * MSIZE) || 594 sbappendaddr(so, &so->so_rcv, &route_src, m, NULL) == 0) { 595 /* Flag socket as desync'ed and flush required */ 596 rop->rop_flags |= ROUTECB_FLAG_DESYNC | ROUTECB_FLAG_FLUSH; 597 rtm_senddesync(so); 598 m_freem(m); 599 return (ENOBUFS); 600 } 601 602 sorwakeup(so); 603 return (0); 604 } 605 606 struct rt_msghdr * 607 rtm_report(struct rtentry *rt, u_char type, int seq, int tableid) 608 { 609 struct rt_msghdr *rtm; 610 struct rt_addrinfo info; 611 struct sockaddr_rtlabel sa_rl; 612 struct sockaddr_in6 sa_mask; 613 #ifdef BFD 614 struct sockaddr_bfd sa_bfd; 615 #endif 616 struct ifnet *ifp = NULL; 617 int len; 618 619 bzero(&info, sizeof(info)); 620 info.rti_info[RTAX_DST] = rt_key(rt); 621 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 622 info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask); 623 info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl); 624 #ifdef BFD 625 if (rt->rt_flags & RTF_BFD) 626 info.rti_info[RTAX_BFD] = bfd2sa(rt, &sa_bfd); 627 #endif 628 #ifdef MPLS 629 if (rt->rt_flags & RTF_MPLS) { 630 struct sockaddr_mpls sa_mpls; 631 632 bzero(&sa_mpls, sizeof(sa_mpls)); 633 sa_mpls.smpls_family = AF_MPLS; 634 sa_mpls.smpls_len = sizeof(sa_mpls); 635 sa_mpls.smpls_label = ((struct rt_mpls *) 636 rt->rt_llinfo)->mpls_label; 637 info.rti_info[RTAX_SRC] = (struct sockaddr *)&sa_mpls; 638 info.rti_mpls = ((struct rt_mpls *) 639 rt->rt_llinfo)->mpls_operation; 640 } 641 #endif 642 ifp = if_get(rt->rt_ifidx); 643 if (ifp != NULL) { 644 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); 645 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; 646 if (ifp->if_flags & IFF_POINTOPOINT) 647 info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr; 648 } 649 if_put(ifp); 650 /* RTAX_GENMASK, RTAX_AUTHOR, RTAX_SRCMASK ignored */ 651 652 /* build new route message */ 653 len = rtm_msg2(type, RTM_VERSION, &info, NULL, NULL); 654 rtm = malloc(len, M_RTABLE, M_WAITOK | M_ZERO); 655 656 rtm_msg2(type, RTM_VERSION, &info, (caddr_t)rtm, NULL); 657 rtm->rtm_type = type; 658 rtm->rtm_index = rt->rt_ifidx; 659 rtm->rtm_tableid = tableid; 660 rtm->rtm_priority = rt->rt_priority & RTP_MASK; 661 rtm->rtm_flags = rt->rt_flags; 662 rtm->rtm_pid = curproc->p_p->ps_pid; 663 rtm->rtm_seq = seq; 664 rtm_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx); 665 rtm->rtm_addrs = info.rti_addrs; 666 #ifdef MPLS 667 rtm->rtm_mpls = info.rti_mpls; 668 #endif 669 return rtm; 670 } 671 672 int 673 route_output(struct mbuf *m, struct socket *so, struct sockaddr *dstaddr, 674 struct mbuf *control) 675 { 676 struct rt_msghdr *rtm = NULL; 677 struct rtentry *rt = NULL; 678 struct rt_addrinfo info; 679 int len, seq, error = 0; 680 u_int tableid; 681 u_int8_t prio; 682 u_char vers, type; 683 684 if (m == NULL || ((m->m_len < sizeof(int32_t)) && 685 (m = m_pullup(m, sizeof(int32_t))) == 0)) 686 return (ENOBUFS); 687 if ((m->m_flags & M_PKTHDR) == 0) 688 panic("route_output"); 689 len = m->m_pkthdr.len; 690 if (len < offsetof(struct rt_msghdr, rtm_hdrlen) + 1 || 691 len != mtod(m, struct rt_msghdr *)->rtm_msglen) { 692 error = EINVAL; 693 goto fail; 694 } 695 vers = mtod(m, struct rt_msghdr *)->rtm_version; 696 switch (vers) { 697 case RTM_VERSION: 698 if (len < sizeof(struct rt_msghdr)) { 699 error = EINVAL; 700 goto fail; 701 } 702 if (len > RTM_MAXSIZE) { 703 error = EMSGSIZE; 704 goto fail; 705 } 706 rtm = malloc(len, M_RTABLE, M_WAITOK); 707 m_copydata(m, 0, len, (caddr_t)rtm); 708 break; 709 default: 710 error = EPROTONOSUPPORT; 711 goto fail; 712 } 713 714 /* Verify that the caller is sending an appropriate message early */ 715 switch (rtm->rtm_type) { 716 case RTM_ADD: 717 case RTM_DELETE: 718 case RTM_GET: 719 case RTM_CHANGE: 720 case RTM_PROPOSAL: 721 break; 722 default: 723 error = EOPNOTSUPP; 724 goto fail; 725 } 726 /* 727 * Verify that the header length is valid. 728 * All messages from userland start with a struct rt_msghdr. 729 */ 730 if (rtm->rtm_hdrlen == 0) /* old client */ 731 rtm->rtm_hdrlen = sizeof(struct rt_msghdr); 732 if (rtm->rtm_hdrlen < sizeof(struct rt_msghdr) || 733 len < rtm->rtm_hdrlen) { 734 error = EINVAL; 735 goto fail; 736 } 737 738 rtm->rtm_pid = curproc->p_p->ps_pid; 739 740 /* 741 * Verify that the caller has the appropriate privilege; RTM_GET 742 * is the only operation the non-superuser is allowed. 743 */ 744 if (rtm->rtm_type != RTM_GET && suser(curproc) != 0) { 745 error = EACCES; 746 goto fail; 747 } 748 tableid = rtm->rtm_tableid; 749 if (!rtable_exists(tableid)) { 750 if (rtm->rtm_type == RTM_ADD) { 751 if ((error = rtable_add(tableid)) != 0) 752 goto fail; 753 } else { 754 error = EINVAL; 755 goto fail; 756 } 757 } 758 759 760 /* Do not let userland play with kernel-only flags. */ 761 if ((rtm->rtm_flags & (RTF_LOCAL|RTF_BROADCAST)) != 0) { 762 error = EINVAL; 763 goto fail; 764 } 765 766 /* make sure that kernel-only bits are not set */ 767 rtm->rtm_priority &= RTP_MASK; 768 rtm->rtm_flags &= ~(RTF_DONE|RTF_CLONED|RTF_CACHED); 769 rtm->rtm_fmask &= RTF_FMASK; 770 771 if (rtm->rtm_priority != 0) { 772 if (rtm->rtm_priority > RTP_MAX || 773 rtm->rtm_priority == RTP_LOCAL) { 774 error = EINVAL; 775 goto fail; 776 } 777 prio = rtm->rtm_priority; 778 } else if (rtm->rtm_type != RTM_ADD) 779 prio = RTP_ANY; 780 else if (rtm->rtm_flags & RTF_STATIC) 781 prio = 0; 782 else 783 prio = RTP_DEFAULT; 784 785 bzero(&info, sizeof(info)); 786 info.rti_addrs = rtm->rtm_addrs; 787 if ((error = rtm_xaddrs(rtm->rtm_hdrlen + (caddr_t)rtm, 788 len + (caddr_t)rtm, &info)) != 0) 789 goto fail; 790 info.rti_flags = rtm->rtm_flags; 791 if (rtm->rtm_type != RTM_PROPOSAL && 792 (info.rti_info[RTAX_DST] == NULL || 793 info.rti_info[RTAX_DST]->sa_family >= AF_MAX || 794 (info.rti_info[RTAX_GATEWAY] != NULL && 795 info.rti_info[RTAX_GATEWAY]->sa_family >= AF_MAX) || 796 info.rti_info[RTAX_GENMASK] != NULL)) { 797 error = EINVAL; 798 goto fail; 799 } 800 #ifdef MPLS 801 info.rti_mpls = rtm->rtm_mpls; 802 #endif 803 804 if (info.rti_info[RTAX_GATEWAY] != NULL && 805 info.rti_info[RTAX_GATEWAY]->sa_family == AF_LINK && 806 (info.rti_flags & RTF_CLONING) == 0) { 807 info.rti_flags |= RTF_LLINFO; 808 } 809 810 /* 811 * Validate RTM_PROPOSAL and pass it along or error out. 812 */ 813 if (rtm->rtm_type == RTM_PROPOSAL) { 814 if (rtm_validate_proposal(&info) == -1) { 815 error = EINVAL; 816 goto fail; 817 } 818 /* 819 * If this is a solicitation proposal forward request to 820 * all interfaces. Most handlers will ignore it but at least 821 * umb(4) will send a response to this event. 822 */ 823 if (rtm->rtm_priority == RTP_PROPOSAL_SOLICIT) { 824 struct ifnet *ifp; 825 NET_LOCK(); 826 TAILQ_FOREACH(ifp, &ifnet, if_list) { 827 ifp->if_rtrequest(ifp, RTM_PROPOSAL, NULL); 828 } 829 NET_UNLOCK(); 830 } 831 } else { 832 error = rtm_output(rtm, &rt, &info, prio, tableid); 833 if (!error) { 834 type = rtm->rtm_type; 835 seq = rtm->rtm_seq; 836 free(rtm, M_RTABLE, len); 837 rtm = rtm_report(rt, type, seq, tableid); 838 len = rtm->rtm_msglen; 839 } 840 } 841 842 rtfree(rt); 843 if (error) { 844 rtm->rtm_errno = error; 845 } else { 846 rtm->rtm_flags |= RTF_DONE; 847 } 848 849 /* 850 * Check to see if we don't want our own messages. 851 */ 852 if (!(so->so_options & SO_USELOOPBACK)) { 853 if (rtptable.rtp_count <= 1) { 854 /* no other listener and no loopback of messages */ 855 fail: 856 free(rtm, M_RTABLE, len); 857 m_freem(m); 858 return (error); 859 } 860 } 861 if (rtm) { 862 if (m_copyback(m, 0, len, rtm, M_NOWAIT)) { 863 m_freem(m); 864 m = NULL; 865 } else if (m->m_pkthdr.len > len) 866 m_adj(m, len - m->m_pkthdr.len); 867 free(rtm, M_RTABLE, len); 868 } 869 if (m) 870 route_input(m, so, info.rti_info[RTAX_DST] ? 871 info.rti_info[RTAX_DST]->sa_family : AF_UNSPEC); 872 873 return (error); 874 } 875 876 int 877 rtm_output(struct rt_msghdr *rtm, struct rtentry **prt, 878 struct rt_addrinfo *info, uint8_t prio, unsigned int tableid) 879 { 880 struct rtentry *rt = *prt; 881 struct ifnet *ifp = NULL; 882 int plen, newgate = 0, error = 0; 883 884 switch (rtm->rtm_type) { 885 case RTM_ADD: 886 if (info->rti_info[RTAX_GATEWAY] == NULL) { 887 error = EINVAL; 888 break; 889 } 890 891 rt = rtable_match(tableid, info->rti_info[RTAX_DST], NULL); 892 if ((error = route_arp_conflict(rt, info))) { 893 rtfree(rt); 894 rt = NULL; 895 break; 896 } 897 898 /* 899 * We cannot go through a delete/create/insert cycle for 900 * cached route because this can lead to races in the 901 * receive path. Instead we update the L2 cache. 902 */ 903 if ((rt != NULL) && ISSET(rt->rt_flags, RTF_CACHED)) 904 goto change; 905 906 rtfree(rt); 907 rt = NULL; 908 909 NET_LOCK(); 910 if ((error = rtm_getifa(info, tableid)) != 0) { 911 NET_UNLOCK(); 912 break; 913 } 914 error = rtrequest(RTM_ADD, info, prio, &rt, tableid); 915 NET_UNLOCK(); 916 if (error == 0) 917 rtm_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx, 918 &rt->rt_rmx); 919 break; 920 case RTM_DELETE: 921 rt = rtable_lookup(tableid, info->rti_info[RTAX_DST], 922 info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY], 923 prio); 924 if (rt == NULL) { 925 error = ESRCH; 926 break; 927 } 928 929 /* 930 * If we got multipath routes, we require users to specify 931 * a matching gateway. 932 */ 933 if (ISSET(rt->rt_flags, RTF_MPATH) && 934 info->rti_info[RTAX_GATEWAY] == NULL) { 935 error = ESRCH; 936 break; 937 } 938 939 /* Detaching an interface requires the KERNEL_LOCK(). */ 940 ifp = if_get(rt->rt_ifidx); 941 KASSERT(ifp != NULL); 942 943 /* 944 * Invalidate the cache of automagically created and 945 * referenced L2 entries to make sure that ``rt_gwroute'' 946 * pointer stays valid for other CPUs. 947 */ 948 if ((ISSET(rt->rt_flags, RTF_CACHED))) { 949 NET_LOCK(); 950 ifp->if_rtrequest(ifp, RTM_INVALIDATE, rt); 951 /* Reset the MTU of the gateway route. */ 952 rtable_walk(tableid, rt_key(rt)->sa_family, NULL, 953 route_cleargateway, rt); 954 NET_UNLOCK(); 955 if_put(ifp); 956 break; 957 } 958 959 /* 960 * Make sure that local routes are only modified by the 961 * kernel. 962 */ 963 if (ISSET(rt->rt_flags, RTF_LOCAL|RTF_BROADCAST)) { 964 if_put(ifp); 965 error = EINVAL; 966 break; 967 } 968 969 rtfree(rt); 970 rt = NULL; 971 972 NET_LOCK(); 973 error = rtrequest_delete(info, prio, ifp, &rt, tableid); 974 NET_UNLOCK(); 975 if_put(ifp); 976 break; 977 case RTM_CHANGE: 978 rt = rtable_lookup(tableid, info->rti_info[RTAX_DST], 979 info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY], 980 prio); 981 /* 982 * If we got multipath routes, we require users to specify 983 * a matching gateway. 984 */ 985 if ((rt != NULL) && ISSET(rt->rt_flags, RTF_MPATH) && 986 (info->rti_info[RTAX_GATEWAY] == NULL)) { 987 rtfree(rt); 988 rt = NULL; 989 } 990 /* 991 * If RTAX_GATEWAY is the argument we're trying to 992 * change, try to find a compatible route. 993 */ 994 if ((rt == NULL) && (info->rti_info[RTAX_GATEWAY] != NULL)) { 995 rt = rtable_lookup(tableid, info->rti_info[RTAX_DST], 996 info->rti_info[RTAX_NETMASK], NULL, prio); 997 /* Ensure we don't pick a multipath one. */ 998 if ((rt != NULL) && ISSET(rt->rt_flags, RTF_MPATH)) { 999 rtfree(rt); 1000 rt = NULL; 1001 } 1002 } 1003 1004 if (rt == NULL) { 1005 error = ESRCH; 1006 break; 1007 } 1008 1009 /* 1010 * Make sure that local routes are only modified by the 1011 * kernel. 1012 */ 1013 if (ISSET(rt->rt_flags, RTF_LOCAL|RTF_BROADCAST)) { 1014 error = EINVAL; 1015 break; 1016 } 1017 1018 /* 1019 * RTM_CHANGE needs a perfect match. 1020 */ 1021 plen = rtable_satoplen(info->rti_info[RTAX_DST]->sa_family, 1022 info->rti_info[RTAX_NETMASK]); 1023 if (rt_plen(rt) != plen) { 1024 error = ESRCH; 1025 break; 1026 } 1027 1028 if (info->rti_info[RTAX_GATEWAY] != NULL) 1029 if (rt->rt_gateway == NULL || 1030 bcmp(rt->rt_gateway, 1031 info->rti_info[RTAX_GATEWAY], 1032 info->rti_info[RTAX_GATEWAY]->sa_len)) { 1033 newgate = 1; 1034 } 1035 /* 1036 * Check reachable gateway before changing the route. 1037 * New gateway could require new ifaddr, ifp; 1038 * flags may also be different; ifp may be specified 1039 * by ll sockaddr when protocol address is ambiguous. 1040 */ 1041 if (newgate || info->rti_info[RTAX_IFP] != NULL || 1042 info->rti_info[RTAX_IFA] != NULL) { 1043 struct ifaddr *ifa = NULL; 1044 1045 NET_LOCK(); 1046 if ((error = rtm_getifa(info, tableid)) != 0) { 1047 NET_UNLOCK(); 1048 break; 1049 } 1050 ifa = info->rti_ifa; 1051 if (rt->rt_ifa != ifa) { 1052 ifp = if_get(rt->rt_ifidx); 1053 KASSERT(ifp != NULL); 1054 ifp->if_rtrequest(ifp, RTM_DELETE, rt); 1055 ifafree(rt->rt_ifa); 1056 if_put(ifp); 1057 1058 ifa->ifa_refcnt++; 1059 rt->rt_ifa = ifa; 1060 rt->rt_ifidx = ifa->ifa_ifp->if_index; 1061 /* recheck link state after ifp change */ 1062 rt_if_linkstate_change(rt, ifa->ifa_ifp, 1063 tableid); 1064 } 1065 NET_UNLOCK(); 1066 } 1067 change: 1068 if (info->rti_info[RTAX_GATEWAY] != NULL) { 1069 /* When updating the gateway, make sure it is valid. */ 1070 if (!newgate && rt->rt_gateway->sa_family != 1071 info->rti_info[RTAX_GATEWAY]->sa_family) { 1072 error = EINVAL; 1073 break; 1074 } 1075 1076 NET_LOCK(); 1077 error = rt_setgate(rt, 1078 info->rti_info[RTAX_GATEWAY], tableid); 1079 NET_UNLOCK(); 1080 if (error) 1081 break; 1082 } 1083 #ifdef MPLS 1084 if (rtm->rtm_flags & RTF_MPLS) { 1085 NET_LOCK(); 1086 error = rt_mpls_set(rt, 1087 info->rti_info[RTAX_SRC], info->rti_mpls); 1088 NET_UNLOCK(); 1089 if (error) 1090 break; 1091 } else if (newgate || (rtm->rtm_fmask & RTF_MPLS)) { 1092 NET_LOCK(); 1093 /* if gateway changed remove MPLS information */ 1094 rt_mpls_clear(rt); 1095 NET_UNLOCK(); 1096 } 1097 #endif 1098 1099 #ifdef BFD 1100 if (ISSET(rtm->rtm_flags, RTF_BFD)) { 1101 if ((error = bfdset(rt))) 1102 break; 1103 } else if (!ISSET(rtm->rtm_flags, RTF_BFD) && 1104 ISSET(rtm->rtm_fmask, RTF_BFD)) { 1105 bfdclear(rt); 1106 } 1107 #endif 1108 1109 NET_LOCK(); 1110 /* Hack to allow some flags to be toggled */ 1111 if (rtm->rtm_fmask) { 1112 /* MPLS flag it is set by rt_mpls_set() */ 1113 rtm->rtm_fmask &= ~RTF_MPLS; 1114 rtm->rtm_flags &= ~RTF_MPLS; 1115 rt->rt_flags = 1116 (rt->rt_flags & ~rtm->rtm_fmask) | 1117 (rtm->rtm_flags & rtm->rtm_fmask); 1118 } 1119 rtm_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx, &rt->rt_rmx); 1120 1121 ifp = if_get(rt->rt_ifidx); 1122 KASSERT(ifp != NULL); 1123 ifp->if_rtrequest(ifp, RTM_ADD, rt); 1124 if_put(ifp); 1125 1126 if (info->rti_info[RTAX_LABEL] != NULL) { 1127 char *rtlabel = ((struct sockaddr_rtlabel *) 1128 info->rti_info[RTAX_LABEL])->sr_label; 1129 rtlabel_unref(rt->rt_labelid); 1130 rt->rt_labelid = rtlabel_name2id(rtlabel); 1131 } 1132 if_group_routechange(info->rti_info[RTAX_DST], 1133 info->rti_info[RTAX_NETMASK]); 1134 rt->rt_locks &= ~(rtm->rtm_inits); 1135 rt->rt_locks |= (rtm->rtm_inits & rtm->rtm_rmx.rmx_locks); 1136 NET_UNLOCK(); 1137 break; 1138 case RTM_GET: 1139 rt = rtable_lookup(tableid, info->rti_info[RTAX_DST], 1140 info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY], 1141 prio); 1142 if (rt == NULL) 1143 error = ESRCH; 1144 break; 1145 } 1146 1147 *prt = rt; 1148 return (error); 1149 } 1150 1151 struct ifaddr * 1152 ifa_ifwithroute(int flags, struct sockaddr *dst, struct sockaddr *gateway, 1153 unsigned int rtableid) 1154 { 1155 struct ifaddr *ifa; 1156 1157 if ((flags & RTF_GATEWAY) == 0) { 1158 /* 1159 * If we are adding a route to an interface, 1160 * and the interface is a pt to pt link 1161 * we should search for the destination 1162 * as our clue to the interface. Otherwise 1163 * we can use the local address. 1164 */ 1165 ifa = NULL; 1166 if (flags & RTF_HOST) 1167 ifa = ifa_ifwithdstaddr(dst, rtableid); 1168 if (ifa == NULL) 1169 ifa = ifa_ifwithaddr(gateway, rtableid); 1170 } else { 1171 /* 1172 * If we are adding a route to a remote net 1173 * or host, the gateway may still be on the 1174 * other end of a pt to pt link. 1175 */ 1176 ifa = ifa_ifwithdstaddr(gateway, rtableid); 1177 } 1178 if (ifa == NULL) { 1179 if (gateway->sa_family == AF_LINK) { 1180 struct sockaddr_dl *sdl = satosdl(gateway); 1181 struct ifnet *ifp = if_get(sdl->sdl_index); 1182 1183 if (ifp != NULL) 1184 ifa = ifaof_ifpforaddr(dst, ifp); 1185 if_put(ifp); 1186 } else { 1187 struct rtentry *rt; 1188 1189 rt = rtalloc(gateway, RT_RESOLVE, rtable_l2(rtableid)); 1190 if (rt != NULL) 1191 ifa = rt->rt_ifa; 1192 rtfree(rt); 1193 } 1194 } 1195 if (ifa == NULL) 1196 return (NULL); 1197 if (ifa->ifa_addr->sa_family != dst->sa_family) { 1198 struct ifaddr *oifa = ifa; 1199 ifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp); 1200 if (ifa == NULL) 1201 ifa = oifa; 1202 } 1203 return (ifa); 1204 } 1205 1206 int 1207 rtm_getifa(struct rt_addrinfo *info, unsigned int rtid) 1208 { 1209 struct ifnet *ifp = NULL; 1210 1211 /* 1212 * The "returned" `ifa' is guaranteed to be alive only if 1213 * the NET_LOCK() is held. 1214 */ 1215 NET_ASSERT_LOCKED(); 1216 1217 /* 1218 * ifp may be specified by sockaddr_dl when protocol address 1219 * is ambiguous 1220 */ 1221 if (info->rti_info[RTAX_IFP] != NULL) { 1222 struct sockaddr_dl *sdl; 1223 1224 sdl = satosdl(info->rti_info[RTAX_IFP]); 1225 ifp = if_get(sdl->sdl_index); 1226 } 1227 1228 #ifdef IPSEC 1229 /* 1230 * If the destination is a PF_KEY address, we'll look 1231 * for the existence of a encap interface number or address 1232 * in the options list of the gateway. By default, we'll return 1233 * enc0. 1234 */ 1235 if (info->rti_info[RTAX_DST] && 1236 info->rti_info[RTAX_DST]->sa_family == PF_KEY) 1237 info->rti_ifa = enc_getifa(rtid, 0); 1238 #endif 1239 1240 if (info->rti_ifa == NULL && info->rti_info[RTAX_IFA] != NULL) 1241 info->rti_ifa = ifa_ifwithaddr(info->rti_info[RTAX_IFA], rtid); 1242 1243 if (info->rti_ifa == NULL) { 1244 struct sockaddr *sa; 1245 1246 if ((sa = info->rti_info[RTAX_IFA]) == NULL) 1247 if ((sa = info->rti_info[RTAX_GATEWAY]) == NULL) 1248 sa = info->rti_info[RTAX_DST]; 1249 1250 if (sa != NULL && ifp != NULL) 1251 info->rti_ifa = ifaof_ifpforaddr(sa, ifp); 1252 else if (info->rti_info[RTAX_DST] != NULL && 1253 info->rti_info[RTAX_GATEWAY] != NULL) 1254 info->rti_ifa = ifa_ifwithroute(info->rti_flags, 1255 info->rti_info[RTAX_DST], 1256 info->rti_info[RTAX_GATEWAY], 1257 rtid); 1258 else if (sa != NULL) 1259 info->rti_ifa = ifa_ifwithroute(info->rti_flags, 1260 sa, sa, rtid); 1261 } 1262 1263 if_put(ifp); 1264 1265 if (info->rti_ifa == NULL) 1266 return (ENETUNREACH); 1267 1268 return (0); 1269 } 1270 1271 int 1272 route_cleargateway(struct rtentry *rt, void *arg, unsigned int rtableid) 1273 { 1274 struct rtentry *nhrt = arg; 1275 1276 if (ISSET(rt->rt_flags, RTF_GATEWAY) && rt->rt_gwroute == nhrt && 1277 !ISSET(rt->rt_locks, RTV_MTU)) 1278 rt->rt_mtu = 0; 1279 1280 return (0); 1281 } 1282 1283 /* 1284 * Check if the user request to insert an ARP entry does not conflict 1285 * with existing ones. 1286 * 1287 * Only two entries are allowed for a given IP address: a private one 1288 * (priv) and a public one (pub). 1289 */ 1290 int 1291 route_arp_conflict(struct rtentry *rt, struct rt_addrinfo *info) 1292 { 1293 int proxy = (info->rti_flags & RTF_ANNOUNCE); 1294 1295 if ((info->rti_flags & RTF_LLINFO) == 0 || 1296 (info->rti_info[RTAX_DST]->sa_family != AF_INET)) 1297 return (0); 1298 1299 if (rt == NULL || !ISSET(rt->rt_flags, RTF_LLINFO)) 1300 return (0); 1301 1302 /* If the entry is cached, it can be updated. */ 1303 if (ISSET(rt->rt_flags, RTF_CACHED)) 1304 return (0); 1305 1306 /* 1307 * Same destination, not cached and both "priv" or "pub" conflict. 1308 * If a second entry exists, it always conflict. 1309 */ 1310 if ((ISSET(rt->rt_flags, RTF_ANNOUNCE) == proxy) || 1311 ISSET(rt->rt_flags, RTF_MPATH)) 1312 return (EEXIST); 1313 1314 /* No conflict but an entry exist so we need to force mpath. */ 1315 info->rti_flags |= RTF_MPATH; 1316 return (0); 1317 } 1318 1319 void 1320 rtm_setmetrics(u_long which, const struct rt_metrics *in, 1321 struct rt_kmetrics *out) 1322 { 1323 int64_t expire; 1324 1325 if (which & RTV_MTU) 1326 out->rmx_mtu = in->rmx_mtu; 1327 if (which & RTV_EXPIRE) { 1328 expire = in->rmx_expire; 1329 if (expire != 0) { 1330 expire -= time_second; 1331 expire += time_uptime; 1332 } 1333 1334 out->rmx_expire = expire; 1335 } 1336 } 1337 1338 void 1339 rtm_getmetrics(const struct rt_kmetrics *in, struct rt_metrics *out) 1340 { 1341 int64_t expire; 1342 1343 expire = in->rmx_expire; 1344 if (expire != 0) { 1345 expire -= time_uptime; 1346 expire += time_second; 1347 } 1348 1349 bzero(out, sizeof(*out)); 1350 out->rmx_locks = in->rmx_locks; 1351 out->rmx_mtu = in->rmx_mtu; 1352 out->rmx_expire = expire; 1353 out->rmx_pksent = in->rmx_pksent; 1354 } 1355 1356 #define ROUNDUP(a) \ 1357 ((a) > 0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long)) 1358 #define ADVANCE(x, n) (x += ROUNDUP((n)->sa_len)) 1359 1360 int 1361 rtm_xaddrs(caddr_t cp, caddr_t cplim, struct rt_addrinfo *rtinfo) 1362 { 1363 struct sockaddr *sa; 1364 int i; 1365 1366 /* 1367 * Parse address bits, split address storage in chunks, and 1368 * set info pointers. Use sa_len for traversing the memory 1369 * and check that we stay within in the limit. 1370 */ 1371 bzero(rtinfo->rti_info, sizeof(rtinfo->rti_info)); 1372 for (i = 0; i < sizeof(rtinfo->rti_addrs) * 8; i++) { 1373 if ((rtinfo->rti_addrs & (1 << i)) == 0) 1374 continue; 1375 if (i >= RTAX_MAX || cp + sizeof(socklen_t) > cplim) 1376 return (EINVAL); 1377 sa = (struct sockaddr *)cp; 1378 if (cp + sa->sa_len > cplim) 1379 return (EINVAL); 1380 rtinfo->rti_info[i] = sa; 1381 ADVANCE(cp, sa); 1382 } 1383 /* 1384 * Check that the address family is suitable for the route address 1385 * type. Check that each address has a size that fits its family 1386 * and its length is within the size. Strings within addresses must 1387 * be NUL terminated. 1388 */ 1389 for (i = 0; i < RTAX_MAX; i++) { 1390 size_t len, maxlen, size; 1391 1392 sa = rtinfo->rti_info[i]; 1393 if (sa == NULL) 1394 continue; 1395 maxlen = size = 0; 1396 switch (i) { 1397 case RTAX_DST: 1398 case RTAX_GATEWAY: 1399 case RTAX_SRC: 1400 switch (sa->sa_family) { 1401 case AF_INET: 1402 size = sizeof(struct sockaddr_in); 1403 break; 1404 case AF_LINK: 1405 size = sizeof(struct sockaddr_dl); 1406 break; 1407 #ifdef INET6 1408 case AF_INET6: 1409 size = sizeof(struct sockaddr_in6); 1410 break; 1411 #endif 1412 #ifdef MPLS 1413 case AF_MPLS: 1414 size = sizeof(struct sockaddr_mpls); 1415 break; 1416 #endif 1417 } 1418 break; 1419 case RTAX_IFP: 1420 if (sa->sa_family != AF_LINK) 1421 return (EAFNOSUPPORT); 1422 /* 1423 * XXX Should be sizeof(struct sockaddr_dl), but 1424 * route(8) has a bug and provides less memory. 1425 * arp(8) has another bug and uses sizeof pointer. 1426 */ 1427 size = 4; 1428 break; 1429 case RTAX_IFA: 1430 switch (sa->sa_family) { 1431 case AF_INET: 1432 size = sizeof(struct sockaddr_in); 1433 break; 1434 #ifdef INET6 1435 case AF_INET6: 1436 size = sizeof(struct sockaddr_in6); 1437 break; 1438 #endif 1439 default: 1440 return (EAFNOSUPPORT); 1441 } 1442 break; 1443 case RTAX_LABEL: 1444 sa->sa_family = AF_UNSPEC; 1445 maxlen = RTLABEL_LEN; 1446 size = sizeof(struct sockaddr_rtlabel); 1447 break; 1448 #ifdef BFD 1449 case RTAX_BFD: 1450 sa->sa_family = AF_UNSPEC; 1451 size = sizeof(struct sockaddr_bfd); 1452 break; 1453 #endif 1454 case RTAX_DNS: 1455 /* more validation in rtm_validate_proposal */ 1456 if (sa->sa_len > sizeof(struct sockaddr_rtdns)) 1457 return (EINVAL); 1458 if (sa->sa_len < offsetof(struct sockaddr_rtdns, 1459 sr_dns)) 1460 return (EINVAL); 1461 switch (sa->sa_family) { 1462 case AF_INET: 1463 #ifdef INET6 1464 case AF_INET6: 1465 #endif 1466 break; 1467 default: 1468 return (EAFNOSUPPORT); 1469 } 1470 break; 1471 case RTAX_STATIC: 1472 sa->sa_family = AF_UNSPEC; 1473 maxlen = RTSTATIC_LEN; 1474 size = sizeof(struct sockaddr_rtstatic); 1475 break; 1476 case RTAX_SEARCH: 1477 sa->sa_family = AF_UNSPEC; 1478 maxlen = RTSEARCH_LEN; 1479 size = sizeof(struct sockaddr_rtsearch); 1480 break; 1481 } 1482 if (size) { 1483 /* memory for the full struct must be provided */ 1484 if (sa->sa_len < size) 1485 return (EINVAL); 1486 } 1487 if (maxlen) { 1488 /* this should not happen */ 1489 if (2 + maxlen > size) 1490 return (EINVAL); 1491 /* strings must be NUL terminated within the struct */ 1492 len = strnlen(sa->sa_data, maxlen); 1493 if (len >= maxlen || 2 + len >= sa->sa_len) 1494 return (EINVAL); 1495 break; 1496 } 1497 } 1498 return (0); 1499 } 1500 1501 struct mbuf * 1502 rtm_msg1(int type, struct rt_addrinfo *rtinfo) 1503 { 1504 struct rt_msghdr *rtm; 1505 struct mbuf *m; 1506 int i; 1507 struct sockaddr *sa; 1508 int len, dlen, hlen; 1509 1510 switch (type) { 1511 case RTM_DELADDR: 1512 case RTM_NEWADDR: 1513 len = sizeof(struct ifa_msghdr); 1514 break; 1515 case RTM_IFINFO: 1516 len = sizeof(struct if_msghdr); 1517 break; 1518 case RTM_IFANNOUNCE: 1519 len = sizeof(struct if_announcemsghdr); 1520 break; 1521 #ifdef BFD 1522 case RTM_BFD: 1523 len = sizeof(struct bfd_msghdr); 1524 break; 1525 #endif 1526 case RTM_80211INFO: 1527 len = sizeof(struct if_ieee80211_msghdr); 1528 break; 1529 default: 1530 len = sizeof(struct rt_msghdr); 1531 break; 1532 } 1533 if (len > MCLBYTES) 1534 panic("rtm_msg1"); 1535 m = m_gethdr(M_DONTWAIT, MT_DATA); 1536 if (m && len > MHLEN) { 1537 MCLGET(m, M_DONTWAIT); 1538 if ((m->m_flags & M_EXT) == 0) { 1539 m_free(m); 1540 m = NULL; 1541 } 1542 } 1543 if (m == NULL) 1544 return (m); 1545 m->m_pkthdr.len = m->m_len = hlen = len; 1546 m->m_pkthdr.ph_ifidx = 0; 1547 rtm = mtod(m, struct rt_msghdr *); 1548 bzero(rtm, len); 1549 for (i = 0; i < RTAX_MAX; i++) { 1550 if (rtinfo == NULL || (sa = rtinfo->rti_info[i]) == NULL) 1551 continue; 1552 rtinfo->rti_addrs |= (1 << i); 1553 dlen = ROUNDUP(sa->sa_len); 1554 if (m_copyback(m, len, dlen, sa, M_NOWAIT)) { 1555 m_freem(m); 1556 return (NULL); 1557 } 1558 len += dlen; 1559 } 1560 rtm->rtm_msglen = len; 1561 rtm->rtm_hdrlen = hlen; 1562 rtm->rtm_version = RTM_VERSION; 1563 rtm->rtm_type = type; 1564 return (m); 1565 } 1566 1567 int 1568 rtm_msg2(int type, int vers, struct rt_addrinfo *rtinfo, caddr_t cp, 1569 struct walkarg *w) 1570 { 1571 int i; 1572 int len, dlen, hlen, second_time = 0; 1573 caddr_t cp0; 1574 1575 rtinfo->rti_addrs = 0; 1576 again: 1577 switch (type) { 1578 case RTM_DELADDR: 1579 case RTM_NEWADDR: 1580 len = sizeof(struct ifa_msghdr); 1581 break; 1582 case RTM_IFINFO: 1583 len = sizeof(struct if_msghdr); 1584 break; 1585 default: 1586 len = sizeof(struct rt_msghdr); 1587 break; 1588 } 1589 hlen = len; 1590 if ((cp0 = cp) != NULL) 1591 cp += len; 1592 for (i = 0; i < RTAX_MAX; i++) { 1593 struct sockaddr *sa; 1594 1595 if ((sa = rtinfo->rti_info[i]) == NULL) 1596 continue; 1597 rtinfo->rti_addrs |= (1 << i); 1598 dlen = ROUNDUP(sa->sa_len); 1599 if (cp) { 1600 bcopy(sa, cp, (size_t)dlen); 1601 cp += dlen; 1602 } 1603 len += dlen; 1604 } 1605 /* align message length to the next natural boundary */ 1606 len = ALIGN(len); 1607 if (cp == 0 && w != NULL && !second_time) { 1608 w->w_needed += len; 1609 if (w->w_needed <= 0 && w->w_where) { 1610 if (w->w_tmemsize < len) { 1611 free(w->w_tmem, M_RTABLE, w->w_tmemsize); 1612 w->w_tmem = malloc(len, M_RTABLE, 1613 M_NOWAIT | M_ZERO); 1614 if (w->w_tmem) 1615 w->w_tmemsize = len; 1616 } 1617 if (w->w_tmem) { 1618 cp = w->w_tmem; 1619 second_time = 1; 1620 goto again; 1621 } else 1622 w->w_where = 0; 1623 } 1624 } 1625 if (cp && w) /* clear the message header */ 1626 bzero(cp0, hlen); 1627 1628 if (cp) { 1629 struct rt_msghdr *rtm = (struct rt_msghdr *)cp0; 1630 1631 rtm->rtm_version = RTM_VERSION; 1632 rtm->rtm_type = type; 1633 rtm->rtm_msglen = len; 1634 rtm->rtm_hdrlen = hlen; 1635 } 1636 return (len); 1637 } 1638 1639 void 1640 rtm_send(struct rtentry *rt, int cmd, int error, unsigned int rtableid) 1641 { 1642 struct rt_addrinfo info; 1643 struct ifnet *ifp; 1644 struct sockaddr_rtlabel sa_rl; 1645 struct sockaddr_in6 sa_mask; 1646 1647 memset(&info, 0, sizeof(info)); 1648 info.rti_info[RTAX_DST] = rt_key(rt); 1649 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 1650 if (!ISSET(rt->rt_flags, RTF_HOST)) 1651 info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask); 1652 info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl); 1653 ifp = if_get(rt->rt_ifidx); 1654 if (ifp != NULL) { 1655 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); 1656 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; 1657 } 1658 1659 rtm_miss(cmd, &info, rt->rt_flags, rt->rt_priority, rt->rt_ifidx, error, 1660 rtableid); 1661 if_put(ifp); 1662 } 1663 1664 /* 1665 * This routine is called to generate a message from the routing 1666 * socket indicating that a redirect has occurred, a routing lookup 1667 * has failed, or that a protocol has detected timeouts to a particular 1668 * destination. 1669 */ 1670 void 1671 rtm_miss(int type, struct rt_addrinfo *rtinfo, int flags, uint8_t prio, 1672 u_int ifidx, int error, u_int tableid) 1673 { 1674 struct rt_msghdr *rtm; 1675 struct mbuf *m; 1676 struct sockaddr *sa = rtinfo->rti_info[RTAX_DST]; 1677 1678 if (rtptable.rtp_count == 0) 1679 return; 1680 m = rtm_msg1(type, rtinfo); 1681 if (m == NULL) 1682 return; 1683 rtm = mtod(m, struct rt_msghdr *); 1684 rtm->rtm_flags = RTF_DONE | flags; 1685 rtm->rtm_priority = prio; 1686 rtm->rtm_errno = error; 1687 rtm->rtm_tableid = tableid; 1688 rtm->rtm_addrs = rtinfo->rti_addrs; 1689 rtm->rtm_index = ifidx; 1690 route_input(m, NULL, sa ? sa->sa_family : AF_UNSPEC); 1691 } 1692 1693 /* 1694 * This routine is called to generate a message from the routing 1695 * socket indicating that the status of a network interface has changed. 1696 */ 1697 void 1698 rtm_ifchg(struct ifnet *ifp) 1699 { 1700 struct if_msghdr *ifm; 1701 struct mbuf *m; 1702 1703 if (rtptable.rtp_count == 0) 1704 return; 1705 m = rtm_msg1(RTM_IFINFO, NULL); 1706 if (m == NULL) 1707 return; 1708 ifm = mtod(m, struct if_msghdr *); 1709 ifm->ifm_index = ifp->if_index; 1710 ifm->ifm_tableid = ifp->if_rdomain; 1711 ifm->ifm_flags = ifp->if_flags; 1712 ifm->ifm_xflags = ifp->if_xflags; 1713 if_getdata(ifp, &ifm->ifm_data); 1714 ifm->ifm_addrs = 0; 1715 route_input(m, NULL, AF_UNSPEC); 1716 } 1717 1718 /* 1719 * This is called to generate messages from the routing socket 1720 * indicating a network interface has had addresses associated with it. 1721 * if we ever reverse the logic and replace messages TO the routing 1722 * socket indicate a request to configure interfaces, then it will 1723 * be unnecessary as the routing socket will automatically generate 1724 * copies of it. 1725 */ 1726 void 1727 rtm_addr(int cmd, struct ifaddr *ifa) 1728 { 1729 struct ifnet *ifp = ifa->ifa_ifp; 1730 struct mbuf *m; 1731 struct rt_addrinfo info; 1732 struct ifa_msghdr *ifam; 1733 1734 if (rtptable.rtp_count == 0) 1735 return; 1736 1737 memset(&info, 0, sizeof(info)); 1738 info.rti_info[RTAX_IFA] = ifa->ifa_addr; 1739 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); 1740 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask; 1741 info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr; 1742 if ((m = rtm_msg1(cmd, &info)) == NULL) 1743 return; 1744 ifam = mtod(m, struct ifa_msghdr *); 1745 ifam->ifam_index = ifp->if_index; 1746 ifam->ifam_metric = ifa->ifa_metric; 1747 ifam->ifam_flags = ifa->ifa_flags; 1748 ifam->ifam_addrs = info.rti_addrs; 1749 ifam->ifam_tableid = ifp->if_rdomain; 1750 1751 route_input(m, NULL, 1752 ifa->ifa_addr ? ifa->ifa_addr->sa_family : AF_UNSPEC); 1753 } 1754 1755 /* 1756 * This is called to generate routing socket messages indicating 1757 * network interface arrival and departure. 1758 */ 1759 void 1760 rtm_ifannounce(struct ifnet *ifp, int what) 1761 { 1762 struct if_announcemsghdr *ifan; 1763 struct mbuf *m; 1764 1765 if (rtptable.rtp_count == 0) 1766 return; 1767 m = rtm_msg1(RTM_IFANNOUNCE, NULL); 1768 if (m == NULL) 1769 return; 1770 ifan = mtod(m, struct if_announcemsghdr *); 1771 ifan->ifan_index = ifp->if_index; 1772 strlcpy(ifan->ifan_name, ifp->if_xname, sizeof(ifan->ifan_name)); 1773 ifan->ifan_what = what; 1774 route_input(m, NULL, AF_UNSPEC); 1775 } 1776 1777 #ifdef BFD 1778 /* 1779 * This is used to generate routing socket messages indicating 1780 * the state of a BFD session. 1781 */ 1782 void 1783 rtm_bfd(struct bfd_config *bfd) 1784 { 1785 struct bfd_msghdr *bfdm; 1786 struct sockaddr_bfd sa_bfd; 1787 struct mbuf *m; 1788 struct rt_addrinfo info; 1789 1790 if (rtptable.rtp_count == 0) 1791 return; 1792 memset(&info, 0, sizeof(info)); 1793 info.rti_info[RTAX_DST] = rt_key(bfd->bc_rt); 1794 info.rti_info[RTAX_IFA] = bfd->bc_rt->rt_ifa->ifa_addr; 1795 1796 m = rtm_msg1(RTM_BFD, &info); 1797 if (m == NULL) 1798 return; 1799 bfdm = mtod(m, struct bfd_msghdr *); 1800 bfdm->bm_addrs = info.rti_addrs; 1801 1802 bfd2sa(bfd->bc_rt, &sa_bfd); 1803 memcpy(&bfdm->bm_sa, &sa_bfd, sizeof(sa_bfd)); 1804 1805 route_input(m, NULL, info.rti_info[RTAX_DST]->sa_family); 1806 } 1807 #endif /* BFD */ 1808 1809 /* 1810 * This is used to generate routing socket messages indicating 1811 * the state of an ieee80211 interface. 1812 */ 1813 void 1814 rtm_80211info(struct ifnet *ifp, struct if_ieee80211_data *ifie) 1815 { 1816 struct if_ieee80211_msghdr *ifim; 1817 struct mbuf *m; 1818 1819 if (rtptable.rtp_count == 0) 1820 return; 1821 m = rtm_msg1(RTM_80211INFO, NULL); 1822 if (m == NULL) 1823 return; 1824 ifim = mtod(m, struct if_ieee80211_msghdr *); 1825 ifim->ifim_index = ifp->if_index; 1826 ifim->ifim_tableid = ifp->if_rdomain; 1827 1828 memcpy(&ifim->ifim_ifie, ifie, sizeof(ifim->ifim_ifie)); 1829 route_input(m, NULL, AF_UNSPEC); 1830 } 1831 1832 /* 1833 * This is used to generate routing socket messages indicating 1834 * the address selection proposal from an interface. 1835 */ 1836 void 1837 rtm_proposal(struct ifnet *ifp, struct rt_addrinfo *rtinfo, int flags, 1838 uint8_t prio) 1839 { 1840 struct rt_msghdr *rtm; 1841 struct mbuf *m; 1842 1843 m = rtm_msg1(RTM_PROPOSAL, rtinfo); 1844 if (m == NULL) 1845 return; 1846 rtm = mtod(m, struct rt_msghdr *); 1847 rtm->rtm_flags = RTF_DONE | flags; 1848 rtm->rtm_priority = prio; 1849 rtm->rtm_tableid = ifp->if_rdomain; 1850 rtm->rtm_index = ifp->if_index; 1851 rtm->rtm_addrs = rtinfo->rti_addrs; 1852 1853 route_input(m, NULL, rtinfo->rti_info[RTAX_DNS]->sa_family); 1854 } 1855 1856 /* 1857 * This is used in dumping the kernel table via sysctl(). 1858 */ 1859 int 1860 sysctl_dumpentry(struct rtentry *rt, void *v, unsigned int id) 1861 { 1862 struct walkarg *w = v; 1863 int error = 0, size; 1864 struct rt_addrinfo info; 1865 struct ifnet *ifp; 1866 #ifdef BFD 1867 struct sockaddr_bfd sa_bfd; 1868 #endif 1869 struct sockaddr_rtlabel sa_rl; 1870 struct sockaddr_in6 sa_mask; 1871 1872 if (w->w_op == NET_RT_FLAGS && !(rt->rt_flags & w->w_arg)) 1873 return 0; 1874 if (w->w_op == NET_RT_DUMP && w->w_arg) { 1875 u_int8_t prio = w->w_arg & RTP_MASK; 1876 if (w->w_arg < 0) { 1877 prio = (-w->w_arg) & RTP_MASK; 1878 /* Show all routes that are not this priority */ 1879 if (prio == (rt->rt_priority & RTP_MASK)) 1880 return 0; 1881 } else { 1882 if (prio != (rt->rt_priority & RTP_MASK) && 1883 prio != RTP_ANY) 1884 return 0; 1885 } 1886 } 1887 bzero(&info, sizeof(info)); 1888 info.rti_info[RTAX_DST] = rt_key(rt); 1889 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 1890 info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask); 1891 ifp = if_get(rt->rt_ifidx); 1892 if (ifp != NULL) { 1893 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); 1894 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; 1895 if (ifp->if_flags & IFF_POINTOPOINT) 1896 info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr; 1897 } 1898 if_put(ifp); 1899 info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl); 1900 #ifdef BFD 1901 if (rt->rt_flags & RTF_BFD) 1902 info.rti_info[RTAX_BFD] = bfd2sa(rt, &sa_bfd); 1903 #endif 1904 #ifdef MPLS 1905 if (rt->rt_flags & RTF_MPLS) { 1906 struct sockaddr_mpls sa_mpls; 1907 1908 bzero(&sa_mpls, sizeof(sa_mpls)); 1909 sa_mpls.smpls_family = AF_MPLS; 1910 sa_mpls.smpls_len = sizeof(sa_mpls); 1911 sa_mpls.smpls_label = ((struct rt_mpls *) 1912 rt->rt_llinfo)->mpls_label; 1913 info.rti_info[RTAX_SRC] = (struct sockaddr *)&sa_mpls; 1914 info.rti_mpls = ((struct rt_mpls *) 1915 rt->rt_llinfo)->mpls_operation; 1916 } 1917 #endif 1918 1919 size = rtm_msg2(RTM_GET, RTM_VERSION, &info, NULL, w); 1920 if (w->w_where && w->w_tmem && w->w_needed <= 0) { 1921 struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem; 1922 1923 rtm->rtm_pid = curproc->p_p->ps_pid; 1924 rtm->rtm_flags = rt->rt_flags; 1925 rtm->rtm_priority = rt->rt_priority & RTP_MASK; 1926 rtm_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx); 1927 /* Do not account the routing table's reference. */ 1928 rtm->rtm_rmx.rmx_refcnt = rt->rt_refcnt - 1; 1929 rtm->rtm_index = rt->rt_ifidx; 1930 rtm->rtm_addrs = info.rti_addrs; 1931 rtm->rtm_tableid = id; 1932 #ifdef MPLS 1933 rtm->rtm_mpls = info.rti_mpls; 1934 #endif 1935 if ((error = copyout(rtm, w->w_where, size)) != 0) 1936 w->w_where = NULL; 1937 else 1938 w->w_where += size; 1939 } 1940 return (error); 1941 } 1942 1943 int 1944 sysctl_iflist(int af, struct walkarg *w) 1945 { 1946 struct ifnet *ifp; 1947 struct ifaddr *ifa; 1948 struct rt_addrinfo info; 1949 int len, error = 0; 1950 1951 bzero(&info, sizeof(info)); 1952 TAILQ_FOREACH(ifp, &ifnet, if_list) { 1953 if (w->w_arg && w->w_arg != ifp->if_index) 1954 continue; 1955 /* Copy the link-layer address first */ 1956 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); 1957 len = rtm_msg2(RTM_IFINFO, RTM_VERSION, &info, 0, w); 1958 if (w->w_where && w->w_tmem && w->w_needed <= 0) { 1959 struct if_msghdr *ifm; 1960 1961 ifm = (struct if_msghdr *)w->w_tmem; 1962 ifm->ifm_index = ifp->if_index; 1963 ifm->ifm_tableid = ifp->if_rdomain; 1964 ifm->ifm_flags = ifp->if_flags; 1965 if_getdata(ifp, &ifm->ifm_data); 1966 ifm->ifm_addrs = info.rti_addrs; 1967 error = copyout(ifm, w->w_where, len); 1968 if (error) 1969 return (error); 1970 w->w_where += len; 1971 } 1972 info.rti_info[RTAX_IFP] = NULL; 1973 TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) { 1974 KASSERT(ifa->ifa_addr->sa_family != AF_LINK); 1975 if (af && af != ifa->ifa_addr->sa_family) 1976 continue; 1977 info.rti_info[RTAX_IFA] = ifa->ifa_addr; 1978 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask; 1979 info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr; 1980 len = rtm_msg2(RTM_NEWADDR, RTM_VERSION, &info, 0, w); 1981 if (w->w_where && w->w_tmem && w->w_needed <= 0) { 1982 struct ifa_msghdr *ifam; 1983 1984 ifam = (struct ifa_msghdr *)w->w_tmem; 1985 ifam->ifam_index = ifa->ifa_ifp->if_index; 1986 ifam->ifam_flags = ifa->ifa_flags; 1987 ifam->ifam_metric = ifa->ifa_metric; 1988 ifam->ifam_addrs = info.rti_addrs; 1989 error = copyout(w->w_tmem, w->w_where, len); 1990 if (error) 1991 return (error); 1992 w->w_where += len; 1993 } 1994 } 1995 info.rti_info[RTAX_IFA] = info.rti_info[RTAX_NETMASK] = 1996 info.rti_info[RTAX_BRD] = NULL; 1997 } 1998 return (0); 1999 } 2000 2001 int 2002 sysctl_ifnames(struct walkarg *w) 2003 { 2004 struct if_nameindex_msg ifn; 2005 struct ifnet *ifp; 2006 int error = 0; 2007 2008 /* XXX ignore tableid for now */ 2009 TAILQ_FOREACH(ifp, &ifnet, if_list) { 2010 if (w->w_arg && w->w_arg != ifp->if_index) 2011 continue; 2012 w->w_needed += sizeof(ifn); 2013 if (w->w_where && w->w_needed <= 0) { 2014 2015 memset(&ifn, 0, sizeof(ifn)); 2016 ifn.if_index = ifp->if_index; 2017 strlcpy(ifn.if_name, ifp->if_xname, 2018 sizeof(ifn.if_name)); 2019 error = copyout(&ifn, w->w_where, sizeof(ifn)); 2020 if (error) 2021 return (error); 2022 w->w_where += sizeof(ifn); 2023 } 2024 } 2025 2026 return (0); 2027 } 2028 2029 int 2030 sysctl_rtable(int *name, u_int namelen, void *where, size_t *given, void *new, 2031 size_t newlen) 2032 { 2033 int i, error = EINVAL; 2034 u_char af; 2035 struct walkarg w; 2036 struct rt_tableinfo tableinfo; 2037 u_int tableid = 0; 2038 2039 if (new) 2040 return (EPERM); 2041 if (namelen < 3 || namelen > 4) 2042 return (EINVAL); 2043 af = name[0]; 2044 bzero(&w, sizeof(w)); 2045 w.w_where = where; 2046 w.w_given = *given; 2047 w.w_needed = 0 - w.w_given; 2048 w.w_op = name[1]; 2049 w.w_arg = name[2]; 2050 2051 if (namelen == 4) { 2052 tableid = name[3]; 2053 if (!rtable_exists(tableid)) 2054 return (ENOENT); 2055 } else 2056 tableid = curproc->p_p->ps_rtableid; 2057 2058 switch (w.w_op) { 2059 case NET_RT_DUMP: 2060 case NET_RT_FLAGS: 2061 NET_LOCK(); 2062 for (i = 1; i <= AF_MAX; i++) { 2063 if (af != 0 && af != i) 2064 continue; 2065 2066 error = rtable_walk(tableid, i, NULL, sysctl_dumpentry, 2067 &w); 2068 if (error == EAFNOSUPPORT) 2069 error = 0; 2070 if (error) 2071 break; 2072 } 2073 NET_UNLOCK(); 2074 break; 2075 2076 case NET_RT_IFLIST: 2077 NET_LOCK(); 2078 error = sysctl_iflist(af, &w); 2079 NET_UNLOCK(); 2080 break; 2081 2082 case NET_RT_STATS: 2083 return (sysctl_rtable_rtstat(where, given, new)); 2084 case NET_RT_TABLE: 2085 tableid = w.w_arg; 2086 if (!rtable_exists(tableid)) 2087 return (ENOENT); 2088 memset(&tableinfo, 0, sizeof tableinfo); 2089 tableinfo.rti_tableid = tableid; 2090 tableinfo.rti_domainid = rtable_l2(tableid); 2091 error = sysctl_rdstruct(where, given, new, 2092 &tableinfo, sizeof(tableinfo)); 2093 return (error); 2094 case NET_RT_IFNAMES: 2095 NET_LOCK(); 2096 error = sysctl_ifnames(&w); 2097 NET_UNLOCK(); 2098 break; 2099 } 2100 free(w.w_tmem, M_RTABLE, w.w_tmemsize); 2101 w.w_needed += w.w_given; 2102 if (where) { 2103 *given = w.w_where - (caddr_t)where; 2104 if (*given < w.w_needed) 2105 return (ENOMEM); 2106 } else 2107 *given = (11 * w.w_needed) / 10; 2108 2109 return (error); 2110 } 2111 2112 int 2113 sysctl_rtable_rtstat(void *oldp, size_t *oldlenp, void *newp) 2114 { 2115 extern struct cpumem *rtcounters; 2116 uint64_t counters[rts_ncounters]; 2117 struct rtstat rtstat; 2118 uint32_t *words = (uint32_t *)&rtstat; 2119 int i; 2120 2121 CTASSERT(sizeof(rtstat) == (nitems(counters) * sizeof(uint32_t))); 2122 memset(&rtstat, 0, sizeof rtstat); 2123 counters_read(rtcounters, counters, nitems(counters)); 2124 2125 for (i = 0; i < nitems(counters); i++) 2126 words[i] = (uint32_t)counters[i]; 2127 2128 return (sysctl_rdstruct(oldp, oldlenp, newp, &rtstat, sizeof(rtstat))); 2129 } 2130 2131 int 2132 rtm_validate_proposal(struct rt_addrinfo *info) 2133 { 2134 if (info->rti_addrs & ~(RTA_NETMASK | RTA_IFA | RTA_DNS | RTA_STATIC | 2135 RTA_SEARCH)) { 2136 return -1; 2137 } 2138 2139 if (ISSET(info->rti_addrs, RTA_NETMASK)) { 2140 struct sockaddr *sa = info->rti_info[RTAX_NETMASK]; 2141 if (sa == NULL) 2142 return -1; 2143 switch (sa->sa_family) { 2144 case AF_INET: 2145 if (sa->sa_len != sizeof(struct sockaddr_in)) 2146 return -1; 2147 break; 2148 case AF_INET6: 2149 if (sa->sa_len != sizeof(struct sockaddr_in6)) 2150 return -1; 2151 break; 2152 default: 2153 return -1; 2154 } 2155 } 2156 2157 if (ISSET(info->rti_addrs, RTA_IFA)) { 2158 struct sockaddr *sa = info->rti_info[RTAX_IFA]; 2159 if (sa == NULL) 2160 return -1; 2161 switch (sa->sa_family) { 2162 case AF_INET: 2163 if (sa->sa_len != sizeof(struct sockaddr_in)) 2164 return -1; 2165 break; 2166 case AF_INET6: 2167 if (sa->sa_len != sizeof(struct sockaddr_in6)) 2168 return -1; 2169 break; 2170 default: 2171 return -1; 2172 } 2173 } 2174 2175 if (ISSET(info->rti_addrs, RTA_DNS)) { 2176 struct sockaddr_rtdns *rtdns = 2177 (struct sockaddr_rtdns *)info->rti_info[RTAX_DNS]; 2178 if (rtdns == NULL) 2179 return -1; 2180 if (rtdns->sr_len > sizeof(*rtdns)) 2181 return -1; 2182 if (rtdns->sr_len < offsetof(struct sockaddr_rtdns, sr_dns)) 2183 return -1; 2184 switch (rtdns->sr_family) { 2185 case AF_INET: 2186 if ((rtdns->sr_len - offsetof(struct sockaddr_rtdns, 2187 sr_dns)) % sizeof(struct in_addr) != 0) 2188 return -1; 2189 break; 2190 #ifdef INET6 2191 case AF_INET6: 2192 if ((rtdns->sr_len - offsetof(struct sockaddr_rtdns, 2193 sr_dns)) % sizeof(struct in6_addr) != 0) 2194 return -1; 2195 break; 2196 #endif 2197 default: 2198 return -1; 2199 } 2200 } 2201 2202 if (ISSET(info->rti_addrs, RTA_STATIC)) { 2203 struct sockaddr_rtstatic *rtstatic = 2204 (struct sockaddr_rtstatic *)info->rti_info[RTAX_STATIC]; 2205 if (rtstatic == NULL) 2206 return -1; 2207 if (rtstatic->sr_len > sizeof(*rtstatic)) 2208 return -1; 2209 if (rtstatic->sr_len <= 2210 offsetof(struct sockaddr_rtstatic, sr_static)) 2211 return -1; 2212 } 2213 2214 if (ISSET(info->rti_addrs, RTA_SEARCH)) { 2215 struct sockaddr_rtsearch *rtsearch = 2216 (struct sockaddr_rtsearch *)info->rti_info[RTAX_SEARCH]; 2217 if (rtsearch == NULL) 2218 return -1; 2219 if (rtsearch->sr_len > sizeof(*rtsearch)) 2220 return -1; 2221 if (rtsearch->sr_len <= 2222 offsetof(struct sockaddr_rtsearch, sr_search)) 2223 return -1; 2224 } 2225 2226 return 0; 2227 } 2228 2229 /* 2230 * Definitions of protocols supported in the ROUTE domain. 2231 */ 2232 2233 extern struct domain routedomain; /* or at least forward */ 2234 2235 struct protosw routesw[] = { 2236 { 2237 .pr_type = SOCK_RAW, 2238 .pr_domain = &routedomain, 2239 .pr_flags = PR_ATOMIC|PR_ADDR|PR_WANTRCVD, 2240 .pr_output = route_output, 2241 .pr_ctloutput = route_ctloutput, 2242 .pr_usrreq = route_usrreq, 2243 .pr_attach = route_attach, 2244 .pr_detach = route_detach, 2245 .pr_init = route_prinit, 2246 .pr_sysctl = sysctl_rtable 2247 } 2248 }; 2249 2250 struct domain routedomain = { 2251 .dom_family = PF_ROUTE, 2252 .dom_name = "route", 2253 .dom_init = route_init, 2254 .dom_protosw = routesw, 2255 .dom_protoswNPROTOSW = &routesw[nitems(routesw)] 2256 }; 2257