1 /* $OpenBSD: rtsock.c,v 1.222 2017/02/01 20:59:47 dhill Exp $ */ 2 /* $NetBSD: rtsock.c,v 1.18 1996/03/29 00:32:10 cgd Exp $ */ 3 4 /* 5 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the project nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 33 /* 34 * Copyright (c) 1988, 1991, 1993 35 * The Regents of the University of California. All rights reserved. 36 * 37 * Redistribution and use in source and binary forms, with or without 38 * modification, are permitted provided that the following conditions 39 * are met: 40 * 1. Redistributions of source code must retain the above copyright 41 * notice, this list of conditions and the following disclaimer. 42 * 2. Redistributions in binary form must reproduce the above copyright 43 * notice, this list of conditions and the following disclaimer in the 44 * documentation and/or other materials provided with the distribution. 45 * 3. Neither the name of the University nor the names of its contributors 46 * may be used to endorse or promote products derived from this software 47 * without specific prior written permission. 48 * 49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 59 * SUCH DAMAGE. 60 * 61 * @(#)rtsock.c 8.6 (Berkeley) 2/11/95 62 */ 63 64 #include <sys/param.h> 65 #include <sys/systm.h> 66 #include <sys/proc.h> 67 #include <sys/sysctl.h> 68 #include <sys/mbuf.h> 69 #include <sys/socket.h> 70 #include <sys/socketvar.h> 71 #include <sys/domain.h> 72 #include <sys/protosw.h> 73 74 #include <net/if.h> 75 #include <net/if_dl.h> 76 #include <net/if_var.h> 77 #include <net/route.h> 78 #include <net/raw_cb.h> 79 80 #include <netinet/in.h> 81 82 #ifdef MPLS 83 #include <netmpls/mpls.h> 84 #endif 85 #ifdef BFD 86 #include <net/bfd.h> 87 #endif 88 89 #include <sys/stdarg.h> 90 #include <sys/kernel.h> 91 #include <sys/timeout.h> 92 93 struct sockaddr route_dst = { 2, PF_ROUTE, }; 94 struct sockaddr route_src = { 2, PF_ROUTE, }; 95 96 struct walkarg { 97 int w_op, w_arg, w_given, w_needed, w_tmemsize; 98 caddr_t w_where, w_tmem; 99 }; 100 101 int route_ctloutput(int, struct socket *, int, int, struct mbuf *); 102 void route_input(struct mbuf *m0, sa_family_t); 103 int route_arp_conflict(struct rtentry *, struct rt_addrinfo *); 104 int route_cleargateway(struct rtentry *, void *, unsigned int); 105 106 struct mbuf *rt_msg1(int, struct rt_addrinfo *); 107 int rt_msg2(int, int, struct rt_addrinfo *, caddr_t, 108 struct walkarg *); 109 void rt_xaddrs(caddr_t, caddr_t, struct rt_addrinfo *); 110 111 int sysctl_iflist(int, struct walkarg *); 112 int sysctl_ifnames(struct walkarg *); 113 int sysctl_rtable_rtstat(void *, size_t *, void *); 114 115 struct routecb { 116 struct rawcb rcb; 117 struct timeout timeout; 118 unsigned int msgfilter; 119 unsigned int flags; 120 u_int rtableid; 121 }; 122 #define sotoroutecb(so) ((struct routecb *)(so)->so_pcb) 123 124 struct route_cb { 125 int ip_count; 126 int ip6_count; 127 int mpls_count; 128 int any_count; 129 }; 130 131 struct route_cb route_cb; 132 133 /* 134 * These flags and timeout are used for indicating to userland (via a 135 * RTM_DESYNC msg) when the route socket has overflowed and messages 136 * have been lost. 137 */ 138 #define ROUTECB_FLAG_DESYNC 0x1 /* Route socket out of memory */ 139 #define ROUTECB_FLAG_FLUSH 0x2 /* Wait until socket is empty before 140 queueing more packets */ 141 142 #define ROUTE_DESYNC_RESEND_TIMEOUT (hz / 5) /* In hz */ 143 144 void rt_senddesync(void *); 145 146 int 147 route_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam, 148 struct mbuf *control, struct proc *p) 149 { 150 struct rawcb *rp; 151 struct routecb *rop; 152 int af; 153 int error = 0; 154 155 NET_ASSERT_LOCKED(); 156 157 rp = sotorawcb(so); 158 159 switch (req) { 160 case PRU_ATTACH: 161 /* 162 * use the rawcb but allocate a routecb, this 163 * code does not care about the additional fields 164 * and works directly on the raw socket. 165 */ 166 rop = malloc(sizeof(struct routecb), M_PCB, M_WAITOK|M_ZERO); 167 rp = &rop->rcb; 168 so->so_pcb = rp; 169 /* Init the timeout structure */ 170 timeout_set(&((struct routecb *)rp)->timeout, rt_senddesync, rp); 171 /* 172 * Don't call raw_usrreq() in the attach case, because 173 * we want to allow non-privileged processes to listen 174 * on and send "safe" commands to the routing socket. 175 */ 176 if (curproc == 0) 177 error = EACCES; 178 else 179 error = raw_attach(so, (int)(long)nam); 180 if (error) { 181 free(rop, M_PCB, sizeof(struct routecb)); 182 return (error); 183 } 184 rop->rtableid = curproc->p_p->ps_rtableid; 185 af = rp->rcb_proto.sp_protocol; 186 if (af == AF_INET) 187 route_cb.ip_count++; 188 else if (af == AF_INET6) 189 route_cb.ip6_count++; 190 #ifdef MPLS 191 else if (af == AF_MPLS) 192 route_cb.mpls_count++; 193 #endif 194 rp->rcb_faddr = &route_src; 195 route_cb.any_count++; 196 soisconnected(so); 197 so->so_options |= SO_USELOOPBACK; 198 break; 199 200 case PRU_RCVD: 201 rop = (struct routecb *)rp; 202 203 /* 204 * If we are in a FLUSH state, check if the buffer is 205 * empty so that we can clear the flag. 206 */ 207 if (((rop->flags & ROUTECB_FLAG_FLUSH) != 0) && 208 ((sbspace(&rp->rcb_socket->so_rcv) == 209 rp->rcb_socket->so_rcv.sb_hiwat))) 210 rop->flags &= ~ROUTECB_FLAG_FLUSH; 211 break; 212 213 case PRU_DETACH: 214 if (rp) { 215 timeout_del(&((struct routecb *)rp)->timeout); 216 af = rp->rcb_proto.sp_protocol; 217 if (af == AF_INET) 218 route_cb.ip_count--; 219 else if (af == AF_INET6) 220 route_cb.ip6_count--; 221 #ifdef MPLS 222 else if (af == AF_MPLS) 223 route_cb.mpls_count--; 224 #endif 225 route_cb.any_count--; 226 } 227 /* FALLTHROUGH */ 228 default: 229 error = raw_usrreq(so, req, m, nam, control, p); 230 } 231 232 return (error); 233 } 234 235 int 236 route_ctloutput(int op, struct socket *so, int level, int optname, 237 struct mbuf *m) 238 { 239 struct routecb *rop = sotoroutecb(so); 240 int error = 0; 241 unsigned int tid; 242 243 if (level != AF_ROUTE) { 244 error = EINVAL; 245 if (op == PRCO_SETOPT && m) 246 m_free(m); 247 return (error); 248 } 249 250 switch (op) { 251 case PRCO_SETOPT: 252 switch (optname) { 253 case ROUTE_MSGFILTER: 254 if (m == NULL || m->m_len != sizeof(unsigned int)) 255 error = EINVAL; 256 else 257 rop->msgfilter = *mtod(m, unsigned int *); 258 break; 259 case ROUTE_TABLEFILTER: 260 if (m == NULL || m->m_len != sizeof(unsigned int)) { 261 error = EINVAL; 262 break; 263 } 264 tid = *mtod(m, unsigned int *); 265 if (tid != RTABLE_ANY && !rtable_exists(tid)) 266 error = ENOENT; 267 else 268 rop->rtableid = tid; 269 break; 270 default: 271 error = ENOPROTOOPT; 272 break; 273 } 274 m_free(m); 275 break; 276 case PRCO_GETOPT: 277 switch (optname) { 278 case ROUTE_MSGFILTER: 279 m->m_len = sizeof(unsigned int); 280 *mtod(m, unsigned int *) = rop->msgfilter; 281 break; 282 case ROUTE_TABLEFILTER: 283 m->m_len = sizeof(unsigned int); 284 *mtod(m, unsigned int *) = rop->rtableid; 285 break; 286 default: 287 error = ENOPROTOOPT; 288 break; 289 } 290 } 291 return (error); 292 } 293 294 void 295 rt_senddesync(void *data) 296 { 297 struct rawcb *rp; 298 struct routecb *rop; 299 struct mbuf *desync_mbuf; 300 int s; 301 302 rp = (struct rawcb *)data; 303 rop = (struct routecb *)rp; 304 305 /* If we are in a DESYNC state, try to send a RTM_DESYNC packet */ 306 if ((rop->flags & ROUTECB_FLAG_DESYNC) == 0) 307 return; 308 309 /* 310 * If we fail to alloc memory or if sbappendaddr() 311 * fails, re-add timeout and try again. 312 */ 313 desync_mbuf = rt_msg1(RTM_DESYNC, NULL); 314 if (desync_mbuf != NULL) { 315 s = splsoftnet(); 316 if (sbappendaddr(&rp->rcb_socket->so_rcv, &route_src, 317 desync_mbuf, NULL) != 0) { 318 rop->flags &= ~ROUTECB_FLAG_DESYNC; 319 sorwakeup(rp->rcb_socket); 320 splx(s); 321 return; 322 } 323 splx(s); 324 m_freem(desync_mbuf); 325 } 326 /* Re-add timeout to try sending msg again */ 327 timeout_add(&rop->timeout, ROUTE_DESYNC_RESEND_TIMEOUT); 328 } 329 330 void 331 route_input(struct mbuf *m0, sa_family_t sa_family) 332 { 333 struct rawcb *rp; 334 struct routecb *rop; 335 struct rt_msghdr *rtm; 336 struct mbuf *m = m0; 337 int s, sockets = 0; 338 struct socket *last = NULL; 339 struct sockaddr *sosrc, *sodst; 340 341 sosrc = &route_src; 342 sodst = &route_dst; 343 344 /* ensure that we can access the rtm_type via mtod() */ 345 if (m->m_len < offsetof(struct rt_msghdr, rtm_type) + 1) { 346 m_freem(m); 347 return; 348 } 349 350 LIST_FOREACH(rp, &rawcb, rcb_list) { 351 if (rp->rcb_socket->so_state & SS_CANTRCVMORE) 352 continue; 353 if (rp->rcb_proto.sp_family != PF_ROUTE) 354 continue; 355 /* 356 * If route socket is bound to an address family only send 357 * messages that match the address family. Address family 358 * agnostic messages are always send. 359 */ 360 if (rp->rcb_proto.sp_protocol != AF_UNSPEC && 361 sa_family != AF_UNSPEC && 362 rp->rcb_proto.sp_protocol != sa_family) 363 continue; 364 /* 365 * We assume the lower level routines have 366 * placed the address in a canonical format 367 * suitable for a structure comparison. 368 * 369 * Note that if the lengths are not the same 370 * the comparison will fail at the first byte. 371 */ 372 #define equal(a1, a2) \ 373 (bcmp((caddr_t)(a1), (caddr_t)(a2), a1->sa_len) == 0) 374 if (rp->rcb_laddr && !equal(rp->rcb_laddr, sodst)) 375 continue; 376 if (rp->rcb_faddr && !equal(rp->rcb_faddr, sosrc)) 377 continue; 378 379 /* filter messages that the process does not want */ 380 rop = (struct routecb *)rp; 381 rtm = mtod(m, struct rt_msghdr *); 382 /* but RTM_DESYNC can't be filtered */ 383 if (rtm->rtm_type != RTM_DESYNC && rop->msgfilter != 0 && 384 !(rop->msgfilter & (1 << rtm->rtm_type))) 385 continue; 386 switch (rtm->rtm_type) { 387 case RTM_IFANNOUNCE: 388 case RTM_DESYNC: 389 /* no tableid */ 390 break; 391 case RTM_RESOLVE: 392 case RTM_NEWADDR: 393 case RTM_DELADDR: 394 case RTM_IFINFO: 395 /* check against rdomain id */ 396 if (rop->rtableid != RTABLE_ANY && 397 rtable_l2(rop->rtableid) != rtm->rtm_tableid) 398 continue; 399 break; 400 default: 401 /* check against rtable id */ 402 if (rop->rtableid != RTABLE_ANY && 403 rop->rtableid != rtm->rtm_tableid) 404 continue; 405 break; 406 } 407 408 /* 409 * Check to see if the flush flag is set. If so, don't queue 410 * any more messages until the flag is cleared. 411 */ 412 if ((rop->flags & ROUTECB_FLAG_FLUSH) != 0) 413 continue; 414 415 if (last) { 416 struct mbuf *n; 417 if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) != NULL) { 418 s = splsoftnet(); 419 if (sbspace(&last->so_rcv) < (2 * MSIZE) || 420 sbappendaddr(&last->so_rcv, sosrc, 421 n, (struct mbuf *)NULL) == 0) { 422 /* 423 * Flag socket as desync'ed and 424 * flush required 425 */ 426 sotoroutecb(last)->flags |= 427 ROUTECB_FLAG_DESYNC | 428 ROUTECB_FLAG_FLUSH; 429 rt_senddesync((void *) sotorawcb(last)); 430 m_freem(n); 431 } else { 432 sorwakeup(last); 433 sockets++; 434 } 435 splx(s); 436 } 437 } 438 last = rp->rcb_socket; 439 } 440 if (last) { 441 s = splsoftnet(); 442 if (sbspace(&last->so_rcv) < (2 * MSIZE) || 443 sbappendaddr(&last->so_rcv, sosrc, 444 m, (struct mbuf *)NULL) == 0) { 445 /* Flag socket as desync'ed and flush required */ 446 sotoroutecb(last)->flags |= 447 ROUTECB_FLAG_DESYNC | ROUTECB_FLAG_FLUSH; 448 rt_senddesync((void *) sotorawcb(last)); 449 m_freem(m); 450 } else { 451 sorwakeup(last); 452 sockets++; 453 } 454 splx(s); 455 } else 456 m_freem(m); 457 } 458 459 struct rt_msghdr * 460 rt_report(struct rtentry *rt, u_char type, int seq, int tableid) 461 { 462 struct rt_msghdr *rtm; 463 struct rt_addrinfo info; 464 struct sockaddr_rtlabel sa_rl; 465 struct sockaddr_in6 sa_mask; 466 #ifdef BFD 467 struct sockaddr_bfd sa_bfd; 468 #endif 469 #ifdef MPLS 470 struct sockaddr_mpls sa_mpls; 471 #endif 472 struct ifnet *ifp = NULL; 473 int len; 474 475 bzero(&info, sizeof(info)); 476 info.rti_info[RTAX_DST] = rt_key(rt); 477 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 478 info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask); 479 info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl); 480 #ifdef BFD 481 if (rt->rt_flags & RTF_BFD) 482 info.rti_info[RTAX_BFD] = bfd2sa(rt, &sa_bfd); 483 #endif 484 #ifdef MPLS 485 if (rt->rt_flags & RTF_MPLS) { 486 bzero(&sa_mpls, sizeof(sa_mpls)); 487 sa_mpls.smpls_family = AF_MPLS; 488 sa_mpls.smpls_len = sizeof(sa_mpls); 489 sa_mpls.smpls_label = ((struct rt_mpls *) 490 rt->rt_llinfo)->mpls_label; 491 info.rti_info[RTAX_SRC] = (struct sockaddr *)&sa_mpls; 492 info.rti_mpls = ((struct rt_mpls *) 493 rt->rt_llinfo)->mpls_operation; 494 } 495 #endif 496 ifp = if_get(rt->rt_ifidx); 497 if (ifp != NULL) { 498 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); 499 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; 500 if (ifp->if_flags & IFF_POINTOPOINT) 501 info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr; 502 } 503 if_put(ifp); 504 /* RTAX_GENMASK, RTAX_AUTHOR, RTAX_SRCMASK ignored */ 505 506 /* build new route message */ 507 len = rt_msg2(type, RTM_VERSION, &info, NULL, NULL); 508 /* XXX why can't we wait? Should be process context... */ 509 rtm = malloc(len, M_RTABLE, M_NOWAIT | M_ZERO); 510 if (rtm == NULL) 511 return NULL; 512 513 rt_msg2(type, RTM_VERSION, &info, (caddr_t)rtm, NULL); 514 rtm->rtm_type = type; 515 rtm->rtm_index = rt->rt_ifidx; 516 rtm->rtm_tableid = tableid; 517 rtm->rtm_priority = rt->rt_priority & RTP_MASK; 518 rtm->rtm_flags = rt->rt_flags; 519 rtm->rtm_pid = curproc->p_p->ps_pid; 520 rtm->rtm_seq = seq; 521 rt_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx); 522 rtm->rtm_addrs = info.rti_addrs; 523 #ifdef MPLS 524 rtm->rtm_mpls = info.rti_mpls; 525 #endif 526 return rtm; 527 } 528 529 int 530 route_output(struct mbuf *m, ...) 531 { 532 struct rt_msghdr *rtm = NULL; 533 struct rtentry *rt = NULL; 534 struct rt_addrinfo info; 535 int plen, len, seq, newgate = 0, error = 0; 536 struct ifnet *ifp = NULL; 537 struct ifaddr *ifa = NULL; 538 struct socket *so; 539 struct rawcb *rp = NULL; 540 #ifdef MPLS 541 struct sockaddr_mpls *psa_mpls; 542 #endif 543 va_list ap; 544 u_int tableid; 545 u_int8_t prio; 546 u_char vers, type; 547 548 va_start(ap, m); 549 so = va_arg(ap, struct socket *); 550 va_end(ap); 551 552 if (m == NULL || ((m->m_len < sizeof(int32_t)) && 553 (m = m_pullup(m, sizeof(int32_t))) == 0)) 554 return (ENOBUFS); 555 if ((m->m_flags & M_PKTHDR) == 0) 556 panic("route_output"); 557 len = m->m_pkthdr.len; 558 if (len < offsetof(struct rt_msghdr, rtm_type) + 1 || 559 len != mtod(m, struct rt_msghdr *)->rtm_msglen) { 560 error = EINVAL; 561 goto fail; 562 } 563 vers = mtod(m, struct rt_msghdr *)->rtm_version; 564 switch (vers) { 565 case RTM_VERSION: 566 if (len < sizeof(struct rt_msghdr)) { 567 error = EINVAL; 568 goto fail; 569 } 570 if (len > RTM_MAXSIZE) { 571 error = EMSGSIZE; 572 goto fail; 573 } 574 rtm = malloc(len, M_RTABLE, M_NOWAIT); 575 if (rtm == NULL) { 576 error = ENOBUFS; 577 goto fail; 578 } 579 m_copydata(m, 0, len, (caddr_t)rtm); 580 break; 581 default: 582 error = EPROTONOSUPPORT; 583 goto fail; 584 } 585 rtm->rtm_pid = curproc->p_p->ps_pid; 586 if (rtm->rtm_hdrlen == 0) /* old client */ 587 rtm->rtm_hdrlen = sizeof(struct rt_msghdr); 588 if (len < rtm->rtm_hdrlen) { 589 error = EINVAL; 590 goto fail; 591 } 592 593 /* Verify that the caller is sending an appropriate message early */ 594 switch (rtm->rtm_type) { 595 case RTM_ADD: 596 case RTM_DELETE: 597 case RTM_GET: 598 case RTM_CHANGE: 599 case RTM_LOCK: 600 break; 601 default: 602 error = EOPNOTSUPP; 603 goto fail; 604 } 605 606 /* 607 * Verify that the caller has the appropriate privilege; RTM_GET 608 * is the only operation the non-superuser is allowed. 609 */ 610 if (rtm->rtm_type != RTM_GET && suser(curproc, 0) != 0) { 611 error = EACCES; 612 goto fail; 613 } 614 tableid = rtm->rtm_tableid; 615 if (!rtable_exists(tableid)) { 616 if (rtm->rtm_type == RTM_ADD) { 617 if ((error = rtable_add(tableid)) != 0) 618 goto fail; 619 } else { 620 error = EINVAL; 621 goto fail; 622 } 623 } 624 625 626 /* Do not let userland play with kernel-only flags. */ 627 if ((rtm->rtm_flags & (RTF_LOCAL|RTF_BROADCAST)) != 0) { 628 error = EINVAL; 629 goto fail; 630 } 631 632 /* make sure that kernel-only bits are not set */ 633 rtm->rtm_priority &= RTP_MASK; 634 rtm->rtm_flags &= ~(RTF_DONE|RTF_CLONED|RTF_CACHED); 635 rtm->rtm_fmask &= RTF_FMASK; 636 637 if (rtm->rtm_priority != 0) { 638 if (rtm->rtm_priority > RTP_MAX || 639 rtm->rtm_priority == RTP_LOCAL) { 640 error = EINVAL; 641 goto fail; 642 } 643 prio = rtm->rtm_priority; 644 } else if (rtm->rtm_type != RTM_ADD) 645 prio = RTP_ANY; 646 else if (rtm->rtm_flags & RTF_STATIC) 647 prio = 0; 648 else 649 prio = RTP_DEFAULT; 650 651 bzero(&info, sizeof(info)); 652 info.rti_addrs = rtm->rtm_addrs; 653 rt_xaddrs(rtm->rtm_hdrlen + (caddr_t)rtm, len + (caddr_t)rtm, &info); 654 info.rti_flags = rtm->rtm_flags; 655 if (info.rti_info[RTAX_DST] == NULL || 656 info.rti_info[RTAX_DST]->sa_family >= AF_MAX || 657 (info.rti_info[RTAX_GATEWAY] != NULL && 658 info.rti_info[RTAX_GATEWAY]->sa_family >= AF_MAX) || 659 info.rti_info[RTAX_GENMASK] != NULL) { 660 error = EINVAL; 661 goto fail; 662 } 663 #ifdef MPLS 664 info.rti_mpls = rtm->rtm_mpls; 665 #endif 666 667 if (info.rti_info[RTAX_GATEWAY] != NULL && 668 info.rti_info[RTAX_GATEWAY]->sa_family == AF_LINK && 669 (info.rti_flags & RTF_CLONING) == 0) { 670 info.rti_flags |= RTF_LLINFO; 671 } 672 673 /* 674 * Do not use goto flush before this point since the message itself 675 * may be not consistent and could cause unexpected behaviour in other 676 * userland clients. Use goto fail instead. 677 */ 678 switch (rtm->rtm_type) { 679 case RTM_ADD: 680 if (info.rti_info[RTAX_GATEWAY] == NULL) { 681 error = EINVAL; 682 goto flush; 683 } 684 685 rt = rtable_match(tableid, info.rti_info[RTAX_DST], NULL); 686 if ((error = route_arp_conflict(rt, &info))) { 687 rtfree(rt); 688 rt = NULL; 689 goto flush; 690 } 691 692 /* 693 * We cannot go through a delete/create/insert cycle for 694 * cached route because this can lead to races in the 695 * receive path. Instead we upade the L2 cache. 696 */ 697 if ((rt != NULL) && ISSET(rt->rt_flags, RTF_CACHED)) 698 goto change; 699 700 rtfree(rt); 701 rt = NULL; 702 703 error = rtrequest(RTM_ADD, &info, prio, &rt, tableid); 704 if (error == 0) 705 rt_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx, 706 &rt->rt_rmx); 707 else 708 goto flush; 709 break; 710 case RTM_DELETE: 711 rt = rtable_lookup(tableid, info.rti_info[RTAX_DST], 712 info.rti_info[RTAX_NETMASK], info.rti_info[RTAX_GATEWAY], 713 prio); 714 715 /* 716 * Invalidate the cache of automagically created and 717 * referenced L2 entries to make sure that ``rt_gwroute'' 718 * pointer stays valid for other CPUs. 719 */ 720 if ((rt != NULL) && (ISSET(rt->rt_flags, RTF_CACHED))) { 721 ifp = if_get(rt->rt_ifidx); 722 KASSERT(ifp != NULL); 723 ifp->if_rtrequest(ifp, RTM_INVALIDATE, rt); 724 if_put(ifp); 725 /* Reset the MTU of the gateway route. */ 726 rtable_walk(tableid, rt_key(rt)->sa_family, 727 route_cleargateway, rt); 728 break; 729 } 730 731 /* 732 * Make sure that local routes are only modified by the 733 * kernel. 734 */ 735 if ((rt != NULL) && 736 ISSET(rt->rt_flags, RTF_LOCAL|RTF_BROADCAST)) { 737 error = EINVAL; 738 break; 739 } 740 741 rtfree(rt); 742 rt = NULL; 743 744 error = rtrequest(RTM_DELETE, &info, prio, &rt, tableid); 745 if (error != 0) 746 goto flush; 747 break; 748 case RTM_CHANGE: 749 case RTM_LOCK: 750 rt = rtable_lookup(tableid, info.rti_info[RTAX_DST], 751 info.rti_info[RTAX_NETMASK], info.rti_info[RTAX_GATEWAY], 752 prio); 753 #ifndef SMALL_KERNEL 754 /* 755 * If we got multipath routes, we require users to specify 756 * a matching gateway. 757 */ 758 if ((rt != NULL) && ISSET(rt->rt_flags, RTF_MPATH) && 759 (info.rti_info[RTAX_GATEWAY] == NULL)) { 760 rtfree(rt); 761 rt = NULL; 762 } 763 #endif 764 /* 765 * If RTAX_GATEWAY is the argument we're trying to 766 * change, try to find a compatible route. 767 */ 768 if ((rt == NULL) && (info.rti_info[RTAX_GATEWAY] != NULL) && 769 (rtm->rtm_type == RTM_CHANGE)) { 770 rt = rtable_lookup(tableid, info.rti_info[RTAX_DST], 771 info.rti_info[RTAX_NETMASK], NULL, prio); 772 #ifndef SMALL_KERNEL 773 /* Ensure we don't pick a multipath one. */ 774 if ((rt != NULL) && ISSET(rt->rt_flags, RTF_MPATH)) { 775 rtfree(rt); 776 rt = NULL; 777 } 778 #endif 779 } 780 781 if (rt == NULL) { 782 error = ESRCH; 783 goto flush; 784 } 785 786 /* 787 * RTM_CHANGE/LOCK need a perfect match. 788 */ 789 plen = rtable_satoplen(info.rti_info[RTAX_DST]->sa_family, 790 info.rti_info[RTAX_NETMASK]); 791 if (rt_plen(rt) != plen ) { 792 error = ESRCH; 793 goto flush; 794 } 795 796 switch (rtm->rtm_type) { 797 case RTM_CHANGE: 798 if (info.rti_info[RTAX_GATEWAY] != NULL) 799 if (rt->rt_gateway == NULL || 800 bcmp(rt->rt_gateway, 801 info.rti_info[RTAX_GATEWAY], 802 info.rti_info[RTAX_GATEWAY]->sa_len)) { 803 newgate = 1; 804 } 805 /* 806 * Check reachable gateway before changing the route. 807 * New gateway could require new ifaddr, ifp; 808 * flags may also be different; ifp may be specified 809 * by ll sockaddr when protocol address is ambiguous. 810 */ 811 if (newgate || info.rti_info[RTAX_IFP] != NULL || 812 info.rti_info[RTAX_IFA] != NULL) { 813 if ((error = rt_getifa(&info, tableid)) != 0) 814 goto flush; 815 ifa = info.rti_ifa; 816 if (rt->rt_ifa != ifa) { 817 ifp = if_get(rt->rt_ifidx); 818 KASSERT(ifp != NULL); 819 ifp->if_rtrequest(ifp, RTM_DELETE, rt); 820 ifafree(rt->rt_ifa); 821 if_put(ifp); 822 823 ifa->ifa_refcnt++; 824 rt->rt_ifa = ifa; 825 rt->rt_ifidx = ifa->ifa_ifp->if_index; 826 #ifndef SMALL_KERNEL 827 /* recheck link state after ifp change*/ 828 rt_if_linkstate_change(rt, ifa->ifa_ifp, 829 tableid); 830 #endif 831 } 832 } 833 change: 834 if (info.rti_info[RTAX_GATEWAY] != NULL && (error = 835 rt_setgate(rt, info.rti_info[RTAX_GATEWAY], 836 tableid))) 837 goto flush; 838 #ifdef MPLS 839 if ((rtm->rtm_flags & RTF_MPLS) && 840 info.rti_info[RTAX_SRC] != NULL) { 841 struct rt_mpls *rt_mpls; 842 843 psa_mpls = (struct sockaddr_mpls *) 844 info.rti_info[RTAX_SRC]; 845 846 if (rt->rt_llinfo == NULL) { 847 rt->rt_llinfo = 848 malloc(sizeof(struct rt_mpls), 849 M_TEMP, M_NOWAIT|M_ZERO); 850 } 851 if (rt->rt_llinfo == NULL) { 852 error = ENOMEM; 853 goto flush; 854 } 855 856 rt_mpls = (struct rt_mpls *)rt->rt_llinfo; 857 858 if (psa_mpls != NULL) { 859 rt_mpls->mpls_label = 860 psa_mpls->smpls_label; 861 } 862 863 rt_mpls->mpls_operation = info.rti_mpls; 864 865 /* XXX: set experimental bits */ 866 867 rt->rt_flags |= RTF_MPLS; 868 } else if (newgate || ((rtm->rtm_fmask & RTF_MPLS) && 869 !(rtm->rtm_flags & RTF_MPLS))) { 870 /* if gateway changed remove MPLS information */ 871 if (rt->rt_llinfo != NULL && 872 rt->rt_flags & RTF_MPLS) { 873 free(rt->rt_llinfo, M_TEMP, 0); 874 rt->rt_llinfo = NULL; 875 rt->rt_flags &= ~RTF_MPLS; 876 } 877 } 878 #endif 879 880 #ifdef BFD 881 if (ISSET(rtm->rtm_flags, RTF_BFD)) { 882 if ((error = bfdset(rt))) 883 goto flush; 884 } else if (!ISSET(rtm->rtm_flags, RTF_BFD) && 885 ISSET(rtm->rtm_fmask, RTF_BFD)) { 886 bfdclear(rt); 887 } 888 #endif 889 890 /* Hack to allow some flags to be toggled */ 891 if (rtm->rtm_fmask) 892 rt->rt_flags = 893 (rt->rt_flags & ~rtm->rtm_fmask) | 894 (rtm->rtm_flags & rtm->rtm_fmask); 895 896 rt_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx, 897 &rt->rt_rmx); 898 899 ifp = if_get(rt->rt_ifidx); 900 KASSERT(ifp != NULL); 901 ifp->if_rtrequest(ifp, RTM_ADD, rt); 902 if_put(ifp); 903 904 if (info.rti_info[RTAX_LABEL] != NULL) { 905 char *rtlabel = ((struct sockaddr_rtlabel *) 906 info.rti_info[RTAX_LABEL])->sr_label; 907 rtlabel_unref(rt->rt_labelid); 908 rt->rt_labelid = rtlabel_name2id(rtlabel); 909 } 910 if_group_routechange(info.rti_info[RTAX_DST], 911 info.rti_info[RTAX_NETMASK]); 912 /* FALLTHROUGH */ 913 case RTM_LOCK: 914 rt->rt_rmx.rmx_locks &= ~(rtm->rtm_inits); 915 rt->rt_rmx.rmx_locks |= 916 (rtm->rtm_inits & rtm->rtm_rmx.rmx_locks); 917 break; 918 } 919 break; 920 case RTM_GET: 921 rt = rtable_lookup(tableid, info.rti_info[RTAX_DST], 922 info.rti_info[RTAX_NETMASK], info.rti_info[RTAX_GATEWAY], 923 prio); 924 if (rt == NULL) { 925 error = ESRCH; 926 goto flush; 927 } 928 break; 929 } 930 931 /* 932 * From here on these vars need to be valid 933 * rt, rtm, error, so, m, tableid, sa_family 934 * 935 * Other notes: 936 * - to end up here previous calls passed OK, error is most probably 0 937 * - error cases take the flush route or in bad cases fail 938 * - fail does not report the message back but just fails the call 939 * if the message is not valid then fail should be used 940 */ 941 942 type = rtm->rtm_type; 943 seq = rtm->rtm_seq; 944 free(rtm, M_RTABLE, 0); 945 rtm = rt_report(rt, type, seq, tableid); 946 if (rtm == NULL) { 947 error = ENOBUFS; 948 goto fail; 949 } 950 951 flush: 952 if (rt) 953 rtfree(rt); 954 if (rtm) { 955 if (error) 956 rtm->rtm_errno = error; 957 else { 958 rtm->rtm_flags |= RTF_DONE; 959 } 960 } 961 962 /* 963 * Check to see if we don't want our own messages. 964 */ 965 if (!(so->so_options & SO_USELOOPBACK)) { 966 if (route_cb.any_count <= 1) { 967 /* no other listener and no loopback of messages */ 968 fail: 969 free(rtm, M_RTABLE, 0); 970 m_freem(m); 971 return (error); 972 } 973 /* There is another listener, so construct message */ 974 rp = sotorawcb(so); 975 rp->rcb_proto.sp_family = 0; /* Avoid us */ 976 } 977 if (rtm) { 978 if (m_copyback(m, 0, rtm->rtm_msglen, rtm, M_NOWAIT)) { 979 m_freem(m); 980 m = NULL; 981 } else if (m->m_pkthdr.len > rtm->rtm_msglen) 982 m_adj(m, rtm->rtm_msglen - m->m_pkthdr.len); 983 free(rtm, M_RTABLE, 0); 984 } 985 if (m) 986 route_input(m, info.rti_info[RTAX_DST] ? 987 info.rti_info[RTAX_DST]->sa_family : AF_UNSPEC); 988 if (rp) 989 rp->rcb_proto.sp_family = PF_ROUTE; /* Readd us */ 990 991 return (error); 992 } 993 994 int 995 route_cleargateway(struct rtentry *rt, void *arg, unsigned int rtableid) 996 { 997 struct rtentry *nhrt = arg; 998 999 if (ISSET(rt->rt_flags, RTF_GATEWAY) && rt->rt_gwroute == nhrt && 1000 !ISSET(rt->rt_locks, RTV_MTU)) 1001 rt->rt_mtu = 0; 1002 1003 return (0); 1004 } 1005 1006 /* 1007 * Check if the user request to insert an ARP entry does not conflict 1008 * with existing ones. 1009 * 1010 * Only two entries are allowed for a given IP address: a private one 1011 * (priv) and a public one (pub). 1012 */ 1013 int 1014 route_arp_conflict(struct rtentry *rt, struct rt_addrinfo *info) 1015 { 1016 #ifdef ART 1017 int proxy = (info->rti_flags & RTF_ANNOUNCE); 1018 1019 if ((info->rti_flags & RTF_LLINFO) == 0 || 1020 (info->rti_info[RTAX_DST]->sa_family != AF_INET)) 1021 return (0); 1022 1023 if (rt == NULL || !ISSET(rt->rt_flags, RTF_LLINFO)) 1024 return (0); 1025 1026 /* If the entry is cached, it can be updated. */ 1027 if (ISSET(rt->rt_flags, RTF_CACHED)) 1028 return (0); 1029 1030 /* 1031 * Same destination, not cached and both "priv" or "pub" conflict. 1032 * If a second entry exists, it always conflict. 1033 */ 1034 if ((ISSET(rt->rt_flags, RTF_ANNOUNCE) == proxy) || 1035 ISSET(rt->rt_flags, RTF_MPATH)) 1036 return (EEXIST); 1037 1038 /* No conflict but an entry exist so we need to force mpath. */ 1039 info->rti_flags |= RTF_MPATH; 1040 #endif /* ART */ 1041 return (0); 1042 } 1043 1044 void 1045 rt_setmetrics(u_long which, const struct rt_metrics *in, 1046 struct rt_kmetrics *out) 1047 { 1048 int64_t expire; 1049 1050 if (which & RTV_MTU) 1051 out->rmx_mtu = in->rmx_mtu; 1052 if (which & RTV_EXPIRE) { 1053 expire = in->rmx_expire; 1054 if (expire != 0) { 1055 expire -= time_second; 1056 expire += time_uptime; 1057 } 1058 1059 out->rmx_expire = expire; 1060 } 1061 } 1062 1063 void 1064 rt_getmetrics(const struct rt_kmetrics *in, struct rt_metrics *out) 1065 { 1066 int64_t expire; 1067 1068 expire = in->rmx_expire; 1069 if (expire != 0) { 1070 expire -= time_uptime; 1071 expire += time_second; 1072 } 1073 1074 bzero(out, sizeof(*out)); 1075 out->rmx_locks = in->rmx_locks; 1076 out->rmx_mtu = in->rmx_mtu; 1077 out->rmx_expire = expire; 1078 out->rmx_pksent = in->rmx_pksent; 1079 } 1080 1081 #define ROUNDUP(a) \ 1082 ((a) > 0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long)) 1083 #define ADVANCE(x, n) (x += ROUNDUP((n)->sa_len)) 1084 1085 void 1086 rt_xaddrs(caddr_t cp, caddr_t cplim, struct rt_addrinfo *rtinfo) 1087 { 1088 struct sockaddr *sa; 1089 int i; 1090 1091 bzero(rtinfo->rti_info, sizeof(rtinfo->rti_info)); 1092 for (i = 0; (i < RTAX_MAX) && (cp < cplim); i++) { 1093 if ((rtinfo->rti_addrs & (1 << i)) == 0) 1094 continue; 1095 rtinfo->rti_info[i] = sa = (struct sockaddr *)cp; 1096 ADVANCE(cp, sa); 1097 } 1098 } 1099 1100 struct mbuf * 1101 rt_msg1(int type, struct rt_addrinfo *rtinfo) 1102 { 1103 struct rt_msghdr *rtm; 1104 struct mbuf *m; 1105 int i; 1106 struct sockaddr *sa; 1107 int len, dlen, hlen; 1108 1109 switch (type) { 1110 case RTM_DELADDR: 1111 case RTM_NEWADDR: 1112 len = sizeof(struct ifa_msghdr); 1113 break; 1114 case RTM_IFINFO: 1115 len = sizeof(struct if_msghdr); 1116 break; 1117 case RTM_IFANNOUNCE: 1118 len = sizeof(struct if_announcemsghdr); 1119 break; 1120 #ifdef BFD 1121 case RTM_BFD: 1122 len = sizeof(struct bfd_msghdr); 1123 break; 1124 #endif 1125 default: 1126 len = sizeof(struct rt_msghdr); 1127 break; 1128 } 1129 if (len > MCLBYTES) 1130 panic("rt_msg1"); 1131 m = m_gethdr(M_DONTWAIT, MT_DATA); 1132 if (m && len > MHLEN) { 1133 MCLGET(m, M_DONTWAIT); 1134 if ((m->m_flags & M_EXT) == 0) { 1135 m_free(m); 1136 m = NULL; 1137 } 1138 } 1139 if (m == NULL) 1140 return (m); 1141 m->m_pkthdr.len = m->m_len = hlen = len; 1142 m->m_pkthdr.ph_ifidx = 0; 1143 rtm = mtod(m, struct rt_msghdr *); 1144 bzero(rtm, len); 1145 for (i = 0; i < RTAX_MAX; i++) { 1146 if (rtinfo == NULL || (sa = rtinfo->rti_info[i]) == NULL) 1147 continue; 1148 rtinfo->rti_addrs |= (1 << i); 1149 dlen = ROUNDUP(sa->sa_len); 1150 if (m_copyback(m, len, dlen, sa, M_NOWAIT)) { 1151 m_freem(m); 1152 return (NULL); 1153 } 1154 len += dlen; 1155 } 1156 rtm->rtm_msglen = len; 1157 rtm->rtm_hdrlen = hlen; 1158 rtm->rtm_version = RTM_VERSION; 1159 rtm->rtm_type = type; 1160 return (m); 1161 } 1162 1163 int 1164 rt_msg2(int type, int vers, struct rt_addrinfo *rtinfo, caddr_t cp, 1165 struct walkarg *w) 1166 { 1167 int i; 1168 int len, dlen, hlen, second_time = 0; 1169 caddr_t cp0; 1170 1171 rtinfo->rti_addrs = 0; 1172 again: 1173 switch (type) { 1174 case RTM_DELADDR: 1175 case RTM_NEWADDR: 1176 len = sizeof(struct ifa_msghdr); 1177 break; 1178 case RTM_IFINFO: 1179 len = sizeof(struct if_msghdr); 1180 break; 1181 default: 1182 len = sizeof(struct rt_msghdr); 1183 break; 1184 } 1185 hlen = len; 1186 if ((cp0 = cp) != NULL) 1187 cp += len; 1188 for (i = 0; i < RTAX_MAX; i++) { 1189 struct sockaddr *sa; 1190 1191 if ((sa = rtinfo->rti_info[i]) == NULL) 1192 continue; 1193 rtinfo->rti_addrs |= (1 << i); 1194 dlen = ROUNDUP(sa->sa_len); 1195 if (cp) { 1196 bcopy(sa, cp, (size_t)dlen); 1197 cp += dlen; 1198 } 1199 len += dlen; 1200 } 1201 /* align message length to the next natural boundary */ 1202 len = ALIGN(len); 1203 if (cp == 0 && w != NULL && !second_time) { 1204 struct walkarg *rw = w; 1205 1206 rw->w_needed += len; 1207 if (rw->w_needed <= 0 && rw->w_where) { 1208 if (rw->w_tmemsize < len) { 1209 free(rw->w_tmem, M_RTABLE, 0); 1210 rw->w_tmem = malloc(len, M_RTABLE, M_NOWAIT); 1211 if (rw->w_tmem) 1212 rw->w_tmemsize = len; 1213 } 1214 if (rw->w_tmem) { 1215 cp = rw->w_tmem; 1216 second_time = 1; 1217 goto again; 1218 } else 1219 rw->w_where = 0; 1220 } 1221 } 1222 if (cp && w) /* clear the message header */ 1223 bzero(cp0, hlen); 1224 1225 if (cp) { 1226 struct rt_msghdr *rtm = (struct rt_msghdr *)cp0; 1227 1228 rtm->rtm_version = RTM_VERSION; 1229 rtm->rtm_type = type; 1230 rtm->rtm_msglen = len; 1231 rtm->rtm_hdrlen = hlen; 1232 } 1233 return (len); 1234 } 1235 1236 /* 1237 * This routine is called to generate a message from the routing 1238 * socket indicating that a redirect has occurred, a routing lookup 1239 * has failed, or that a protocol has detected timeouts to a particular 1240 * destination. 1241 */ 1242 void 1243 rt_missmsg(int type, struct rt_addrinfo *rtinfo, int flags, uint8_t prio, 1244 u_int ifidx, int error, u_int tableid) 1245 { 1246 struct rt_msghdr *rtm; 1247 struct mbuf *m; 1248 struct sockaddr *sa = rtinfo->rti_info[RTAX_DST]; 1249 1250 if (route_cb.any_count == 0) 1251 return; 1252 m = rt_msg1(type, rtinfo); 1253 if (m == NULL) 1254 return; 1255 rtm = mtod(m, struct rt_msghdr *); 1256 rtm->rtm_flags = RTF_DONE | flags; 1257 rtm->rtm_priority = prio; 1258 rtm->rtm_errno = error; 1259 rtm->rtm_tableid = tableid; 1260 rtm->rtm_addrs = rtinfo->rti_addrs; 1261 rtm->rtm_index = ifidx; 1262 route_input(m, sa ? sa->sa_family : AF_UNSPEC); 1263 } 1264 1265 /* 1266 * This routine is called to generate a message from the routing 1267 * socket indicating that the status of a network interface has changed. 1268 */ 1269 void 1270 rt_ifmsg(struct ifnet *ifp) 1271 { 1272 struct if_msghdr *ifm; 1273 struct mbuf *m; 1274 1275 if (route_cb.any_count == 0) 1276 return; 1277 m = rt_msg1(RTM_IFINFO, NULL); 1278 if (m == NULL) 1279 return; 1280 ifm = mtod(m, struct if_msghdr *); 1281 ifm->ifm_index = ifp->if_index; 1282 ifm->ifm_tableid = ifp->if_rdomain; 1283 ifm->ifm_flags = ifp->if_flags; 1284 ifm->ifm_xflags = ifp->if_xflags; 1285 if_getdata(ifp, &ifm->ifm_data); 1286 ifm->ifm_addrs = 0; 1287 route_input(m, AF_UNSPEC); 1288 } 1289 1290 /* 1291 * This is called to generate messages from the routing socket 1292 * indicating a network interface has had addresses associated with it. 1293 * if we ever reverse the logic and replace messages TO the routing 1294 * socket indicate a request to configure interfaces, then it will 1295 * be unnecessary as the routing socket will automatically generate 1296 * copies of it. 1297 */ 1298 void 1299 rt_sendaddrmsg(struct rtentry *rt, int cmd, struct ifaddr *ifa) 1300 { 1301 struct ifnet *ifp = ifa->ifa_ifp; 1302 struct mbuf *m = NULL; 1303 struct rt_addrinfo info; 1304 struct ifa_msghdr *ifam; 1305 1306 if (route_cb.any_count == 0) 1307 return; 1308 1309 memset(&info, 0, sizeof(info)); 1310 info.rti_info[RTAX_IFA] = ifa->ifa_addr; 1311 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); 1312 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask; 1313 info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr; 1314 if ((m = rt_msg1(cmd, &info)) == NULL) 1315 return; 1316 ifam = mtod(m, struct ifa_msghdr *); 1317 ifam->ifam_index = ifp->if_index; 1318 ifam->ifam_metric = ifa->ifa_metric; 1319 ifam->ifam_flags = ifa->ifa_flags; 1320 ifam->ifam_addrs = info.rti_addrs; 1321 ifam->ifam_tableid = ifp->if_rdomain; 1322 1323 route_input(m, ifa->ifa_addr ? ifa->ifa_addr->sa_family : AF_UNSPEC); 1324 } 1325 1326 /* 1327 * This is called to generate routing socket messages indicating 1328 * network interface arrival and departure. 1329 */ 1330 void 1331 rt_ifannouncemsg(struct ifnet *ifp, int what) 1332 { 1333 struct if_announcemsghdr *ifan; 1334 struct mbuf *m; 1335 1336 if (route_cb.any_count == 0) 1337 return; 1338 m = rt_msg1(RTM_IFANNOUNCE, NULL); 1339 if (m == NULL) 1340 return; 1341 ifan = mtod(m, struct if_announcemsghdr *); 1342 ifan->ifan_index = ifp->if_index; 1343 strlcpy(ifan->ifan_name, ifp->if_xname, sizeof(ifan->ifan_name)); 1344 ifan->ifan_what = what; 1345 route_input(m, AF_UNSPEC); 1346 } 1347 1348 #ifdef BFD 1349 /* 1350 * This is used to generate routing socket messages indicating 1351 * the state of a BFD session. 1352 */ 1353 void 1354 rt_bfdmsg(struct bfd_config *bfd) 1355 { 1356 struct bfd_msghdr *bfdm; 1357 struct sockaddr_bfd sa_bfd; 1358 struct mbuf *m; 1359 struct rt_addrinfo info; 1360 1361 if (route_cb.any_count == 0) 1362 return; 1363 memset(&info, 0, sizeof(info)); 1364 info.rti_info[RTAX_DST] = rt_key(bfd->bc_rt); 1365 info.rti_info[RTAX_IFA] = bfd->bc_rt->rt_ifa->ifa_addr; 1366 1367 m = rt_msg1(RTM_BFD, &info); 1368 if (m == NULL) 1369 return; 1370 bfdm = mtod(m, struct bfd_msghdr *); 1371 bfdm->bm_addrs = info.rti_addrs; 1372 1373 bfd2sa(bfd->bc_rt, &sa_bfd); 1374 memcpy(&bfdm->bm_sa, &sa_bfd, sizeof(sa_bfd)); 1375 1376 route_input(m, info.rti_info[RTAX_DST]->sa_family); 1377 } 1378 #endif /* BFD */ 1379 1380 /* 1381 * This is used in dumping the kernel table via sysctl(). 1382 */ 1383 int 1384 sysctl_dumpentry(struct rtentry *rt, void *v, unsigned int id) 1385 { 1386 struct walkarg *w = v; 1387 int error = 0, size; 1388 struct rt_addrinfo info; 1389 struct ifnet *ifp; 1390 #ifdef BFD 1391 struct sockaddr_bfd sa_bfd; 1392 #endif 1393 #ifdef MPLS 1394 struct sockaddr_mpls sa_mpls; 1395 #endif 1396 struct sockaddr_rtlabel sa_rl; 1397 struct sockaddr_in6 sa_mask; 1398 1399 if (w->w_op == NET_RT_FLAGS && !(rt->rt_flags & w->w_arg)) 1400 return 0; 1401 if (w->w_op == NET_RT_DUMP && w->w_arg) { 1402 u_int8_t prio = w->w_arg & RTP_MASK; 1403 if (w->w_arg < 0) { 1404 prio = (-w->w_arg) & RTP_MASK; 1405 /* Show all routes that are not this priority */ 1406 if (prio == (rt->rt_priority & RTP_MASK)) 1407 return 0; 1408 } else { 1409 if (prio != (rt->rt_priority & RTP_MASK) && 1410 prio != RTP_ANY) 1411 return 0; 1412 } 1413 } 1414 bzero(&info, sizeof(info)); 1415 info.rti_info[RTAX_DST] = rt_key(rt); 1416 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 1417 info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask); 1418 ifp = if_get(rt->rt_ifidx); 1419 if (ifp != NULL) { 1420 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); 1421 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; 1422 if (ifp->if_flags & IFF_POINTOPOINT) 1423 info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr; 1424 } 1425 if_put(ifp); 1426 info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl); 1427 #ifdef BFD 1428 if (rt->rt_flags & RTF_BFD) 1429 info.rti_info[RTAX_BFD] = bfd2sa(rt, &sa_bfd); 1430 #endif 1431 #ifdef MPLS 1432 if (rt->rt_flags & RTF_MPLS) { 1433 bzero(&sa_mpls, sizeof(sa_mpls)); 1434 sa_mpls.smpls_family = AF_MPLS; 1435 sa_mpls.smpls_len = sizeof(sa_mpls); 1436 sa_mpls.smpls_label = ((struct rt_mpls *) 1437 rt->rt_llinfo)->mpls_label; 1438 info.rti_info[RTAX_SRC] = (struct sockaddr *)&sa_mpls; 1439 info.rti_mpls = ((struct rt_mpls *) 1440 rt->rt_llinfo)->mpls_operation; 1441 } 1442 #endif 1443 1444 size = rt_msg2(RTM_GET, RTM_VERSION, &info, NULL, w); 1445 if (w->w_where && w->w_tmem && w->w_needed <= 0) { 1446 struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem; 1447 1448 rtm->rtm_pid = curproc->p_p->ps_pid; 1449 rtm->rtm_flags = rt->rt_flags; 1450 rtm->rtm_priority = rt->rt_priority & RTP_MASK; 1451 rt_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx); 1452 /* Do not account the routing table's reference. */ 1453 rtm->rtm_rmx.rmx_refcnt = rt->rt_refcnt - 1; 1454 rtm->rtm_index = rt->rt_ifidx; 1455 rtm->rtm_addrs = info.rti_addrs; 1456 rtm->rtm_tableid = id; 1457 #ifdef MPLS 1458 rtm->rtm_mpls = info.rti_mpls; 1459 #endif 1460 if ((error = copyout(rtm, w->w_where, size)) != 0) 1461 w->w_where = NULL; 1462 else 1463 w->w_where += size; 1464 } 1465 return (error); 1466 } 1467 1468 int 1469 sysctl_iflist(int af, struct walkarg *w) 1470 { 1471 struct ifnet *ifp; 1472 struct ifaddr *ifa; 1473 struct rt_addrinfo info; 1474 int len, error = 0; 1475 1476 bzero(&info, sizeof(info)); 1477 TAILQ_FOREACH(ifp, &ifnet, if_list) { 1478 if (w->w_arg && w->w_arg != ifp->if_index) 1479 continue; 1480 /* Copy the link-layer address first */ 1481 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); 1482 len = rt_msg2(RTM_IFINFO, RTM_VERSION, &info, 0, w); 1483 if (w->w_where && w->w_tmem && w->w_needed <= 0) { 1484 struct if_msghdr *ifm; 1485 1486 ifm = (struct if_msghdr *)w->w_tmem; 1487 ifm->ifm_index = ifp->if_index; 1488 ifm->ifm_tableid = ifp->if_rdomain; 1489 ifm->ifm_flags = ifp->if_flags; 1490 if_getdata(ifp, &ifm->ifm_data); 1491 ifm->ifm_addrs = info.rti_addrs; 1492 error = copyout(ifm, w->w_where, len); 1493 if (error) 1494 return (error); 1495 w->w_where += len; 1496 } 1497 info.rti_info[RTAX_IFP] = NULL; 1498 TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) { 1499 KASSERT(ifa->ifa_addr->sa_family != AF_LINK); 1500 if (af && af != ifa->ifa_addr->sa_family) 1501 continue; 1502 info.rti_info[RTAX_IFA] = ifa->ifa_addr; 1503 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask; 1504 info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr; 1505 len = rt_msg2(RTM_NEWADDR, RTM_VERSION, &info, 0, w); 1506 if (w->w_where && w->w_tmem && w->w_needed <= 0) { 1507 struct ifa_msghdr *ifam; 1508 1509 ifam = (struct ifa_msghdr *)w->w_tmem; 1510 ifam->ifam_index = ifa->ifa_ifp->if_index; 1511 ifam->ifam_flags = ifa->ifa_flags; 1512 ifam->ifam_metric = ifa->ifa_metric; 1513 ifam->ifam_addrs = info.rti_addrs; 1514 error = copyout(w->w_tmem, w->w_where, len); 1515 if (error) 1516 return (error); 1517 w->w_where += len; 1518 } 1519 } 1520 info.rti_info[RTAX_IFA] = info.rti_info[RTAX_NETMASK] = 1521 info.rti_info[RTAX_BRD] = NULL; 1522 } 1523 return (0); 1524 } 1525 1526 int 1527 sysctl_ifnames(struct walkarg *w) 1528 { 1529 struct if_nameindex_msg ifn; 1530 struct ifnet *ifp; 1531 int error = 0; 1532 1533 /* XXX ignore tableid for now */ 1534 TAILQ_FOREACH(ifp, &ifnet, if_list) { 1535 if (w->w_arg && w->w_arg != ifp->if_index) 1536 continue; 1537 w->w_needed += sizeof(ifn); 1538 if (w->w_where && w->w_needed <= 0) { 1539 1540 memset(&ifn, 0, sizeof(ifn)); 1541 ifn.if_index = ifp->if_index; 1542 strlcpy(ifn.if_name, ifp->if_xname, 1543 sizeof(ifn.if_name)); 1544 error = copyout(&ifn, w->w_where, sizeof(ifn)); 1545 if (error) 1546 return (error); 1547 w->w_where += sizeof(ifn); 1548 } 1549 } 1550 1551 return (0); 1552 } 1553 1554 int 1555 sysctl_rtable(int *name, u_int namelen, void *where, size_t *given, void *new, 1556 size_t newlen) 1557 { 1558 int i, error = EINVAL; 1559 u_char af; 1560 struct walkarg w; 1561 struct rt_tableinfo tableinfo; 1562 u_int tableid = 0; 1563 1564 NET_ASSERT_LOCKED(); 1565 1566 if (new) 1567 return (EPERM); 1568 if (namelen < 3 || namelen > 4) 1569 return (EINVAL); 1570 af = name[0]; 1571 bzero(&w, sizeof(w)); 1572 w.w_where = where; 1573 w.w_given = *given; 1574 w.w_needed = 0 - w.w_given; 1575 w.w_op = name[1]; 1576 w.w_arg = name[2]; 1577 1578 if (namelen == 4) { 1579 tableid = name[3]; 1580 if (!rtable_exists(tableid)) 1581 return (ENOENT); 1582 } else 1583 tableid = curproc->p_p->ps_rtableid; 1584 1585 switch (w.w_op) { 1586 case NET_RT_DUMP: 1587 case NET_RT_FLAGS: 1588 for (i = 1; i <= AF_MAX; i++) { 1589 if (af != 0 && af != i) 1590 continue; 1591 1592 error = rtable_walk(tableid, i, sysctl_dumpentry, &w); 1593 if (error == EAFNOSUPPORT) 1594 error = 0; 1595 if (error) 1596 break; 1597 } 1598 break; 1599 1600 case NET_RT_IFLIST: 1601 error = sysctl_iflist(af, &w); 1602 break; 1603 1604 case NET_RT_STATS: 1605 return (sysctl_rtable_rtstat(where, given, new)); 1606 case NET_RT_TABLE: 1607 tableid = w.w_arg; 1608 if (!rtable_exists(tableid)) 1609 return (ENOENT); 1610 tableinfo.rti_tableid = tableid; 1611 tableinfo.rti_domainid = rtable_l2(tableid); 1612 error = sysctl_rdstruct(where, given, new, 1613 &tableinfo, sizeof(tableinfo)); 1614 return (error); 1615 case NET_RT_IFNAMES: 1616 error = sysctl_ifnames(&w); 1617 break; 1618 } 1619 free(w.w_tmem, M_RTABLE, 0); 1620 w.w_needed += w.w_given; 1621 if (where) { 1622 *given = w.w_where - (caddr_t)where; 1623 if (*given < w.w_needed) 1624 return (ENOMEM); 1625 } else 1626 *given = (11 * w.w_needed) / 10; 1627 1628 return (error); 1629 } 1630 1631 int 1632 sysctl_rtable_rtstat(void *oldp, size_t *oldlenp, void *newp) 1633 { 1634 extern struct cpumem *rtcounters; 1635 uint64_t counters[rts_ncounters]; 1636 struct rtstat rtstat; 1637 uint32_t *words = (uint32_t *)&rtstat; 1638 int i; 1639 1640 CTASSERT(sizeof(rtstat) == (nitems(counters) * sizeof(uint32_t))); 1641 1642 counters_read(rtcounters, counters, nitems(counters)); 1643 1644 for (i = 0; i < nitems(counters); i++) 1645 words[i] = (uint32_t)counters[i]; 1646 1647 return (sysctl_rdstruct(oldp, oldlenp, newp, &rtstat, sizeof(rtstat))); 1648 } 1649 1650 /* 1651 * Definitions of protocols supported in the ROUTE domain. 1652 */ 1653 1654 extern struct domain routedomain; /* or at least forward */ 1655 1656 struct protosw routesw[] = { 1657 { SOCK_RAW, &routedomain, 0, PR_ATOMIC|PR_ADDR|PR_WANTRCVD, 1658 0, route_output, 0, route_ctloutput, 1659 route_usrreq, 1660 raw_init, 0, 0, 0, 1661 sysctl_rtable, 1662 } 1663 }; 1664 1665 struct domain routedomain = 1666 { PF_ROUTE, "route", route_init, 0, 0, 1667 routesw, &routesw[nitems(routesw)] }; 1668