1 /* $OpenBSD: rtsock.c,v 1.228 2017/03/03 15:48:02 bluhm Exp $ */ 2 /* $NetBSD: rtsock.c,v 1.18 1996/03/29 00:32:10 cgd Exp $ */ 3 4 /* 5 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the project nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 33 /* 34 * Copyright (c) 1988, 1991, 1993 35 * The Regents of the University of California. All rights reserved. 36 * 37 * Redistribution and use in source and binary forms, with or without 38 * modification, are permitted provided that the following conditions 39 * are met: 40 * 1. Redistributions of source code must retain the above copyright 41 * notice, this list of conditions and the following disclaimer. 42 * 2. Redistributions in binary form must reproduce the above copyright 43 * notice, this list of conditions and the following disclaimer in the 44 * documentation and/or other materials provided with the distribution. 45 * 3. Neither the name of the University nor the names of its contributors 46 * may be used to endorse or promote products derived from this software 47 * without specific prior written permission. 48 * 49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 59 * SUCH DAMAGE. 60 * 61 * @(#)rtsock.c 8.6 (Berkeley) 2/11/95 62 */ 63 64 #include <sys/param.h> 65 #include <sys/systm.h> 66 #include <sys/proc.h> 67 #include <sys/sysctl.h> 68 #include <sys/mbuf.h> 69 #include <sys/socket.h> 70 #include <sys/socketvar.h> 71 #include <sys/domain.h> 72 #include <sys/protosw.h> 73 74 #include <net/if.h> 75 #include <net/if_dl.h> 76 #include <net/if_var.h> 77 #include <net/route.h> 78 #include <net/raw_cb.h> 79 80 #include <netinet/in.h> 81 82 #ifdef MPLS 83 #include <netmpls/mpls.h> 84 #endif 85 #ifdef BFD 86 #include <net/bfd.h> 87 #endif 88 89 #include <sys/stdarg.h> 90 #include <sys/kernel.h> 91 #include <sys/timeout.h> 92 93 struct sockaddr route_dst = { 2, PF_ROUTE, }; 94 struct sockaddr route_src = { 2, PF_ROUTE, }; 95 96 struct walkarg { 97 int w_op, w_arg, w_given, w_needed, w_tmemsize; 98 caddr_t w_where, w_tmem; 99 }; 100 101 int route_ctloutput(int, struct socket *, int, int, struct mbuf *); 102 void route_input(struct mbuf *m0, sa_family_t); 103 int route_arp_conflict(struct rtentry *, struct rt_addrinfo *); 104 int route_cleargateway(struct rtentry *, void *, unsigned int); 105 106 struct mbuf *rt_msg1(int, struct rt_addrinfo *); 107 int rt_msg2(int, int, struct rt_addrinfo *, caddr_t, 108 struct walkarg *); 109 void rt_xaddrs(caddr_t, caddr_t, struct rt_addrinfo *); 110 111 void rt_proposalmsg(struct rt_msghdr *, struct rt_addrinfo *); 112 113 int sysctl_iflist(int, struct walkarg *); 114 int sysctl_ifnames(struct walkarg *); 115 int sysctl_rtable_rtstat(void *, size_t *, void *); 116 117 int validate_proposal(struct rt_addrinfo *); 118 119 struct routecb { 120 struct rawcb rcb; 121 struct timeout timeout; 122 unsigned int msgfilter; 123 unsigned int flags; 124 u_int rtableid; 125 }; 126 #define sotoroutecb(so) ((struct routecb *)(so)->so_pcb) 127 128 struct route_cb { 129 int ip_count; 130 int ip6_count; 131 int mpls_count; 132 int any_count; 133 }; 134 135 struct route_cb route_cb; 136 137 /* 138 * These flags and timeout are used for indicating to userland (via a 139 * RTM_DESYNC msg) when the route socket has overflowed and messages 140 * have been lost. 141 */ 142 #define ROUTECB_FLAG_DESYNC 0x1 /* Route socket out of memory */ 143 #define ROUTECB_FLAG_FLUSH 0x2 /* Wait until socket is empty before 144 queueing more packets */ 145 146 #define ROUTE_DESYNC_RESEND_TIMEOUT (hz / 5) /* In hz */ 147 148 void rt_senddesync(void *); 149 150 int 151 route_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam, 152 struct mbuf *control, struct proc *p) 153 { 154 struct rawcb *rp; 155 struct routecb *rop; 156 int af; 157 int error = 0; 158 159 NET_ASSERT_LOCKED(); 160 161 rp = sotorawcb(so); 162 163 switch (req) { 164 case PRU_ATTACH: 165 /* 166 * use the rawcb but allocate a routecb, this 167 * code does not care about the additional fields 168 * and works directly on the raw socket. 169 */ 170 rop = malloc(sizeof(struct routecb), M_PCB, M_WAITOK|M_ZERO); 171 rp = &rop->rcb; 172 so->so_pcb = rp; 173 /* Init the timeout structure */ 174 timeout_set(&((struct routecb *)rp)->timeout, rt_senddesync, rp); 175 /* 176 * Don't call raw_usrreq() in the attach case, because 177 * we want to allow non-privileged processes to listen 178 * on and send "safe" commands to the routing socket. 179 */ 180 if (curproc == 0) 181 error = EACCES; 182 else 183 error = raw_attach(so, (int)(long)nam); 184 if (error) { 185 free(rop, M_PCB, sizeof(struct routecb)); 186 return (error); 187 } 188 rop->rtableid = curproc->p_p->ps_rtableid; 189 af = rp->rcb_proto.sp_protocol; 190 if (af == AF_INET) 191 route_cb.ip_count++; 192 else if (af == AF_INET6) 193 route_cb.ip6_count++; 194 #ifdef MPLS 195 else if (af == AF_MPLS) 196 route_cb.mpls_count++; 197 #endif 198 rp->rcb_faddr = &route_src; 199 route_cb.any_count++; 200 soisconnected(so); 201 so->so_options |= SO_USELOOPBACK; 202 break; 203 204 case PRU_RCVD: 205 rop = (struct routecb *)rp; 206 207 /* 208 * If we are in a FLUSH state, check if the buffer is 209 * empty so that we can clear the flag. 210 */ 211 if (((rop->flags & ROUTECB_FLAG_FLUSH) != 0) && 212 ((sbspace(&rp->rcb_socket->so_rcv) == 213 rp->rcb_socket->so_rcv.sb_hiwat))) 214 rop->flags &= ~ROUTECB_FLAG_FLUSH; 215 break; 216 217 case PRU_DETACH: 218 if (rp) { 219 timeout_del(&((struct routecb *)rp)->timeout); 220 af = rp->rcb_proto.sp_protocol; 221 if (af == AF_INET) 222 route_cb.ip_count--; 223 else if (af == AF_INET6) 224 route_cb.ip6_count--; 225 #ifdef MPLS 226 else if (af == AF_MPLS) 227 route_cb.mpls_count--; 228 #endif 229 route_cb.any_count--; 230 } 231 /* FALLTHROUGH */ 232 default: 233 error = raw_usrreq(so, req, m, nam, control, p); 234 } 235 236 return (error); 237 } 238 239 int 240 route_ctloutput(int op, struct socket *so, int level, int optname, 241 struct mbuf *m) 242 { 243 struct routecb *rop = sotoroutecb(so); 244 int error = 0; 245 unsigned int tid; 246 247 if (level != AF_ROUTE) { 248 error = EINVAL; 249 if (op == PRCO_SETOPT && m) 250 m_free(m); 251 return (error); 252 } 253 254 switch (op) { 255 case PRCO_SETOPT: 256 switch (optname) { 257 case ROUTE_MSGFILTER: 258 if (m == NULL || m->m_len != sizeof(unsigned int)) 259 error = EINVAL; 260 else 261 rop->msgfilter = *mtod(m, unsigned int *); 262 break; 263 case ROUTE_TABLEFILTER: 264 if (m == NULL || m->m_len != sizeof(unsigned int)) { 265 error = EINVAL; 266 break; 267 } 268 tid = *mtod(m, unsigned int *); 269 if (tid != RTABLE_ANY && !rtable_exists(tid)) 270 error = ENOENT; 271 else 272 rop->rtableid = tid; 273 break; 274 default: 275 error = ENOPROTOOPT; 276 break; 277 } 278 m_free(m); 279 break; 280 case PRCO_GETOPT: 281 switch (optname) { 282 case ROUTE_MSGFILTER: 283 m->m_len = sizeof(unsigned int); 284 *mtod(m, unsigned int *) = rop->msgfilter; 285 break; 286 case ROUTE_TABLEFILTER: 287 m->m_len = sizeof(unsigned int); 288 *mtod(m, unsigned int *) = rop->rtableid; 289 break; 290 default: 291 error = ENOPROTOOPT; 292 break; 293 } 294 } 295 return (error); 296 } 297 298 void 299 rt_senddesync(void *data) 300 { 301 struct rawcb *rp; 302 struct routecb *rop; 303 struct mbuf *desync_mbuf; 304 int s; 305 306 rp = (struct rawcb *)data; 307 rop = (struct routecb *)rp; 308 309 /* If we are in a DESYNC state, try to send a RTM_DESYNC packet */ 310 if ((rop->flags & ROUTECB_FLAG_DESYNC) == 0) 311 return; 312 313 /* 314 * If we fail to alloc memory or if sbappendaddr() 315 * fails, re-add timeout and try again. 316 */ 317 desync_mbuf = rt_msg1(RTM_DESYNC, NULL); 318 if (desync_mbuf != NULL) { 319 s = splsoftnet(); 320 if (sbappendaddr(&rp->rcb_socket->so_rcv, &route_src, 321 desync_mbuf, NULL) != 0) { 322 rop->flags &= ~ROUTECB_FLAG_DESYNC; 323 sorwakeup(rp->rcb_socket); 324 splx(s); 325 return; 326 } 327 splx(s); 328 m_freem(desync_mbuf); 329 } 330 /* Re-add timeout to try sending msg again */ 331 timeout_add(&rop->timeout, ROUTE_DESYNC_RESEND_TIMEOUT); 332 } 333 334 void 335 route_input(struct mbuf *m0, sa_family_t sa_family) 336 { 337 struct rawcb *rp; 338 struct routecb *rop; 339 struct rt_msghdr *rtm; 340 struct mbuf *m = m0; 341 int s, sockets = 0; 342 struct socket *last = NULL; 343 struct sockaddr *sosrc, *sodst; 344 345 sosrc = &route_src; 346 sodst = &route_dst; 347 348 /* ensure that we can access the rtm_type via mtod() */ 349 if (m->m_len < offsetof(struct rt_msghdr, rtm_type) + 1) { 350 m_freem(m); 351 return; 352 } 353 354 LIST_FOREACH(rp, &rawcb, rcb_list) { 355 if (rp->rcb_socket->so_state & SS_CANTRCVMORE) 356 continue; 357 if (rp->rcb_proto.sp_family != PF_ROUTE) 358 continue; 359 /* 360 * If route socket is bound to an address family only send 361 * messages that match the address family. Address family 362 * agnostic messages are always send. 363 */ 364 if (rp->rcb_proto.sp_protocol != AF_UNSPEC && 365 sa_family != AF_UNSPEC && 366 rp->rcb_proto.sp_protocol != sa_family) 367 continue; 368 /* 369 * We assume the lower level routines have 370 * placed the address in a canonical format 371 * suitable for a structure comparison. 372 * 373 * Note that if the lengths are not the same 374 * the comparison will fail at the first byte. 375 */ 376 #define equal(a1, a2) \ 377 (bcmp((caddr_t)(a1), (caddr_t)(a2), a1->sa_len) == 0) 378 if (rp->rcb_laddr && !equal(rp->rcb_laddr, sodst)) 379 continue; 380 if (rp->rcb_faddr && !equal(rp->rcb_faddr, sosrc)) 381 continue; 382 383 /* filter messages that the process does not want */ 384 rop = (struct routecb *)rp; 385 rtm = mtod(m, struct rt_msghdr *); 386 /* but RTM_DESYNC can't be filtered */ 387 if (rtm->rtm_type != RTM_DESYNC && rop->msgfilter != 0 && 388 !(rop->msgfilter & (1 << rtm->rtm_type))) 389 continue; 390 switch (rtm->rtm_type) { 391 case RTM_IFANNOUNCE: 392 case RTM_DESYNC: 393 /* no tableid */ 394 break; 395 case RTM_RESOLVE: 396 case RTM_NEWADDR: 397 case RTM_DELADDR: 398 case RTM_IFINFO: 399 /* check against rdomain id */ 400 if (rop->rtableid != RTABLE_ANY && 401 rtable_l2(rop->rtableid) != rtm->rtm_tableid) 402 continue; 403 break; 404 default: 405 /* check against rtable id */ 406 if (rop->rtableid != RTABLE_ANY && 407 rop->rtableid != rtm->rtm_tableid) 408 continue; 409 break; 410 } 411 412 /* 413 * Check to see if the flush flag is set. If so, don't queue 414 * any more messages until the flag is cleared. 415 */ 416 if ((rop->flags & ROUTECB_FLAG_FLUSH) != 0) 417 continue; 418 419 if (last) { 420 struct mbuf *n; 421 if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) != NULL) { 422 s = splsoftnet(); 423 if (sbspace(&last->so_rcv) < (2 * MSIZE) || 424 sbappendaddr(&last->so_rcv, sosrc, 425 n, (struct mbuf *)NULL) == 0) { 426 /* 427 * Flag socket as desync'ed and 428 * flush required 429 */ 430 sotoroutecb(last)->flags |= 431 ROUTECB_FLAG_DESYNC | 432 ROUTECB_FLAG_FLUSH; 433 rt_senddesync((void *) sotorawcb(last)); 434 m_freem(n); 435 } else { 436 sorwakeup(last); 437 sockets++; 438 } 439 splx(s); 440 } 441 } 442 last = rp->rcb_socket; 443 } 444 if (last) { 445 s = splsoftnet(); 446 if (sbspace(&last->so_rcv) < (2 * MSIZE) || 447 sbappendaddr(&last->so_rcv, sosrc, 448 m, (struct mbuf *)NULL) == 0) { 449 /* Flag socket as desync'ed and flush required */ 450 sotoroutecb(last)->flags |= 451 ROUTECB_FLAG_DESYNC | ROUTECB_FLAG_FLUSH; 452 rt_senddesync((void *) sotorawcb(last)); 453 m_freem(m); 454 } else { 455 sorwakeup(last); 456 sockets++; 457 } 458 splx(s); 459 } else 460 m_freem(m); 461 } 462 463 struct rt_msghdr * 464 rt_report(struct rtentry *rt, u_char type, int seq, int tableid) 465 { 466 struct rt_msghdr *rtm; 467 struct rt_addrinfo info; 468 struct sockaddr_rtlabel sa_rl; 469 struct sockaddr_in6 sa_mask; 470 #ifdef BFD 471 struct sockaddr_bfd sa_bfd; 472 #endif 473 #ifdef MPLS 474 struct sockaddr_mpls sa_mpls; 475 #endif 476 struct ifnet *ifp = NULL; 477 int len; 478 479 bzero(&info, sizeof(info)); 480 info.rti_info[RTAX_DST] = rt_key(rt); 481 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 482 info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask); 483 info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl); 484 #ifdef BFD 485 if (rt->rt_flags & RTF_BFD) 486 info.rti_info[RTAX_BFD] = bfd2sa(rt, &sa_bfd); 487 #endif 488 #ifdef MPLS 489 if (rt->rt_flags & RTF_MPLS) { 490 bzero(&sa_mpls, sizeof(sa_mpls)); 491 sa_mpls.smpls_family = AF_MPLS; 492 sa_mpls.smpls_len = sizeof(sa_mpls); 493 sa_mpls.smpls_label = ((struct rt_mpls *) 494 rt->rt_llinfo)->mpls_label; 495 info.rti_info[RTAX_SRC] = (struct sockaddr *)&sa_mpls; 496 info.rti_mpls = ((struct rt_mpls *) 497 rt->rt_llinfo)->mpls_operation; 498 } 499 #endif 500 ifp = if_get(rt->rt_ifidx); 501 if (ifp != NULL) { 502 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); 503 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; 504 if (ifp->if_flags & IFF_POINTOPOINT) 505 info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr; 506 } 507 if_put(ifp); 508 /* RTAX_GENMASK, RTAX_AUTHOR, RTAX_SRCMASK ignored */ 509 510 /* build new route message */ 511 len = rt_msg2(type, RTM_VERSION, &info, NULL, NULL); 512 rtm = malloc(len, M_RTABLE, M_WAITOK | M_ZERO); 513 514 rt_msg2(type, RTM_VERSION, &info, (caddr_t)rtm, NULL); 515 rtm->rtm_type = type; 516 rtm->rtm_index = rt->rt_ifidx; 517 rtm->rtm_tableid = tableid; 518 rtm->rtm_priority = rt->rt_priority & RTP_MASK; 519 rtm->rtm_flags = rt->rt_flags; 520 rtm->rtm_pid = curproc->p_p->ps_pid; 521 rtm->rtm_seq = seq; 522 rt_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx); 523 rtm->rtm_addrs = info.rti_addrs; 524 #ifdef MPLS 525 rtm->rtm_mpls = info.rti_mpls; 526 #endif 527 return rtm; 528 } 529 530 int 531 route_output(struct mbuf *m, struct socket *so, struct sockaddr *dstaddr, 532 struct mbuf *control) 533 { 534 struct rt_msghdr *rtm = NULL; 535 struct rtentry *rt = NULL; 536 struct rt_addrinfo info; 537 int plen, len, seq, newgate = 0, error = 0; 538 struct ifnet *ifp = NULL; 539 struct ifaddr *ifa = NULL; 540 struct rawcb *rp = NULL; 541 #ifdef MPLS 542 struct sockaddr_mpls *psa_mpls; 543 #endif 544 u_int tableid; 545 u_int8_t prio; 546 u_char vers, type; 547 548 if (m == NULL || ((m->m_len < sizeof(int32_t)) && 549 (m = m_pullup(m, sizeof(int32_t))) == 0)) 550 return (ENOBUFS); 551 if ((m->m_flags & M_PKTHDR) == 0) 552 panic("route_output"); 553 len = m->m_pkthdr.len; 554 if (len < offsetof(struct rt_msghdr, rtm_type) + 1 || 555 len != mtod(m, struct rt_msghdr *)->rtm_msglen) { 556 error = EINVAL; 557 goto fail; 558 } 559 vers = mtod(m, struct rt_msghdr *)->rtm_version; 560 switch (vers) { 561 case RTM_VERSION: 562 if (len < sizeof(struct rt_msghdr)) { 563 error = EINVAL; 564 goto fail; 565 } 566 if (len > RTM_MAXSIZE) { 567 error = EMSGSIZE; 568 goto fail; 569 } 570 rtm = malloc(len, M_RTABLE, M_WAITOK); 571 m_copydata(m, 0, len, (caddr_t)rtm); 572 break; 573 default: 574 error = EPROTONOSUPPORT; 575 goto fail; 576 } 577 rtm->rtm_pid = curproc->p_p->ps_pid; 578 if (rtm->rtm_hdrlen == 0) /* old client */ 579 rtm->rtm_hdrlen = sizeof(struct rt_msghdr); 580 if (len < rtm->rtm_hdrlen) { 581 error = EINVAL; 582 goto fail; 583 } 584 585 /* Verify that the caller is sending an appropriate message early */ 586 switch (rtm->rtm_type) { 587 case RTM_ADD: 588 case RTM_DELETE: 589 case RTM_GET: 590 case RTM_CHANGE: 591 case RTM_LOCK: 592 case RTM_PROPOSAL: 593 break; 594 default: 595 error = EOPNOTSUPP; 596 goto fail; 597 } 598 599 /* 600 * Verify that the caller has the appropriate privilege; RTM_GET 601 * is the only operation the non-superuser is allowed. 602 */ 603 if (rtm->rtm_type != RTM_GET && suser(curproc, 0) != 0) { 604 error = EACCES; 605 goto fail; 606 } 607 tableid = rtm->rtm_tableid; 608 if (!rtable_exists(tableid)) { 609 if (rtm->rtm_type == RTM_ADD) { 610 if ((error = rtable_add(tableid)) != 0) 611 goto fail; 612 } else { 613 error = EINVAL; 614 goto fail; 615 } 616 } 617 618 619 /* Do not let userland play with kernel-only flags. */ 620 if ((rtm->rtm_flags & (RTF_LOCAL|RTF_BROADCAST)) != 0) { 621 error = EINVAL; 622 goto fail; 623 } 624 625 /* make sure that kernel-only bits are not set */ 626 rtm->rtm_priority &= RTP_MASK; 627 rtm->rtm_flags &= ~(RTF_DONE|RTF_CLONED|RTF_CACHED); 628 rtm->rtm_fmask &= RTF_FMASK; 629 630 if (rtm->rtm_priority != 0) { 631 if (rtm->rtm_priority > RTP_MAX || 632 rtm->rtm_priority == RTP_LOCAL) { 633 error = EINVAL; 634 goto fail; 635 } 636 prio = rtm->rtm_priority; 637 } else if (rtm->rtm_type != RTM_ADD) 638 prio = RTP_ANY; 639 else if (rtm->rtm_flags & RTF_STATIC) 640 prio = 0; 641 else 642 prio = RTP_DEFAULT; 643 644 bzero(&info, sizeof(info)); 645 info.rti_addrs = rtm->rtm_addrs; 646 rt_xaddrs(rtm->rtm_hdrlen + (caddr_t)rtm, len + (caddr_t)rtm, &info); 647 info.rti_flags = rtm->rtm_flags; 648 if (rtm->rtm_type != RTM_PROPOSAL && 649 (info.rti_info[RTAX_DST] == NULL || 650 info.rti_info[RTAX_DST]->sa_family >= AF_MAX || 651 (info.rti_info[RTAX_GATEWAY] != NULL && 652 info.rti_info[RTAX_GATEWAY]->sa_family >= AF_MAX) || 653 info.rti_info[RTAX_GENMASK] != NULL)) { 654 error = EINVAL; 655 goto fail; 656 } 657 #ifdef MPLS 658 info.rti_mpls = rtm->rtm_mpls; 659 #endif 660 661 if (info.rti_info[RTAX_GATEWAY] != NULL && 662 info.rti_info[RTAX_GATEWAY]->sa_family == AF_LINK && 663 (info.rti_flags & RTF_CLONING) == 0) { 664 info.rti_flags |= RTF_LLINFO; 665 } 666 667 /* 668 * Do not use goto flush before this point since the message itself 669 * may be not consistent and could cause unexpected behaviour in other 670 * userland clients. Use goto fail instead. 671 */ 672 673 /* 674 * Validate RTM_PROPOSAL and pass it along or error out. 675 */ 676 if (rtm->rtm_type == RTM_PROPOSAL) { 677 if (validate_proposal(&info) == -1) { 678 error = EINVAL; 679 goto fail; 680 } 681 goto flush; 682 } 683 684 switch (rtm->rtm_type) { 685 case RTM_ADD: 686 if (info.rti_info[RTAX_GATEWAY] == NULL) { 687 error = EINVAL; 688 goto flush; 689 } 690 691 rt = rtable_match(tableid, info.rti_info[RTAX_DST], NULL); 692 if ((error = route_arp_conflict(rt, &info))) { 693 rtfree(rt); 694 rt = NULL; 695 goto flush; 696 } 697 698 /* 699 * We cannot go through a delete/create/insert cycle for 700 * cached route because this can lead to races in the 701 * receive path. Instead we upade the L2 cache. 702 */ 703 if ((rt != NULL) && ISSET(rt->rt_flags, RTF_CACHED)) 704 goto change; 705 706 rtfree(rt); 707 rt = NULL; 708 709 error = rtrequest(RTM_ADD, &info, prio, &rt, tableid); 710 if (error == 0) 711 rt_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx, 712 &rt->rt_rmx); 713 else 714 goto flush; 715 break; 716 case RTM_DELETE: 717 rt = rtable_lookup(tableid, info.rti_info[RTAX_DST], 718 info.rti_info[RTAX_NETMASK], info.rti_info[RTAX_GATEWAY], 719 prio); 720 721 /* 722 * Invalidate the cache of automagically created and 723 * referenced L2 entries to make sure that ``rt_gwroute'' 724 * pointer stays valid for other CPUs. 725 */ 726 if ((rt != NULL) && (ISSET(rt->rt_flags, RTF_CACHED))) { 727 ifp = if_get(rt->rt_ifidx); 728 KASSERT(ifp != NULL); 729 ifp->if_rtrequest(ifp, RTM_INVALIDATE, rt); 730 if_put(ifp); 731 /* Reset the MTU of the gateway route. */ 732 rtable_walk(tableid, rt_key(rt)->sa_family, 733 route_cleargateway, rt); 734 break; 735 } 736 737 /* 738 * Make sure that local routes are only modified by the 739 * kernel. 740 */ 741 if ((rt != NULL) && 742 ISSET(rt->rt_flags, RTF_LOCAL|RTF_BROADCAST)) { 743 error = EINVAL; 744 break; 745 } 746 747 rtfree(rt); 748 rt = NULL; 749 750 error = rtrequest(RTM_DELETE, &info, prio, &rt, tableid); 751 if (error != 0) 752 goto flush; 753 break; 754 case RTM_CHANGE: 755 case RTM_LOCK: 756 rt = rtable_lookup(tableid, info.rti_info[RTAX_DST], 757 info.rti_info[RTAX_NETMASK], info.rti_info[RTAX_GATEWAY], 758 prio); 759 #ifndef SMALL_KERNEL 760 /* 761 * If we got multipath routes, we require users to specify 762 * a matching gateway. 763 */ 764 if ((rt != NULL) && ISSET(rt->rt_flags, RTF_MPATH) && 765 (info.rti_info[RTAX_GATEWAY] == NULL)) { 766 rtfree(rt); 767 rt = NULL; 768 } 769 #endif 770 /* 771 * If RTAX_GATEWAY is the argument we're trying to 772 * change, try to find a compatible route. 773 */ 774 if ((rt == NULL) && (info.rti_info[RTAX_GATEWAY] != NULL) && 775 (rtm->rtm_type == RTM_CHANGE)) { 776 rt = rtable_lookup(tableid, info.rti_info[RTAX_DST], 777 info.rti_info[RTAX_NETMASK], NULL, prio); 778 #ifndef SMALL_KERNEL 779 /* Ensure we don't pick a multipath one. */ 780 if ((rt != NULL) && ISSET(rt->rt_flags, RTF_MPATH)) { 781 rtfree(rt); 782 rt = NULL; 783 } 784 #endif 785 } 786 787 if (rt == NULL) { 788 error = ESRCH; 789 goto flush; 790 } 791 792 /* 793 * RTM_CHANGE/LOCK need a perfect match. 794 */ 795 plen = rtable_satoplen(info.rti_info[RTAX_DST]->sa_family, 796 info.rti_info[RTAX_NETMASK]); 797 if (rt_plen(rt) != plen ) { 798 error = ESRCH; 799 goto flush; 800 } 801 802 switch (rtm->rtm_type) { 803 case RTM_CHANGE: 804 if (info.rti_info[RTAX_GATEWAY] != NULL) 805 if (rt->rt_gateway == NULL || 806 bcmp(rt->rt_gateway, 807 info.rti_info[RTAX_GATEWAY], 808 info.rti_info[RTAX_GATEWAY]->sa_len)) { 809 newgate = 1; 810 } 811 /* 812 * Check reachable gateway before changing the route. 813 * New gateway could require new ifaddr, ifp; 814 * flags may also be different; ifp may be specified 815 * by ll sockaddr when protocol address is ambiguous. 816 */ 817 if (newgate || info.rti_info[RTAX_IFP] != NULL || 818 info.rti_info[RTAX_IFA] != NULL) { 819 if ((error = rt_getifa(&info, tableid)) != 0) 820 goto flush; 821 ifa = info.rti_ifa; 822 if (rt->rt_ifa != ifa) { 823 ifp = if_get(rt->rt_ifidx); 824 KASSERT(ifp != NULL); 825 ifp->if_rtrequest(ifp, RTM_DELETE, rt); 826 ifafree(rt->rt_ifa); 827 if_put(ifp); 828 829 ifa->ifa_refcnt++; 830 rt->rt_ifa = ifa; 831 rt->rt_ifidx = ifa->ifa_ifp->if_index; 832 #ifndef SMALL_KERNEL 833 /* recheck link state after ifp change*/ 834 rt_if_linkstate_change(rt, ifa->ifa_ifp, 835 tableid); 836 #endif 837 } 838 } 839 change: 840 if (info.rti_info[RTAX_GATEWAY] != NULL && (error = 841 rt_setgate(rt, info.rti_info[RTAX_GATEWAY], 842 tableid))) 843 goto flush; 844 #ifdef MPLS 845 if ((rtm->rtm_flags & RTF_MPLS) && 846 info.rti_info[RTAX_SRC] != NULL) { 847 struct rt_mpls *rt_mpls; 848 849 psa_mpls = (struct sockaddr_mpls *) 850 info.rti_info[RTAX_SRC]; 851 852 if (rt->rt_llinfo == NULL) { 853 rt->rt_llinfo = 854 malloc(sizeof(struct rt_mpls), 855 M_TEMP, M_WAITOK | M_ZERO); 856 } 857 858 rt_mpls = (struct rt_mpls *)rt->rt_llinfo; 859 860 if (psa_mpls != NULL) { 861 rt_mpls->mpls_label = 862 psa_mpls->smpls_label; 863 } 864 865 rt_mpls->mpls_operation = info.rti_mpls; 866 867 /* XXX: set experimental bits */ 868 869 rt->rt_flags |= RTF_MPLS; 870 } else if (newgate || ((rtm->rtm_fmask & RTF_MPLS) && 871 !(rtm->rtm_flags & RTF_MPLS))) { 872 /* if gateway changed remove MPLS information */ 873 if (rt->rt_llinfo != NULL && 874 rt->rt_flags & RTF_MPLS) { 875 free(rt->rt_llinfo, M_TEMP, 0); 876 rt->rt_llinfo = NULL; 877 rt->rt_flags &= ~RTF_MPLS; 878 } 879 } 880 #endif 881 882 #ifdef BFD 883 if (ISSET(rtm->rtm_flags, RTF_BFD)) { 884 if ((error = bfdset(rt))) 885 goto flush; 886 } else if (!ISSET(rtm->rtm_flags, RTF_BFD) && 887 ISSET(rtm->rtm_fmask, RTF_BFD)) { 888 bfdclear(rt); 889 } 890 #endif 891 892 /* Hack to allow some flags to be toggled */ 893 if (rtm->rtm_fmask) 894 rt->rt_flags = 895 (rt->rt_flags & ~rtm->rtm_fmask) | 896 (rtm->rtm_flags & rtm->rtm_fmask); 897 898 rt_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx, 899 &rt->rt_rmx); 900 901 ifp = if_get(rt->rt_ifidx); 902 KASSERT(ifp != NULL); 903 ifp->if_rtrequest(ifp, RTM_ADD, rt); 904 if_put(ifp); 905 906 if (info.rti_info[RTAX_LABEL] != NULL) { 907 char *rtlabel = ((struct sockaddr_rtlabel *) 908 info.rti_info[RTAX_LABEL])->sr_label; 909 rtlabel_unref(rt->rt_labelid); 910 rt->rt_labelid = rtlabel_name2id(rtlabel); 911 } 912 if_group_routechange(info.rti_info[RTAX_DST], 913 info.rti_info[RTAX_NETMASK]); 914 /* FALLTHROUGH */ 915 case RTM_LOCK: 916 rt->rt_rmx.rmx_locks &= ~(rtm->rtm_inits); 917 rt->rt_rmx.rmx_locks |= 918 (rtm->rtm_inits & rtm->rtm_rmx.rmx_locks); 919 break; 920 } 921 break; 922 case RTM_GET: 923 rt = rtable_lookup(tableid, info.rti_info[RTAX_DST], 924 info.rti_info[RTAX_NETMASK], info.rti_info[RTAX_GATEWAY], 925 prio); 926 if (rt == NULL) { 927 error = ESRCH; 928 goto flush; 929 } 930 break; 931 } 932 933 /* 934 * From here on these vars need to be valid 935 * rt, rtm, error, so, m, tableid, sa_family 936 * 937 * Other notes: 938 * - to end up here previous calls passed OK, error is most probably 0 939 * - error cases take the flush route or in bad cases fail 940 * - fail does not report the message back but just fails the call 941 * if the message is not valid then fail should be used 942 */ 943 944 type = rtm->rtm_type; 945 seq = rtm->rtm_seq; 946 free(rtm, M_RTABLE, 0); 947 rtm = rt_report(rt, type, seq, tableid); 948 flush: 949 rtfree(rt); 950 if (error) { 951 rtm->rtm_errno = error; 952 } else { 953 rtm->rtm_flags |= RTF_DONE; 954 } 955 956 /* 957 * Check to see if we don't want our own messages. 958 */ 959 if (!(so->so_options & SO_USELOOPBACK)) { 960 if (route_cb.any_count <= 1) { 961 /* no other listener and no loopback of messages */ 962 fail: 963 free(rtm, M_RTABLE, 0); 964 m_freem(m); 965 return (error); 966 } 967 /* There is another listener, so construct message */ 968 rp = sotorawcb(so); 969 rp->rcb_proto.sp_family = 0; /* Avoid us */ 970 } 971 if (rtm) { 972 if (m_copyback(m, 0, rtm->rtm_msglen, rtm, M_NOWAIT)) { 973 m_freem(m); 974 m = NULL; 975 } else if (m->m_pkthdr.len > rtm->rtm_msglen) 976 m_adj(m, rtm->rtm_msglen - m->m_pkthdr.len); 977 free(rtm, M_RTABLE, 0); 978 } 979 if (m) 980 route_input(m, info.rti_info[RTAX_DST] ? 981 info.rti_info[RTAX_DST]->sa_family : AF_UNSPEC); 982 if (rp) 983 rp->rcb_proto.sp_family = PF_ROUTE; /* Readd us */ 984 985 return (error); 986 } 987 988 int 989 route_cleargateway(struct rtentry *rt, void *arg, unsigned int rtableid) 990 { 991 struct rtentry *nhrt = arg; 992 993 if (ISSET(rt->rt_flags, RTF_GATEWAY) && rt->rt_gwroute == nhrt && 994 !ISSET(rt->rt_locks, RTV_MTU)) 995 rt->rt_mtu = 0; 996 997 return (0); 998 } 999 1000 /* 1001 * Check if the user request to insert an ARP entry does not conflict 1002 * with existing ones. 1003 * 1004 * Only two entries are allowed for a given IP address: a private one 1005 * (priv) and a public one (pub). 1006 */ 1007 int 1008 route_arp_conflict(struct rtentry *rt, struct rt_addrinfo *info) 1009 { 1010 #ifdef ART 1011 int proxy = (info->rti_flags & RTF_ANNOUNCE); 1012 1013 if ((info->rti_flags & RTF_LLINFO) == 0 || 1014 (info->rti_info[RTAX_DST]->sa_family != AF_INET)) 1015 return (0); 1016 1017 if (rt == NULL || !ISSET(rt->rt_flags, RTF_LLINFO)) 1018 return (0); 1019 1020 /* If the entry is cached, it can be updated. */ 1021 if (ISSET(rt->rt_flags, RTF_CACHED)) 1022 return (0); 1023 1024 /* 1025 * Same destination, not cached and both "priv" or "pub" conflict. 1026 * If a second entry exists, it always conflict. 1027 */ 1028 if ((ISSET(rt->rt_flags, RTF_ANNOUNCE) == proxy) || 1029 ISSET(rt->rt_flags, RTF_MPATH)) 1030 return (EEXIST); 1031 1032 /* No conflict but an entry exist so we need to force mpath. */ 1033 info->rti_flags |= RTF_MPATH; 1034 #endif /* ART */ 1035 return (0); 1036 } 1037 1038 void 1039 rt_setmetrics(u_long which, const struct rt_metrics *in, 1040 struct rt_kmetrics *out) 1041 { 1042 int64_t expire; 1043 1044 if (which & RTV_MTU) 1045 out->rmx_mtu = in->rmx_mtu; 1046 if (which & RTV_EXPIRE) { 1047 expire = in->rmx_expire; 1048 if (expire != 0) { 1049 expire -= time_second; 1050 expire += time_uptime; 1051 } 1052 1053 out->rmx_expire = expire; 1054 } 1055 } 1056 1057 void 1058 rt_getmetrics(const struct rt_kmetrics *in, struct rt_metrics *out) 1059 { 1060 int64_t expire; 1061 1062 expire = in->rmx_expire; 1063 if (expire != 0) { 1064 expire -= time_uptime; 1065 expire += time_second; 1066 } 1067 1068 bzero(out, sizeof(*out)); 1069 out->rmx_locks = in->rmx_locks; 1070 out->rmx_mtu = in->rmx_mtu; 1071 out->rmx_expire = expire; 1072 out->rmx_pksent = in->rmx_pksent; 1073 } 1074 1075 #define ROUNDUP(a) \ 1076 ((a) > 0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long)) 1077 #define ADVANCE(x, n) (x += ROUNDUP((n)->sa_len)) 1078 1079 void 1080 rt_xaddrs(caddr_t cp, caddr_t cplim, struct rt_addrinfo *rtinfo) 1081 { 1082 struct sockaddr *sa; 1083 int i; 1084 1085 bzero(rtinfo->rti_info, sizeof(rtinfo->rti_info)); 1086 for (i = 0; (i < RTAX_MAX) && (cp < cplim); i++) { 1087 if ((rtinfo->rti_addrs & (1 << i)) == 0) 1088 continue; 1089 rtinfo->rti_info[i] = sa = (struct sockaddr *)cp; 1090 ADVANCE(cp, sa); 1091 } 1092 } 1093 1094 struct mbuf * 1095 rt_msg1(int type, struct rt_addrinfo *rtinfo) 1096 { 1097 struct rt_msghdr *rtm; 1098 struct mbuf *m; 1099 int i; 1100 struct sockaddr *sa; 1101 int len, dlen, hlen; 1102 1103 switch (type) { 1104 case RTM_DELADDR: 1105 case RTM_NEWADDR: 1106 len = sizeof(struct ifa_msghdr); 1107 break; 1108 case RTM_IFINFO: 1109 len = sizeof(struct if_msghdr); 1110 break; 1111 case RTM_IFANNOUNCE: 1112 len = sizeof(struct if_announcemsghdr); 1113 break; 1114 #ifdef BFD 1115 case RTM_BFD: 1116 len = sizeof(struct bfd_msghdr); 1117 break; 1118 #endif 1119 default: 1120 len = sizeof(struct rt_msghdr); 1121 break; 1122 } 1123 if (len > MCLBYTES) 1124 panic("rt_msg1"); 1125 m = m_gethdr(M_DONTWAIT, MT_DATA); 1126 if (m && len > MHLEN) { 1127 MCLGET(m, M_DONTWAIT); 1128 if ((m->m_flags & M_EXT) == 0) { 1129 m_free(m); 1130 m = NULL; 1131 } 1132 } 1133 if (m == NULL) 1134 return (m); 1135 m->m_pkthdr.len = m->m_len = hlen = len; 1136 m->m_pkthdr.ph_ifidx = 0; 1137 rtm = mtod(m, struct rt_msghdr *); 1138 bzero(rtm, len); 1139 for (i = 0; i < RTAX_MAX; i++) { 1140 if (rtinfo == NULL || (sa = rtinfo->rti_info[i]) == NULL) 1141 continue; 1142 rtinfo->rti_addrs |= (1 << i); 1143 dlen = ROUNDUP(sa->sa_len); 1144 if (m_copyback(m, len, dlen, sa, M_NOWAIT)) { 1145 m_freem(m); 1146 return (NULL); 1147 } 1148 len += dlen; 1149 } 1150 rtm->rtm_msglen = len; 1151 rtm->rtm_hdrlen = hlen; 1152 rtm->rtm_version = RTM_VERSION; 1153 rtm->rtm_type = type; 1154 return (m); 1155 } 1156 1157 int 1158 rt_msg2(int type, int vers, struct rt_addrinfo *rtinfo, caddr_t cp, 1159 struct walkarg *w) 1160 { 1161 int i; 1162 int len, dlen, hlen, second_time = 0; 1163 caddr_t cp0; 1164 1165 rtinfo->rti_addrs = 0; 1166 again: 1167 switch (type) { 1168 case RTM_DELADDR: 1169 case RTM_NEWADDR: 1170 len = sizeof(struct ifa_msghdr); 1171 break; 1172 case RTM_IFINFO: 1173 len = sizeof(struct if_msghdr); 1174 break; 1175 default: 1176 len = sizeof(struct rt_msghdr); 1177 break; 1178 } 1179 hlen = len; 1180 if ((cp0 = cp) != NULL) 1181 cp += len; 1182 for (i = 0; i < RTAX_MAX; i++) { 1183 struct sockaddr *sa; 1184 1185 if ((sa = rtinfo->rti_info[i]) == NULL) 1186 continue; 1187 rtinfo->rti_addrs |= (1 << i); 1188 dlen = ROUNDUP(sa->sa_len); 1189 if (cp) { 1190 bcopy(sa, cp, (size_t)dlen); 1191 cp += dlen; 1192 } 1193 len += dlen; 1194 } 1195 /* align message length to the next natural boundary */ 1196 len = ALIGN(len); 1197 if (cp == 0 && w != NULL && !second_time) { 1198 struct walkarg *rw = w; 1199 1200 rw->w_needed += len; 1201 if (rw->w_needed <= 0 && rw->w_where) { 1202 if (rw->w_tmemsize < len) { 1203 free(rw->w_tmem, M_RTABLE, 0); 1204 rw->w_tmem = malloc(len, M_RTABLE, M_NOWAIT); 1205 if (rw->w_tmem) 1206 rw->w_tmemsize = len; 1207 } 1208 if (rw->w_tmem) { 1209 cp = rw->w_tmem; 1210 second_time = 1; 1211 goto again; 1212 } else 1213 rw->w_where = 0; 1214 } 1215 } 1216 if (cp && w) /* clear the message header */ 1217 bzero(cp0, hlen); 1218 1219 if (cp) { 1220 struct rt_msghdr *rtm = (struct rt_msghdr *)cp0; 1221 1222 rtm->rtm_version = RTM_VERSION; 1223 rtm->rtm_type = type; 1224 rtm->rtm_msglen = len; 1225 rtm->rtm_hdrlen = hlen; 1226 } 1227 return (len); 1228 } 1229 1230 /* 1231 * This routine is called to generate a message from the routing 1232 * socket indicating that a redirect has occurred, a routing lookup 1233 * has failed, or that a protocol has detected timeouts to a particular 1234 * destination. 1235 */ 1236 void 1237 rt_missmsg(int type, struct rt_addrinfo *rtinfo, int flags, uint8_t prio, 1238 u_int ifidx, int error, u_int tableid) 1239 { 1240 struct rt_msghdr *rtm; 1241 struct mbuf *m; 1242 struct sockaddr *sa = rtinfo->rti_info[RTAX_DST]; 1243 1244 if (route_cb.any_count == 0) 1245 return; 1246 m = rt_msg1(type, rtinfo); 1247 if (m == NULL) 1248 return; 1249 rtm = mtod(m, struct rt_msghdr *); 1250 rtm->rtm_flags = RTF_DONE | flags; 1251 rtm->rtm_priority = prio; 1252 rtm->rtm_errno = error; 1253 rtm->rtm_tableid = tableid; 1254 rtm->rtm_addrs = rtinfo->rti_addrs; 1255 rtm->rtm_index = ifidx; 1256 route_input(m, sa ? sa->sa_family : AF_UNSPEC); 1257 } 1258 1259 /* 1260 * This routine is called to generate a message from the routing 1261 * socket indicating that the status of a network interface has changed. 1262 */ 1263 void 1264 rt_ifmsg(struct ifnet *ifp) 1265 { 1266 struct if_msghdr *ifm; 1267 struct mbuf *m; 1268 1269 if (route_cb.any_count == 0) 1270 return; 1271 m = rt_msg1(RTM_IFINFO, NULL); 1272 if (m == NULL) 1273 return; 1274 ifm = mtod(m, struct if_msghdr *); 1275 ifm->ifm_index = ifp->if_index; 1276 ifm->ifm_tableid = ifp->if_rdomain; 1277 ifm->ifm_flags = ifp->if_flags; 1278 ifm->ifm_xflags = ifp->if_xflags; 1279 if_getdata(ifp, &ifm->ifm_data); 1280 ifm->ifm_addrs = 0; 1281 route_input(m, AF_UNSPEC); 1282 } 1283 1284 /* 1285 * This is called to generate messages from the routing socket 1286 * indicating a network interface has had addresses associated with it. 1287 * if we ever reverse the logic and replace messages TO the routing 1288 * socket indicate a request to configure interfaces, then it will 1289 * be unnecessary as the routing socket will automatically generate 1290 * copies of it. 1291 */ 1292 void 1293 rt_sendaddrmsg(struct rtentry *rt, int cmd, struct ifaddr *ifa) 1294 { 1295 struct ifnet *ifp = ifa->ifa_ifp; 1296 struct mbuf *m = NULL; 1297 struct rt_addrinfo info; 1298 struct ifa_msghdr *ifam; 1299 1300 if (route_cb.any_count == 0) 1301 return; 1302 1303 memset(&info, 0, sizeof(info)); 1304 info.rti_info[RTAX_IFA] = ifa->ifa_addr; 1305 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); 1306 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask; 1307 info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr; 1308 if ((m = rt_msg1(cmd, &info)) == NULL) 1309 return; 1310 ifam = mtod(m, struct ifa_msghdr *); 1311 ifam->ifam_index = ifp->if_index; 1312 ifam->ifam_metric = ifa->ifa_metric; 1313 ifam->ifam_flags = ifa->ifa_flags; 1314 ifam->ifam_addrs = info.rti_addrs; 1315 ifam->ifam_tableid = ifp->if_rdomain; 1316 1317 route_input(m, ifa->ifa_addr ? ifa->ifa_addr->sa_family : AF_UNSPEC); 1318 } 1319 1320 /* 1321 * This is called to generate routing socket messages indicating 1322 * network interface arrival and departure. 1323 */ 1324 void 1325 rt_ifannouncemsg(struct ifnet *ifp, int what) 1326 { 1327 struct if_announcemsghdr *ifan; 1328 struct mbuf *m; 1329 1330 if (route_cb.any_count == 0) 1331 return; 1332 m = rt_msg1(RTM_IFANNOUNCE, NULL); 1333 if (m == NULL) 1334 return; 1335 ifan = mtod(m, struct if_announcemsghdr *); 1336 ifan->ifan_index = ifp->if_index; 1337 strlcpy(ifan->ifan_name, ifp->if_xname, sizeof(ifan->ifan_name)); 1338 ifan->ifan_what = what; 1339 route_input(m, AF_UNSPEC); 1340 } 1341 1342 #ifdef BFD 1343 /* 1344 * This is used to generate routing socket messages indicating 1345 * the state of a BFD session. 1346 */ 1347 void 1348 rt_bfdmsg(struct bfd_config *bfd) 1349 { 1350 struct bfd_msghdr *bfdm; 1351 struct sockaddr_bfd sa_bfd; 1352 struct mbuf *m; 1353 struct rt_addrinfo info; 1354 1355 if (route_cb.any_count == 0) 1356 return; 1357 memset(&info, 0, sizeof(info)); 1358 info.rti_info[RTAX_DST] = rt_key(bfd->bc_rt); 1359 info.rti_info[RTAX_IFA] = bfd->bc_rt->rt_ifa->ifa_addr; 1360 1361 m = rt_msg1(RTM_BFD, &info); 1362 if (m == NULL) 1363 return; 1364 bfdm = mtod(m, struct bfd_msghdr *); 1365 bfdm->bm_addrs = info.rti_addrs; 1366 1367 bfd2sa(bfd->bc_rt, &sa_bfd); 1368 memcpy(&bfdm->bm_sa, &sa_bfd, sizeof(sa_bfd)); 1369 1370 route_input(m, info.rti_info[RTAX_DST]->sa_family); 1371 } 1372 #endif /* BFD */ 1373 1374 /* 1375 * This is used in dumping the kernel table via sysctl(). 1376 */ 1377 int 1378 sysctl_dumpentry(struct rtentry *rt, void *v, unsigned int id) 1379 { 1380 struct walkarg *w = v; 1381 int error = 0, size; 1382 struct rt_addrinfo info; 1383 struct ifnet *ifp; 1384 #ifdef BFD 1385 struct sockaddr_bfd sa_bfd; 1386 #endif 1387 #ifdef MPLS 1388 struct sockaddr_mpls sa_mpls; 1389 #endif 1390 struct sockaddr_rtlabel sa_rl; 1391 struct sockaddr_in6 sa_mask; 1392 1393 if (w->w_op == NET_RT_FLAGS && !(rt->rt_flags & w->w_arg)) 1394 return 0; 1395 if (w->w_op == NET_RT_DUMP && w->w_arg) { 1396 u_int8_t prio = w->w_arg & RTP_MASK; 1397 if (w->w_arg < 0) { 1398 prio = (-w->w_arg) & RTP_MASK; 1399 /* Show all routes that are not this priority */ 1400 if (prio == (rt->rt_priority & RTP_MASK)) 1401 return 0; 1402 } else { 1403 if (prio != (rt->rt_priority & RTP_MASK) && 1404 prio != RTP_ANY) 1405 return 0; 1406 } 1407 } 1408 bzero(&info, sizeof(info)); 1409 info.rti_info[RTAX_DST] = rt_key(rt); 1410 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 1411 info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask); 1412 ifp = if_get(rt->rt_ifidx); 1413 if (ifp != NULL) { 1414 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); 1415 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; 1416 if (ifp->if_flags & IFF_POINTOPOINT) 1417 info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr; 1418 } 1419 if_put(ifp); 1420 info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl); 1421 #ifdef BFD 1422 if (rt->rt_flags & RTF_BFD) 1423 info.rti_info[RTAX_BFD] = bfd2sa(rt, &sa_bfd); 1424 #endif 1425 #ifdef MPLS 1426 if (rt->rt_flags & RTF_MPLS) { 1427 bzero(&sa_mpls, sizeof(sa_mpls)); 1428 sa_mpls.smpls_family = AF_MPLS; 1429 sa_mpls.smpls_len = sizeof(sa_mpls); 1430 sa_mpls.smpls_label = ((struct rt_mpls *) 1431 rt->rt_llinfo)->mpls_label; 1432 info.rti_info[RTAX_SRC] = (struct sockaddr *)&sa_mpls; 1433 info.rti_mpls = ((struct rt_mpls *) 1434 rt->rt_llinfo)->mpls_operation; 1435 } 1436 #endif 1437 1438 size = rt_msg2(RTM_GET, RTM_VERSION, &info, NULL, w); 1439 if (w->w_where && w->w_tmem && w->w_needed <= 0) { 1440 struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem; 1441 1442 rtm->rtm_pid = curproc->p_p->ps_pid; 1443 rtm->rtm_flags = rt->rt_flags; 1444 rtm->rtm_priority = rt->rt_priority & RTP_MASK; 1445 rt_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx); 1446 /* Do not account the routing table's reference. */ 1447 rtm->rtm_rmx.rmx_refcnt = rt->rt_refcnt - 1; 1448 rtm->rtm_index = rt->rt_ifidx; 1449 rtm->rtm_addrs = info.rti_addrs; 1450 rtm->rtm_tableid = id; 1451 #ifdef MPLS 1452 rtm->rtm_mpls = info.rti_mpls; 1453 #endif 1454 if ((error = copyout(rtm, w->w_where, size)) != 0) 1455 w->w_where = NULL; 1456 else 1457 w->w_where += size; 1458 } 1459 return (error); 1460 } 1461 1462 int 1463 sysctl_iflist(int af, struct walkarg *w) 1464 { 1465 struct ifnet *ifp; 1466 struct ifaddr *ifa; 1467 struct rt_addrinfo info; 1468 int len, error = 0; 1469 1470 bzero(&info, sizeof(info)); 1471 TAILQ_FOREACH(ifp, &ifnet, if_list) { 1472 if (w->w_arg && w->w_arg != ifp->if_index) 1473 continue; 1474 /* Copy the link-layer address first */ 1475 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); 1476 len = rt_msg2(RTM_IFINFO, RTM_VERSION, &info, 0, w); 1477 if (w->w_where && w->w_tmem && w->w_needed <= 0) { 1478 struct if_msghdr *ifm; 1479 1480 ifm = (struct if_msghdr *)w->w_tmem; 1481 ifm->ifm_index = ifp->if_index; 1482 ifm->ifm_tableid = ifp->if_rdomain; 1483 ifm->ifm_flags = ifp->if_flags; 1484 if_getdata(ifp, &ifm->ifm_data); 1485 ifm->ifm_addrs = info.rti_addrs; 1486 error = copyout(ifm, w->w_where, len); 1487 if (error) 1488 return (error); 1489 w->w_where += len; 1490 } 1491 info.rti_info[RTAX_IFP] = NULL; 1492 TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) { 1493 KASSERT(ifa->ifa_addr->sa_family != AF_LINK); 1494 if (af && af != ifa->ifa_addr->sa_family) 1495 continue; 1496 info.rti_info[RTAX_IFA] = ifa->ifa_addr; 1497 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask; 1498 info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr; 1499 len = rt_msg2(RTM_NEWADDR, RTM_VERSION, &info, 0, w); 1500 if (w->w_where && w->w_tmem && w->w_needed <= 0) { 1501 struct ifa_msghdr *ifam; 1502 1503 ifam = (struct ifa_msghdr *)w->w_tmem; 1504 ifam->ifam_index = ifa->ifa_ifp->if_index; 1505 ifam->ifam_flags = ifa->ifa_flags; 1506 ifam->ifam_metric = ifa->ifa_metric; 1507 ifam->ifam_addrs = info.rti_addrs; 1508 error = copyout(w->w_tmem, w->w_where, len); 1509 if (error) 1510 return (error); 1511 w->w_where += len; 1512 } 1513 } 1514 info.rti_info[RTAX_IFA] = info.rti_info[RTAX_NETMASK] = 1515 info.rti_info[RTAX_BRD] = NULL; 1516 } 1517 return (0); 1518 } 1519 1520 int 1521 sysctl_ifnames(struct walkarg *w) 1522 { 1523 struct if_nameindex_msg ifn; 1524 struct ifnet *ifp; 1525 int error = 0; 1526 1527 /* XXX ignore tableid for now */ 1528 TAILQ_FOREACH(ifp, &ifnet, if_list) { 1529 if (w->w_arg && w->w_arg != ifp->if_index) 1530 continue; 1531 w->w_needed += sizeof(ifn); 1532 if (w->w_where && w->w_needed <= 0) { 1533 1534 memset(&ifn, 0, sizeof(ifn)); 1535 ifn.if_index = ifp->if_index; 1536 strlcpy(ifn.if_name, ifp->if_xname, 1537 sizeof(ifn.if_name)); 1538 error = copyout(&ifn, w->w_where, sizeof(ifn)); 1539 if (error) 1540 return (error); 1541 w->w_where += sizeof(ifn); 1542 } 1543 } 1544 1545 return (0); 1546 } 1547 1548 int 1549 sysctl_rtable(int *name, u_int namelen, void *where, size_t *given, void *new, 1550 size_t newlen) 1551 { 1552 int i, error = EINVAL; 1553 u_char af; 1554 struct walkarg w; 1555 struct rt_tableinfo tableinfo; 1556 u_int tableid = 0; 1557 1558 NET_ASSERT_LOCKED(); 1559 1560 if (new) 1561 return (EPERM); 1562 if (namelen < 3 || namelen > 4) 1563 return (EINVAL); 1564 af = name[0]; 1565 bzero(&w, sizeof(w)); 1566 w.w_where = where; 1567 w.w_given = *given; 1568 w.w_needed = 0 - w.w_given; 1569 w.w_op = name[1]; 1570 w.w_arg = name[2]; 1571 1572 if (namelen == 4) { 1573 tableid = name[3]; 1574 if (!rtable_exists(tableid)) 1575 return (ENOENT); 1576 } else 1577 tableid = curproc->p_p->ps_rtableid; 1578 1579 switch (w.w_op) { 1580 case NET_RT_DUMP: 1581 case NET_RT_FLAGS: 1582 for (i = 1; i <= AF_MAX; i++) { 1583 if (af != 0 && af != i) 1584 continue; 1585 1586 error = rtable_walk(tableid, i, sysctl_dumpentry, &w); 1587 if (error == EAFNOSUPPORT) 1588 error = 0; 1589 if (error) 1590 break; 1591 } 1592 break; 1593 1594 case NET_RT_IFLIST: 1595 error = sysctl_iflist(af, &w); 1596 break; 1597 1598 case NET_RT_STATS: 1599 return (sysctl_rtable_rtstat(where, given, new)); 1600 case NET_RT_TABLE: 1601 tableid = w.w_arg; 1602 if (!rtable_exists(tableid)) 1603 return (ENOENT); 1604 tableinfo.rti_tableid = tableid; 1605 tableinfo.rti_domainid = rtable_l2(tableid); 1606 error = sysctl_rdstruct(where, given, new, 1607 &tableinfo, sizeof(tableinfo)); 1608 return (error); 1609 case NET_RT_IFNAMES: 1610 error = sysctl_ifnames(&w); 1611 break; 1612 } 1613 free(w.w_tmem, M_RTABLE, 0); 1614 w.w_needed += w.w_given; 1615 if (where) { 1616 *given = w.w_where - (caddr_t)where; 1617 if (*given < w.w_needed) 1618 return (ENOMEM); 1619 } else 1620 *given = (11 * w.w_needed) / 10; 1621 1622 return (error); 1623 } 1624 1625 int 1626 sysctl_rtable_rtstat(void *oldp, size_t *oldlenp, void *newp) 1627 { 1628 extern struct cpumem *rtcounters; 1629 uint64_t counters[rts_ncounters]; 1630 struct rtstat rtstat; 1631 uint32_t *words = (uint32_t *)&rtstat; 1632 int i; 1633 1634 CTASSERT(sizeof(rtstat) == (nitems(counters) * sizeof(uint32_t))); 1635 1636 counters_read(rtcounters, counters, nitems(counters)); 1637 1638 for (i = 0; i < nitems(counters); i++) 1639 words[i] = (uint32_t)counters[i]; 1640 1641 return (sysctl_rdstruct(oldp, oldlenp, newp, &rtstat, sizeof(rtstat))); 1642 } 1643 1644 int 1645 validate_proposal(struct rt_addrinfo *info) 1646 { 1647 if (info->rti_addrs & ~(RTA_NETMASK | RTA_IFA | RTA_DNS | RTA_STATIC | 1648 RTA_SEARCH)) { 1649 return -1; 1650 } 1651 1652 if (ISSET(info->rti_addrs, RTA_NETMASK)) { 1653 struct sockaddr *sa = info->rti_info[RTAX_NETMASK]; 1654 if (sa == NULL) 1655 return -1; 1656 switch (sa->sa_family) { 1657 case AF_INET: 1658 if (sa->sa_len != sizeof(struct sockaddr_in)) 1659 return -1; 1660 break; 1661 case AF_INET6: 1662 if (sa->sa_len != sizeof(struct sockaddr_in6)) 1663 return -1; 1664 break; 1665 default: 1666 return -1; 1667 } 1668 } 1669 1670 if (ISSET(info->rti_addrs, RTA_IFA)) { 1671 struct sockaddr *sa = info->rti_info[RTAX_IFA]; 1672 if (sa == NULL) 1673 return -1; 1674 switch (sa->sa_family) { 1675 case AF_INET: 1676 if (sa->sa_len != sizeof(struct sockaddr_in)) 1677 return -1; 1678 break; 1679 case AF_INET6: 1680 if (sa->sa_len != sizeof(struct sockaddr_in6)) 1681 return -1; 1682 break; 1683 default: 1684 return -1; 1685 } 1686 } 1687 1688 if (ISSET(info->rti_addrs, RTA_DNS)) { 1689 struct sockaddr_rtdns *rtdns = 1690 (struct sockaddr_rtdns *)info->rti_info[RTAX_DNS]; 1691 if (rtdns == NULL) 1692 return -1; 1693 if (rtdns->sr_len > sizeof(*rtdns)) 1694 return -1; 1695 if (rtdns->sr_len <= 1696 offsetof(struct sockaddr_rtdns, sr_dns)) 1697 return -1; 1698 } 1699 1700 if (ISSET(info->rti_addrs, RTA_STATIC)) { 1701 struct sockaddr_rtstatic *rtstatic = 1702 (struct sockaddr_rtstatic *)info->rti_info[RTAX_STATIC]; 1703 if (rtstatic == NULL) 1704 return -1; 1705 if (rtstatic->sr_len > sizeof(*rtstatic)) 1706 return -1; 1707 if (rtstatic->sr_len <= 1708 offsetof(struct sockaddr_rtstatic, sr_static)) 1709 return -1; 1710 } 1711 1712 if (ISSET(info->rti_addrs, RTA_SEARCH)) { 1713 struct sockaddr_rtsearch *rtsearch = 1714 (struct sockaddr_rtsearch *)info->rti_info[RTAX_SEARCH]; 1715 if (rtsearch == NULL) 1716 return -1; 1717 if (rtsearch->sr_len > sizeof(*rtsearch)) 1718 return -1; 1719 if (rtsearch->sr_len <= 1720 offsetof(struct sockaddr_rtsearch, sr_search)) 1721 return -1; 1722 } 1723 1724 return 0; 1725 } 1726 1727 /* 1728 * Definitions of protocols supported in the ROUTE domain. 1729 */ 1730 1731 extern struct domain routedomain; /* or at least forward */ 1732 1733 struct protosw routesw[] = { 1734 { 1735 .pr_type = SOCK_RAW, 1736 .pr_domain = &routedomain, 1737 .pr_flags = PR_ATOMIC|PR_ADDR|PR_WANTRCVD, 1738 .pr_output = route_output, 1739 .pr_ctloutput = route_ctloutput, 1740 .pr_usrreq = route_usrreq, 1741 .pr_init = raw_init, 1742 .pr_sysctl = sysctl_rtable 1743 } 1744 }; 1745 1746 struct domain routedomain = { 1747 .dom_family = PF_ROUTE, 1748 .dom_name = "route", 1749 .dom_init = route_init, 1750 .dom_protosw = routesw, 1751 .dom_protoswNPROTOSW = &routesw[nitems(routesw)] 1752 }; 1753