1 /* $OpenBSD: rtsock.c,v 1.205 2016/09/17 07:35:05 phessler Exp $ */ 2 /* $NetBSD: rtsock.c,v 1.18 1996/03/29 00:32:10 cgd Exp $ */ 3 4 /* 5 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the project nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 33 /* 34 * Copyright (c) 1988, 1991, 1993 35 * The Regents of the University of California. All rights reserved. 36 * 37 * Redistribution and use in source and binary forms, with or without 38 * modification, are permitted provided that the following conditions 39 * are met: 40 * 1. Redistributions of source code must retain the above copyright 41 * notice, this list of conditions and the following disclaimer. 42 * 2. Redistributions in binary form must reproduce the above copyright 43 * notice, this list of conditions and the following disclaimer in the 44 * documentation and/or other materials provided with the distribution. 45 * 3. Neither the name of the University nor the names of its contributors 46 * may be used to endorse or promote products derived from this software 47 * without specific prior written permission. 48 * 49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 59 * SUCH DAMAGE. 60 * 61 * @(#)rtsock.c 8.6 (Berkeley) 2/11/95 62 */ 63 64 #include <sys/param.h> 65 #include <sys/systm.h> 66 #include <sys/proc.h> 67 #include <sys/sysctl.h> 68 #include <sys/mbuf.h> 69 #include <sys/socket.h> 70 #include <sys/socketvar.h> 71 #include <sys/domain.h> 72 #include <sys/protosw.h> 73 74 #include <net/if.h> 75 #include <net/if_dl.h> 76 #include <net/if_var.h> 77 #include <net/route.h> 78 #include <net/raw_cb.h> 79 80 #include <netinet/in.h> 81 82 #ifdef MPLS 83 #include <netmpls/mpls.h> 84 #endif 85 #ifdef BFD 86 #include <net/bfd.h> 87 #endif 88 89 #include <sys/stdarg.h> 90 #include <sys/kernel.h> 91 #include <sys/timeout.h> 92 93 struct sockaddr route_dst = { 2, PF_ROUTE, }; 94 struct sockaddr route_src = { 2, PF_ROUTE, }; 95 struct sockproto route_proto = { PF_ROUTE, }; 96 97 struct walkarg { 98 int w_op, w_arg, w_given, w_needed, w_tmemsize; 99 caddr_t w_where, w_tmem; 100 }; 101 102 int route_ctloutput(int, struct socket *, int, int, struct mbuf **); 103 void route_input(struct mbuf *m0, ...); 104 int route_arp_conflict(struct rtentry *, struct rt_addrinfo *); 105 int route_cleargateway(struct rtentry *, void *, unsigned int); 106 107 struct mbuf *rt_msg1(int, struct rt_addrinfo *); 108 int rt_msg2(int, int, struct rt_addrinfo *, caddr_t, 109 struct walkarg *); 110 void rt_xaddrs(caddr_t, caddr_t, struct rt_addrinfo *); 111 112 int sysctl_iflist(int, struct walkarg *); 113 int sysctl_ifnames(struct walkarg *); 114 115 struct routecb { 116 struct rawcb rcb; 117 struct timeout timeout; 118 unsigned int msgfilter; 119 unsigned int flags; 120 u_int rtableid; 121 }; 122 #define sotoroutecb(so) ((struct routecb *)(so)->so_pcb) 123 124 struct route_cb { 125 int ip_count; 126 int ip6_count; 127 int mpls_count; 128 int any_count; 129 }; 130 131 struct route_cb route_cb; 132 133 /* 134 * These flags and timeout are used for indicating to userland (via a 135 * RTM_DESYNC msg) when the route socket has overflowed and messages 136 * have been lost. 137 */ 138 #define ROUTECB_FLAG_DESYNC 0x1 /* Route socket out of memory */ 139 #define ROUTECB_FLAG_FLUSH 0x2 /* Wait until socket is empty before 140 queueing more packets */ 141 142 #define ROUTE_DESYNC_RESEND_TIMEOUT (hz / 5) /* In hz */ 143 144 void rt_senddesync(void *); 145 146 int 147 route_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam, 148 struct mbuf *control, struct proc *p) 149 { 150 struct rawcb *rp; 151 struct routecb *rop; 152 int s, af; 153 int error = 0; 154 155 s = splsoftnet(); 156 rp = sotorawcb(so); 157 158 switch (req) { 159 case PRU_ATTACH: 160 /* 161 * use the rawcb but allocate a routecb, this 162 * code does not care about the additional fields 163 * and works directly on the raw socket. 164 */ 165 rop = malloc(sizeof(struct routecb), M_PCB, M_WAITOK|M_ZERO); 166 rp = &rop->rcb; 167 so->so_pcb = rp; 168 /* Init the timeout structure */ 169 timeout_set(&((struct routecb *)rp)->timeout, rt_senddesync, rp); 170 /* 171 * Don't call raw_usrreq() in the attach case, because 172 * we want to allow non-privileged processes to listen 173 * on and send "safe" commands to the routing socket. 174 */ 175 if (curproc == 0) 176 error = EACCES; 177 else 178 error = raw_attach(so, (int)(long)nam); 179 if (error) { 180 free(rop, M_PCB, sizeof(struct routecb)); 181 splx(s); 182 return (error); 183 } 184 rop->rtableid = curproc->p_p->ps_rtableid; 185 af = rp->rcb_proto.sp_protocol; 186 if (af == AF_INET) 187 route_cb.ip_count++; 188 else if (af == AF_INET6) 189 route_cb.ip6_count++; 190 #ifdef MPLS 191 else if (af == AF_MPLS) 192 route_cb.mpls_count++; 193 #endif 194 rp->rcb_faddr = &route_src; 195 route_cb.any_count++; 196 soisconnected(so); 197 so->so_options |= SO_USELOOPBACK; 198 break; 199 200 case PRU_RCVD: 201 rop = (struct routecb *)rp; 202 203 /* 204 * If we are in a FLUSH state, check if the buffer is 205 * empty so that we can clear the flag. 206 */ 207 if (((rop->flags & ROUTECB_FLAG_FLUSH) != 0) && 208 ((sbspace(&rp->rcb_socket->so_rcv) == 209 rp->rcb_socket->so_rcv.sb_hiwat))) 210 rop->flags &= ~ROUTECB_FLAG_FLUSH; 211 break; 212 213 case PRU_DETACH: 214 if (rp) { 215 timeout_del(&((struct routecb *)rp)->timeout); 216 af = rp->rcb_proto.sp_protocol; 217 if (af == AF_INET) 218 route_cb.ip_count--; 219 else if (af == AF_INET6) 220 route_cb.ip6_count--; 221 #ifdef MPLS 222 else if (af == AF_MPLS) 223 route_cb.mpls_count--; 224 #endif 225 route_cb.any_count--; 226 } 227 /* FALLTHROUGH */ 228 default: 229 error = raw_usrreq(so, req, m, nam, control, p); 230 } 231 232 splx(s); 233 return (error); 234 } 235 236 int 237 route_ctloutput(int op, struct socket *so, int level, int optname, 238 struct mbuf **mp) 239 { 240 struct routecb *rop = sotoroutecb(so); 241 struct mbuf *m = *mp; 242 int error = 0; 243 unsigned int tid; 244 245 if (level != AF_ROUTE) { 246 error = EINVAL; 247 if (op == PRCO_SETOPT && *mp) 248 m_free(*mp); 249 return (error); 250 } 251 252 switch (op) { 253 case PRCO_SETOPT: 254 switch (optname) { 255 case ROUTE_MSGFILTER: 256 if (m == NULL || m->m_len != sizeof(unsigned int)) 257 error = EINVAL; 258 else 259 rop->msgfilter = *mtod(m, unsigned int *); 260 break; 261 case ROUTE_TABLEFILTER: 262 if (m == NULL || m->m_len != sizeof(unsigned int)) { 263 error = EINVAL; 264 break; 265 } 266 tid = *mtod(m, unsigned int *); 267 if (tid != RTABLE_ANY && !rtable_exists(tid)) 268 error = ENOENT; 269 else 270 rop->rtableid = tid; 271 break; 272 default: 273 error = ENOPROTOOPT; 274 break; 275 } 276 if (m) 277 m_free(m); 278 break; 279 case PRCO_GETOPT: 280 switch (optname) { 281 case ROUTE_MSGFILTER: 282 *mp = m = m_get(M_WAIT, MT_SOOPTS); 283 m->m_len = sizeof(unsigned int); 284 *mtod(m, unsigned int *) = rop->msgfilter; 285 break; 286 case ROUTE_TABLEFILTER: 287 *mp = m = m_get(M_WAIT, MT_SOOPTS); 288 m->m_len = sizeof(unsigned int); 289 *mtod(m, unsigned int *) = rop->rtableid; 290 break; 291 default: 292 error = ENOPROTOOPT; 293 break; 294 } 295 } 296 return (error); 297 } 298 299 void 300 rt_senddesync(void *data) 301 { 302 struct rawcb *rp; 303 struct routecb *rop; 304 struct mbuf *desync_mbuf; 305 306 rp = (struct rawcb *)data; 307 rop = (struct routecb *)rp; 308 309 /* If we are in a DESYNC state, try to send a RTM_DESYNC packet */ 310 if ((rop->flags & ROUTECB_FLAG_DESYNC) != 0) { 311 /* 312 * If we fail to alloc memory or if sbappendaddr() 313 * fails, re-add timeout and try again. 314 */ 315 desync_mbuf = rt_msg1(RTM_DESYNC, NULL); 316 if ((desync_mbuf != NULL) && 317 (sbappendaddr(&rp->rcb_socket->so_rcv, &route_src, 318 desync_mbuf, (struct mbuf *)NULL) != 0)) { 319 rop->flags &= ~ROUTECB_FLAG_DESYNC; 320 sorwakeup(rp->rcb_socket); 321 } else { 322 m_freem(desync_mbuf); 323 /* Re-add timeout to try sending msg again */ 324 timeout_add(&rop->timeout, ROUTE_DESYNC_RESEND_TIMEOUT); 325 } 326 } 327 } 328 329 void 330 route_input(struct mbuf *m0, ...) 331 { 332 struct rawcb *rp; 333 struct routecb *rop; 334 struct rt_msghdr *rtm; 335 struct mbuf *m = m0; 336 int sockets = 0; 337 struct socket *last = NULL; 338 va_list ap; 339 struct sockproto *proto; 340 struct sockaddr *sosrc, *sodst; 341 342 va_start(ap, m0); 343 proto = va_arg(ap, struct sockproto *); 344 sosrc = va_arg(ap, struct sockaddr *); 345 sodst = va_arg(ap, struct sockaddr *); 346 va_end(ap); 347 348 /* ensure that we can access the rtm_type via mtod() */ 349 if (m->m_len < offsetof(struct rt_msghdr, rtm_type) + 1) { 350 m_freem(m); 351 return; 352 } 353 354 LIST_FOREACH(rp, &rawcb, rcb_list) { 355 if (rp->rcb_socket->so_state & SS_CANTRCVMORE) 356 continue; 357 if (rp->rcb_proto.sp_family != proto->sp_family) 358 continue; 359 if (rp->rcb_proto.sp_protocol && proto->sp_protocol && 360 rp->rcb_proto.sp_protocol != proto->sp_protocol) 361 continue; 362 /* 363 * We assume the lower level routines have 364 * placed the address in a canonical format 365 * suitable for a structure comparison. 366 * 367 * Note that if the lengths are not the same 368 * the comparison will fail at the first byte. 369 */ 370 #define equal(a1, a2) \ 371 (bcmp((caddr_t)(a1), (caddr_t)(a2), a1->sa_len) == 0) 372 if (rp->rcb_laddr && !equal(rp->rcb_laddr, sodst)) 373 continue; 374 if (rp->rcb_faddr && !equal(rp->rcb_faddr, sosrc)) 375 continue; 376 377 /* filter messages that the process does not want */ 378 rop = (struct routecb *)rp; 379 rtm = mtod(m, struct rt_msghdr *); 380 /* but RTM_DESYNC can't be filtered */ 381 if (rtm->rtm_type != RTM_DESYNC && rop->msgfilter != 0 && 382 !(rop->msgfilter & (1 << rtm->rtm_type))) 383 continue; 384 switch (rtm->rtm_type) { 385 case RTM_IFANNOUNCE: 386 case RTM_DESYNC: 387 /* no tableid */ 388 break; 389 case RTM_RESOLVE: 390 case RTM_NEWADDR: 391 case RTM_DELADDR: 392 case RTM_IFINFO: 393 /* check against rdomain id */ 394 if (rop->rtableid != RTABLE_ANY && 395 rtable_l2(rop->rtableid) != rtm->rtm_tableid) 396 continue; 397 break; 398 default: 399 /* check against rtable id */ 400 if (rop->rtableid != RTABLE_ANY && 401 rop->rtableid != rtm->rtm_tableid) 402 continue; 403 break; 404 } 405 406 /* 407 * Check to see if the flush flag is set. If so, don't queue 408 * any more messages until the flag is cleared. 409 */ 410 if ((rop->flags & ROUTECB_FLAG_FLUSH) != 0) 411 continue; 412 413 if (last) { 414 struct mbuf *n; 415 if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) != NULL) { 416 if (sbspace(&last->so_rcv) < (2 * MSIZE) || 417 sbappendaddr(&last->so_rcv, sosrc, 418 n, (struct mbuf *)NULL) == 0) { 419 /* 420 * Flag socket as desync'ed and 421 * flush required 422 */ 423 sotoroutecb(last)->flags |= 424 ROUTECB_FLAG_DESYNC | 425 ROUTECB_FLAG_FLUSH; 426 rt_senddesync((void *) sotorawcb(last)); 427 m_freem(n); 428 } else { 429 sorwakeup(last); 430 sockets++; 431 } 432 } 433 } 434 last = rp->rcb_socket; 435 } 436 if (last) { 437 if (sbspace(&last->so_rcv) < (2 * MSIZE) || 438 sbappendaddr(&last->so_rcv, sosrc, 439 m, (struct mbuf *)NULL) == 0) { 440 /* Flag socket as desync'ed and flush required */ 441 sotoroutecb(last)->flags |= 442 ROUTECB_FLAG_DESYNC | ROUTECB_FLAG_FLUSH; 443 rt_senddesync((void *) sotorawcb(last)); 444 m_freem(m); 445 } else { 446 sorwakeup(last); 447 sockets++; 448 } 449 } else 450 m_freem(m); 451 } 452 453 int 454 route_output(struct mbuf *m, ...) 455 { 456 struct rt_msghdr *rtm = NULL; 457 struct rtentry *rt = NULL; 458 struct rtentry *saved_nrt = NULL; 459 struct rt_addrinfo info; 460 int plen, len, newgate = 0, error = 0; 461 struct ifnet *ifp = NULL; 462 struct ifaddr *ifa = NULL; 463 struct socket *so; 464 struct rawcb *rp = NULL; 465 struct sockaddr_rtlabel sa_rl; 466 struct sockaddr_in6 sa_mask; 467 #ifdef MPLS 468 struct sockaddr_mpls sa_mpls, *psa_mpls; 469 #endif 470 va_list ap; 471 u_int tableid; 472 u_int8_t prio; 473 u_char vers; 474 475 va_start(ap, m); 476 so = va_arg(ap, struct socket *); 477 va_end(ap); 478 479 info.rti_info[RTAX_DST] = NULL; /* for error handling (goto flush) */ 480 if (m == NULL || ((m->m_len < sizeof(int32_t)) && 481 (m = m_pullup(m, sizeof(int32_t))) == 0)) 482 return (ENOBUFS); 483 if ((m->m_flags & M_PKTHDR) == 0) 484 panic("route_output"); 485 len = m->m_pkthdr.len; 486 if (len < offsetof(struct rt_msghdr, rtm_type) + 1 || 487 len != mtod(m, struct rt_msghdr *)->rtm_msglen) { 488 error = EINVAL; 489 goto fail; 490 } 491 vers = mtod(m, struct rt_msghdr *)->rtm_version; 492 switch (vers) { 493 case RTM_VERSION: 494 if (len < sizeof(struct rt_msghdr)) { 495 error = EINVAL; 496 goto fail; 497 } 498 if (len > RTM_MAXSIZE) { 499 error = EMSGSIZE; 500 goto fail; 501 } 502 rtm = malloc(len, M_RTABLE, M_NOWAIT); 503 if (rtm == NULL) { 504 error = ENOBUFS; 505 goto fail; 506 } 507 m_copydata(m, 0, len, (caddr_t)rtm); 508 break; 509 default: 510 error = EPROTONOSUPPORT; 511 goto fail; 512 } 513 rtm->rtm_pid = curproc->p_p->ps_pid; 514 if (rtm->rtm_hdrlen == 0) /* old client */ 515 rtm->rtm_hdrlen = sizeof(struct rt_msghdr); 516 if (len < rtm->rtm_hdrlen) { 517 error = EINVAL; 518 goto fail; 519 } 520 521 /* Verify that the caller is sending an appropriate message early */ 522 switch (rtm->rtm_type) { 523 case RTM_ADD: 524 case RTM_DELETE: 525 case RTM_GET: 526 case RTM_CHANGE: 527 case RTM_LOCK: 528 break; 529 default: 530 error = EOPNOTSUPP; 531 goto fail; 532 } 533 534 /* 535 * Verify that the caller has the appropriate privilege; RTM_GET 536 * is the only operation the non-superuser is allowed. 537 */ 538 if (rtm->rtm_type != RTM_GET && suser(curproc, 0) != 0) { 539 error = EACCES; 540 goto fail; 541 } 542 tableid = rtm->rtm_tableid; 543 if (!rtable_exists(tableid)) { 544 if (rtm->rtm_type == RTM_ADD) { 545 if ((error = rtable_add(tableid)) != 0) 546 goto flush; 547 } else { 548 error = EINVAL; 549 goto flush; 550 } 551 } 552 553 554 /* Do not let userland play with kernel-only flags. */ 555 if ((rtm->rtm_flags & (RTF_LOCAL|RTF_BROADCAST)) != 0) { 556 error = EINVAL; 557 goto fail; 558 } 559 560 /* make sure that kernel-only bits are not set */ 561 rtm->rtm_priority &= RTP_MASK; 562 rtm->rtm_flags &= ~(RTF_DONE|RTF_CLONED|RTF_CACHED); 563 rtm->rtm_fmask &= RTF_FMASK; 564 565 if (rtm->rtm_priority != 0) { 566 if (rtm->rtm_priority > RTP_MAX || 567 rtm->rtm_priority == RTP_LOCAL) { 568 error = EINVAL; 569 goto fail; 570 } 571 prio = rtm->rtm_priority; 572 } else if (rtm->rtm_type != RTM_ADD) 573 prio = RTP_ANY; 574 else if (rtm->rtm_flags & RTF_STATIC) 575 prio = 0; 576 else 577 prio = RTP_DEFAULT; 578 579 bzero(&info, sizeof(info)); 580 info.rti_addrs = rtm->rtm_addrs; 581 rt_xaddrs(rtm->rtm_hdrlen + (caddr_t)rtm, len + (caddr_t)rtm, &info); 582 info.rti_flags = rtm->rtm_flags; 583 if (info.rti_info[RTAX_DST] == NULL || 584 info.rti_info[RTAX_DST]->sa_family >= AF_MAX || 585 (info.rti_info[RTAX_GATEWAY] != NULL && 586 info.rti_info[RTAX_GATEWAY]->sa_family >= AF_MAX) || 587 info.rti_info[RTAX_GENMASK] != NULL) { 588 error = EINVAL; 589 goto flush; 590 } 591 #ifdef MPLS 592 info.rti_mpls = rtm->rtm_mpls; 593 #endif 594 595 if (info.rti_info[RTAX_GATEWAY] != NULL && 596 info.rti_info[RTAX_GATEWAY]->sa_family == AF_LINK && 597 (info.rti_flags & RTF_CLONING) == 0) { 598 info.rti_flags |= RTF_LLINFO; 599 } 600 601 switch (rtm->rtm_type) { 602 case RTM_ADD: 603 if (info.rti_info[RTAX_GATEWAY] == NULL) { 604 error = EINVAL; 605 goto flush; 606 } 607 608 rt = rtable_match(tableid, info.rti_info[RTAX_DST], NULL); 609 if ((error = route_arp_conflict(rt, &info))) { 610 rtfree(rt); 611 rt = NULL; 612 goto flush; 613 } 614 615 /* 616 * We cannot go through a delete/create/insert cycle for 617 * cached route because this can lead to races in the 618 * receive path. Instead we upade the L2 cache. 619 */ 620 if ((rt != NULL) && ISSET(rt->rt_flags, RTF_CACHED)) 621 goto change; 622 623 rtfree(rt); 624 rt = NULL; 625 626 error = rtrequest(RTM_ADD, &info, prio, &saved_nrt, tableid); 627 if (error == 0) { 628 rt_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx, 629 &saved_nrt->rt_rmx); 630 /* write back the priority the kernel used */ 631 rtm->rtm_priority = saved_nrt->rt_priority & RTP_MASK; 632 rtm->rtm_index = saved_nrt->rt_ifidx; 633 rtm->rtm_flags = saved_nrt->rt_flags; 634 rtfree(saved_nrt); 635 } 636 break; 637 case RTM_DELETE: 638 if (!rtable_exists(tableid)) { 639 error = EAFNOSUPPORT; 640 goto flush; 641 } 642 643 rt = rtable_lookup(tableid, info.rti_info[RTAX_DST], 644 info.rti_info[RTAX_NETMASK], info.rti_info[RTAX_GATEWAY], 645 prio); 646 647 /* 648 * Invalidate the cache of automagically created and 649 * referenced L2 entries to make sure that ``rt_gwroute'' 650 * pointer stays valid for other CPUs. 651 */ 652 if ((rt != NULL) && (ISSET(rt->rt_flags, RTF_CACHED))) { 653 ifp = if_get(rt->rt_ifidx); 654 KASSERT(ifp != NULL); 655 ifp->if_rtrequest(ifp, RTM_INVALIDATE, rt); 656 if_put(ifp); 657 /* Reset the MTU of the gateway route. */ 658 rtable_walk(tableid, rt_key(rt)->sa_family, 659 route_cleargateway, rt); 660 goto report; 661 } 662 663 /* 664 * Make sure that local routes are only modified by the 665 * kernel. 666 */ 667 if ((rt != NULL) && 668 ISSET(rt->rt_flags, RTF_LOCAL|RTF_BROADCAST)) { 669 error = EINVAL; 670 goto report; 671 } 672 673 rtfree(rt); 674 rt = NULL; 675 676 error = rtrequest(RTM_DELETE, &info, prio, &rt, tableid); 677 if (error == 0) 678 goto report; 679 break; 680 case RTM_GET: 681 if (!rtable_exists(tableid)) { 682 error = EAFNOSUPPORT; 683 goto flush; 684 } 685 rt = rtable_lookup(tableid, info.rti_info[RTAX_DST], 686 info.rti_info[RTAX_NETMASK], info.rti_info[RTAX_GATEWAY], 687 prio); 688 if (rt == NULL) { 689 error = ESRCH; 690 goto flush; 691 } 692 693 report: 694 info.rti_info[RTAX_DST] = rt_key(rt); 695 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 696 info.rti_info[RTAX_NETMASK] = 697 rt_plen2mask(rt, &sa_mask); 698 info.rti_info[RTAX_LABEL] = 699 rtlabel_id2sa(rt->rt_labelid, &sa_rl); 700 #ifdef MPLS 701 if (rt->rt_flags & RTF_MPLS) { 702 bzero(&sa_mpls, sizeof(sa_mpls)); 703 sa_mpls.smpls_family = AF_MPLS; 704 sa_mpls.smpls_len = sizeof(sa_mpls); 705 sa_mpls.smpls_label = ((struct rt_mpls *) 706 rt->rt_llinfo)->mpls_label; 707 info.rti_info[RTAX_SRC] = 708 (struct sockaddr *)&sa_mpls; 709 info.rti_mpls = ((struct rt_mpls *) 710 rt->rt_llinfo)->mpls_operation; 711 rtm->rtm_mpls = info.rti_mpls; 712 } 713 #endif 714 info.rti_info[RTAX_IFP] = NULL; 715 info.rti_info[RTAX_IFA] = NULL; 716 ifp = if_get(rt->rt_ifidx); 717 if (ifp != NULL && rtm->rtm_addrs & (RTA_IFP|RTA_IFA)) { 718 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); 719 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; 720 if (ifp->if_flags & IFF_POINTOPOINT) 721 info.rti_info[RTAX_BRD] = 722 rt->rt_ifa->ifa_dstaddr; 723 else 724 info.rti_info[RTAX_BRD] = NULL; 725 } 726 if_put(ifp); 727 len = rt_msg2(rtm->rtm_type, RTM_VERSION, &info, NULL, 728 NULL); 729 if (len > rtm->rtm_msglen) { 730 struct rt_msghdr *new_rtm; 731 new_rtm = malloc(len, M_RTABLE, M_NOWAIT); 732 if (new_rtm == NULL) { 733 error = ENOBUFS; 734 goto flush; 735 } 736 memcpy(new_rtm, rtm, rtm->rtm_msglen); 737 free(rtm, M_RTABLE, 0); 738 rtm = new_rtm; 739 } 740 rt_msg2(rtm->rtm_type, RTM_VERSION, &info, (caddr_t)rtm, 741 NULL); 742 rtm->rtm_flags = rt->rt_flags; 743 rtm->rtm_use = 0; 744 rtm->rtm_priority = rt->rt_priority & RTP_MASK; 745 rtm->rtm_index = rt->rt_ifidx; 746 rt_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx); 747 rtm->rtm_addrs = info.rti_addrs; 748 break; 749 case RTM_CHANGE: 750 case RTM_LOCK: 751 if (!rtable_exists(tableid)) { 752 error = EAFNOSUPPORT; 753 goto flush; 754 } 755 756 rt = rtable_lookup(tableid, info.rti_info[RTAX_DST], 757 info.rti_info[RTAX_NETMASK], info.rti_info[RTAX_GATEWAY], 758 prio); 759 #ifndef SMALL_KERNEL 760 /* 761 * If we got multipath routes, we require users to specify 762 * a matching gateway. 763 */ 764 if ((rt != NULL) && ISSET(rt->rt_flags, RTF_MPATH) && 765 (info.rti_info[RTAX_GATEWAY] == NULL)) { 766 rtfree(rt); 767 rt = NULL; 768 } 769 #endif 770 /* 771 * If RTAX_GATEWAY is the argument we're trying to 772 * change, try to find a compatible route. 773 */ 774 if ((rt == NULL) && (info.rti_info[RTAX_GATEWAY] != NULL) && 775 (rtm->rtm_type == RTM_CHANGE)) { 776 rt = rtable_lookup(tableid, info.rti_info[RTAX_DST], 777 info.rti_info[RTAX_NETMASK], NULL, prio); 778 #ifndef SMALL_KERNEL 779 /* Ensure we don't pick a multipath one. */ 780 if ((rt != NULL) && ISSET(rt->rt_flags, RTF_MPATH)) { 781 rtfree(rt); 782 rt = NULL; 783 } 784 #endif 785 } 786 787 if (rt == NULL) { 788 error = ESRCH; 789 goto flush; 790 } 791 792 /* 793 * RTM_CHANGE/LOCK need a perfect match. 794 */ 795 plen = rtable_satoplen(info.rti_info[RTAX_DST]->sa_family, 796 info.rti_info[RTAX_NETMASK]); 797 if (rt_plen(rt) != plen ) { 798 error = ESRCH; 799 goto flush; 800 } 801 802 switch (rtm->rtm_type) { 803 case RTM_CHANGE: 804 if (info.rti_info[RTAX_GATEWAY] != NULL) 805 if (rt->rt_gateway == NULL || 806 bcmp(rt->rt_gateway, 807 info.rti_info[RTAX_GATEWAY], 808 info.rti_info[RTAX_GATEWAY]->sa_len)) { 809 newgate = 1; 810 } 811 /* 812 * Check reachable gateway before changing the route. 813 * New gateway could require new ifaddr, ifp; 814 * flags may also be different; ifp may be specified 815 * by ll sockaddr when protocol address is ambiguous. 816 */ 817 if (newgate || info.rti_info[RTAX_IFP] != NULL || 818 info.rti_info[RTAX_IFA] != NULL) { 819 if ((error = rt_getifa(&info, tableid)) != 0) 820 goto flush; 821 ifa = info.rti_ifa; 822 if (rt->rt_ifa != ifa) { 823 ifp = if_get(rt->rt_ifidx); 824 KASSERT(ifp != NULL); 825 ifp->if_rtrequest(ifp, RTM_DELETE, rt); 826 ifafree(rt->rt_ifa); 827 if_put(ifp); 828 829 ifa->ifa_refcnt++; 830 rt->rt_ifa = ifa; 831 rt->rt_ifidx = ifa->ifa_ifp->if_index; 832 #ifndef SMALL_KERNEL 833 /* recheck link state after ifp change*/ 834 rt_if_linkstate_change(rt, ifa->ifa_ifp, 835 tableid); 836 #endif 837 } 838 } 839 change: 840 if (info.rti_info[RTAX_GATEWAY] != NULL && (error = 841 rt_setgate(rt, info.rti_info[RTAX_GATEWAY], 842 tableid))) 843 goto flush; 844 #ifdef MPLS 845 if ((rtm->rtm_flags & RTF_MPLS) && 846 info.rti_info[RTAX_SRC] != NULL) { 847 struct rt_mpls *rt_mpls; 848 849 psa_mpls = (struct sockaddr_mpls *) 850 info.rti_info[RTAX_SRC]; 851 852 if (rt->rt_llinfo == NULL) { 853 rt->rt_llinfo = 854 malloc(sizeof(struct rt_mpls), 855 M_TEMP, M_NOWAIT|M_ZERO); 856 } 857 if (rt->rt_llinfo == NULL) { 858 error = ENOMEM; 859 goto flush; 860 } 861 862 rt_mpls = (struct rt_mpls *)rt->rt_llinfo; 863 864 if (psa_mpls != NULL) { 865 rt_mpls->mpls_label = 866 psa_mpls->smpls_label; 867 } 868 869 rt_mpls->mpls_operation = info.rti_mpls; 870 871 /* XXX: set experimental bits */ 872 873 rt->rt_flags |= RTF_MPLS; 874 } else if (newgate || ((rtm->rtm_fmask & RTF_MPLS) && 875 !(rtm->rtm_flags & RTF_MPLS))) { 876 /* if gateway changed remove MPLS information */ 877 if (rt->rt_llinfo != NULL && 878 rt->rt_flags & RTF_MPLS) { 879 free(rt->rt_llinfo, M_TEMP, 0); 880 rt->rt_llinfo = NULL; 881 rt->rt_flags &= ~RTF_MPLS; 882 } 883 } 884 #endif 885 886 #ifdef BFD 887 if (ISSET(rtm->rtm_flags, RTF_BFD)) { 888 if ((error = bfdset(rt))) 889 goto flush; 890 } else if (!ISSET(rtm->rtm_flags, RTF_BFD) && 891 ISSET(rtm->rtm_fmask, RTF_BFD)) { 892 bfdclear(rt); 893 } 894 #endif 895 896 /* Hack to allow some flags to be toggled */ 897 if (rtm->rtm_fmask) 898 rt->rt_flags = 899 (rt->rt_flags & ~rtm->rtm_fmask) | 900 (rtm->rtm_flags & rtm->rtm_fmask); 901 902 rt_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx, 903 &rt->rt_rmx); 904 rtm->rtm_index = rt->rt_ifidx; 905 rtm->rtm_priority = rt->rt_priority & RTP_MASK; 906 rtm->rtm_flags = rt->rt_flags; 907 908 ifp = if_get(rt->rt_ifidx); 909 KASSERT(ifp != NULL); 910 ifp->if_rtrequest(ifp, RTM_ADD, rt); 911 if_put(ifp); 912 913 if (info.rti_info[RTAX_LABEL] != NULL) { 914 char *rtlabel = ((struct sockaddr_rtlabel *) 915 info.rti_info[RTAX_LABEL])->sr_label; 916 rtlabel_unref(rt->rt_labelid); 917 rt->rt_labelid = rtlabel_name2id(rtlabel); 918 } 919 if_group_routechange(info.rti_info[RTAX_DST], 920 info.rti_info[RTAX_NETMASK]); 921 /* FALLTHROUGH */ 922 case RTM_LOCK: 923 rt->rt_rmx.rmx_locks &= ~(rtm->rtm_inits); 924 rt->rt_rmx.rmx_locks |= 925 (rtm->rtm_inits & rtm->rtm_rmx.rmx_locks); 926 rtm->rtm_priority = rt->rt_priority & RTP_MASK; 927 break; 928 } 929 break; 930 } 931 932 flush: 933 if (rtm) { 934 if (error) 935 rtm->rtm_errno = error; 936 else { 937 rtm->rtm_flags |= RTF_DONE; 938 } 939 } 940 if (info.rti_info[RTAX_DST]) 941 route_proto.sp_protocol = info.rti_info[RTAX_DST]->sa_family; 942 if (rt) 943 rtfree(rt); 944 945 /* 946 * Check to see if we don't want our own messages. 947 */ 948 if (!(so->so_options & SO_USELOOPBACK)) { 949 if (route_cb.any_count <= 1) { 950 fail: 951 free(rtm, M_RTABLE, 0); 952 m_freem(m); 953 return (error); 954 } 955 /* There is another listener, so construct message */ 956 rp = sotorawcb(so); 957 } 958 if (rp) 959 rp->rcb_proto.sp_family = 0; /* Avoid us */ 960 if (rtm) { 961 if (m_copyback(m, 0, rtm->rtm_msglen, rtm, M_NOWAIT)) { 962 m_freem(m); 963 m = NULL; 964 } else if (m->m_pkthdr.len > rtm->rtm_msglen) 965 m_adj(m, rtm->rtm_msglen - m->m_pkthdr.len); 966 free(rtm, M_RTABLE, 0); 967 } 968 if (m) 969 route_input(m, &route_proto, &route_src, &route_dst); 970 if (rp) 971 rp->rcb_proto.sp_family = PF_ROUTE; 972 973 return (error); 974 } 975 976 int 977 route_cleargateway(struct rtentry *rt, void *arg, unsigned int rtableid) 978 { 979 struct rtentry *nhrt = arg; 980 981 if (ISSET(rt->rt_flags, RTF_GATEWAY) && rt->rt_gwroute == nhrt && 982 !ISSET(rt->rt_locks, RTV_MTU)) 983 rt->rt_mtu = 0; 984 985 return (0); 986 } 987 988 /* 989 * Check if the user request to insert an ARP entry does not conflict 990 * with existing ones. 991 * 992 * Only two entries are allowed for a given IP address: a private one 993 * (priv) and a public one (pub). 994 */ 995 int 996 route_arp_conflict(struct rtentry *rt, struct rt_addrinfo *info) 997 { 998 #ifdef ART 999 int proxy = (info->rti_flags & RTF_ANNOUNCE); 1000 1001 if ((info->rti_flags & RTF_LLINFO) == 0 || 1002 (info->rti_info[RTAX_DST]->sa_family != AF_INET)) 1003 return (0); 1004 1005 if (rt == NULL || !ISSET(rt->rt_flags, RTF_LLINFO)) 1006 return (0); 1007 1008 /* If the entry is cached, it can be updated. */ 1009 if (ISSET(rt->rt_flags, RTF_CACHED)) 1010 return (0); 1011 1012 /* 1013 * Same destination, not cached and both "priv" or "pub" conflict. 1014 * If a second entry exists, it always conflict. 1015 */ 1016 if ((ISSET(rt->rt_flags, RTF_ANNOUNCE) == proxy) || 1017 ISSET(rt->rt_flags, RTF_MPATH)) 1018 return (EEXIST); 1019 1020 /* No conflict but an entry exist so we need to force mpath. */ 1021 info->rti_flags |= RTF_MPATH; 1022 #endif /* ART */ 1023 return (0); 1024 } 1025 1026 void 1027 rt_setmetrics(u_long which, const struct rt_metrics *in, 1028 struct rt_kmetrics *out) 1029 { 1030 int64_t expire; 1031 1032 if (which & RTV_MTU) 1033 out->rmx_mtu = in->rmx_mtu; 1034 if (which & RTV_EXPIRE) { 1035 expire = in->rmx_expire; 1036 if (expire != 0) { 1037 expire -= time_second; 1038 expire += time_uptime; 1039 } 1040 1041 out->rmx_expire = expire; 1042 } 1043 /* RTV_PRIORITY handled before */ 1044 } 1045 1046 void 1047 rt_getmetrics(const struct rt_kmetrics *in, struct rt_metrics *out) 1048 { 1049 int64_t expire; 1050 1051 expire = in->rmx_expire; 1052 if (expire != 0) { 1053 expire -= time_uptime; 1054 expire += time_second; 1055 } 1056 1057 bzero(out, sizeof(*out)); 1058 out->rmx_locks = in->rmx_locks; 1059 out->rmx_mtu = in->rmx_mtu; 1060 out->rmx_expire = expire; 1061 out->rmx_pksent = in->rmx_pksent; 1062 } 1063 1064 #define ROUNDUP(a) \ 1065 ((a) > 0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long)) 1066 #define ADVANCE(x, n) (x += ROUNDUP((n)->sa_len)) 1067 1068 void 1069 rt_xaddrs(caddr_t cp, caddr_t cplim, struct rt_addrinfo *rtinfo) 1070 { 1071 struct sockaddr *sa; 1072 int i; 1073 1074 bzero(rtinfo->rti_info, sizeof(rtinfo->rti_info)); 1075 for (i = 0; (i < RTAX_MAX) && (cp < cplim); i++) { 1076 if ((rtinfo->rti_addrs & (1 << i)) == 0) 1077 continue; 1078 rtinfo->rti_info[i] = sa = (struct sockaddr *)cp; 1079 ADVANCE(cp, sa); 1080 } 1081 } 1082 1083 struct mbuf * 1084 rt_msg1(int type, struct rt_addrinfo *rtinfo) 1085 { 1086 struct rt_msghdr *rtm; 1087 struct mbuf *m; 1088 int i; 1089 struct sockaddr *sa; 1090 int len, dlen, hlen; 1091 1092 switch (type) { 1093 case RTM_DELADDR: 1094 case RTM_NEWADDR: 1095 len = sizeof(struct ifa_msghdr); 1096 break; 1097 case RTM_IFINFO: 1098 len = sizeof(struct if_msghdr); 1099 break; 1100 case RTM_IFANNOUNCE: 1101 len = sizeof(struct if_announcemsghdr); 1102 break; 1103 default: 1104 len = sizeof(struct rt_msghdr); 1105 break; 1106 } 1107 if (len > MCLBYTES) 1108 panic("rt_msg1"); 1109 m = m_gethdr(M_DONTWAIT, MT_DATA); 1110 if (m && len > MHLEN) { 1111 MCLGET(m, M_DONTWAIT); 1112 if ((m->m_flags & M_EXT) == 0) { 1113 m_free(m); 1114 m = NULL; 1115 } 1116 } 1117 if (m == NULL) 1118 return (m); 1119 m->m_pkthdr.len = m->m_len = hlen = len; 1120 m->m_pkthdr.ph_ifidx = 0; 1121 rtm = mtod(m, struct rt_msghdr *); 1122 bzero(rtm, len); 1123 for (i = 0; i < RTAX_MAX; i++) { 1124 if (rtinfo == NULL || (sa = rtinfo->rti_info[i]) == NULL) 1125 continue; 1126 rtinfo->rti_addrs |= (1 << i); 1127 dlen = ROUNDUP(sa->sa_len); 1128 if (m_copyback(m, len, dlen, sa, M_NOWAIT)) { 1129 m_freem(m); 1130 return (NULL); 1131 } 1132 len += dlen; 1133 } 1134 rtm->rtm_msglen = len; 1135 rtm->rtm_hdrlen = hlen; 1136 rtm->rtm_version = RTM_VERSION; 1137 rtm->rtm_type = type; 1138 return (m); 1139 } 1140 1141 int 1142 rt_msg2(int type, int vers, struct rt_addrinfo *rtinfo, caddr_t cp, 1143 struct walkarg *w) 1144 { 1145 int i; 1146 int len, dlen, hlen, second_time = 0; 1147 caddr_t cp0; 1148 1149 rtinfo->rti_addrs = 0; 1150 again: 1151 switch (type) { 1152 case RTM_DELADDR: 1153 case RTM_NEWADDR: 1154 len = sizeof(struct ifa_msghdr); 1155 break; 1156 case RTM_IFINFO: 1157 len = sizeof(struct if_msghdr); 1158 break; 1159 default: 1160 len = sizeof(struct rt_msghdr); 1161 break; 1162 } 1163 hlen = len; 1164 if ((cp0 = cp) != NULL) 1165 cp += len; 1166 for (i = 0; i < RTAX_MAX; i++) { 1167 struct sockaddr *sa; 1168 1169 if ((sa = rtinfo->rti_info[i]) == NULL) 1170 continue; 1171 rtinfo->rti_addrs |= (1 << i); 1172 dlen = ROUNDUP(sa->sa_len); 1173 if (cp) { 1174 bcopy(sa, cp, (size_t)dlen); 1175 cp += dlen; 1176 } 1177 len += dlen; 1178 } 1179 /* align message length to the next natural boundary */ 1180 len = ALIGN(len); 1181 if (cp == 0 && w != NULL && !second_time) { 1182 struct walkarg *rw = w; 1183 1184 rw->w_needed += len; 1185 if (rw->w_needed <= 0 && rw->w_where) { 1186 if (rw->w_tmemsize < len) { 1187 free(rw->w_tmem, M_RTABLE, 0); 1188 rw->w_tmem = malloc(len, M_RTABLE, M_NOWAIT); 1189 if (rw->w_tmem) 1190 rw->w_tmemsize = len; 1191 } 1192 if (rw->w_tmem) { 1193 cp = rw->w_tmem; 1194 second_time = 1; 1195 goto again; 1196 } else 1197 rw->w_where = 0; 1198 } 1199 } 1200 if (cp && w) /* clear the message header */ 1201 bzero(cp0, hlen); 1202 1203 if (cp) { 1204 struct rt_msghdr *rtm = (struct rt_msghdr *)cp0; 1205 1206 rtm->rtm_version = RTM_VERSION; 1207 rtm->rtm_type = type; 1208 rtm->rtm_msglen = len; 1209 rtm->rtm_hdrlen = hlen; 1210 } 1211 return (len); 1212 } 1213 1214 /* 1215 * This routine is called to generate a message from the routing 1216 * socket indicating that a redirect has occurred, a routing lookup 1217 * has failed, or that a protocol has detected timeouts to a particular 1218 * destination. 1219 */ 1220 void 1221 rt_missmsg(int type, struct rt_addrinfo *rtinfo, int flags, uint8_t prio, 1222 u_int ifidx, int error, u_int tableid) 1223 { 1224 struct rt_msghdr *rtm; 1225 struct mbuf *m; 1226 struct sockaddr *sa = rtinfo->rti_info[RTAX_DST]; 1227 1228 if (route_cb.any_count == 0) 1229 return; 1230 m = rt_msg1(type, rtinfo); 1231 if (m == NULL) 1232 return; 1233 rtm = mtod(m, struct rt_msghdr *); 1234 rtm->rtm_flags = RTF_DONE | flags; 1235 rtm->rtm_priority = prio; 1236 rtm->rtm_errno = error; 1237 rtm->rtm_tableid = tableid; 1238 rtm->rtm_addrs = rtinfo->rti_addrs; 1239 rtm->rtm_index = ifidx; 1240 if (sa == NULL) 1241 route_proto.sp_protocol = 0; 1242 else 1243 route_proto.sp_protocol = sa->sa_family; 1244 route_input(m, &route_proto, &route_src, &route_dst); 1245 } 1246 1247 /* 1248 * This routine is called to generate a message from the routing 1249 * socket indicating that the status of a network interface has changed. 1250 */ 1251 void 1252 rt_ifmsg(struct ifnet *ifp) 1253 { 1254 struct if_msghdr *ifm; 1255 struct mbuf *m; 1256 1257 if (route_cb.any_count == 0) 1258 return; 1259 m = rt_msg1(RTM_IFINFO, NULL); 1260 if (m == NULL) 1261 return; 1262 ifm = mtod(m, struct if_msghdr *); 1263 ifm->ifm_index = ifp->if_index; 1264 ifm->ifm_tableid = ifp->if_rdomain; 1265 ifm->ifm_flags = ifp->if_flags; 1266 ifm->ifm_xflags = ifp->if_xflags; 1267 ifm->ifm_data = ifp->if_data; 1268 ifm->ifm_addrs = 0; 1269 route_proto.sp_protocol = 0; 1270 route_input(m, &route_proto, &route_src, &route_dst); 1271 } 1272 1273 /* 1274 * This is called to generate messages from the routing socket 1275 * indicating a network interface has had addresses associated with it. 1276 * if we ever reverse the logic and replace messages TO the routing 1277 * socket indicate a request to configure interfaces, then it will 1278 * be unnecessary as the routing socket will automatically generate 1279 * copies of it. 1280 */ 1281 void 1282 rt_sendaddrmsg(struct rtentry *rt, int cmd, struct ifaddr *ifa) 1283 { 1284 struct ifnet *ifp = ifa->ifa_ifp; 1285 struct mbuf *m = NULL; 1286 struct rt_addrinfo info; 1287 struct ifa_msghdr *ifam; 1288 1289 if (route_cb.any_count == 0) 1290 return; 1291 1292 memset(&info, 0, sizeof(info)); 1293 info.rti_info[RTAX_IFA] = ifa->ifa_addr; 1294 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); 1295 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask; 1296 info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr; 1297 if ((m = rt_msg1(cmd, &info)) == NULL) 1298 return; 1299 ifam = mtod(m, struct ifa_msghdr *); 1300 ifam->ifam_index = ifp->if_index; 1301 ifam->ifam_metric = ifa->ifa_metric; 1302 ifam->ifam_flags = ifa->ifa_flags; 1303 ifam->ifam_addrs = info.rti_addrs; 1304 ifam->ifam_tableid = ifp->if_rdomain; 1305 1306 if (ifa->ifa_addr == NULL) 1307 route_proto.sp_protocol = 0; 1308 else 1309 route_proto.sp_protocol = ifa->ifa_addr->sa_family; 1310 route_input(m, &route_proto, &route_src, &route_dst); 1311 } 1312 1313 /* 1314 * This is called to generate routing socket messages indicating 1315 * network interface arrival and departure. 1316 */ 1317 void 1318 rt_ifannouncemsg(struct ifnet *ifp, int what) 1319 { 1320 struct if_announcemsghdr *ifan; 1321 struct mbuf *m; 1322 1323 if (route_cb.any_count == 0) 1324 return; 1325 m = rt_msg1(RTM_IFANNOUNCE, NULL); 1326 if (m == NULL) 1327 return; 1328 ifan = mtod(m, struct if_announcemsghdr *); 1329 ifan->ifan_index = ifp->if_index; 1330 strlcpy(ifan->ifan_name, ifp->if_xname, sizeof(ifan->ifan_name)); 1331 ifan->ifan_what = what; 1332 route_proto.sp_protocol = 0; 1333 route_input(m, &route_proto, &route_src, &route_dst); 1334 } 1335 1336 /* 1337 * This is used in dumping the kernel table via sysctl(). 1338 */ 1339 int 1340 sysctl_dumpentry(struct rtentry *rt, void *v, unsigned int id) 1341 { 1342 struct walkarg *w = v; 1343 int error = 0, size; 1344 struct rt_addrinfo info; 1345 struct ifnet *ifp; 1346 #ifdef MPLS 1347 struct sockaddr_mpls sa_mpls; 1348 #endif 1349 struct sockaddr_rtlabel sa_rl; 1350 struct sockaddr_in6 sa_mask; 1351 1352 if (w->w_op == NET_RT_FLAGS && !(rt->rt_flags & w->w_arg)) 1353 return 0; 1354 if (w->w_op == NET_RT_DUMP && w->w_arg) { 1355 u_int8_t prio = w->w_arg & RTP_MASK; 1356 if (w->w_arg < 0) { 1357 prio = (-w->w_arg) & RTP_MASK; 1358 /* Show all routes that are not this priority */ 1359 if (prio == (rt->rt_priority & RTP_MASK)) 1360 return 0; 1361 } else { 1362 if (prio != (rt->rt_priority & RTP_MASK) && 1363 prio != RTP_ANY) 1364 return 0; 1365 } 1366 } 1367 bzero(&info, sizeof(info)); 1368 info.rti_info[RTAX_DST] = rt_key(rt); 1369 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 1370 info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask); 1371 ifp = if_get(rt->rt_ifidx); 1372 if (ifp != NULL) { 1373 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); 1374 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; 1375 if (ifp->if_flags & IFF_POINTOPOINT) 1376 info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr; 1377 } 1378 if_put(ifp); 1379 info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl); 1380 #ifdef MPLS 1381 if (rt->rt_flags & RTF_MPLS) { 1382 bzero(&sa_mpls, sizeof(sa_mpls)); 1383 sa_mpls.smpls_family = AF_MPLS; 1384 sa_mpls.smpls_len = sizeof(sa_mpls); 1385 sa_mpls.smpls_label = ((struct rt_mpls *) 1386 rt->rt_llinfo)->mpls_label; 1387 info.rti_info[RTAX_SRC] = (struct sockaddr *)&sa_mpls; 1388 info.rti_mpls = ((struct rt_mpls *) 1389 rt->rt_llinfo)->mpls_operation; 1390 } 1391 #endif 1392 1393 size = rt_msg2(RTM_GET, RTM_VERSION, &info, NULL, w); 1394 if (w->w_where && w->w_tmem && w->w_needed <= 0) { 1395 struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem; 1396 1397 rtm->rtm_pid = curproc->p_p->ps_pid; 1398 rtm->rtm_flags = rt->rt_flags; 1399 rtm->rtm_priority = rt->rt_priority & RTP_MASK; 1400 rt_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx); 1401 /* Do not account the routing table's reference. */ 1402 rtm->rtm_rmx.rmx_refcnt = rt->rt_refcnt - 1; 1403 rtm->rtm_index = rt->rt_ifidx; 1404 rtm->rtm_addrs = info.rti_addrs; 1405 rtm->rtm_tableid = id; 1406 #ifdef MPLS 1407 rtm->rtm_mpls = info.rti_mpls; 1408 #endif 1409 if ((error = copyout(rtm, w->w_where, size)) != 0) 1410 w->w_where = NULL; 1411 else 1412 w->w_where += size; 1413 } 1414 return (error); 1415 } 1416 1417 int 1418 sysctl_iflist(int af, struct walkarg *w) 1419 { 1420 struct ifnet *ifp; 1421 struct ifaddr *ifa; 1422 struct rt_addrinfo info; 1423 int len, error = 0; 1424 1425 bzero(&info, sizeof(info)); 1426 TAILQ_FOREACH(ifp, &ifnet, if_list) { 1427 if (w->w_arg && w->w_arg != ifp->if_index) 1428 continue; 1429 /* Copy the link-layer address first */ 1430 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); 1431 len = rt_msg2(RTM_IFINFO, RTM_VERSION, &info, 0, w); 1432 if (w->w_where && w->w_tmem && w->w_needed <= 0) { 1433 struct if_msghdr *ifm; 1434 1435 ifm = (struct if_msghdr *)w->w_tmem; 1436 ifm->ifm_index = ifp->if_index; 1437 ifm->ifm_tableid = ifp->if_rdomain; 1438 ifm->ifm_flags = ifp->if_flags; 1439 ifm->ifm_data = ifp->if_data; 1440 ifm->ifm_addrs = info.rti_addrs; 1441 error = copyout(ifm, w->w_where, len); 1442 if (error) 1443 return (error); 1444 w->w_where += len; 1445 } 1446 info.rti_info[RTAX_IFP] = NULL; 1447 TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) { 1448 KASSERT(ifa->ifa_addr->sa_family != AF_LINK); 1449 if (af && af != ifa->ifa_addr->sa_family) 1450 continue; 1451 info.rti_info[RTAX_IFA] = ifa->ifa_addr; 1452 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask; 1453 info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr; 1454 len = rt_msg2(RTM_NEWADDR, RTM_VERSION, &info, 0, w); 1455 if (w->w_where && w->w_tmem && w->w_needed <= 0) { 1456 struct ifa_msghdr *ifam; 1457 1458 ifam = (struct ifa_msghdr *)w->w_tmem; 1459 ifam->ifam_index = ifa->ifa_ifp->if_index; 1460 ifam->ifam_flags = ifa->ifa_flags; 1461 ifam->ifam_metric = ifa->ifa_metric; 1462 ifam->ifam_addrs = info.rti_addrs; 1463 error = copyout(w->w_tmem, w->w_where, len); 1464 if (error) 1465 return (error); 1466 w->w_where += len; 1467 } 1468 } 1469 info.rti_info[RTAX_IFA] = info.rti_info[RTAX_NETMASK] = 1470 info.rti_info[RTAX_BRD] = NULL; 1471 } 1472 return (0); 1473 } 1474 1475 int 1476 sysctl_ifnames(struct walkarg *w) 1477 { 1478 struct if_nameindex_msg ifn; 1479 struct ifnet *ifp; 1480 int error = 0; 1481 1482 /* XXX ignore tableid for now */ 1483 TAILQ_FOREACH(ifp, &ifnet, if_list) { 1484 if (w->w_arg && w->w_arg != ifp->if_index) 1485 continue; 1486 w->w_needed += sizeof(ifn); 1487 if (w->w_where && w->w_needed <= 0) { 1488 1489 memset(&ifn, 0, sizeof(ifn)); 1490 ifn.if_index = ifp->if_index; 1491 strlcpy(ifn.if_name, ifp->if_xname, 1492 sizeof(ifn.if_name)); 1493 error = copyout(&ifn, w->w_where, sizeof(ifn)); 1494 if (error) 1495 return (error); 1496 w->w_where += sizeof(ifn); 1497 } 1498 } 1499 1500 return (0); 1501 } 1502 1503 int 1504 sysctl_rtable(int *name, u_int namelen, void *where, size_t *given, void *new, 1505 size_t newlen) 1506 { 1507 int i, s, error = EINVAL; 1508 u_char af; 1509 struct walkarg w; 1510 struct rt_tableinfo tableinfo; 1511 u_int tableid = 0; 1512 1513 if (new) 1514 return (EPERM); 1515 if (namelen < 3 || namelen > 4) 1516 return (EINVAL); 1517 af = name[0]; 1518 bzero(&w, sizeof(w)); 1519 w.w_where = where; 1520 w.w_given = *given; 1521 w.w_needed = 0 - w.w_given; 1522 w.w_op = name[1]; 1523 w.w_arg = name[2]; 1524 1525 if (namelen == 4) { 1526 tableid = name[3]; 1527 if (!rtable_exists(tableid)) 1528 return (ENOENT); 1529 } else 1530 tableid = curproc->p_p->ps_rtableid; 1531 1532 s = splsoftnet(); 1533 switch (w.w_op) { 1534 case NET_RT_DUMP: 1535 case NET_RT_FLAGS: 1536 for (i = 1; i <= AF_MAX; i++) { 1537 if (af != 0 && af != i) 1538 continue; 1539 1540 error = rtable_walk(tableid, i, sysctl_dumpentry, &w); 1541 if (error == EAFNOSUPPORT) 1542 error = 0; 1543 if (error) 1544 break; 1545 } 1546 break; 1547 1548 case NET_RT_IFLIST: 1549 error = sysctl_iflist(af, &w); 1550 break; 1551 1552 case NET_RT_STATS: 1553 error = sysctl_rdstruct(where, given, new, 1554 &rtstat, sizeof(rtstat)); 1555 splx(s); 1556 return (error); 1557 case NET_RT_TABLE: 1558 tableid = w.w_arg; 1559 if (!rtable_exists(tableid)) { 1560 splx(s); 1561 return (ENOENT); 1562 } 1563 tableinfo.rti_tableid = tableid; 1564 tableinfo.rti_domainid = rtable_l2(tableid); 1565 error = sysctl_rdstruct(where, given, new, 1566 &tableinfo, sizeof(tableinfo)); 1567 splx(s); 1568 return (error); 1569 case NET_RT_IFNAMES: 1570 error = sysctl_ifnames(&w); 1571 break; 1572 } 1573 splx(s); 1574 free(w.w_tmem, M_RTABLE, 0); 1575 w.w_needed += w.w_given; 1576 if (where) { 1577 *given = w.w_where - (caddr_t)where; 1578 if (*given < w.w_needed) 1579 return (ENOMEM); 1580 } else 1581 *given = (11 * w.w_needed) / 10; 1582 1583 return (error); 1584 } 1585 1586 /* 1587 * Definitions of protocols supported in the ROUTE domain. 1588 */ 1589 1590 extern struct domain routedomain; /* or at least forward */ 1591 1592 struct protosw routesw[] = { 1593 { SOCK_RAW, &routedomain, 0, PR_ATOMIC|PR_ADDR|PR_WANTRCVD, 1594 route_input, route_output, 0, route_ctloutput, 1595 route_usrreq, 1596 raw_init, 0, 0, 0, 1597 sysctl_rtable, 1598 } 1599 }; 1600 1601 struct domain routedomain = 1602 { PF_ROUTE, "route", route_init, 0, 0, 1603 routesw, &routesw[nitems(routesw)] }; 1604