1 /* $OpenBSD: if_vxlan.c,v 1.44 2016/09/04 11:14:44 reyk Exp $ */ 2 3 /* 4 * Copyright (c) 2013 Reyk Floeter <reyk@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #include "bpfilter.h" 20 #include "vxlan.h" 21 #include "vlan.h" 22 #include "pf.h" 23 #include "bridge.h" 24 25 #include <sys/param.h> 26 #include <sys/systm.h> 27 #include <sys/mbuf.h> 28 #include <sys/socket.h> 29 #include <sys/sockio.h> 30 #include <sys/ioctl.h> 31 32 #include <net/if.h> 33 #include <net/if_var.h> 34 #include <net/if_media.h> 35 #include <net/route.h> 36 37 #if NBPFILTER > 0 38 #include <net/bpf.h> 39 #endif 40 41 #include <netinet/in.h> 42 #include <netinet/in_var.h> 43 #include <netinet/if_ether.h> 44 #include <netinet/ip.h> 45 #include <netinet/ip_var.h> 46 #include <netinet/udp.h> 47 #include <netinet/udp_var.h> 48 #include <netinet/in_pcb.h> 49 50 #if NPF > 0 51 #include <net/pfvar.h> 52 #endif 53 54 #if NBRIDGE > 0 55 #include <net/if_bridge.h> 56 #endif 57 58 #include <net/if_vxlan.h> 59 60 struct vxlan_softc { 61 struct arpcom sc_ac; 62 struct ifmedia sc_media; 63 64 struct ip_moptions sc_imo; 65 void *sc_ahcookie; 66 void *sc_lhcookie; 67 void *sc_dhcookie; 68 69 struct sockaddr_storage sc_src; 70 struct sockaddr_storage sc_dst; 71 in_port_t sc_dstport; 72 u_int sc_rdomain; 73 int64_t sc_vnetid; 74 u_int8_t sc_ttl; 75 76 LIST_ENTRY(vxlan_softc) sc_entry; 77 }; 78 79 void vxlanattach(int); 80 int vxlanioctl(struct ifnet *, u_long, caddr_t); 81 void vxlanstart(struct ifnet *); 82 int vxlan_clone_create(struct if_clone *, int); 83 int vxlan_clone_destroy(struct ifnet *); 84 void vxlan_multicast_cleanup(struct ifnet *); 85 int vxlan_multicast_join(struct ifnet *, struct sockaddr *, 86 struct sockaddr *); 87 int vxlan_media_change(struct ifnet *); 88 void vxlan_media_status(struct ifnet *, struct ifmediareq *); 89 int vxlan_config(struct ifnet *, struct sockaddr *, struct sockaddr *); 90 int vxlan_output(struct ifnet *, struct mbuf *); 91 void vxlan_addr_change(void *); 92 void vxlan_if_change(void *); 93 void vxlan_link_change(void *); 94 95 int vxlan_sockaddr_cmp(struct sockaddr *, struct sockaddr *); 96 uint16_t vxlan_sockaddr_port(struct sockaddr *); 97 98 struct if_clone vxlan_cloner = 99 IF_CLONE_INITIALIZER("vxlan", vxlan_clone_create, vxlan_clone_destroy); 100 101 int vxlan_enable = 0; 102 u_long vxlan_tagmask; 103 104 #define VXLAN_TAGHASHSIZE 32 105 #define VXLAN_TAGHASH(tag) ((unsigned int)tag & vxlan_tagmask) 106 LIST_HEAD(vxlan_taghash, vxlan_softc) *vxlan_tagh, vxlan_any; 107 108 void 109 vxlanattach(int count) 110 { 111 /* Regular vxlan interfaces with a VNI */ 112 if ((vxlan_tagh = hashinit(VXLAN_TAGHASHSIZE, M_DEVBUF, M_NOWAIT, 113 &vxlan_tagmask)) == NULL) 114 panic("vxlanattach: hashinit"); 115 116 /* multipoint-to-multipoint interfaces that accept any VNI */ 117 LIST_INIT(&vxlan_any); 118 119 if_clone_attach(&vxlan_cloner); 120 } 121 122 int 123 vxlan_clone_create(struct if_clone *ifc, int unit) 124 { 125 struct ifnet *ifp; 126 struct vxlan_softc *sc; 127 128 if ((sc = malloc(sizeof(*sc), 129 M_DEVBUF, M_NOWAIT|M_ZERO)) == NULL) 130 return (ENOMEM); 131 132 sc->sc_imo.imo_membership = malloc( 133 (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_IPMOPTS, 134 M_WAITOK|M_ZERO); 135 sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS; 136 sc->sc_dstport = htons(VXLAN_PORT); 137 sc->sc_vnetid = VXLAN_VNI_UNSET; 138 139 ifp = &sc->sc_ac.ac_if; 140 snprintf(ifp->if_xname, sizeof ifp->if_xname, "vxlan%d", unit); 141 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 142 ether_fakeaddr(ifp); 143 144 ifp->if_softc = sc; 145 ifp->if_ioctl = vxlanioctl; 146 ifp->if_start = vxlanstart; 147 IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN); 148 149 ifp->if_hardmtu = 0xffff; 150 ifp->if_capabilities = IFCAP_VLAN_MTU; 151 152 ifmedia_init(&sc->sc_media, 0, vxlan_media_change, 153 vxlan_media_status); 154 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL); 155 ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO); 156 157 if_attach(ifp); 158 ether_ifattach(ifp); 159 160 #if 0 161 /* 162 * Instead of using a decreased MTU of 1450 bytes, prefer 163 * to use the default Ethernet-size MTU of 1500 bytes and to 164 * increase the MTU of the outer transport interfaces to 165 * at least 1550 bytes. The following is disabled by default. 166 */ 167 ifp->if_mtu = ETHERMTU - sizeof(struct ether_header); 168 ifp->if_mtu -= sizeof(struct vxlanudphdr) + sizeof(struct ipovly); 169 #endif 170 171 LIST_INSERT_HEAD(&vxlan_tagh[VXLAN_TAGHASH(0)], sc, sc_entry); 172 vxlan_enable++; 173 174 return (0); 175 } 176 177 int 178 vxlan_clone_destroy(struct ifnet *ifp) 179 { 180 struct vxlan_softc *sc = ifp->if_softc; 181 int s; 182 183 s = splnet(); 184 vxlan_multicast_cleanup(ifp); 185 splx(s); 186 187 vxlan_enable--; 188 LIST_REMOVE(sc, sc_entry); 189 190 ifmedia_delete_instance(&sc->sc_media, IFM_INST_ANY); 191 ether_ifdetach(ifp); 192 if_detach(ifp); 193 free(sc->sc_imo.imo_membership, M_IPMOPTS, 0); 194 free(sc, M_DEVBUF, sizeof(*sc)); 195 196 return (0); 197 } 198 199 void 200 vxlan_multicast_cleanup(struct ifnet *ifp) 201 { 202 struct vxlan_softc *sc = (struct vxlan_softc *)ifp->if_softc; 203 struct ip_moptions *imo = &sc->sc_imo; 204 struct ifnet *mifp; 205 206 mifp = if_get(imo->imo_ifidx); 207 if (mifp != NULL) { 208 if (sc->sc_ahcookie != NULL) { 209 hook_disestablish(mifp->if_addrhooks, sc->sc_ahcookie); 210 sc->sc_ahcookie = NULL; 211 } 212 if (sc->sc_lhcookie != NULL) { 213 hook_disestablish(mifp->if_linkstatehooks, 214 sc->sc_lhcookie); 215 sc->sc_lhcookie = NULL; 216 } 217 if (sc->sc_dhcookie != NULL) { 218 hook_disestablish(mifp->if_detachhooks, 219 sc->sc_dhcookie); 220 sc->sc_dhcookie = NULL; 221 } 222 223 if_put(mifp); 224 } 225 226 if (imo->imo_num_memberships > 0) { 227 in_delmulti(imo->imo_membership[--imo->imo_num_memberships]); 228 imo->imo_ifidx = 0; 229 } 230 } 231 232 int 233 vxlan_multicast_join(struct ifnet *ifp, struct sockaddr *src, 234 struct sockaddr *dst) 235 { 236 struct vxlan_softc *sc = ifp->if_softc; 237 struct ip_moptions *imo = &sc->sc_imo; 238 struct sockaddr_in *src4, *dst4; 239 struct sockaddr_in6 *dst6; 240 struct ifaddr *ifa; 241 struct ifnet *mifp; 242 243 if (dst->sa_family == AF_INET) { 244 dst4 = satosin(dst); 245 if (!IN_MULTICAST(dst4->sin_addr.s_addr)) 246 return (0); 247 } else if (dst->sa_family == AF_INET6) { 248 dst6 = satosin6(dst); 249 if (!IN6_IS_ADDR_MULTICAST(&dst6->sin6_addr)) 250 return (0); 251 252 /* Multicast mode is currently not supported for IPv6 */ 253 return (EAFNOSUPPORT); 254 } 255 256 src4 = satosin(src); 257 dst4 = satosin(dst); 258 259 if (src4->sin_addr.s_addr == INADDR_ANY || 260 IN_MULTICAST(src4->sin_addr.s_addr)) 261 return (EINVAL); 262 if ((ifa = ifa_ifwithaddr(src, sc->sc_rdomain)) == NULL || 263 (mifp = ifa->ifa_ifp) == NULL || 264 (mifp->if_flags & IFF_MULTICAST) == 0) 265 return (EADDRNOTAVAIL); 266 267 if ((imo->imo_membership[0] = 268 in_addmulti(&dst4->sin_addr, mifp)) == NULL) 269 return (ENOBUFS); 270 271 imo->imo_num_memberships++; 272 imo->imo_ifidx = mifp->if_index; 273 if (sc->sc_ttl > 0) 274 imo->imo_ttl = sc->sc_ttl; 275 else 276 imo->imo_ttl = IP_DEFAULT_MULTICAST_TTL; 277 imo->imo_loop = 0; 278 279 /* 280 * Use interface hooks to track any changes on the interface 281 * that is used to send out the tunnel traffic as multicast. 282 */ 283 if ((sc->sc_ahcookie = hook_establish(mifp->if_addrhooks, 284 0, vxlan_addr_change, sc)) == NULL || 285 (sc->sc_lhcookie = hook_establish(mifp->if_linkstatehooks, 286 0, vxlan_link_change, sc)) == NULL || 287 (sc->sc_dhcookie = hook_establish(mifp->if_detachhooks, 288 0, vxlan_if_change, sc)) == NULL) 289 panic("%s: cannot allocate interface hook", 290 mifp->if_xname); 291 292 return (0); 293 } 294 295 void 296 vxlanstart(struct ifnet *ifp) 297 { 298 struct mbuf *m; 299 300 for (;;) { 301 IFQ_DEQUEUE(&ifp->if_snd, m); 302 if (m == NULL) 303 return; 304 305 ifp->if_opackets++; 306 307 #if NBPFILTER > 0 308 if (ifp->if_bpf) 309 bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT); 310 #endif 311 312 vxlan_output(ifp, m); 313 } 314 } 315 316 int 317 vxlan_config(struct ifnet *ifp, struct sockaddr *src, struct sockaddr *dst) 318 { 319 struct vxlan_softc *sc = (struct vxlan_softc *)ifp->if_softc; 320 int reset = 0, error, af; 321 socklen_t slen; 322 in_port_t port; 323 struct vxlan_taghash *tagh; 324 325 if (src != NULL && dst != NULL) { 326 if ((af = src->sa_family) != dst->sa_family) 327 return (EAFNOSUPPORT); 328 } else { 329 /* Reset current configuration */ 330 af = sc->sc_src.ss_family; 331 src = (struct sockaddr *)&sc->sc_src; 332 dst = (struct sockaddr *)&sc->sc_dst; 333 reset = 1; 334 } 335 336 if (af == AF_INET) 337 slen = sizeof(struct sockaddr_in); 338 else if (af == AF_INET6) 339 slen = sizeof(struct sockaddr_in6); 340 else 341 return (EAFNOSUPPORT); 342 343 if (src->sa_len != slen || dst->sa_len != slen) 344 return (EINVAL); 345 346 vxlan_multicast_cleanup(ifp); 347 348 /* returns without error if multicast is not configured */ 349 if ((error = vxlan_multicast_join(ifp, src, dst)) != 0) 350 return (error); 351 352 if ((port = vxlan_sockaddr_port(dst)) != 0) 353 sc->sc_dstport = port; 354 355 if (!reset) { 356 bzero(&sc->sc_src, sizeof(sc->sc_src)); 357 bzero(&sc->sc_dst, sizeof(sc->sc_dst)); 358 memcpy(&sc->sc_src, src, src->sa_len); 359 memcpy(&sc->sc_dst, dst, dst->sa_len); 360 } 361 362 if (sc->sc_vnetid == VXLAN_VNI_ANY) { 363 /* 364 * If the interface accepts any VNI, put it into a separate 365 * list that is not part of the main hash. 366 */ 367 tagh = &vxlan_any; 368 } else 369 tagh = &vxlan_tagh[VXLAN_TAGHASH(sc->sc_vnetid)]; 370 371 LIST_REMOVE(sc, sc_entry); 372 LIST_INSERT_HEAD(tagh, sc, sc_entry); 373 374 return (0); 375 } 376 377 int 378 vxlanioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 379 { 380 struct vxlan_softc *sc = (struct vxlan_softc *)ifp->if_softc; 381 struct ifreq *ifr = (struct ifreq *)data; 382 struct if_laddrreq *lifr = (struct if_laddrreq *)data; 383 int error = 0, s; 384 385 switch (cmd) { 386 case SIOCSIFADDR: 387 ifp->if_flags |= IFF_UP; 388 /* FALLTHROUGH */ 389 390 case SIOCSIFFLAGS: 391 if (ifp->if_flags & IFF_UP) { 392 ifp->if_flags |= IFF_RUNNING; 393 } else { 394 ifp->if_flags &= ~IFF_RUNNING; 395 } 396 break; 397 398 case SIOCADDMULTI: 399 case SIOCDELMULTI: 400 break; 401 402 case SIOCGIFMEDIA: 403 case SIOCSIFMEDIA: 404 error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd); 405 break; 406 407 case SIOCSLIFPHYADDR: 408 s = splnet(); 409 error = vxlan_config(ifp, 410 (struct sockaddr *)&lifr->addr, 411 (struct sockaddr *)&lifr->dstaddr); 412 splx(s); 413 break; 414 415 case SIOCDIFPHYADDR: 416 s = splnet(); 417 vxlan_multicast_cleanup(ifp); 418 bzero(&sc->sc_src, sizeof(sc->sc_src)); 419 bzero(&sc->sc_dst, sizeof(sc->sc_dst)); 420 sc->sc_dstport = htons(VXLAN_PORT); 421 splx(s); 422 break; 423 424 case SIOCGLIFPHYADDR: 425 if (sc->sc_dst.ss_family == AF_UNSPEC) { 426 error = EADDRNOTAVAIL; 427 break; 428 } 429 bzero(&lifr->addr, sizeof(lifr->addr)); 430 bzero(&lifr->dstaddr, sizeof(lifr->dstaddr)); 431 memcpy(&lifr->addr, &sc->sc_src, sc->sc_src.ss_len); 432 memcpy(&lifr->dstaddr, &sc->sc_dst, sc->sc_dst.ss_len); 433 break; 434 435 case SIOCSLIFPHYRTABLE: 436 if (ifr->ifr_rdomainid < 0 || 437 ifr->ifr_rdomainid > RT_TABLEID_MAX || 438 !rtable_exists(ifr->ifr_rdomainid)) { 439 error = EINVAL; 440 break; 441 } 442 s = splnet(); 443 sc->sc_rdomain = ifr->ifr_rdomainid; 444 (void)vxlan_config(ifp, NULL, NULL); 445 splx(s); 446 break; 447 448 case SIOCGLIFPHYRTABLE: 449 ifr->ifr_rdomainid = sc->sc_rdomain; 450 break; 451 452 case SIOCSLIFPHYTTL: 453 if (ifr->ifr_ttl < 0 || ifr->ifr_ttl > 0xff) { 454 error = EINVAL; 455 break; 456 } 457 if (sc->sc_ttl == (u_int8_t)ifr->ifr_ttl) 458 break; 459 s = splnet(); 460 sc->sc_ttl = (u_int8_t)(ifr->ifr_ttl); 461 (void)vxlan_config(ifp, NULL, NULL); 462 splx(s); 463 break; 464 465 case SIOCGLIFPHYTTL: 466 ifr->ifr_ttl = (int)sc->sc_ttl; 467 break; 468 469 case SIOCSVNETID: 470 if (sc->sc_vnetid == ifr->ifr_vnetid) 471 break; 472 473 if ((ifr->ifr_vnetid != VXLAN_VNI_ANY) && 474 (ifr->ifr_vnetid > VXLAN_VNI_MAX || 475 ifr->ifr_vnetid < VXLAN_VNI_MIN)) { 476 error = EINVAL; 477 break; 478 } 479 480 s = splnet(); 481 sc->sc_vnetid = (int)ifr->ifr_vnetid; 482 (void)vxlan_config(ifp, NULL, NULL); 483 splx(s); 484 break; 485 486 case SIOCGVNETID: 487 if ((sc->sc_vnetid != VXLAN_VNI_ANY) && 488 (sc->sc_vnetid > VXLAN_VNI_MAX || 489 sc->sc_vnetid < VXLAN_VNI_MIN)) { 490 error = EADDRNOTAVAIL; 491 break; 492 } 493 494 ifr->ifr_vnetid = sc->sc_vnetid; 495 break; 496 497 case SIOCDVNETID: 498 s = splnet(); 499 sc->sc_vnetid = VXLAN_VNI_UNSET; 500 (void)vxlan_config(ifp, NULL, NULL); 501 splx(s); 502 break; 503 504 default: 505 error = ether_ioctl(ifp, &sc->sc_ac, cmd, data); 506 break; 507 } 508 509 return (error); 510 } 511 512 int 513 vxlan_media_change(struct ifnet *ifp) 514 { 515 return (0); 516 } 517 518 void 519 vxlan_media_status(struct ifnet *ifp, struct ifmediareq *imr) 520 { 521 imr->ifm_status = IFM_AVALID | IFM_ACTIVE; 522 } 523 524 int 525 vxlan_sockaddr_cmp(struct sockaddr *srcsa, struct sockaddr *dstsa) 526 { 527 struct sockaddr_in *src4, *dst4; 528 struct sockaddr_in6 *src6, *dst6; 529 530 if (srcsa->sa_family != dstsa->sa_family) 531 return (1); 532 533 switch (dstsa->sa_family) { 534 case AF_INET: 535 src4 = satosin(srcsa); 536 dst4 = satosin(dstsa); 537 if (src4->sin_addr.s_addr == dst4->sin_addr.s_addr) 538 return (0); 539 case AF_INET6: 540 src6 = satosin6(srcsa); 541 dst6 = satosin6(dstsa); 542 if (IN6_ARE_ADDR_EQUAL(&src6->sin6_addr, &dst6->sin6_addr)) 543 return (0); 544 } 545 546 return (1); 547 } 548 549 uint16_t 550 vxlan_sockaddr_port(struct sockaddr *sa) 551 { 552 struct sockaddr_in *sin4; 553 struct sockaddr_in6 *sin6; 554 555 switch (sa->sa_family) { 556 case AF_INET: 557 sin4 = satosin(sa); 558 return (sin4->sin_port); 559 case AF_INET6: 560 sin6 = satosin6(sa); 561 return (sin6->sin6_port); 562 default: 563 break; 564 } 565 566 return (0); 567 } 568 569 int 570 vxlan_lookup(struct mbuf *m, struct udphdr *uh, int iphlen, 571 struct sockaddr *srcsa, struct sockaddr *dstsa) 572 { 573 struct mbuf_list ml = MBUF_LIST_INITIALIZER(); 574 struct vxlan_softc *sc = NULL, *sc_cand = NULL; 575 struct vxlan_header v; 576 int vni; 577 struct ifnet *ifp; 578 int skip; 579 struct ether_header *eh; 580 #if NBRIDGE > 0 581 struct bridge_tunneltag *brtag; 582 #endif 583 584 /* XXX Should verify the UDP port first before copying the packet */ 585 skip = iphlen + sizeof(*uh); 586 if (m->m_pkthdr.len - skip < sizeof(v)) 587 return (0); 588 m_copydata(m, skip, sizeof(v), (caddr_t)&v); 589 skip += sizeof(v); 590 591 if (v.vxlan_flags & htonl(VXLAN_RESERVED1) || 592 v.vxlan_id & htonl(VXLAN_RESERVED2)) 593 return (0); 594 595 vni = ntohl(v.vxlan_id) >> VXLAN_VNI_S; 596 if ((v.vxlan_flags & htonl(VXLAN_FLAGS_VNI)) == 0) { 597 if (vni != 0) 598 return (0); 599 600 vni = VXLAN_VNI_UNSET; 601 } 602 603 /* First search for a vxlan(4) interface with the packet's VNI */ 604 LIST_FOREACH(sc, &vxlan_tagh[VXLAN_TAGHASH(vni)], sc_entry) { 605 if ((uh->uh_dport == sc->sc_dstport) && 606 vni == sc->sc_vnetid && 607 sc->sc_rdomain == rtable_l2(m->m_pkthdr.ph_rtableid)) { 608 sc_cand = sc; 609 if (vxlan_sockaddr_cmp(srcsa, 610 (struct sockaddr *)&sc->sc_dst) == 0) 611 goto found; 612 } 613 } 614 615 /* 616 * Now loop through all the vxlan(4) interfaces that are configured 617 * to accept any VNI and operating in multipoint-to-multipoint mode 618 * that is used in combination with bridge(4) or switch(4). 619 * If a vxlan(4) interface has been found for the packet's VNI, this 620 * code is not reached as the other interface is more specific. 621 */ 622 LIST_FOREACH(sc, &vxlan_any, sc_entry) { 623 if ((uh->uh_dport == sc->sc_dstport) && 624 (sc->sc_rdomain == rtable_l2(m->m_pkthdr.ph_rtableid))) { 625 sc_cand = sc; 626 goto found; 627 } 628 } 629 630 if (sc_cand) { 631 sc = sc_cand; 632 goto found; 633 } 634 635 /* not found */ 636 return (0); 637 638 found: 639 m_adj(m, skip); 640 ifp = &sc->sc_ac.ac_if; 641 642 if ((eh = mtod(m, struct ether_header *)) == NULL) 643 return (EINVAL); 644 645 #if NBRIDGE > 0 646 /* Store the tunnel src/dst IP and vni for the bridge or switch */ 647 if ((ifp->if_bridgeport != NULL || ifp->if_switchport != NULL) && 648 srcsa->sa_family != AF_UNSPEC && 649 ((brtag = bridge_tunneltag(m)) != NULL)) { 650 memcpy(&brtag->brtag_src.sa, srcsa, srcsa->sa_len); 651 memcpy(&brtag->brtag_dst.sa, dstsa, dstsa->sa_len); 652 brtag->brtag_id = vni; 653 } 654 #endif 655 656 m->m_flags &= ~(M_MCAST|M_BCAST); 657 658 #if NPF > 0 659 pf_pkt_addr_changed(m); 660 #endif 661 662 ml_enqueue(&ml, m); 663 if_input(ifp, &ml); 664 665 /* success */ 666 return (1); 667 } 668 669 struct mbuf * 670 vxlan_encap4(struct ifnet *ifp, struct mbuf *m, 671 struct sockaddr *src, struct sockaddr *dst) 672 { 673 struct vxlan_softc *sc = (struct vxlan_softc *)ifp->if_softc; 674 struct ip *ip; 675 676 M_PREPEND(m, sizeof(*ip), M_DONTWAIT); 677 if (m == NULL) 678 return (NULL); 679 680 ip = mtod(m, struct ip *); 681 ip->ip_v = IPVERSION; 682 ip->ip_hl = sizeof(struct ip) >> 2; 683 ip->ip_id = htons(ip_randomid()); 684 ip->ip_off = 0; /* htons(IP_DF); XXX should we disallow IP fragments? */ 685 ip->ip_p = IPPROTO_UDP; 686 ip->ip_tos = IPTOS_LOWDELAY; 687 ip->ip_len = htons(m->m_pkthdr.len); 688 689 ip->ip_src = satosin(src)->sin_addr; 690 ip->ip_dst = satosin(dst)->sin_addr; 691 692 if (sc->sc_ttl > 0) 693 ip->ip_ttl = sc->sc_ttl; 694 else 695 ip->ip_ttl = IPDEFTTL; 696 697 return (m); 698 } 699 700 struct mbuf * 701 vxlan_encap6(struct ifnet *ifp, struct mbuf *m, 702 struct sockaddr *src, struct sockaddr *dst) 703 { 704 struct vxlan_softc *sc = (struct vxlan_softc *)ifp->if_softc; 705 struct ip6_hdr *ip6; 706 struct in6_addr *in6a; 707 int error; 708 709 M_PREPEND(m, sizeof(struct ip6_hdr), M_DONTWAIT); 710 if (m == NULL) 711 return (NULL); 712 713 ip6 = mtod(m, struct ip6_hdr *); 714 ip6->ip6_flow = 0; 715 ip6->ip6_vfc &= ~IPV6_VERSION_MASK; 716 ip6->ip6_vfc |= IPV6_VERSION; 717 ip6->ip6_nxt = IPPROTO_UDP; 718 ip6->ip6_plen = htons(m->m_pkthdr.len - sizeof(struct ip6_hdr)); 719 ip6->ip6_src = satosin6(src)->sin6_addr; 720 ip6->ip6_dst = satosin6(dst)->sin6_addr; 721 722 if (sc->sc_ttl > 0) 723 ip6->ip6_hlim = sc->sc_ttl; 724 else 725 ip6->ip6_hlim = ip6_defhlim; 726 727 if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(src)->sin6_addr)) { 728 error = in6_selectsrc(&in6a, satosin6(dst), NULL, NULL, 729 sc->sc_rdomain); 730 if (error != 0) { 731 m_freem(m); 732 return (NULL); 733 } 734 ip6->ip6_src = *in6a; 735 } 736 737 /* 738 * The UDP checksum of VXLAN packets should be set to zero, 739 * but the IPv6 UDP checksum is not optional. There is an RFC 6539 740 * to relax the IPv6 UDP checksum requirement for tunnels, but it 741 * is currently not supported by most implementations. 742 */ 743 m->m_pkthdr.csum_flags |= M_UDP_CSUM_OUT; 744 745 return (m); 746 } 747 748 int 749 vxlan_output(struct ifnet *ifp, struct mbuf *m) 750 { 751 struct vxlan_softc *sc = (struct vxlan_softc *)ifp->if_softc; 752 struct vxlanudphdr *vu; 753 struct sockaddr *src, *dst; 754 #if NBRIDGE > 0 755 struct bridge_tunneltag *brtag; 756 #endif 757 int error, af; 758 uint32_t tag; 759 760 /* VXLAN header */ 761 M_PREPEND(m, sizeof(*vu), M_DONTWAIT); 762 if (m == NULL) { 763 ifp->if_oerrors++; 764 return (ENOBUFS); 765 } 766 767 src = (struct sockaddr *)&sc->sc_src; 768 dst = (struct sockaddr *)&sc->sc_dst; 769 af = src->sa_family; 770 771 vu = mtod(m, struct vxlanudphdr *); 772 vu->vu_u.uh_sport = sc->sc_dstport; 773 vu->vu_u.uh_dport = sc->sc_dstport; 774 vu->vu_u.uh_ulen = htons(m->m_pkthdr.len); 775 vu->vu_u.uh_sum = 0; 776 tag = sc->sc_vnetid; 777 778 #if NBRIDGE > 0 779 if ((brtag = bridge_tunnel(m)) != NULL) { 780 dst = &brtag->brtag_dst.sa; 781 782 /* If accepting any VNI, source ip address is from brtag */ 783 if (sc->sc_vnetid == VXLAN_VNI_ANY) { 784 src = &brtag->brtag_src.sa; 785 tag = (uint32_t)brtag->brtag_id; 786 af = src->sa_family; 787 } 788 789 if (dst->sa_family != af) { 790 ifp->if_oerrors++; 791 m_freem(m); 792 return (EINVAL); 793 } 794 } else 795 #endif 796 if (sc->sc_vnetid == VXLAN_VNI_ANY) { 797 /* 798 * If accepting any VNI, build the vxlan header only by 799 * bridge_tunneltag or drop packet if the tag does not exist. 800 */ 801 ifp->if_oerrors++; 802 m_freem(m); 803 return (ENETUNREACH); 804 } 805 806 if (sc->sc_vnetid != VXLAN_VNI_UNSET) { 807 vu->vu_v.vxlan_flags = htonl(VXLAN_FLAGS_VNI); 808 vu->vu_v.vxlan_id = htonl(tag << VXLAN_VNI_S); 809 } else { 810 vu->vu_v.vxlan_flags = htonl(0); 811 vu->vu_v.vxlan_id = htonl(0); 812 } 813 814 if (af == AF_INET) 815 m = vxlan_encap4(ifp, m, src, dst); 816 else if (af == AF_INET6) 817 m = vxlan_encap6(ifp, m, src, dst); 818 else { 819 m_freem(m); 820 m = NULL; 821 } 822 823 if (m == NULL) { 824 ifp->if_oerrors++; 825 return (ENOBUFS); 826 } 827 828 #if NBRIDGE > 0 829 if (brtag != NULL) 830 bridge_tunneluntag(m); 831 #endif 832 833 ifp->if_opackets++; 834 ifp->if_obytes += m->m_pkthdr.len; 835 836 m->m_pkthdr.ph_rtableid = sc->sc_rdomain; 837 838 #if NPF > 0 839 pf_pkt_addr_changed(m); 840 #endif 841 842 if (af == AF_INET) 843 error = ip_output(m, NULL, NULL, IP_RAWOUTPUT, 844 &sc->sc_imo, NULL, 0); 845 else 846 error = ip6_output(m, 0, NULL, IPV6_MINMTU, 0, NULL); 847 848 if (error) 849 ifp->if_oerrors++; 850 851 return (error); 852 } 853 854 void 855 vxlan_addr_change(void *arg) 856 { 857 struct vxlan_softc *sc = arg; 858 struct ifnet *ifp = &sc->sc_ac.ac_if; 859 int s, error; 860 861 /* 862 * Reset the configuration after resume or any possible address 863 * configuration changes. 864 */ 865 s = splnet(); 866 if ((error = vxlan_config(ifp, NULL, NULL))) { 867 /* 868 * The source address of the tunnel can temporarily disappear, 869 * after a link state change when running the DHCP client, 870 * so keep it configured. 871 */ 872 } 873 splx(s); 874 } 875 876 void 877 vxlan_if_change(void *arg) 878 { 879 struct vxlan_softc *sc = arg; 880 struct ifnet *ifp = &sc->sc_ac.ac_if; 881 int s, error; 882 883 /* 884 * Reset the configuration after the parent interface disappeared. 885 */ 886 s = splnet(); 887 if ((error = vxlan_config(ifp, NULL, NULL)) != 0) { 888 /* The configured tunnel addresses are invalid, remove them */ 889 bzero(&sc->sc_src, sizeof(sc->sc_src)); 890 bzero(&sc->sc_dst, sizeof(sc->sc_dst)); 891 } 892 splx(s); 893 } 894 895 void 896 vxlan_link_change(void *arg) 897 { 898 struct vxlan_softc *sc = arg; 899 struct ifnet *ifp = &sc->sc_ac.ac_if; 900 int s; 901 902 /* 903 * The machine might have lost its multicast associations after 904 * link state changes. This fixes a problem with VMware after 905 * suspend/resume of the host or guest. 906 */ 907 s = splnet(); 908 (void)vxlan_config(ifp, NULL, NULL); 909 splx(s); 910 } 911