1 /* $OpenBSD: if_vxlan.c,v 1.72 2019/04/28 22:15:58 mpi Exp $ */ 2 3 /* 4 * Copyright (c) 2013 Reyk Floeter <reyk@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #include "bpfilter.h" 20 #include "vxlan.h" 21 #include "vlan.h" 22 #include "pf.h" 23 #include "bridge.h" 24 25 #include <sys/param.h> 26 #include <sys/systm.h> 27 #include <sys/mbuf.h> 28 #include <sys/socket.h> 29 #include <sys/sockio.h> 30 #include <sys/ioctl.h> 31 32 #include <net/if.h> 33 #include <net/if_var.h> 34 #include <net/if_media.h> 35 #include <net/route.h> 36 37 #if NBPFILTER > 0 38 #include <net/bpf.h> 39 #endif 40 41 #include <netinet/in.h> 42 #include <netinet/in_var.h> 43 #include <netinet/if_ether.h> 44 #include <netinet/ip.h> 45 #include <netinet/ip_var.h> 46 #include <netinet/udp.h> 47 #include <netinet/udp_var.h> 48 #include <netinet/in_pcb.h> 49 50 #if NPF > 0 51 #include <net/pfvar.h> 52 #endif 53 54 #if NBRIDGE > 0 55 #include <net/if_bridge.h> 56 #endif 57 58 #include <net/if_vxlan.h> 59 60 struct vxlan_softc { 61 struct arpcom sc_ac; 62 struct ifmedia sc_media; 63 64 struct ip_moptions sc_imo; 65 void *sc_ahcookie; 66 void *sc_lhcookie; 67 void *sc_dhcookie; 68 69 struct sockaddr_storage sc_src; 70 struct sockaddr_storage sc_dst; 71 in_port_t sc_dstport; 72 u_int sc_rdomain; 73 int64_t sc_vnetid; 74 uint16_t sc_df; 75 u_int8_t sc_ttl; 76 int sc_txhprio; 77 78 struct task sc_sendtask; 79 80 LIST_ENTRY(vxlan_softc) sc_entry; 81 }; 82 83 void vxlanattach(int); 84 int vxlanioctl(struct ifnet *, u_long, caddr_t); 85 void vxlanstart(struct ifnet *); 86 int vxlan_clone_create(struct if_clone *, int); 87 int vxlan_clone_destroy(struct ifnet *); 88 void vxlan_multicast_cleanup(struct ifnet *); 89 int vxlan_multicast_join(struct ifnet *, struct sockaddr *, 90 struct sockaddr *); 91 int vxlan_media_change(struct ifnet *); 92 void vxlan_media_status(struct ifnet *, struct ifmediareq *); 93 int vxlan_config(struct ifnet *, struct sockaddr *, struct sockaddr *); 94 int vxlan_output(struct ifnet *, struct mbuf *); 95 void vxlan_addr_change(void *); 96 void vxlan_if_change(void *); 97 void vxlan_link_change(void *); 98 void vxlan_send_dispatch(void *); 99 100 int vxlan_sockaddr_cmp(struct sockaddr *, struct sockaddr *); 101 uint16_t vxlan_sockaddr_port(struct sockaddr *); 102 103 struct if_clone vxlan_cloner = 104 IF_CLONE_INITIALIZER("vxlan", vxlan_clone_create, vxlan_clone_destroy); 105 106 int vxlan_enable = 0; 107 u_long vxlan_tagmask; 108 109 #define VXLAN_TAGHASHSIZE 32 110 #define VXLAN_TAGHASH(tag) ((unsigned int)tag & vxlan_tagmask) 111 LIST_HEAD(vxlan_taghash, vxlan_softc) *vxlan_tagh, vxlan_any; 112 113 void 114 vxlanattach(int count) 115 { 116 /* Regular vxlan interfaces with a VNI */ 117 if ((vxlan_tagh = hashinit(VXLAN_TAGHASHSIZE, M_DEVBUF, M_NOWAIT, 118 &vxlan_tagmask)) == NULL) 119 panic("vxlanattach: hashinit"); 120 121 /* multipoint-to-multipoint interfaces that accept any VNI */ 122 LIST_INIT(&vxlan_any); 123 124 if_clone_attach(&vxlan_cloner); 125 } 126 127 int 128 vxlan_clone_create(struct if_clone *ifc, int unit) 129 { 130 struct ifnet *ifp; 131 struct vxlan_softc *sc; 132 133 sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO); 134 sc->sc_imo.imo_membership = malloc( 135 (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_IPMOPTS, 136 M_WAITOK|M_ZERO); 137 sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS; 138 sc->sc_dstport = htons(VXLAN_PORT); 139 sc->sc_vnetid = VXLAN_VNI_UNSET; 140 sc->sc_txhprio = IFQ_TOS2PRIO(IPTOS_PREC_ROUTINE); /* 0 */ 141 sc->sc_df = htons(0); 142 task_set(&sc->sc_sendtask, vxlan_send_dispatch, sc); 143 144 ifp = &sc->sc_ac.ac_if; 145 snprintf(ifp->if_xname, sizeof ifp->if_xname, "vxlan%d", unit); 146 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 147 ether_fakeaddr(ifp); 148 149 ifp->if_softc = sc; 150 ifp->if_ioctl = vxlanioctl; 151 ifp->if_start = vxlanstart; 152 IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN); 153 154 ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN; 155 ifp->if_capabilities = IFCAP_VLAN_MTU; 156 157 ifmedia_init(&sc->sc_media, 0, vxlan_media_change, 158 vxlan_media_status); 159 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL); 160 ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO); 161 162 if_counters_alloc(ifp); 163 if_attach(ifp); 164 ether_ifattach(ifp); 165 166 #if 0 167 /* 168 * Instead of using a decreased MTU of 1450 bytes, prefer 169 * to use the default Ethernet-size MTU of 1500 bytes and to 170 * increase the MTU of the outer transport interfaces to 171 * at least 1550 bytes. The following is disabled by default. 172 */ 173 ifp->if_mtu = ETHERMTU - sizeof(struct ether_header); 174 ifp->if_mtu -= sizeof(struct vxlanudphdr) + sizeof(struct ipovly); 175 #endif 176 177 LIST_INSERT_HEAD(&vxlan_tagh[VXLAN_TAGHASH(0)], sc, sc_entry); 178 vxlan_enable++; 179 180 return (0); 181 } 182 183 int 184 vxlan_clone_destroy(struct ifnet *ifp) 185 { 186 struct vxlan_softc *sc = ifp->if_softc; 187 188 NET_LOCK(); 189 vxlan_multicast_cleanup(ifp); 190 NET_UNLOCK(); 191 192 vxlan_enable--; 193 LIST_REMOVE(sc, sc_entry); 194 195 ifmedia_delete_instance(&sc->sc_media, IFM_INST_ANY); 196 ether_ifdetach(ifp); 197 if_detach(ifp); 198 199 if (!task_del(net_tq(ifp->if_index), &sc->sc_sendtask)) 200 taskq_barrier(net_tq(ifp->if_index)); 201 202 free(sc->sc_imo.imo_membership, M_IPMOPTS, 0); 203 free(sc, M_DEVBUF, sizeof(*sc)); 204 205 return (0); 206 } 207 208 void 209 vxlan_multicast_cleanup(struct ifnet *ifp) 210 { 211 struct vxlan_softc *sc = (struct vxlan_softc *)ifp->if_softc; 212 struct ip_moptions *imo = &sc->sc_imo; 213 struct ifnet *mifp; 214 215 mifp = if_get(imo->imo_ifidx); 216 if (mifp != NULL) { 217 if (sc->sc_ahcookie != NULL) { 218 hook_disestablish(mifp->if_addrhooks, sc->sc_ahcookie); 219 sc->sc_ahcookie = NULL; 220 } 221 if (sc->sc_lhcookie != NULL) { 222 hook_disestablish(mifp->if_linkstatehooks, 223 sc->sc_lhcookie); 224 sc->sc_lhcookie = NULL; 225 } 226 if (sc->sc_dhcookie != NULL) { 227 hook_disestablish(mifp->if_detachhooks, 228 sc->sc_dhcookie); 229 sc->sc_dhcookie = NULL; 230 } 231 232 if_put(mifp); 233 } 234 235 if (imo->imo_num_memberships > 0) { 236 in_delmulti(imo->imo_membership[--imo->imo_num_memberships]); 237 imo->imo_ifidx = 0; 238 } 239 } 240 241 int 242 vxlan_multicast_join(struct ifnet *ifp, struct sockaddr *src, 243 struct sockaddr *dst) 244 { 245 struct vxlan_softc *sc = ifp->if_softc; 246 struct ip_moptions *imo = &sc->sc_imo; 247 struct sockaddr_in *src4, *dst4; 248 #ifdef INET6 249 struct sockaddr_in6 *dst6; 250 #endif /* INET6 */ 251 struct ifaddr *ifa; 252 struct ifnet *mifp; 253 254 switch (dst->sa_family) { 255 case AF_INET: 256 dst4 = satosin(dst); 257 if (!IN_MULTICAST(dst4->sin_addr.s_addr)) 258 return (0); 259 break; 260 #ifdef INET6 261 case AF_INET6: 262 dst6 = satosin6(dst); 263 if (!IN6_IS_ADDR_MULTICAST(&dst6->sin6_addr)) 264 return (0); 265 266 /* Multicast mode is currently not supported for IPv6 */ 267 return (EAFNOSUPPORT); 268 #endif /* INET6 */ 269 default: 270 return (EAFNOSUPPORT); 271 } 272 273 src4 = satosin(src); 274 dst4 = satosin(dst); 275 276 if (src4->sin_addr.s_addr == INADDR_ANY || 277 IN_MULTICAST(src4->sin_addr.s_addr)) 278 return (EINVAL); 279 if ((ifa = ifa_ifwithaddr(src, sc->sc_rdomain)) == NULL || 280 (mifp = ifa->ifa_ifp) == NULL || 281 (mifp->if_flags & IFF_MULTICAST) == 0) 282 return (EADDRNOTAVAIL); 283 284 if ((imo->imo_membership[0] = 285 in_addmulti(&dst4->sin_addr, mifp)) == NULL) 286 return (ENOBUFS); 287 288 imo->imo_num_memberships++; 289 imo->imo_ifidx = mifp->if_index; 290 if (sc->sc_ttl > 0) 291 imo->imo_ttl = sc->sc_ttl; 292 else 293 imo->imo_ttl = IP_DEFAULT_MULTICAST_TTL; 294 imo->imo_loop = 0; 295 296 /* 297 * Use interface hooks to track any changes on the interface 298 * that is used to send out the tunnel traffic as multicast. 299 */ 300 if ((sc->sc_ahcookie = hook_establish(mifp->if_addrhooks, 301 0, vxlan_addr_change, sc)) == NULL || 302 (sc->sc_lhcookie = hook_establish(mifp->if_linkstatehooks, 303 0, vxlan_link_change, sc)) == NULL || 304 (sc->sc_dhcookie = hook_establish(mifp->if_detachhooks, 305 0, vxlan_if_change, sc)) == NULL) 306 panic("%s: cannot allocate interface hook", 307 mifp->if_xname); 308 309 return (0); 310 } 311 312 void 313 vxlanstart(struct ifnet *ifp) 314 { 315 struct vxlan_softc *sc = (struct vxlan_softc *)ifp->if_softc; 316 317 task_add(net_tq(ifp->if_index), &sc->sc_sendtask); 318 } 319 320 void 321 vxlan_send_dispatch(void *xsc) 322 { 323 struct vxlan_softc *sc = xsc; 324 struct ifnet *ifp = &sc->sc_ac.ac_if; 325 struct mbuf *m; 326 struct mbuf_list ml; 327 328 ml_init(&ml); 329 for (;;) { 330 IFQ_DEQUEUE(&ifp->if_snd, m); 331 if (m == NULL) 332 break; 333 334 #if NBPFILTER > 0 335 if (ifp->if_bpf) 336 bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT); 337 #endif 338 339 ml_enqueue(&ml, m); 340 } 341 342 if (ml_empty(&ml)) 343 return; 344 345 NET_RLOCK(); 346 while ((m = ml_dequeue(&ml)) != NULL) { 347 vxlan_output(ifp, m); 348 } 349 NET_RUNLOCK(); 350 } 351 352 353 int 354 vxlan_config(struct ifnet *ifp, struct sockaddr *src, struct sockaddr *dst) 355 { 356 struct vxlan_softc *sc = (struct vxlan_softc *)ifp->if_softc; 357 int reset = 0, error, af; 358 socklen_t slen; 359 in_port_t port; 360 struct vxlan_taghash *tagh; 361 362 if (src != NULL && dst != NULL) { 363 if ((af = src->sa_family) != dst->sa_family) 364 return (EAFNOSUPPORT); 365 } else { 366 /* Reset current configuration */ 367 af = sc->sc_src.ss_family; 368 src = sstosa(&sc->sc_src); 369 dst = sstosa(&sc->sc_dst); 370 reset = 1; 371 } 372 373 switch (af) { 374 case AF_INET: 375 slen = sizeof(struct sockaddr_in); 376 break; 377 #ifdef INET6 378 case AF_INET6: 379 slen = sizeof(struct sockaddr_in6); 380 break; 381 #endif /* INET6 */ 382 default: 383 return (EAFNOSUPPORT); 384 } 385 386 if (src->sa_len != slen || dst->sa_len != slen) 387 return (EINVAL); 388 389 vxlan_multicast_cleanup(ifp); 390 391 /* returns without error if multicast is not configured */ 392 if ((error = vxlan_multicast_join(ifp, src, dst)) != 0) 393 return (error); 394 395 if ((port = vxlan_sockaddr_port(dst)) != 0) 396 sc->sc_dstport = port; 397 398 if (!reset) { 399 bzero(&sc->sc_src, sizeof(sc->sc_src)); 400 bzero(&sc->sc_dst, sizeof(sc->sc_dst)); 401 memcpy(&sc->sc_src, src, src->sa_len); 402 memcpy(&sc->sc_dst, dst, dst->sa_len); 403 } 404 405 if (sc->sc_vnetid == VXLAN_VNI_ANY) { 406 /* 407 * If the interface accepts any VNI, put it into a separate 408 * list that is not part of the main hash. 409 */ 410 tagh = &vxlan_any; 411 } else 412 tagh = &vxlan_tagh[VXLAN_TAGHASH(sc->sc_vnetid)]; 413 414 LIST_REMOVE(sc, sc_entry); 415 LIST_INSERT_HEAD(tagh, sc, sc_entry); 416 417 return (0); 418 } 419 420 int 421 vxlanioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 422 { 423 struct vxlan_softc *sc = (struct vxlan_softc *)ifp->if_softc; 424 struct ifreq *ifr = (struct ifreq *)data; 425 struct if_laddrreq *lifr = (struct if_laddrreq *)data; 426 int error = 0; 427 428 switch (cmd) { 429 case SIOCSIFADDR: 430 ifp->if_flags |= IFF_UP; 431 /* FALLTHROUGH */ 432 433 case SIOCSIFFLAGS: 434 if (ifp->if_flags & IFF_UP) { 435 ifp->if_flags |= IFF_RUNNING; 436 } else { 437 ifp->if_flags &= ~IFF_RUNNING; 438 } 439 break; 440 441 case SIOCADDMULTI: 442 case SIOCDELMULTI: 443 break; 444 445 case SIOCGIFMEDIA: 446 case SIOCSIFMEDIA: 447 error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd); 448 break; 449 450 case SIOCSLIFPHYADDR: 451 error = vxlan_config(ifp, 452 sstosa(&lifr->addr), 453 sstosa(&lifr->dstaddr)); 454 break; 455 456 case SIOCDIFPHYADDR: 457 vxlan_multicast_cleanup(ifp); 458 bzero(&sc->sc_src, sizeof(sc->sc_src)); 459 bzero(&sc->sc_dst, sizeof(sc->sc_dst)); 460 sc->sc_dstport = htons(VXLAN_PORT); 461 break; 462 463 case SIOCGLIFPHYADDR: 464 if (sc->sc_dst.ss_family == AF_UNSPEC) { 465 error = EADDRNOTAVAIL; 466 break; 467 } 468 bzero(&lifr->addr, sizeof(lifr->addr)); 469 bzero(&lifr->dstaddr, sizeof(lifr->dstaddr)); 470 memcpy(&lifr->addr, &sc->sc_src, sc->sc_src.ss_len); 471 memcpy(&lifr->dstaddr, &sc->sc_dst, sc->sc_dst.ss_len); 472 break; 473 474 case SIOCSLIFPHYRTABLE: 475 if (ifr->ifr_rdomainid < 0 || 476 ifr->ifr_rdomainid > RT_TABLEID_MAX || 477 !rtable_exists(ifr->ifr_rdomainid)) { 478 error = EINVAL; 479 break; 480 } 481 sc->sc_rdomain = ifr->ifr_rdomainid; 482 (void)vxlan_config(ifp, NULL, NULL); 483 break; 484 485 case SIOCGLIFPHYRTABLE: 486 ifr->ifr_rdomainid = sc->sc_rdomain; 487 break; 488 489 case SIOCSLIFPHYTTL: 490 if (ifr->ifr_ttl < 0 || ifr->ifr_ttl > 0xff) { 491 error = EINVAL; 492 break; 493 } 494 if (sc->sc_ttl == (u_int8_t)ifr->ifr_ttl) 495 break; 496 sc->sc_ttl = (u_int8_t)(ifr->ifr_ttl); 497 (void)vxlan_config(ifp, NULL, NULL); 498 break; 499 500 case SIOCGLIFPHYTTL: 501 ifr->ifr_ttl = (int)sc->sc_ttl; 502 break; 503 504 case SIOCSLIFPHYDF: 505 /* commit */ 506 sc->sc_df = ifr->ifr_df ? htons(IP_DF) : htons(0); 507 break; 508 case SIOCGLIFPHYDF: 509 ifr->ifr_df = sc->sc_df ? 1 : 0; 510 break; 511 512 case SIOCSTXHPRIO: 513 if (ifr->ifr_hdrprio == IF_HDRPRIO_PACKET) 514 ; /* fall through */ 515 else if (ifr->ifr_hdrprio < IF_HDRPRIO_MIN || 516 ifr->ifr_hdrprio > IF_HDRPRIO_MAX) { 517 error = EINVAL; 518 break; 519 } 520 521 sc->sc_txhprio = ifr->ifr_hdrprio; 522 break; 523 case SIOCGTXHPRIO: 524 ifr->ifr_hdrprio = sc->sc_txhprio; 525 break; 526 527 case SIOCSVNETID: 528 if (sc->sc_vnetid == ifr->ifr_vnetid) 529 break; 530 531 if ((ifr->ifr_vnetid != VXLAN_VNI_ANY) && 532 (ifr->ifr_vnetid > VXLAN_VNI_MAX || 533 ifr->ifr_vnetid < VXLAN_VNI_MIN)) { 534 error = EINVAL; 535 break; 536 } 537 538 sc->sc_vnetid = (int)ifr->ifr_vnetid; 539 (void)vxlan_config(ifp, NULL, NULL); 540 break; 541 542 case SIOCGVNETID: 543 if ((sc->sc_vnetid != VXLAN_VNI_ANY) && 544 (sc->sc_vnetid > VXLAN_VNI_MAX || 545 sc->sc_vnetid < VXLAN_VNI_MIN)) { 546 error = EADDRNOTAVAIL; 547 break; 548 } 549 550 ifr->ifr_vnetid = sc->sc_vnetid; 551 break; 552 553 case SIOCDVNETID: 554 sc->sc_vnetid = VXLAN_VNI_UNSET; 555 (void)vxlan_config(ifp, NULL, NULL); 556 break; 557 558 default: 559 error = ether_ioctl(ifp, &sc->sc_ac, cmd, data); 560 break; 561 } 562 563 return (error); 564 } 565 566 int 567 vxlan_media_change(struct ifnet *ifp) 568 { 569 return (0); 570 } 571 572 void 573 vxlan_media_status(struct ifnet *ifp, struct ifmediareq *imr) 574 { 575 imr->ifm_status = IFM_AVALID | IFM_ACTIVE; 576 } 577 578 int 579 vxlan_sockaddr_cmp(struct sockaddr *srcsa, struct sockaddr *dstsa) 580 { 581 struct sockaddr_in *src4, *dst4; 582 #ifdef INET6 583 struct sockaddr_in6 *src6, *dst6; 584 #endif /* INET6 */ 585 586 if (srcsa->sa_family != dstsa->sa_family) 587 return (1); 588 589 switch (dstsa->sa_family) { 590 case AF_INET: 591 src4 = satosin(srcsa); 592 dst4 = satosin(dstsa); 593 if (src4->sin_addr.s_addr == dst4->sin_addr.s_addr) 594 return (0); 595 break; 596 #ifdef INET6 597 case AF_INET6: 598 src6 = satosin6(srcsa); 599 dst6 = satosin6(dstsa); 600 if (IN6_ARE_ADDR_EQUAL(&src6->sin6_addr, &dst6->sin6_addr) && 601 src6->sin6_scope_id == dst6->sin6_scope_id) 602 return (0); 603 break; 604 #endif /* INET6 */ 605 } 606 607 return (1); 608 } 609 610 uint16_t 611 vxlan_sockaddr_port(struct sockaddr *sa) 612 { 613 struct sockaddr_in *sin4; 614 #ifdef INET6 615 struct sockaddr_in6 *sin6; 616 #endif /* INET6 */ 617 618 switch (sa->sa_family) { 619 case AF_INET: 620 sin4 = satosin(sa); 621 return (sin4->sin_port); 622 #ifdef INET6 623 case AF_INET6: 624 sin6 = satosin6(sa); 625 return (sin6->sin6_port); 626 #endif /* INET6 */ 627 default: 628 break; 629 } 630 631 return (0); 632 } 633 634 int 635 vxlan_lookup(struct mbuf *m, struct udphdr *uh, int iphlen, 636 struct sockaddr *srcsa, struct sockaddr *dstsa) 637 { 638 struct vxlan_softc *sc = NULL, *sc_cand = NULL; 639 struct vxlan_header v; 640 int vni; 641 struct ifnet *ifp; 642 int skip; 643 #if NBRIDGE > 0 644 struct bridge_tunneltag *brtag; 645 #endif 646 struct mbuf *n; 647 int off; 648 649 /* XXX Should verify the UDP port first before copying the packet */ 650 skip = iphlen + sizeof(*uh); 651 if (m->m_pkthdr.len - skip < sizeof(v)) 652 return (0); 653 m_copydata(m, skip, sizeof(v), (caddr_t)&v); 654 skip += sizeof(v); 655 656 if (v.vxlan_flags & htonl(VXLAN_RESERVED1) || 657 v.vxlan_id & htonl(VXLAN_RESERVED2)) 658 return (0); 659 660 vni = ntohl(v.vxlan_id) >> VXLAN_VNI_S; 661 if ((v.vxlan_flags & htonl(VXLAN_FLAGS_VNI)) == 0) { 662 if (vni != 0) 663 return (0); 664 665 vni = VXLAN_VNI_UNSET; 666 } 667 668 NET_ASSERT_LOCKED(); 669 /* First search for a vxlan(4) interface with the packet's VNI */ 670 LIST_FOREACH(sc, &vxlan_tagh[VXLAN_TAGHASH(vni)], sc_entry) { 671 if ((uh->uh_dport == sc->sc_dstport) && 672 vni == sc->sc_vnetid && 673 sc->sc_rdomain == rtable_l2(m->m_pkthdr.ph_rtableid)) { 674 sc_cand = sc; 675 if (vxlan_sockaddr_cmp(srcsa, sstosa(&sc->sc_dst)) == 0) 676 goto found; 677 } 678 } 679 680 /* 681 * Now loop through all the vxlan(4) interfaces that are configured 682 * to accept any VNI and operating in multipoint-to-multipoint mode 683 * that is used in combination with bridge(4) or switch(4). 684 * If a vxlan(4) interface has been found for the packet's VNI, this 685 * code is not reached as the other interface is more specific. 686 */ 687 LIST_FOREACH(sc, &vxlan_any, sc_entry) { 688 if ((uh->uh_dport == sc->sc_dstport) && 689 (sc->sc_rdomain == rtable_l2(m->m_pkthdr.ph_rtableid))) { 690 sc_cand = sc; 691 goto found; 692 } 693 } 694 695 if (sc_cand) { 696 sc = sc_cand; 697 goto found; 698 } 699 700 /* not found */ 701 return (0); 702 703 found: 704 if (m->m_pkthdr.len < skip + sizeof(struct ether_header)) { 705 m_freem(m); 706 return (EINVAL); 707 } 708 709 m_adj(m, skip); 710 ifp = &sc->sc_ac.ac_if; 711 712 #if NBRIDGE > 0 713 /* Store the tunnel src/dst IP and vni for the bridge or switch */ 714 if ((ifp->if_bridgeidx != 0 || ifp->if_switchport != NULL) && 715 srcsa->sa_family != AF_UNSPEC && 716 ((brtag = bridge_tunneltag(m)) != NULL)) { 717 memcpy(&brtag->brtag_peer.sa, srcsa, srcsa->sa_len); 718 memcpy(&brtag->brtag_local.sa, dstsa, dstsa->sa_len); 719 brtag->brtag_id = vni; 720 } 721 #endif 722 723 m->m_flags &= ~(M_BCAST|M_MCAST); 724 725 #if NPF > 0 726 pf_pkt_addr_changed(m); 727 #endif 728 if ((m->m_len < sizeof(struct ether_header)) && 729 (m = m_pullup(m, sizeof(struct ether_header))) == NULL) 730 return (ENOBUFS); 731 732 n = m_getptr(m, sizeof(struct ether_header), &off); 733 if (n == NULL) { 734 m_freem(m); 735 return (EINVAL); 736 } 737 if (!ALIGNED_POINTER(mtod(n, caddr_t) + off, uint32_t)) { 738 n = m_dup_pkt(m, ETHER_ALIGN, M_NOWAIT); 739 /* Dispose of the original mbuf chain */ 740 m_freem(m); 741 if (n == NULL) 742 return (ENOBUFS); 743 m = n; 744 } 745 746 if_vinput(ifp, m); 747 748 /* success */ 749 return (1); 750 } 751 752 struct mbuf * 753 vxlan_encap4(struct ifnet *ifp, struct mbuf *m, 754 struct sockaddr *src, struct sockaddr *dst) 755 { 756 struct vxlan_softc *sc = (struct vxlan_softc *)ifp->if_softc; 757 struct ip *ip; 758 759 /* 760 * Remove multicast and broadcast flags or encapsulated packet 761 * ends up as multicast or broadcast packet. 762 */ 763 m->m_flags &= ~(M_BCAST|M_MCAST); 764 765 M_PREPEND(m, sizeof(*ip), M_DONTWAIT); 766 if (m == NULL) 767 return (NULL); 768 769 ip = mtod(m, struct ip *); 770 ip->ip_v = IPVERSION; 771 ip->ip_hl = sizeof(struct ip) >> 2; 772 ip->ip_id = htons(ip_randomid()); 773 ip->ip_off = sc->sc_df; 774 ip->ip_p = IPPROTO_UDP; 775 ip->ip_tos = IFQ_PRIO2TOS(sc->sc_txhprio == IF_HDRPRIO_PACKET ? 776 m->m_pkthdr.pf.prio : sc->sc_txhprio); 777 ip->ip_len = htons(m->m_pkthdr.len); 778 779 ip->ip_src = satosin(src)->sin_addr; 780 ip->ip_dst = satosin(dst)->sin_addr; 781 782 if (sc->sc_ttl > 0) 783 ip->ip_ttl = sc->sc_ttl; 784 else 785 ip->ip_ttl = IPDEFTTL; 786 787 return (m); 788 } 789 790 #ifdef INET6 791 struct mbuf * 792 vxlan_encap6(struct ifnet *ifp, struct mbuf *m, 793 struct sockaddr *src, struct sockaddr *dst) 794 { 795 struct vxlan_softc *sc = (struct vxlan_softc *)ifp->if_softc; 796 struct ip6_hdr *ip6; 797 struct in6_addr *in6a; 798 uint32_t flow; 799 800 /* 801 * Remove multicast and broadcast flags or encapsulated packet 802 * ends up as multicast or broadcast packet. 803 */ 804 m->m_flags &= ~(M_BCAST|M_MCAST); 805 806 M_PREPEND(m, sizeof(struct ip6_hdr), M_DONTWAIT); 807 if (m == NULL) 808 return (NULL); 809 810 flow = (uint32_t)IFQ_PRIO2TOS(sc->sc_txhprio == IF_HDRPRIO_PACKET ? 811 m->m_pkthdr.pf.prio : sc->sc_txhprio) << 20; 812 813 ip6 = mtod(m, struct ip6_hdr *); 814 ip6->ip6_flow = htonl(flow); 815 ip6->ip6_vfc &= ~IPV6_VERSION_MASK; 816 ip6->ip6_vfc |= IPV6_VERSION; 817 ip6->ip6_nxt = IPPROTO_UDP; 818 ip6->ip6_plen = htons(m->m_pkthdr.len - sizeof(struct ip6_hdr)); 819 if (in6_embedscope(&ip6->ip6_src, satosin6(src), NULL) != 0) 820 goto drop; 821 if (in6_embedscope(&ip6->ip6_dst, satosin6(dst), NULL) != 0) 822 goto drop; 823 824 if (sc->sc_ttl > 0) 825 ip6->ip6_hlim = sc->sc_ttl; 826 else 827 ip6->ip6_hlim = ip6_defhlim; 828 829 if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(src)->sin6_addr)) { 830 if (in6_selectsrc(&in6a, satosin6(dst), NULL, 831 sc->sc_rdomain) != 0) 832 goto drop; 833 834 ip6->ip6_src = *in6a; 835 } 836 837 if (sc->sc_df) 838 SET(m->m_pkthdr.csum_flags, M_IPV6_DF_OUT); 839 840 /* 841 * The UDP checksum of VXLAN packets should be set to zero, 842 * but the IPv6 UDP checksum is not optional. There is an RFC 6539 843 * to relax the IPv6 UDP checksum requirement for tunnels, but it 844 * is currently not supported by most implementations. 845 */ 846 m->m_pkthdr.csum_flags |= M_UDP_CSUM_OUT; 847 848 return (m); 849 850 drop: 851 m_freem(m); 852 return (NULL); 853 } 854 #endif /* INET6 */ 855 856 int 857 vxlan_output(struct ifnet *ifp, struct mbuf *m) 858 { 859 struct vxlan_softc *sc = (struct vxlan_softc *)ifp->if_softc; 860 struct vxlanudphdr *vu; 861 struct sockaddr *src, *dst; 862 #if NBRIDGE > 0 863 struct bridge_tunneltag *brtag; 864 #endif 865 int error, af; 866 uint32_t tag; 867 struct mbuf *m0; 868 869 /* VXLAN header, needs new mbuf because of alignment issues */ 870 MGET(m0, M_DONTWAIT, m->m_type); 871 if (m0 == NULL) { 872 ifp->if_oerrors++; 873 return (ENOBUFS); 874 } 875 M_MOVE_PKTHDR(m0, m); 876 m0->m_next = m; 877 m = m0; 878 m_align(m, sizeof(*vu)); 879 m->m_len = sizeof(*vu); 880 m->m_pkthdr.len += sizeof(*vu); 881 882 src = sstosa(&sc->sc_src); 883 dst = sstosa(&sc->sc_dst); 884 af = src->sa_family; 885 886 vu = mtod(m, struct vxlanudphdr *); 887 vu->vu_u.uh_sport = sc->sc_dstport; 888 vu->vu_u.uh_dport = sc->sc_dstport; 889 vu->vu_u.uh_ulen = htons(m->m_pkthdr.len); 890 vu->vu_u.uh_sum = 0; 891 tag = sc->sc_vnetid; 892 893 #if NBRIDGE > 0 894 if ((brtag = bridge_tunnel(m)) != NULL) { 895 dst = &brtag->brtag_peer.sa; 896 897 /* If accepting any VNI, source ip address is from brtag */ 898 if (sc->sc_vnetid == VXLAN_VNI_ANY) { 899 src = &brtag->brtag_local.sa; 900 tag = (uint32_t)brtag->brtag_id; 901 af = src->sa_family; 902 } 903 904 if (dst->sa_family != af) { 905 ifp->if_oerrors++; 906 m_freem(m); 907 return (EINVAL); 908 } 909 } else 910 #endif 911 if (sc->sc_vnetid == VXLAN_VNI_ANY) { 912 /* 913 * If accepting any VNI, build the vxlan header only by 914 * bridge_tunneltag or drop packet if the tag does not exist. 915 */ 916 ifp->if_oerrors++; 917 m_freem(m); 918 return (ENETUNREACH); 919 } 920 921 if (sc->sc_vnetid != VXLAN_VNI_UNSET) { 922 vu->vu_v.vxlan_flags = htonl(VXLAN_FLAGS_VNI); 923 vu->vu_v.vxlan_id = htonl(tag << VXLAN_VNI_S); 924 } else { 925 vu->vu_v.vxlan_flags = htonl(0); 926 vu->vu_v.vxlan_id = htonl(0); 927 } 928 929 switch (af) { 930 case AF_INET: 931 m = vxlan_encap4(ifp, m, src, dst); 932 break; 933 #ifdef INET6 934 case AF_INET6: 935 m = vxlan_encap6(ifp, m, src, dst); 936 break; 937 #endif /* INET6 */ 938 default: 939 m_freem(m); 940 m = NULL; 941 } 942 943 if (m == NULL) { 944 ifp->if_oerrors++; 945 return (ENOBUFS); 946 } 947 948 #if NBRIDGE > 0 949 if (brtag != NULL) 950 bridge_tunneluntag(m); 951 #endif 952 953 m->m_pkthdr.ph_rtableid = sc->sc_rdomain; 954 955 #if NPF > 0 956 pf_pkt_addr_changed(m); 957 #endif 958 959 switch (af) { 960 case AF_INET: 961 error = ip_output(m, NULL, NULL, IP_RAWOUTPUT, 962 &sc->sc_imo, NULL, 0); 963 break; 964 #ifdef INET6 965 case AF_INET6: 966 error = ip6_output(m, 0, NULL, IPV6_MINMTU, 0, NULL); 967 break; 968 #endif /* INET6 */ 969 default: 970 m_freem(m); 971 error = EAFNOSUPPORT; 972 } 973 974 if (error) 975 ifp->if_oerrors++; 976 977 return (error); 978 } 979 980 void 981 vxlan_addr_change(void *arg) 982 { 983 struct vxlan_softc *sc = arg; 984 struct ifnet *ifp = &sc->sc_ac.ac_if; 985 int error; 986 987 /* 988 * Reset the configuration after resume or any possible address 989 * configuration changes. 990 */ 991 if ((error = vxlan_config(ifp, NULL, NULL))) { 992 /* 993 * The source address of the tunnel can temporarily disappear, 994 * after a link state change when running the DHCP client, 995 * so keep it configured. 996 */ 997 } 998 } 999 1000 void 1001 vxlan_if_change(void *arg) 1002 { 1003 struct vxlan_softc *sc = arg; 1004 struct ifnet *ifp = &sc->sc_ac.ac_if; 1005 1006 /* 1007 * Reset the configuration after the parent interface disappeared. 1008 */ 1009 vxlan_multicast_cleanup(ifp); 1010 memset(&sc->sc_src, 0, sizeof(sc->sc_src)); 1011 memset(&sc->sc_dst, 0, sizeof(sc->sc_dst)); 1012 sc->sc_dstport = htons(VXLAN_PORT); 1013 } 1014 1015 void 1016 vxlan_link_change(void *arg) 1017 { 1018 struct vxlan_softc *sc = arg; 1019 struct ifnet *ifp = &sc->sc_ac.ac_if; 1020 1021 /* 1022 * The machine might have lost its multicast associations after 1023 * link state changes. This fixes a problem with VMware after 1024 * suspend/resume of the host or guest. 1025 */ 1026 (void)vxlan_config(ifp, NULL, NULL); 1027 } 1028