1 /* $OpenBSD: if_vxlan.c,v 1.81 2020/08/21 22:59:27 kn Exp $ */ 2 3 /* 4 * Copyright (c) 2013 Reyk Floeter <reyk@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #include "bpfilter.h" 20 #include "vxlan.h" 21 #include "vlan.h" 22 #include "pf.h" 23 #include "bridge.h" 24 25 #include <sys/param.h> 26 #include <sys/systm.h> 27 #include <sys/mbuf.h> 28 #include <sys/socket.h> 29 #include <sys/sockio.h> 30 #include <sys/ioctl.h> 31 32 #include <net/if.h> 33 #include <net/if_var.h> 34 #include <net/if_media.h> 35 #include <net/route.h> 36 37 #if NBPFILTER > 0 38 #include <net/bpf.h> 39 #endif 40 41 #include <netinet/in.h> 42 #include <netinet/in_var.h> 43 #include <netinet/if_ether.h> 44 #include <netinet/ip.h> 45 #include <netinet/ip_var.h> 46 #include <netinet/udp.h> 47 #include <netinet/udp_var.h> 48 #include <netinet/in_pcb.h> 49 50 #if NPF > 0 51 #include <net/pfvar.h> 52 #endif 53 54 #if NBRIDGE > 0 55 #include <net/if_bridge.h> 56 #endif 57 58 #include <net/if_vxlan.h> 59 60 struct vxlan_softc { 61 struct arpcom sc_ac; 62 struct ifmedia sc_media; 63 64 struct ip_moptions sc_imo; 65 struct task sc_atask; 66 struct task sc_ltask; 67 struct task sc_dtask; 68 69 struct sockaddr_storage sc_src; 70 struct sockaddr_storage sc_dst; 71 in_port_t sc_dstport; 72 u_int sc_rdomain; 73 int64_t sc_vnetid; 74 uint16_t sc_df; 75 u_int8_t sc_ttl; 76 int sc_txhprio; 77 78 struct task sc_sendtask; 79 80 LIST_ENTRY(vxlan_softc) sc_entry; 81 }; 82 83 void vxlanattach(int); 84 int vxlanioctl(struct ifnet *, u_long, caddr_t); 85 void vxlanstart(struct ifnet *); 86 int vxlan_clone_create(struct if_clone *, int); 87 int vxlan_clone_destroy(struct ifnet *); 88 void vxlan_multicast_cleanup(struct ifnet *); 89 int vxlan_multicast_join(struct ifnet *, struct sockaddr *, 90 struct sockaddr *); 91 int vxlan_media_change(struct ifnet *); 92 void vxlan_media_status(struct ifnet *, struct ifmediareq *); 93 int vxlan_config(struct ifnet *, struct sockaddr *, struct sockaddr *); 94 int vxlan_output(struct ifnet *, struct mbuf *); 95 void vxlan_addr_change(void *); 96 void vxlan_if_change(void *); 97 void vxlan_link_change(void *); 98 void vxlan_send_dispatch(void *); 99 100 int vxlan_sockaddr_cmp(struct sockaddr *, struct sockaddr *); 101 uint16_t vxlan_sockaddr_port(struct sockaddr *); 102 103 struct if_clone vxlan_cloner = 104 IF_CLONE_INITIALIZER("vxlan", vxlan_clone_create, vxlan_clone_destroy); 105 106 int vxlan_enable = 0; 107 u_long vxlan_tagmask; 108 109 #define VXLAN_TAGHASHSIZE 32 110 #define VXLAN_TAGHASH(tag) ((unsigned int)tag & vxlan_tagmask) 111 LIST_HEAD(vxlan_taghash, vxlan_softc) *vxlan_tagh, vxlan_any; 112 113 void 114 vxlanattach(int count) 115 { 116 /* Regular vxlan interfaces with a VNI */ 117 if ((vxlan_tagh = hashinit(VXLAN_TAGHASHSIZE, M_DEVBUF, M_NOWAIT, 118 &vxlan_tagmask)) == NULL) 119 panic("vxlanattach: hashinit"); 120 121 /* multipoint-to-multipoint interfaces that accept any VNI */ 122 LIST_INIT(&vxlan_any); 123 124 if_clone_attach(&vxlan_cloner); 125 } 126 127 int 128 vxlan_clone_create(struct if_clone *ifc, int unit) 129 { 130 struct ifnet *ifp; 131 struct vxlan_softc *sc; 132 133 sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO); 134 sc->sc_imo.imo_membership = mallocarray(IP_MIN_MEMBERSHIPS, 135 sizeof(struct in_multi *), M_IPMOPTS, M_WAITOK|M_ZERO); 136 sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS; 137 sc->sc_dstport = htons(VXLAN_PORT); 138 sc->sc_vnetid = VXLAN_VNI_UNSET; 139 sc->sc_txhprio = IFQ_TOS2PRIO(IPTOS_PREC_ROUTINE); /* 0 */ 140 sc->sc_df = htons(0); 141 task_set(&sc->sc_atask, vxlan_addr_change, sc); 142 task_set(&sc->sc_ltask, vxlan_link_change, sc); 143 task_set(&sc->sc_dtask, vxlan_if_change, sc); 144 task_set(&sc->sc_sendtask, vxlan_send_dispatch, sc); 145 146 ifp = &sc->sc_ac.ac_if; 147 snprintf(ifp->if_xname, sizeof ifp->if_xname, "vxlan%d", unit); 148 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 149 ether_fakeaddr(ifp); 150 151 ifp->if_softc = sc; 152 ifp->if_ioctl = vxlanioctl; 153 ifp->if_start = vxlanstart; 154 155 ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN; 156 ifp->if_capabilities = IFCAP_VLAN_MTU; 157 ifp->if_xflags = IFXF_CLONED; 158 159 ifmedia_init(&sc->sc_media, 0, vxlan_media_change, 160 vxlan_media_status); 161 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL); 162 ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO); 163 164 if_counters_alloc(ifp); 165 if_attach(ifp); 166 ether_ifattach(ifp); 167 168 #if 0 169 /* 170 * Instead of using a decreased MTU of 1450 bytes, prefer 171 * to use the default Ethernet-size MTU of 1500 bytes and to 172 * increase the MTU of the outer transport interfaces to 173 * at least 1550 bytes. The following is disabled by default. 174 */ 175 ifp->if_mtu = ETHERMTU - sizeof(struct ether_header); 176 ifp->if_mtu -= sizeof(struct vxlanudphdr) + sizeof(struct ipovly); 177 #endif 178 179 LIST_INSERT_HEAD(&vxlan_tagh[VXLAN_TAGHASH(0)], sc, sc_entry); 180 vxlan_enable++; 181 182 return (0); 183 } 184 185 int 186 vxlan_clone_destroy(struct ifnet *ifp) 187 { 188 struct vxlan_softc *sc = ifp->if_softc; 189 190 NET_LOCK(); 191 vxlan_multicast_cleanup(ifp); 192 NET_UNLOCK(); 193 194 vxlan_enable--; 195 LIST_REMOVE(sc, sc_entry); 196 197 ifmedia_delete_instance(&sc->sc_media, IFM_INST_ANY); 198 ether_ifdetach(ifp); 199 if_detach(ifp); 200 201 if (!task_del(net_tq(ifp->if_index), &sc->sc_sendtask)) 202 taskq_barrier(net_tq(ifp->if_index)); 203 204 free(sc->sc_imo.imo_membership, M_IPMOPTS, 205 sc->sc_imo.imo_max_memberships * sizeof(struct in_multi *)); 206 free(sc, M_DEVBUF, sizeof(*sc)); 207 208 return (0); 209 } 210 211 void 212 vxlan_multicast_cleanup(struct ifnet *ifp) 213 { 214 struct vxlan_softc *sc = (struct vxlan_softc *)ifp->if_softc; 215 struct ip_moptions *imo = &sc->sc_imo; 216 struct ifnet *mifp; 217 218 mifp = if_get(imo->imo_ifidx); 219 if (mifp != NULL) { 220 if_addrhook_del(mifp, &sc->sc_atask); 221 if_linkstatehook_del(mifp, &sc->sc_ltask); 222 if_detachhook_del(mifp, &sc->sc_dtask); 223 224 if_put(mifp); 225 } 226 227 if (imo->imo_num_memberships > 0) { 228 in_delmulti(imo->imo_membership[--imo->imo_num_memberships]); 229 imo->imo_ifidx = 0; 230 } 231 } 232 233 int 234 vxlan_multicast_join(struct ifnet *ifp, struct sockaddr *src, 235 struct sockaddr *dst) 236 { 237 struct vxlan_softc *sc = ifp->if_softc; 238 struct ip_moptions *imo = &sc->sc_imo; 239 struct sockaddr_in *src4, *dst4; 240 #ifdef INET6 241 struct sockaddr_in6 *dst6; 242 #endif /* INET6 */ 243 struct ifaddr *ifa; 244 struct ifnet *mifp; 245 246 switch (dst->sa_family) { 247 case AF_INET: 248 dst4 = satosin(dst); 249 if (!IN_MULTICAST(dst4->sin_addr.s_addr)) 250 return (0); 251 break; 252 #ifdef INET6 253 case AF_INET6: 254 dst6 = satosin6(dst); 255 if (!IN6_IS_ADDR_MULTICAST(&dst6->sin6_addr)) 256 return (0); 257 258 /* Multicast mode is currently not supported for IPv6 */ 259 return (EAFNOSUPPORT); 260 #endif /* INET6 */ 261 default: 262 return (EAFNOSUPPORT); 263 } 264 265 src4 = satosin(src); 266 dst4 = satosin(dst); 267 268 if (src4->sin_addr.s_addr == INADDR_ANY || 269 IN_MULTICAST(src4->sin_addr.s_addr)) 270 return (EINVAL); 271 if ((ifa = ifa_ifwithaddr(src, sc->sc_rdomain)) == NULL || 272 (mifp = ifa->ifa_ifp) == NULL || 273 (mifp->if_flags & IFF_MULTICAST) == 0) 274 return (EADDRNOTAVAIL); 275 276 if ((imo->imo_membership[0] = 277 in_addmulti(&dst4->sin_addr, mifp)) == NULL) 278 return (ENOBUFS); 279 280 imo->imo_num_memberships++; 281 imo->imo_ifidx = mifp->if_index; 282 if (sc->sc_ttl > 0) 283 imo->imo_ttl = sc->sc_ttl; 284 else 285 imo->imo_ttl = IP_DEFAULT_MULTICAST_TTL; 286 imo->imo_loop = 0; 287 288 /* 289 * Use interface hooks to track any changes on the interface 290 * that is used to send out the tunnel traffic as multicast. 291 */ 292 if_addrhook_add(mifp, &sc->sc_atask); 293 if_linkstatehook_add(mifp, &sc->sc_ltask); 294 if_detachhook_add(mifp, &sc->sc_dtask); 295 296 return (0); 297 } 298 299 void 300 vxlanstart(struct ifnet *ifp) 301 { 302 struct vxlan_softc *sc = (struct vxlan_softc *)ifp->if_softc; 303 304 task_add(net_tq(ifp->if_index), &sc->sc_sendtask); 305 } 306 307 void 308 vxlan_send_dispatch(void *xsc) 309 { 310 struct vxlan_softc *sc = xsc; 311 struct ifnet *ifp = &sc->sc_ac.ac_if; 312 struct mbuf *m; 313 struct mbuf_list ml; 314 315 ml_init(&ml); 316 for (;;) { 317 m = ifq_dequeue(&ifp->if_snd); 318 if (m == NULL) 319 break; 320 321 #if NBPFILTER > 0 322 if (ifp->if_bpf) 323 bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT); 324 #endif 325 326 ml_enqueue(&ml, m); 327 } 328 329 if (ml_empty(&ml)) 330 return; 331 332 NET_LOCK(); 333 while ((m = ml_dequeue(&ml)) != NULL) { 334 vxlan_output(ifp, m); 335 } 336 NET_UNLOCK(); 337 } 338 339 340 int 341 vxlan_config(struct ifnet *ifp, struct sockaddr *src, struct sockaddr *dst) 342 { 343 struct vxlan_softc *sc = (struct vxlan_softc *)ifp->if_softc; 344 int reset = 0, error, af; 345 socklen_t slen; 346 in_port_t port; 347 struct vxlan_taghash *tagh; 348 349 if (src != NULL && dst != NULL) { 350 if ((af = src->sa_family) != dst->sa_family) 351 return (EAFNOSUPPORT); 352 } else { 353 /* Reset current configuration */ 354 af = sc->sc_src.ss_family; 355 src = sstosa(&sc->sc_src); 356 dst = sstosa(&sc->sc_dst); 357 reset = 1; 358 } 359 360 switch (af) { 361 case AF_INET: 362 slen = sizeof(struct sockaddr_in); 363 break; 364 #ifdef INET6 365 case AF_INET6: 366 slen = sizeof(struct sockaddr_in6); 367 break; 368 #endif /* INET6 */ 369 default: 370 return (EAFNOSUPPORT); 371 } 372 373 if (src->sa_len != slen || dst->sa_len != slen) 374 return (EINVAL); 375 376 vxlan_multicast_cleanup(ifp); 377 378 /* returns without error if multicast is not configured */ 379 if ((error = vxlan_multicast_join(ifp, src, dst)) != 0) 380 return (error); 381 382 if ((port = vxlan_sockaddr_port(dst)) != 0) 383 sc->sc_dstport = port; 384 385 if (!reset) { 386 bzero(&sc->sc_src, sizeof(sc->sc_src)); 387 bzero(&sc->sc_dst, sizeof(sc->sc_dst)); 388 memcpy(&sc->sc_src, src, src->sa_len); 389 memcpy(&sc->sc_dst, dst, dst->sa_len); 390 } 391 392 if (sc->sc_vnetid == VXLAN_VNI_ANY) { 393 /* 394 * If the interface accepts any VNI, put it into a separate 395 * list that is not part of the main hash. 396 */ 397 tagh = &vxlan_any; 398 } else 399 tagh = &vxlan_tagh[VXLAN_TAGHASH(sc->sc_vnetid)]; 400 401 LIST_REMOVE(sc, sc_entry); 402 LIST_INSERT_HEAD(tagh, sc, sc_entry); 403 404 return (0); 405 } 406 407 int 408 vxlanioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 409 { 410 struct vxlan_softc *sc = (struct vxlan_softc *)ifp->if_softc; 411 struct ifreq *ifr = (struct ifreq *)data; 412 struct if_laddrreq *lifr = (struct if_laddrreq *)data; 413 int error = 0; 414 415 switch (cmd) { 416 case SIOCSIFADDR: 417 ifp->if_flags |= IFF_UP; 418 /* FALLTHROUGH */ 419 420 case SIOCSIFFLAGS: 421 if (ifp->if_flags & IFF_UP) { 422 ifp->if_flags |= IFF_RUNNING; 423 } else { 424 ifp->if_flags &= ~IFF_RUNNING; 425 } 426 break; 427 428 case SIOCADDMULTI: 429 case SIOCDELMULTI: 430 break; 431 432 case SIOCGIFMEDIA: 433 case SIOCSIFMEDIA: 434 error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd); 435 break; 436 437 case SIOCSLIFPHYADDR: 438 error = vxlan_config(ifp, 439 sstosa(&lifr->addr), 440 sstosa(&lifr->dstaddr)); 441 break; 442 443 case SIOCDIFPHYADDR: 444 vxlan_multicast_cleanup(ifp); 445 bzero(&sc->sc_src, sizeof(sc->sc_src)); 446 bzero(&sc->sc_dst, sizeof(sc->sc_dst)); 447 sc->sc_dstport = htons(VXLAN_PORT); 448 break; 449 450 case SIOCGLIFPHYADDR: 451 if (sc->sc_dst.ss_family == AF_UNSPEC) { 452 error = EADDRNOTAVAIL; 453 break; 454 } 455 bzero(&lifr->addr, sizeof(lifr->addr)); 456 bzero(&lifr->dstaddr, sizeof(lifr->dstaddr)); 457 memcpy(&lifr->addr, &sc->sc_src, sc->sc_src.ss_len); 458 memcpy(&lifr->dstaddr, &sc->sc_dst, sc->sc_dst.ss_len); 459 break; 460 461 case SIOCSLIFPHYRTABLE: 462 if (ifr->ifr_rdomainid < 0 || 463 ifr->ifr_rdomainid > RT_TABLEID_MAX || 464 !rtable_exists(ifr->ifr_rdomainid)) { 465 error = EINVAL; 466 break; 467 } 468 sc->sc_rdomain = ifr->ifr_rdomainid; 469 (void)vxlan_config(ifp, NULL, NULL); 470 break; 471 472 case SIOCGLIFPHYRTABLE: 473 ifr->ifr_rdomainid = sc->sc_rdomain; 474 break; 475 476 case SIOCSLIFPHYTTL: 477 if (ifr->ifr_ttl < 0 || ifr->ifr_ttl > 0xff) { 478 error = EINVAL; 479 break; 480 } 481 if (sc->sc_ttl == (u_int8_t)ifr->ifr_ttl) 482 break; 483 sc->sc_ttl = (u_int8_t)(ifr->ifr_ttl); 484 (void)vxlan_config(ifp, NULL, NULL); 485 break; 486 487 case SIOCGLIFPHYTTL: 488 ifr->ifr_ttl = (int)sc->sc_ttl; 489 break; 490 491 case SIOCSLIFPHYDF: 492 /* commit */ 493 sc->sc_df = ifr->ifr_df ? htons(IP_DF) : htons(0); 494 break; 495 case SIOCGLIFPHYDF: 496 ifr->ifr_df = sc->sc_df ? 1 : 0; 497 break; 498 499 case SIOCSTXHPRIO: 500 if (ifr->ifr_hdrprio == IF_HDRPRIO_PACKET) 501 ; /* fall through */ 502 else if (ifr->ifr_hdrprio < IF_HDRPRIO_MIN || 503 ifr->ifr_hdrprio > IF_HDRPRIO_MAX) { 504 error = EINVAL; 505 break; 506 } 507 508 sc->sc_txhprio = ifr->ifr_hdrprio; 509 break; 510 case SIOCGTXHPRIO: 511 ifr->ifr_hdrprio = sc->sc_txhprio; 512 break; 513 514 case SIOCSVNETID: 515 if (sc->sc_vnetid == ifr->ifr_vnetid) 516 break; 517 518 if ((ifr->ifr_vnetid != VXLAN_VNI_ANY) && 519 (ifr->ifr_vnetid > VXLAN_VNI_MAX || 520 ifr->ifr_vnetid < VXLAN_VNI_MIN)) { 521 error = EINVAL; 522 break; 523 } 524 525 sc->sc_vnetid = (int)ifr->ifr_vnetid; 526 (void)vxlan_config(ifp, NULL, NULL); 527 break; 528 529 case SIOCGVNETID: 530 if ((sc->sc_vnetid != VXLAN_VNI_ANY) && 531 (sc->sc_vnetid > VXLAN_VNI_MAX || 532 sc->sc_vnetid < VXLAN_VNI_MIN)) { 533 error = EADDRNOTAVAIL; 534 break; 535 } 536 537 ifr->ifr_vnetid = sc->sc_vnetid; 538 break; 539 540 case SIOCDVNETID: 541 sc->sc_vnetid = VXLAN_VNI_UNSET; 542 (void)vxlan_config(ifp, NULL, NULL); 543 break; 544 545 default: 546 error = ether_ioctl(ifp, &sc->sc_ac, cmd, data); 547 break; 548 } 549 550 return (error); 551 } 552 553 int 554 vxlan_media_change(struct ifnet *ifp) 555 { 556 return (0); 557 } 558 559 void 560 vxlan_media_status(struct ifnet *ifp, struct ifmediareq *imr) 561 { 562 imr->ifm_status = IFM_AVALID | IFM_ACTIVE; 563 } 564 565 int 566 vxlan_sockaddr_cmp(struct sockaddr *srcsa, struct sockaddr *dstsa) 567 { 568 struct sockaddr_in *src4, *dst4; 569 #ifdef INET6 570 struct sockaddr_in6 *src6, *dst6; 571 #endif /* INET6 */ 572 573 if (srcsa->sa_family != dstsa->sa_family) 574 return (1); 575 576 switch (dstsa->sa_family) { 577 case AF_INET: 578 src4 = satosin(srcsa); 579 dst4 = satosin(dstsa); 580 if (src4->sin_addr.s_addr == dst4->sin_addr.s_addr) 581 return (0); 582 break; 583 #ifdef INET6 584 case AF_INET6: 585 src6 = satosin6(srcsa); 586 dst6 = satosin6(dstsa); 587 if (IN6_ARE_ADDR_EQUAL(&src6->sin6_addr, &dst6->sin6_addr) && 588 src6->sin6_scope_id == dst6->sin6_scope_id) 589 return (0); 590 break; 591 #endif /* INET6 */ 592 } 593 594 return (1); 595 } 596 597 uint16_t 598 vxlan_sockaddr_port(struct sockaddr *sa) 599 { 600 struct sockaddr_in *sin4; 601 #ifdef INET6 602 struct sockaddr_in6 *sin6; 603 #endif /* INET6 */ 604 605 switch (sa->sa_family) { 606 case AF_INET: 607 sin4 = satosin(sa); 608 return (sin4->sin_port); 609 #ifdef INET6 610 case AF_INET6: 611 sin6 = satosin6(sa); 612 return (sin6->sin6_port); 613 #endif /* INET6 */ 614 default: 615 break; 616 } 617 618 return (0); 619 } 620 621 int 622 vxlan_lookup(struct mbuf *m, struct udphdr *uh, int iphlen, 623 struct sockaddr *srcsa, struct sockaddr *dstsa) 624 { 625 struct vxlan_softc *sc = NULL, *sc_cand = NULL; 626 struct vxlan_header v; 627 int vni; 628 struct ifnet *ifp; 629 int skip; 630 #if NBRIDGE > 0 631 struct bridge_tunneltag *brtag; 632 #endif 633 struct mbuf *n; 634 int off; 635 636 /* XXX Should verify the UDP port first before copying the packet */ 637 skip = iphlen + sizeof(*uh); 638 if (m->m_pkthdr.len - skip < sizeof(v)) 639 return (0); 640 m_copydata(m, skip, sizeof(v), (caddr_t)&v); 641 skip += sizeof(v); 642 643 if (v.vxlan_flags & htonl(VXLAN_RESERVED1) || 644 v.vxlan_id & htonl(VXLAN_RESERVED2)) 645 return (0); 646 647 vni = ntohl(v.vxlan_id) >> VXLAN_VNI_S; 648 if ((v.vxlan_flags & htonl(VXLAN_FLAGS_VNI)) == 0) { 649 if (vni != 0) 650 return (0); 651 652 vni = VXLAN_VNI_UNSET; 653 } 654 655 NET_ASSERT_LOCKED(); 656 /* First search for a vxlan(4) interface with the packet's VNI */ 657 LIST_FOREACH(sc, &vxlan_tagh[VXLAN_TAGHASH(vni)], sc_entry) { 658 if ((uh->uh_dport == sc->sc_dstport) && 659 vni == sc->sc_vnetid && 660 sc->sc_rdomain == rtable_l2(m->m_pkthdr.ph_rtableid)) { 661 sc_cand = sc; 662 if (vxlan_sockaddr_cmp(srcsa, sstosa(&sc->sc_dst)) == 0) 663 goto found; 664 } 665 } 666 667 /* 668 * Now loop through all the vxlan(4) interfaces that are configured 669 * to accept any VNI and operating in multipoint-to-multipoint mode 670 * that is used in combination with bridge(4) or switch(4). 671 * If a vxlan(4) interface has been found for the packet's VNI, this 672 * code is not reached as the other interface is more specific. 673 */ 674 LIST_FOREACH(sc, &vxlan_any, sc_entry) { 675 if ((uh->uh_dport == sc->sc_dstport) && 676 (sc->sc_rdomain == rtable_l2(m->m_pkthdr.ph_rtableid))) { 677 sc_cand = sc; 678 goto found; 679 } 680 } 681 682 if (sc_cand) { 683 sc = sc_cand; 684 goto found; 685 } 686 687 /* not found */ 688 return (0); 689 690 found: 691 if (m->m_pkthdr.len < skip + sizeof(struct ether_header)) { 692 m_freem(m); 693 return (EINVAL); 694 } 695 696 m_adj(m, skip); 697 ifp = &sc->sc_ac.ac_if; 698 699 #if NBRIDGE > 0 700 /* Store the tunnel src/dst IP and vni for the bridge or switch */ 701 if ((ifp->if_bridgeidx != 0 || ifp->if_switchport != NULL) && 702 srcsa->sa_family != AF_UNSPEC && 703 ((brtag = bridge_tunneltag(m)) != NULL)) { 704 memcpy(&brtag->brtag_peer.sa, srcsa, srcsa->sa_len); 705 memcpy(&brtag->brtag_local.sa, dstsa, dstsa->sa_len); 706 brtag->brtag_id = vni; 707 } 708 #endif 709 710 m->m_flags &= ~(M_BCAST|M_MCAST); 711 712 #if NPF > 0 713 pf_pkt_addr_changed(m); 714 #endif 715 if ((m->m_len < sizeof(struct ether_header)) && 716 (m = m_pullup(m, sizeof(struct ether_header))) == NULL) 717 return (ENOBUFS); 718 719 n = m_getptr(m, sizeof(struct ether_header), &off); 720 if (n == NULL) { 721 m_freem(m); 722 return (EINVAL); 723 } 724 if (!ALIGNED_POINTER(mtod(n, caddr_t) + off, uint32_t)) { 725 n = m_dup_pkt(m, ETHER_ALIGN, M_NOWAIT); 726 /* Dispose of the original mbuf chain */ 727 m_freem(m); 728 if (n == NULL) 729 return (ENOBUFS); 730 m = n; 731 } 732 733 if_vinput(ifp, m); 734 735 /* success */ 736 return (1); 737 } 738 739 struct mbuf * 740 vxlan_encap4(struct ifnet *ifp, struct mbuf *m, 741 struct sockaddr *src, struct sockaddr *dst) 742 { 743 struct vxlan_softc *sc = (struct vxlan_softc *)ifp->if_softc; 744 struct ip *ip; 745 746 /* 747 * Remove multicast and broadcast flags or encapsulated packet 748 * ends up as multicast or broadcast packet. 749 */ 750 m->m_flags &= ~(M_BCAST|M_MCAST); 751 752 M_PREPEND(m, sizeof(*ip), M_DONTWAIT); 753 if (m == NULL) 754 return (NULL); 755 756 ip = mtod(m, struct ip *); 757 ip->ip_v = IPVERSION; 758 ip->ip_hl = sizeof(struct ip) >> 2; 759 ip->ip_id = htons(ip_randomid()); 760 ip->ip_off = sc->sc_df; 761 ip->ip_p = IPPROTO_UDP; 762 ip->ip_tos = IFQ_PRIO2TOS(sc->sc_txhprio == IF_HDRPRIO_PACKET ? 763 m->m_pkthdr.pf.prio : sc->sc_txhprio); 764 ip->ip_len = htons(m->m_pkthdr.len); 765 766 ip->ip_src = satosin(src)->sin_addr; 767 ip->ip_dst = satosin(dst)->sin_addr; 768 769 if (sc->sc_ttl > 0) 770 ip->ip_ttl = sc->sc_ttl; 771 else 772 ip->ip_ttl = IPDEFTTL; 773 774 return (m); 775 } 776 777 #ifdef INET6 778 struct mbuf * 779 vxlan_encap6(struct ifnet *ifp, struct mbuf *m, 780 struct sockaddr *src, struct sockaddr *dst) 781 { 782 struct vxlan_softc *sc = (struct vxlan_softc *)ifp->if_softc; 783 struct ip6_hdr *ip6; 784 struct in6_addr *in6a; 785 uint32_t flow; 786 787 /* 788 * Remove multicast and broadcast flags or encapsulated packet 789 * ends up as multicast or broadcast packet. 790 */ 791 m->m_flags &= ~(M_BCAST|M_MCAST); 792 793 M_PREPEND(m, sizeof(struct ip6_hdr), M_DONTWAIT); 794 if (m == NULL) 795 return (NULL); 796 797 flow = (uint32_t)IFQ_PRIO2TOS(sc->sc_txhprio == IF_HDRPRIO_PACKET ? 798 m->m_pkthdr.pf.prio : sc->sc_txhprio) << 20; 799 800 ip6 = mtod(m, struct ip6_hdr *); 801 ip6->ip6_flow = htonl(flow); 802 ip6->ip6_vfc &= ~IPV6_VERSION_MASK; 803 ip6->ip6_vfc |= IPV6_VERSION; 804 ip6->ip6_nxt = IPPROTO_UDP; 805 ip6->ip6_plen = htons(m->m_pkthdr.len - sizeof(struct ip6_hdr)); 806 if (in6_embedscope(&ip6->ip6_src, satosin6(src), NULL) != 0) 807 goto drop; 808 if (in6_embedscope(&ip6->ip6_dst, satosin6(dst), NULL) != 0) 809 goto drop; 810 811 if (sc->sc_ttl > 0) 812 ip6->ip6_hlim = sc->sc_ttl; 813 else 814 ip6->ip6_hlim = ip6_defhlim; 815 816 if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(src)->sin6_addr)) { 817 if (in6_selectsrc(&in6a, satosin6(dst), NULL, 818 sc->sc_rdomain) != 0) 819 goto drop; 820 821 ip6->ip6_src = *in6a; 822 } 823 824 if (sc->sc_df) 825 SET(m->m_pkthdr.csum_flags, M_IPV6_DF_OUT); 826 827 /* 828 * The UDP checksum of VXLAN packets should be set to zero, 829 * but the IPv6 UDP checksum is not optional. There is an RFC 6539 830 * to relax the IPv6 UDP checksum requirement for tunnels, but it 831 * is currently not supported by most implementations. 832 */ 833 m->m_pkthdr.csum_flags |= M_UDP_CSUM_OUT; 834 835 return (m); 836 837 drop: 838 m_freem(m); 839 return (NULL); 840 } 841 #endif /* INET6 */ 842 843 int 844 vxlan_output(struct ifnet *ifp, struct mbuf *m) 845 { 846 struct vxlan_softc *sc = (struct vxlan_softc *)ifp->if_softc; 847 struct vxlanudphdr *vu; 848 struct sockaddr *src, *dst; 849 #if NBRIDGE > 0 850 struct bridge_tunneltag *brtag; 851 #endif 852 int error, af; 853 uint32_t tag; 854 struct mbuf *m0; 855 856 /* VXLAN header, needs new mbuf because of alignment issues */ 857 MGET(m0, M_DONTWAIT, m->m_type); 858 if (m0 == NULL) { 859 ifp->if_oerrors++; 860 return (ENOBUFS); 861 } 862 M_MOVE_PKTHDR(m0, m); 863 m0->m_next = m; 864 m = m0; 865 m_align(m, sizeof(*vu)); 866 m->m_len = sizeof(*vu); 867 m->m_pkthdr.len += sizeof(*vu); 868 869 src = sstosa(&sc->sc_src); 870 dst = sstosa(&sc->sc_dst); 871 af = src->sa_family; 872 873 vu = mtod(m, struct vxlanudphdr *); 874 vu->vu_u.uh_sport = sc->sc_dstport; 875 vu->vu_u.uh_dport = sc->sc_dstport; 876 vu->vu_u.uh_ulen = htons(m->m_pkthdr.len); 877 vu->vu_u.uh_sum = 0; 878 tag = sc->sc_vnetid; 879 880 #if NBRIDGE > 0 881 if ((brtag = bridge_tunnel(m)) != NULL) { 882 dst = &brtag->brtag_peer.sa; 883 884 /* If accepting any VNI, source ip address is from brtag */ 885 if (sc->sc_vnetid == VXLAN_VNI_ANY) { 886 src = &brtag->brtag_local.sa; 887 tag = (uint32_t)brtag->brtag_id; 888 af = src->sa_family; 889 } 890 891 if (dst->sa_family != af) { 892 ifp->if_oerrors++; 893 m_freem(m); 894 return (EINVAL); 895 } 896 } else 897 #endif 898 if (sc->sc_vnetid == VXLAN_VNI_ANY) { 899 /* 900 * If accepting any VNI, build the vxlan header only by 901 * bridge_tunneltag or drop packet if the tag does not exist. 902 */ 903 ifp->if_oerrors++; 904 m_freem(m); 905 return (ENETUNREACH); 906 } 907 908 if (sc->sc_vnetid != VXLAN_VNI_UNSET) { 909 vu->vu_v.vxlan_flags = htonl(VXLAN_FLAGS_VNI); 910 vu->vu_v.vxlan_id = htonl(tag << VXLAN_VNI_S); 911 } else { 912 vu->vu_v.vxlan_flags = htonl(0); 913 vu->vu_v.vxlan_id = htonl(0); 914 } 915 916 switch (af) { 917 case AF_INET: 918 m = vxlan_encap4(ifp, m, src, dst); 919 break; 920 #ifdef INET6 921 case AF_INET6: 922 m = vxlan_encap6(ifp, m, src, dst); 923 break; 924 #endif /* INET6 */ 925 default: 926 m_freem(m); 927 m = NULL; 928 } 929 930 if (m == NULL) { 931 ifp->if_oerrors++; 932 return (ENOBUFS); 933 } 934 935 #if NBRIDGE > 0 936 if (brtag != NULL) 937 bridge_tunneluntag(m); 938 #endif 939 940 m->m_pkthdr.ph_rtableid = sc->sc_rdomain; 941 942 #if NPF > 0 943 pf_pkt_addr_changed(m); 944 #endif 945 946 switch (af) { 947 case AF_INET: 948 error = ip_output(m, NULL, NULL, IP_RAWOUTPUT, 949 &sc->sc_imo, NULL, 0); 950 break; 951 #ifdef INET6 952 case AF_INET6: 953 error = ip6_output(m, 0, NULL, IPV6_MINMTU, 0, NULL); 954 break; 955 #endif /* INET6 */ 956 default: 957 m_freem(m); 958 error = EAFNOSUPPORT; 959 } 960 961 if (error) 962 ifp->if_oerrors++; 963 964 return (error); 965 } 966 967 void 968 vxlan_addr_change(void *arg) 969 { 970 struct vxlan_softc *sc = arg; 971 struct ifnet *ifp = &sc->sc_ac.ac_if; 972 int error; 973 974 /* 975 * Reset the configuration after resume or any possible address 976 * configuration changes. 977 */ 978 if ((error = vxlan_config(ifp, NULL, NULL))) { 979 /* 980 * The source address of the tunnel can temporarily disappear, 981 * after a link state change when running the DHCP client, 982 * so keep it configured. 983 */ 984 } 985 } 986 987 void 988 vxlan_if_change(void *arg) 989 { 990 struct vxlan_softc *sc = arg; 991 struct ifnet *ifp = &sc->sc_ac.ac_if; 992 993 /* 994 * Reset the configuration after the parent interface disappeared. 995 */ 996 vxlan_multicast_cleanup(ifp); 997 memset(&sc->sc_src, 0, sizeof(sc->sc_src)); 998 memset(&sc->sc_dst, 0, sizeof(sc->sc_dst)); 999 sc->sc_dstport = htons(VXLAN_PORT); 1000 } 1001 1002 void 1003 vxlan_link_change(void *arg) 1004 { 1005 struct vxlan_softc *sc = arg; 1006 struct ifnet *ifp = &sc->sc_ac.ac_if; 1007 1008 /* 1009 * The machine might have lost its multicast associations after 1010 * link state changes. This fixes a problem with VMware after 1011 * suspend/resume of the host or guest. 1012 */ 1013 (void)vxlan_config(ifp, NULL, NULL); 1014 } 1015