1 /* $OpenBSD: if_vxlan.c,v 1.90 2022/02/26 04:46:34 dlg Exp $ */ 2 3 /* 4 * Copyright (c) 2021 David Gwynne <dlg@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #include "bpfilter.h" 20 #include "pf.h" 21 22 #include <sys/param.h> 23 #include <sys/systm.h> 24 #include <sys/kernel.h> 25 #include <sys/mbuf.h> 26 #include <sys/socket.h> 27 #include <sys/ioctl.h> 28 #include <sys/timeout.h> 29 #include <sys/pool.h> 30 #include <sys/tree.h> 31 #include <sys/refcnt.h> 32 #include <sys/smr.h> 33 34 #include <sys/socket.h> 35 #include <sys/socketvar.h> 36 37 #include <net/if.h> 38 #include <net/if_var.h> 39 #include <net/if_dl.h> 40 #include <net/if_media.h> 41 #include <net/if_types.h> 42 #include <net/route.h> 43 #include <net/rtable.h> 44 45 #include <netinet/in.h> 46 #include <netinet/in_var.h> 47 #include <netinet/if_ether.h> 48 #include <netinet/ip.h> 49 #include <netinet/udp.h> 50 #include <netinet/in_pcb.h> 51 #include <netinet/ip_var.h> 52 53 #ifdef INET6 54 #include <netinet/ip6.h> 55 #include <netinet6/ip6_var.h> 56 #include <netinet6/in6_var.h> 57 #endif 58 59 /* for bridge stuff */ 60 #include <net/if_bridge.h> 61 #include <net/if_etherbridge.h> 62 63 #if NBPFILTER > 0 64 #include <net/bpf.h> 65 #endif 66 67 /* 68 * The protocol. 69 */ 70 71 #define VXLANMTU 1492 72 #define VXLAN_PORT 4789 73 74 struct vxlan_header { 75 uint32_t vxlan_flags; 76 #define VXLAN_F_I (1U << 27) 77 uint32_t vxlan_id; 78 #define VXLAN_VNI_SHIFT 8 79 #define VXLAN_VNI_MASK (0xffffffU << VXLAN_VNI_SHIFT) 80 }; 81 82 #define VXLAN_VNI_MAX 0x00ffffffU 83 #define VXLAN_VNI_MIN 0x00000000U 84 85 /* 86 * The driver. 87 */ 88 89 union vxlan_addr { 90 struct in_addr in4; 91 struct in6_addr in6; 92 }; 93 94 struct vxlan_softc; 95 96 struct vxlan_peer { 97 RBT_ENTRY(vxlan_peer) p_entry; 98 99 struct vxlan_header p_header; 100 union vxlan_addr p_addr; 101 102 struct vxlan_softc *p_sc; 103 }; 104 105 RBT_HEAD(vxlan_peers, vxlan_peer); 106 107 struct vxlan_tep { 108 TAILQ_ENTRY(vxlan_tep) vt_entry; 109 110 sa_family_t vt_af; 111 unsigned int vt_rdomain; 112 union vxlan_addr vt_addr; 113 #define vt_addr4 vt_addr.in4 114 #define vt_addr6 vt_addr.in6 115 in_port_t vt_port; 116 117 struct socket *vt_so; 118 119 struct mutex vt_mtx; 120 struct vxlan_peers vt_peers; 121 }; 122 123 TAILQ_HEAD(vxlan_teps, vxlan_tep); 124 125 enum vxlan_tunnel_mode { 126 VXLAN_TMODE_UNSET, 127 VXLAN_TMODE_P2P, /* unicast destination, no learning */ 128 VXLAN_TMODE_LEARNING, /* multicast destination, learning */ 129 VXLAN_TMODE_ENDPOINT, /* unset destination, no learning */ 130 }; 131 132 struct vxlan_softc { 133 struct arpcom sc_ac; 134 struct etherbridge sc_eb; 135 136 unsigned int sc_rdomain; 137 sa_family_t sc_af; 138 union vxlan_addr sc_src; 139 union vxlan_addr sc_dst; 140 in_port_t sc_port; 141 struct vxlan_header sc_header; 142 unsigned int sc_if_index0; 143 144 struct task sc_dtask; 145 void *sc_inmulti; 146 147 enum vxlan_tunnel_mode sc_mode; 148 struct vxlan_peer *sc_ucast_peer; 149 struct vxlan_peer *sc_mcast_peer; 150 struct refcnt sc_refs; 151 152 uint16_t sc_df; 153 int sc_ttl; 154 int sc_txhprio; 155 int sc_rxhprio; 156 157 struct task sc_send_task; 158 }; 159 160 void vxlanattach(int); 161 162 static int vxlan_clone_create(struct if_clone *, int); 163 static int vxlan_clone_destroy(struct ifnet *); 164 165 static int vxlan_output(struct ifnet *, struct mbuf *, 166 struct sockaddr *, struct rtentry *); 167 static int vxlan_enqueue(struct ifnet *, struct mbuf *); 168 static void vxlan_start(struct ifqueue *); 169 static void vxlan_send(void *); 170 171 static int vxlan_ioctl(struct ifnet *, u_long, caddr_t); 172 static int vxlan_up(struct vxlan_softc *); 173 static int vxlan_down(struct vxlan_softc *); 174 static int vxlan_addmulti(struct vxlan_softc *, struct ifnet *); 175 static void vxlan_delmulti(struct vxlan_softc *); 176 177 static struct mbuf * 178 vxlan_input(void *, struct mbuf *, 179 struct ip *, struct ip6_hdr *, void *, int); 180 181 static int vxlan_set_rdomain(struct vxlan_softc *, const struct ifreq *); 182 static int vxlan_get_rdomain(struct vxlan_softc *, struct ifreq *); 183 static int vxlan_set_tunnel(struct vxlan_softc *, 184 const struct if_laddrreq *); 185 static int vxlan_get_tunnel(struct vxlan_softc *, struct if_laddrreq *); 186 static int vxlan_del_tunnel(struct vxlan_softc *); 187 static int vxlan_set_vnetid(struct vxlan_softc *, const struct ifreq *); 188 static int vxlan_get_vnetid(struct vxlan_softc *, struct ifreq *); 189 static int vxlan_del_vnetid(struct vxlan_softc *); 190 static int vxlan_set_parent(struct vxlan_softc *, 191 const struct if_parent *); 192 static int vxlan_get_parent(struct vxlan_softc *, struct if_parent *); 193 static int vxlan_del_parent(struct vxlan_softc *); 194 195 static int vxlan_add_addr(struct vxlan_softc *, const struct ifbareq *); 196 static int vxlan_del_addr(struct vxlan_softc *, const struct ifbareq *); 197 198 static void vxlan_detach_hook(void *); 199 200 static struct if_clone vxlan_cloner = 201 IF_CLONE_INITIALIZER("vxlan", vxlan_clone_create, vxlan_clone_destroy); 202 203 static int vxlan_eb_port_eq(void *, void *, void *); 204 static void *vxlan_eb_port_take(void *, void *); 205 static void vxlan_eb_port_rele(void *, void *); 206 static size_t vxlan_eb_port_ifname(void *, char *, size_t, void *); 207 static void vxlan_eb_port_sa(void *, struct sockaddr_storage *, void *); 208 209 static const struct etherbridge_ops vxlan_etherbridge_ops = { 210 vxlan_eb_port_eq, 211 vxlan_eb_port_take, 212 vxlan_eb_port_rele, 213 vxlan_eb_port_ifname, 214 vxlan_eb_port_sa, 215 }; 216 217 static struct rwlock vxlan_lock = RWLOCK_INITIALIZER("vteps"); 218 static struct vxlan_teps vxlan_teps = TAILQ_HEAD_INITIALIZER(vxlan_teps); 219 static struct pool vxlan_endpoint_pool; 220 221 static inline int vxlan_peer_cmp(const struct vxlan_peer *, 222 const struct vxlan_peer *); 223 224 RBT_PROTOTYPE(vxlan_peers, vxlan_peer, p_entry, vxlan_peer_cmp); 225 226 void 227 vxlanattach(int count) 228 { 229 if_clone_attach(&vxlan_cloner); 230 } 231 232 static int 233 vxlan_clone_create(struct if_clone *ifc, int unit) 234 { 235 struct vxlan_softc *sc; 236 struct ifnet *ifp; 237 int error; 238 239 if (vxlan_endpoint_pool.pr_size == 0) { 240 pool_init(&vxlan_endpoint_pool, sizeof(union vxlan_addr), 241 0, IPL_SOFTNET, 0, "vxlanep", NULL); 242 } 243 244 sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO|M_CANFAIL); 245 if (sc == NULL) 246 return (ENOMEM); 247 248 ifp = &sc->sc_ac.ac_if; 249 250 snprintf(ifp->if_xname, sizeof(ifp->if_xname), "%s%d", 251 ifc->ifc_name, unit); 252 253 error = etherbridge_init(&sc->sc_eb, ifp->if_xname, 254 &vxlan_etherbridge_ops, sc); 255 if (error == -1) { 256 free(sc, M_DEVBUF, sizeof(*sc)); 257 return (error); 258 } 259 260 sc->sc_af = AF_UNSPEC; 261 sc->sc_txhprio = 0; 262 sc->sc_rxhprio = IF_HDRPRIO_OUTER; 263 sc->sc_df = 0; 264 sc->sc_ttl = IP_DEFAULT_MULTICAST_TTL; 265 266 task_set(&sc->sc_dtask, vxlan_detach_hook, sc); 267 refcnt_init(&sc->sc_refs); 268 task_set(&sc->sc_send_task, vxlan_send, sc); 269 270 ifp->if_softc = sc; 271 ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN; 272 ifp->if_ioctl = vxlan_ioctl; 273 ifp->if_output = vxlan_output; 274 ifp->if_enqueue = vxlan_enqueue; 275 ifp->if_qstart = vxlan_start; 276 ifp->if_flags = IFF_BROADCAST | IFF_MULTICAST | IFF_SIMPLEX; 277 ifp->if_xflags = IFXF_CLONED | IFXF_MPSAFE; 278 ether_fakeaddr(ifp); 279 280 if_counters_alloc(ifp); 281 if_attach(ifp); 282 ether_ifattach(ifp); 283 284 return (0); 285 } 286 287 static int 288 vxlan_clone_destroy(struct ifnet *ifp) 289 { 290 struct vxlan_softc *sc = ifp->if_softc; 291 292 NET_LOCK(); 293 if (ISSET(ifp->if_flags, IFF_RUNNING)) 294 vxlan_down(sc); 295 NET_UNLOCK(); 296 297 ether_ifdetach(ifp); 298 if_detach(ifp); 299 300 etherbridge_destroy(&sc->sc_eb); 301 302 refcnt_finalize(&sc->sc_refs, "vxlanfini"); 303 304 free(sc, M_DEVBUF, sizeof(*sc)); 305 306 return (0); 307 } 308 309 static struct vxlan_softc * 310 vxlan_take(struct vxlan_softc *sc) 311 { 312 refcnt_take(&sc->sc_refs); 313 return (sc); 314 } 315 316 static void 317 vxlan_rele(struct vxlan_softc *sc) 318 { 319 refcnt_rele_wake(&sc->sc_refs); 320 } 321 322 static struct mbuf * 323 vxlan_encap(struct vxlan_softc *sc, struct mbuf *m, 324 struct mbuf *(ip_encap)(struct vxlan_softc *sc, struct mbuf *, 325 const union vxlan_addr *, uint8_t)) 326 { 327 struct ifnet *ifp = &sc->sc_ac.ac_if; 328 struct m_tag *mtag; 329 struct mbuf *m0; 330 union vxlan_addr gateway; 331 const union vxlan_addr *endpoint; 332 struct vxlan_header *vh; 333 struct udphdr *uh; 334 int prio; 335 uint8_t tos; 336 337 if (sc->sc_mode == VXLAN_TMODE_UNSET) 338 goto drop; 339 340 if (sc->sc_mode == VXLAN_TMODE_P2P) 341 endpoint = &sc->sc_dst; 342 else { /* VXLAN_TMODE_LEARNING || VXLAN_TMODE_ENDPOINT */ 343 struct ether_header *eh = mtod(m, struct ether_header *); 344 345 smr_read_enter(); 346 endpoint = etherbridge_resolve_ea(&sc->sc_eb, 347 (struct ether_addr *)eh->ether_dhost); 348 if (endpoint != NULL) { 349 gateway = *endpoint; 350 endpoint = &gateway; 351 } 352 smr_read_leave(); 353 354 if (endpoint == NULL) { 355 if (sc->sc_mode == VXLAN_TMODE_ENDPOINT) 356 goto drop; 357 358 /* "flood" to unknown destinations */ 359 endpoint = &sc->sc_dst; 360 } 361 } 362 363 /* force prepend mbuf because of payload alignment */ 364 m0 = m_get(M_DONTWAIT, m->m_type); 365 if (m0 == NULL) 366 goto drop; 367 368 m_align(m0, 0); 369 m0->m_len = 0; 370 371 M_MOVE_PKTHDR(m0, m); 372 m0->m_next = m; 373 374 m = m_prepend(m0, sizeof(*vh), M_DONTWAIT); 375 if (m == NULL) 376 return (NULL); 377 378 vh = mtod(m, struct vxlan_header *); 379 *vh = sc->sc_header; 380 381 m = m_prepend(m, sizeof(*uh), M_DONTWAIT); 382 if (m == NULL) 383 return (NULL); 384 385 uh = mtod(m, struct udphdr *); 386 uh->uh_sport = sc->sc_port; /* XXX */ 387 uh->uh_dport = sc->sc_port; 388 htobem16(&uh->uh_ulen, m->m_pkthdr.len); 389 uh->uh_sum = htons(0); 390 391 SET(m->m_pkthdr.csum_flags, M_UDP_CSUM_OUT); 392 393 mtag = m_tag_get(PACKET_TAG_GRE, sizeof(ifp->if_index), M_NOWAIT); 394 if (mtag == NULL) 395 goto drop; 396 397 *(int *)(mtag + 1) = ifp->if_index; 398 m_tag_prepend(m, mtag); 399 400 prio = sc->sc_txhprio; 401 if (prio == IF_HDRPRIO_PACKET) 402 prio = m->m_pkthdr.pf.prio; 403 tos = IFQ_PRIO2TOS(prio); 404 405 CLR(m->m_flags, M_BCAST|M_MCAST); 406 m->m_pkthdr.ph_rtableid = sc->sc_rdomain; 407 408 #if NPF > 0 409 pf_pkt_addr_changed(m); 410 #endif 411 412 return ((*ip_encap)(sc, m, endpoint, tos)); 413 drop: 414 m_freem(m); 415 return (NULL); 416 } 417 418 static struct mbuf * 419 vxlan_encap_ipv4(struct vxlan_softc *sc, struct mbuf *m, 420 const union vxlan_addr *endpoint, uint8_t tos) 421 { 422 struct ip *ip; 423 424 m = m_prepend(m, sizeof(*ip), M_DONTWAIT); 425 if (m == NULL) 426 return (NULL); 427 428 ip = mtod(m, struct ip *); 429 ip->ip_v = IPVERSION; 430 ip->ip_hl = sizeof(*ip) >> 2; 431 ip->ip_off = sc->sc_df; 432 ip->ip_tos = tos; 433 ip->ip_len = htons(m->m_pkthdr.len); 434 ip->ip_ttl = sc->sc_ttl; 435 ip->ip_p = IPPROTO_UDP; 436 ip->ip_src = sc->sc_src.in4; 437 ip->ip_dst = endpoint->in4; 438 439 return (m); 440 } 441 442 #ifdef INET6 443 static struct mbuf * 444 vxlan_encap_ipv6(struct vxlan_softc *sc, struct mbuf *m, 445 const union vxlan_addr *endpoint, uint8_t tos) 446 { 447 struct ip6_hdr *ip6; 448 int len = m->m_pkthdr.len; 449 450 m = m_prepend(m, sizeof(*ip6), M_DONTWAIT); 451 if (m == NULL) 452 return (NULL); 453 454 ip6 = mtod(m, struct ip6_hdr *); 455 ip6->ip6_flow = ISSET(m->m_pkthdr.csum_flags, M_FLOWID) ? 456 htonl(m->m_pkthdr.ph_flowid) : 0; 457 ip6->ip6_vfc |= IPV6_VERSION; 458 ip6->ip6_flow |= htonl((uint32_t)tos << 20); 459 ip6->ip6_plen = htons(len); 460 ip6->ip6_nxt = IPPROTO_UDP; 461 ip6->ip6_hlim = sc->sc_ttl; 462 ip6->ip6_src = sc->sc_src.in6; 463 ip6->ip6_dst = endpoint->in6; 464 465 if (sc->sc_df) 466 SET(m->m_pkthdr.csum_flags, M_IPV6_DF_OUT); 467 468 return (m); 469 } 470 #endif /* INET6 */ 471 472 static int 473 vxlan_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, 474 struct rtentry *rt) 475 { 476 struct m_tag *mtag; 477 478 mtag = NULL; 479 while ((mtag = m_tag_find(m, PACKET_TAG_GRE, mtag)) != NULL) { 480 if (*(int *)(mtag + 1) == ifp->if_index) { 481 m_freem(m); 482 return (EIO); 483 } 484 } 485 486 return (ether_output(ifp, m, dst, rt)); 487 } 488 489 static int 490 vxlan_enqueue(struct ifnet *ifp, struct mbuf *m) 491 { 492 struct vxlan_softc *sc = ifp->if_softc; 493 struct ifqueue *ifq = &ifp->if_snd; 494 495 if (ifq_enqueue(ifq, m) != 0) 496 return (ENOBUFS); 497 498 task_add(ifq->ifq_softnet, &sc->sc_send_task); 499 500 return (0); 501 } 502 503 static void 504 vxlan_start(struct ifqueue *ifq) 505 { 506 struct ifnet *ifp = ifq->ifq_if; 507 struct vxlan_softc *sc = ifp->if_softc; 508 509 task_add(ifq->ifq_softnet, &sc->sc_send_task); 510 } 511 512 static uint64_t 513 vxlan_send_ipv4(struct vxlan_softc *sc, struct mbuf_list *ml) 514 { 515 struct ip_moptions imo; 516 struct mbuf *m; 517 uint64_t oerrors = 0; 518 519 imo.imo_ifidx = sc->sc_if_index0; 520 imo.imo_ttl = sc->sc_ttl; 521 imo.imo_loop = 0; 522 523 NET_LOCK(); 524 while ((m = ml_dequeue(ml)) != NULL) { 525 if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &imo, NULL, 0) != 0) 526 oerrors++; 527 } 528 NET_UNLOCK(); 529 530 return (oerrors); 531 } 532 533 #ifdef INET6 534 static uint64_t 535 vxlan_send_ipv6(struct vxlan_softc *sc, struct mbuf_list *ml) 536 { 537 struct ip6_moptions im6o; 538 struct mbuf *m; 539 uint64_t oerrors = 0; 540 541 im6o.im6o_ifidx = sc->sc_if_index0; 542 im6o.im6o_hlim = sc->sc_ttl; 543 im6o.im6o_loop = 0; 544 545 NET_LOCK(); 546 while ((m = ml_dequeue(ml)) != NULL) { 547 if (ip6_output(m, NULL, NULL, 0, &im6o, NULL) != 0) 548 oerrors++; 549 } 550 NET_UNLOCK(); 551 552 return (oerrors); 553 } 554 #endif /* INET6 */ 555 556 static void 557 vxlan_send(void *arg) 558 { 559 struct vxlan_softc *sc = arg; 560 struct ifnet *ifp = &sc->sc_ac.ac_if; 561 struct mbuf *(*ip_encap)(struct vxlan_softc *, struct mbuf *, 562 const union vxlan_addr *, uint8_t); 563 uint64_t (*ip_send)(struct vxlan_softc *, struct mbuf_list *); 564 struct mbuf_list ml = MBUF_LIST_INITIALIZER(); 565 struct mbuf *m; 566 uint64_t oerrors; 567 568 if (!ISSET(ifp->if_flags, IFF_RUNNING)) 569 return; 570 571 switch (sc->sc_af) { 572 case AF_INET: 573 ip_encap = vxlan_encap_ipv4; 574 ip_send = vxlan_send_ipv4; 575 break; 576 #ifdef INET6 577 case AF_INET6: 578 ip_encap = vxlan_encap_ipv6; 579 ip_send = vxlan_send_ipv6; 580 break; 581 #endif 582 default: 583 unhandled_af(sc->sc_af); 584 /* NOTREACHED */ 585 } 586 587 while ((m = ifq_dequeue(&ifp->if_snd)) != NULL) { 588 #if NBPFILTER > 0 589 caddr_t if_bpf = READ_ONCE(ifp->if_bpf); 590 if (if_bpf != NULL) 591 bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_OUT); 592 #endif 593 m = vxlan_encap(sc, m, ip_encap); 594 if (m == NULL) 595 continue; 596 597 ml_enqueue(&ml, m); 598 } 599 600 oerrors = (*ip_send)(sc, &ml); 601 602 counters_add(ifp->if_counters, ifc_oerrors, oerrors); 603 } 604 605 static struct mbuf * 606 vxlan_input(void *arg, struct mbuf *m, struct ip *ip, struct ip6_hdr *ip6, 607 void *uhp, int hlen) 608 { 609 struct vxlan_tep *vt = arg; 610 union vxlan_addr addr; 611 struct vxlan_peer key, *p; 612 struct udphdr *uh; 613 struct vxlan_header *vh; 614 struct ether_header *eh; 615 int vhlen = hlen + sizeof(*vh); 616 struct mbuf *n; 617 int off; 618 in_port_t port; 619 struct vxlan_softc *sc = NULL; 620 struct ifnet *ifp; 621 int rxhprio; 622 uint8_t tos; 623 624 if (m->m_pkthdr.len < vhlen) 625 goto drop; 626 627 uh = uhp; 628 port = uh->uh_sport; 629 630 if (ip != NULL) { 631 memset(&addr, 0, sizeof(addr)); 632 addr.in4 = ip->ip_src; 633 tos = ip->ip_tos; 634 } 635 #ifdef INET6 636 else { 637 addr.in6 = ip6->ip6_src; 638 tos = bemtoh32(&ip6->ip6_flow) >> 20; 639 } 640 #endif 641 642 if (m->m_len < vhlen) { 643 m = m_pullup(m, vhlen); 644 if (m == NULL) 645 return (NULL); 646 } 647 648 /* can't use ip/ip6/uh after this */ 649 650 vh = (struct vxlan_header *)(mtod(m, caddr_t) + hlen); 651 652 memset(&key, 0, sizeof(key)); 653 key.p_addr = addr; 654 key.p_header.vxlan_flags = vh->vxlan_flags & htonl(VXLAN_F_I); 655 key.p_header.vxlan_id = vh->vxlan_id & htonl(VXLAN_VNI_MASK); 656 657 mtx_enter(&vt->vt_mtx); 658 p = RBT_FIND(vxlan_peers, &vt->vt_peers, &key); 659 if (p == NULL) { 660 memset(&key.p_addr, 0, sizeof(key.p_addr)); 661 p = RBT_FIND(vxlan_peers, &vt->vt_peers, &key); 662 } 663 if (p != NULL) 664 sc = vxlan_take(p->p_sc); 665 mtx_leave(&vt->vt_mtx); 666 667 if (sc == NULL) 668 goto drop; 669 670 ifp = &sc->sc_ac.ac_if; 671 if (ISSET(ifp->if_flags, IFF_LINK0) && port != sc->sc_port) 672 goto rele_drop; 673 674 m_adj(m, vhlen); 675 676 if (m->m_pkthdr.len < sizeof(*eh)) 677 goto rele_drop; 678 679 if (m->m_len < sizeof(*eh)) { 680 m = m_pullup(m, sizeof(*eh)); 681 if (m == NULL) 682 goto rele; 683 } 684 685 n = m_getptr(m, sizeof(*eh), &off); 686 if (n == NULL) 687 goto rele_drop; 688 689 if (!ALIGNED_POINTER(mtod(n, caddr_t) + off, uint32_t)) { 690 n = m_dup_pkt(m, ETHER_ALIGN, M_NOWAIT); 691 m_freem(m); 692 if (n == NULL) 693 goto rele; 694 m = n; 695 } 696 697 if (sc->sc_mode == VXLAN_TMODE_LEARNING) { 698 eh = mtod(m, struct ether_header *); 699 etherbridge_map_ea(&sc->sc_eb, &addr, 700 (struct ether_addr *)eh->ether_shost); 701 } 702 703 rxhprio = sc->sc_rxhprio; 704 switch (rxhprio) { 705 case IF_HDRPRIO_PACKET: 706 /* nop */ 707 break; 708 case IF_HDRPRIO_OUTER: 709 m->m_pkthdr.pf.prio = IFQ_TOS2PRIO(tos); 710 break; 711 default: 712 m->m_pkthdr.pf.prio = rxhprio; 713 break; \ 714 } \ 715 716 if_vinput(ifp, m); 717 rele: 718 vxlan_rele(sc); 719 return (NULL); 720 721 rele_drop: 722 vxlan_rele(sc); 723 drop: 724 m_freem(m); 725 return (NULL); 726 } 727 728 static int 729 vxlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 730 { 731 struct vxlan_softc *sc = ifp->if_softc; 732 struct ifreq *ifr = (struct ifreq *)data; 733 struct ifbrparam *bparam = (struct ifbrparam *)data; 734 int error = 0; 735 736 switch (cmd) { 737 case SIOCSIFADDR: 738 break; 739 case SIOCSIFFLAGS: 740 if (ISSET(ifp->if_flags, IFF_UP)) { 741 if (!ISSET(ifp->if_flags, IFF_RUNNING)) 742 error = vxlan_up(sc); 743 else 744 error = 0; 745 } else { 746 if (ISSET(ifp->if_flags, IFF_RUNNING)) 747 error = vxlan_down(sc); 748 } 749 break; 750 751 case SIOCSLIFPHYRTABLE: 752 error = vxlan_set_rdomain(sc, ifr); 753 break; 754 case SIOCGLIFPHYRTABLE: 755 error = vxlan_get_rdomain(sc, ifr); 756 break; 757 758 case SIOCSLIFPHYADDR: 759 error = vxlan_set_tunnel(sc, (const struct if_laddrreq *)data); 760 break; 761 case SIOCGLIFPHYADDR: 762 error = vxlan_get_tunnel(sc, (struct if_laddrreq *)data); 763 break; 764 case SIOCDIFPHYADDR: 765 error = vxlan_del_tunnel(sc); 766 break; 767 768 case SIOCSVNETID: 769 error = vxlan_set_vnetid(sc, ifr); 770 break; 771 case SIOCGVNETID: 772 error = vxlan_get_vnetid(sc, ifr); 773 break; 774 case SIOCDVNETID: 775 error = vxlan_del_vnetid(sc); 776 break; 777 778 case SIOCSIFPARENT: 779 error = vxlan_set_parent(sc, (struct if_parent *)data); 780 break; 781 case SIOCGIFPARENT: 782 error = vxlan_get_parent(sc, (struct if_parent *)data); 783 break; 784 case SIOCDIFPARENT: 785 error = vxlan_del_parent(sc); 786 break; 787 788 case SIOCSTXHPRIO: 789 error = if_txhprio_l2_check(ifr->ifr_hdrprio); 790 if (error != 0) 791 break; 792 793 sc->sc_txhprio = ifr->ifr_hdrprio; 794 break; 795 case SIOCGTXHPRIO: 796 ifr->ifr_hdrprio = sc->sc_txhprio; 797 break; 798 799 case SIOCSRXHPRIO: 800 error = if_rxhprio_l2_check(ifr->ifr_hdrprio); 801 if (error != 0) 802 break; 803 804 sc->sc_rxhprio = ifr->ifr_hdrprio; 805 break; 806 case SIOCGRXHPRIO: 807 ifr->ifr_hdrprio = sc->sc_rxhprio; 808 break; 809 810 case SIOCSLIFPHYDF: 811 /* commit */ 812 sc->sc_df = ifr->ifr_df ? htons(IP_DF) : htons(0); 813 break; 814 case SIOCGLIFPHYDF: 815 ifr->ifr_df = sc->sc_df ? 1 : 0; 816 break; 817 818 case SIOCSLIFPHYTTL: 819 if (ifr->ifr_ttl < 1 || ifr->ifr_ttl > 0xff) { 820 error = EINVAL; 821 break; 822 } 823 824 /* commit */ 825 sc->sc_ttl = (uint8_t)ifr->ifr_ttl; 826 break; 827 case SIOCGLIFPHYTTL: 828 ifr->ifr_ttl = (int)sc->sc_ttl; 829 break; 830 831 case SIOCBRDGSCACHE: 832 error = etherbridge_set_max(&sc->sc_eb, bparam); 833 break; 834 case SIOCBRDGGCACHE: 835 error = etherbridge_get_max(&sc->sc_eb, bparam); 836 break; 837 case SIOCBRDGSTO: 838 error = etherbridge_set_tmo(&sc->sc_eb, bparam); 839 break; 840 case SIOCBRDGGTO: 841 error = etherbridge_get_tmo(&sc->sc_eb, bparam); 842 break; 843 844 case SIOCBRDGRTS: 845 error = etherbridge_rtfind(&sc->sc_eb, 846 (struct ifbaconf *)data); 847 break; 848 case SIOCBRDGFLUSH: 849 etherbridge_flush(&sc->sc_eb, 850 ((struct ifbreq *)data)->ifbr_ifsflags); 851 break; 852 case SIOCBRDGSADDR: 853 error = vxlan_add_addr(sc, (struct ifbareq *)data); 854 break; 855 case SIOCBRDGDADDR: 856 error = vxlan_del_addr(sc, (struct ifbareq *)data); 857 break; 858 859 case SIOCADDMULTI: 860 case SIOCDELMULTI: 861 /* no hardware to program */ 862 break; 863 864 default: 865 error = ether_ioctl(ifp, &sc->sc_ac, cmd, data); 866 break; 867 } 868 869 if (error == ENETRESET) { 870 /* no hardware to program */ 871 error = 0; 872 } 873 874 return (error); 875 } 876 877 static struct vxlan_tep * 878 vxlan_tep_get(struct vxlan_softc *sc, const union vxlan_addr *addr) 879 { 880 struct vxlan_tep *vt; 881 882 TAILQ_FOREACH(vt, &vxlan_teps, vt_entry) { 883 if (sc->sc_af == vt->vt_af && 884 sc->sc_rdomain == vt->vt_rdomain && 885 memcmp(addr, &vt->vt_addr, sizeof(*addr)) == 0 && 886 sc->sc_port == vt->vt_port) 887 return (vt); 888 } 889 890 return (NULL); 891 } 892 893 static int 894 vxlan_tep_add_addr(struct vxlan_softc *sc, const union vxlan_addr *addr, 895 struct vxlan_peer *p) 896 { 897 struct mbuf m; 898 struct vxlan_tep *vt; 899 struct socket *so; 900 struct sockaddr_in *sin; 901 #ifdef INET6 902 struct sockaddr_in6 *sin6; 903 #endif 904 int error; 905 int s; 906 907 vt = vxlan_tep_get(sc, addr); 908 if (vt != NULL) { 909 struct vxlan_peer *op; 910 911 mtx_enter(&vt->vt_mtx); 912 op = RBT_INSERT(vxlan_peers, &vt->vt_peers, p); 913 mtx_leave(&vt->vt_mtx); 914 915 if (op != NULL) 916 return (EADDRINUSE); 917 918 return (0); 919 } 920 921 vt = malloc(sizeof(*vt), M_DEVBUF, M_NOWAIT|M_ZERO); 922 if (vt == NULL) 923 return (ENOMEM); 924 925 vt->vt_af = sc->sc_af; 926 vt->vt_rdomain = sc->sc_rdomain; 927 vt->vt_addr = *addr; 928 vt->vt_port = sc->sc_port; 929 930 mtx_init(&vt->vt_mtx, IPL_SOFTNET); 931 RBT_INIT(vxlan_peers, &vt->vt_peers); 932 RBT_INSERT(vxlan_peers, &vt->vt_peers, p); 933 934 error = socreate(vt->vt_af, &so, SOCK_DGRAM, IPPROTO_UDP); 935 if (error != 0) 936 goto free; 937 938 s = solock(so); 939 940 sotoinpcb(so)->inp_upcall = vxlan_input; 941 sotoinpcb(so)->inp_upcall_arg = vt; 942 943 m_inithdr(&m); 944 m.m_len = sizeof(vt->vt_rdomain); 945 *mtod(&m, unsigned int *) = vt->vt_rdomain; 946 error = sosetopt(so, SOL_SOCKET, SO_RTABLE, &m); 947 if (error != 0) 948 goto close; 949 950 m_inithdr(&m); 951 switch (vt->vt_af) { 952 case AF_INET: 953 sin = mtod(&m, struct sockaddr_in *); 954 memset(sin, 0, sizeof(*sin)); 955 sin->sin_len = sizeof(*sin); 956 sin->sin_family = AF_INET; 957 sin->sin_addr = addr->in4; 958 sin->sin_port = vt->vt_port; 959 960 m.m_len = sizeof(*sin); 961 break; 962 963 #ifdef INET6 964 case AF_INET6: 965 sin6 = mtod(&m, struct sockaddr_in6 *); 966 sin6->sin6_len = sizeof(*sin6); 967 sin6->sin6_family = AF_INET6; 968 in6_recoverscope(sin6, &addr->in6); 969 sin6->sin6_port = sc->sc_port; 970 971 m.m_len = sizeof(*sin6); 972 break; 973 #endif 974 default: 975 unhandled_af(vt->vt_af); 976 } 977 978 error = sobind(so, &m, curproc); 979 if (error != 0) 980 goto close; 981 982 sounlock(so, s); 983 984 rw_assert_wrlock(&vxlan_lock); 985 TAILQ_INSERT_TAIL(&vxlan_teps, vt, vt_entry); 986 987 vt->vt_so = so; 988 989 return (0); 990 991 close: 992 sounlock(so, s); 993 soclose(so, MSG_DONTWAIT); 994 free: 995 free(vt, M_DEVBUF, sizeof(*vt)); 996 return (error); 997 } 998 999 static void 1000 vxlan_tep_del_addr(struct vxlan_softc *sc, const union vxlan_addr *addr, 1001 struct vxlan_peer *p) 1002 { 1003 struct vxlan_tep *vt; 1004 int empty; 1005 1006 vt = vxlan_tep_get(sc, addr); 1007 if (vt == NULL) 1008 panic("unable to find vxlan_tep for peer %p (sc %p)", p, sc); 1009 1010 mtx_enter(&vt->vt_mtx); 1011 RBT_REMOVE(vxlan_peers, &vt->vt_peers, p); 1012 empty = RBT_EMPTY(vxlan_peers, &vt->vt_peers); 1013 mtx_leave(&vt->vt_mtx); 1014 1015 if (!empty) 1016 return; 1017 1018 rw_assert_wrlock(&vxlan_lock); 1019 TAILQ_REMOVE(&vxlan_teps, vt, vt_entry); 1020 1021 soclose(vt->vt_so, MSG_DONTWAIT); 1022 free(vt, M_DEVBUF, sizeof(*vt)); 1023 } 1024 1025 static int 1026 vxlan_tep_up(struct vxlan_softc *sc) 1027 { 1028 struct vxlan_peer *up, *mp; 1029 int error; 1030 1031 up = malloc(sizeof(*up), M_DEVBUF, M_NOWAIT|M_ZERO); 1032 if (up == NULL) 1033 return (ENOMEM); 1034 1035 if (sc->sc_mode == VXLAN_TMODE_P2P) 1036 up->p_addr = sc->sc_dst; 1037 up->p_header = sc->sc_header; 1038 up->p_sc = vxlan_take(sc); 1039 1040 error = vxlan_tep_add_addr(sc, &sc->sc_src, up); 1041 if (error != 0) 1042 goto freeup; 1043 1044 sc->sc_ucast_peer = up; 1045 1046 if (sc->sc_mode != VXLAN_TMODE_LEARNING) 1047 return (0); 1048 1049 mp = malloc(sizeof(*mp), M_DEVBUF, M_NOWAIT|M_ZERO); 1050 if (mp == NULL) { 1051 error = ENOMEM; 1052 goto delup; 1053 } 1054 1055 /* addr is multicast, leave it as 0s */ 1056 mp->p_header = sc->sc_header; 1057 mp->p_sc = vxlan_take(sc); 1058 1059 /* destination address is a multicast group we want to join */ 1060 error = vxlan_tep_add_addr(sc, &sc->sc_dst, up); 1061 if (error != 0) 1062 goto freemp; 1063 1064 sc->sc_mcast_peer = mp; 1065 1066 return (0); 1067 1068 freemp: 1069 vxlan_rele(mp->p_sc); 1070 free(mp, M_DEVBUF, sizeof(*mp)); 1071 delup: 1072 vxlan_tep_del_addr(sc, &sc->sc_src, up); 1073 freeup: 1074 vxlan_rele(up->p_sc); 1075 free(up, M_DEVBUF, sizeof(*up)); 1076 return (error); 1077 } 1078 1079 static void 1080 vxlan_tep_down(struct vxlan_softc *sc) 1081 { 1082 struct vxlan_peer *up = sc->sc_ucast_peer; 1083 1084 if (sc->sc_mode == VXLAN_TMODE_LEARNING) { 1085 struct vxlan_peer *mp = sc->sc_mcast_peer; 1086 vxlan_tep_del_addr(sc, &sc->sc_dst, mp); 1087 vxlan_rele(mp->p_sc); 1088 free(mp, M_DEVBUF, sizeof(*mp)); 1089 } 1090 1091 vxlan_tep_del_addr(sc, &sc->sc_src, up); 1092 vxlan_rele(up->p_sc); 1093 free(up, M_DEVBUF, sizeof(*up)); 1094 } 1095 1096 static int 1097 vxlan_up(struct vxlan_softc *sc) 1098 { 1099 struct ifnet *ifp = &sc->sc_ac.ac_if; 1100 struct ifnet *ifp0 = NULL; 1101 int error; 1102 1103 KASSERT(!ISSET(ifp->if_flags, IFF_RUNNING)); 1104 NET_ASSERT_LOCKED(); 1105 1106 if (sc->sc_af == AF_UNSPEC) 1107 return (EDESTADDRREQ); 1108 KASSERT(sc->sc_mode != VXLAN_TMODE_UNSET); 1109 1110 NET_UNLOCK(); 1111 1112 error = rw_enter(&vxlan_lock, RW_WRITE|RW_INTR); 1113 if (error != 0) 1114 goto netlock; 1115 1116 NET_LOCK(); 1117 if (ISSET(ifp->if_flags, IFF_RUNNING)) { 1118 /* something else beat us */ 1119 rw_exit(&vxlan_lock); 1120 return (0); 1121 } 1122 NET_UNLOCK(); 1123 1124 if (sc->sc_mode != VXLAN_TMODE_P2P) { 1125 error = etherbridge_up(&sc->sc_eb); 1126 if (error != 0) 1127 goto unlock; 1128 } 1129 1130 if (sc->sc_mode == VXLAN_TMODE_LEARNING) { 1131 ifp0 = if_get(sc->sc_if_index0); 1132 if (ifp0 == NULL) { 1133 error = ENXIO; 1134 goto down; 1135 } 1136 1137 /* check again if multicast will work on top of the parent */ 1138 if (!ISSET(ifp0->if_flags, IFF_MULTICAST)) { 1139 error = EPROTONOSUPPORT; 1140 goto put; 1141 } 1142 1143 error = vxlan_addmulti(sc, ifp0); 1144 if (error != 0) 1145 goto put; 1146 1147 /* Register callback if parent wants to unregister */ 1148 if_detachhook_add(ifp0, &sc->sc_dtask); 1149 } else { 1150 if (sc->sc_if_index0 != 0) { 1151 error = EPROTONOSUPPORT; 1152 goto down; 1153 } 1154 } 1155 1156 error = vxlan_tep_up(sc); 1157 if (error != 0) 1158 goto del; 1159 1160 if_put(ifp0); 1161 1162 NET_LOCK(); 1163 SET(ifp->if_flags, IFF_RUNNING); 1164 rw_exit(&vxlan_lock); 1165 1166 return (0); 1167 1168 del: 1169 if (sc->sc_mode == VXLAN_TMODE_LEARNING) { 1170 if (ifp0 != NULL) 1171 if_detachhook_del(ifp0, &sc->sc_dtask); 1172 vxlan_delmulti(sc); 1173 } 1174 put: 1175 if_put(ifp0); 1176 down: 1177 if (sc->sc_mode != VXLAN_TMODE_P2P) 1178 etherbridge_down(&sc->sc_eb); 1179 unlock: 1180 rw_exit(&vxlan_lock); 1181 netlock: 1182 NET_LOCK(); 1183 1184 return (error); 1185 } 1186 1187 static int 1188 vxlan_down(struct vxlan_softc *sc) 1189 { 1190 struct ifnet *ifp = &sc->sc_ac.ac_if; 1191 struct ifnet *ifp0; 1192 int error; 1193 1194 KASSERT(ISSET(ifp->if_flags, IFF_RUNNING)); 1195 NET_UNLOCK(); 1196 1197 error = rw_enter(&vxlan_lock, RW_WRITE|RW_INTR); 1198 if (error != 0) { 1199 NET_LOCK(); 1200 return (error); 1201 } 1202 1203 NET_LOCK(); 1204 if (!ISSET(ifp->if_flags, IFF_RUNNING)) { 1205 /* something else beat us */ 1206 rw_exit(&vxlan_lock); 1207 return (0); 1208 } 1209 NET_UNLOCK(); 1210 1211 vxlan_tep_down(sc); 1212 1213 if (sc->sc_mode == VXLAN_TMODE_LEARNING) { 1214 vxlan_delmulti(sc); 1215 ifp0 = if_get(sc->sc_if_index0); 1216 if (ifp0 != NULL) { 1217 if_detachhook_del(ifp0, &sc->sc_dtask); 1218 } 1219 if_put(ifp0); 1220 } 1221 1222 if (sc->sc_mode != VXLAN_TMODE_P2P) 1223 etherbridge_down(&sc->sc_eb); 1224 1225 taskq_del_barrier(ifp->if_snd.ifq_softnet, &sc->sc_send_task); 1226 NET_LOCK(); 1227 CLR(ifp->if_flags, IFF_RUNNING); 1228 rw_exit(&vxlan_lock); 1229 1230 return (0); 1231 } 1232 1233 static int 1234 vxlan_addmulti(struct vxlan_softc *sc, struct ifnet *ifp0) 1235 { 1236 int error = 0; 1237 1238 NET_LOCK(); 1239 1240 switch (sc->sc_af) { 1241 case AF_INET: 1242 sc->sc_inmulti = in_addmulti(&sc->sc_dst.in4, ifp0); 1243 if (sc->sc_inmulti == NULL) 1244 error = EADDRNOTAVAIL; 1245 break; 1246 #ifdef INET6 1247 case AF_INET6: 1248 sc->sc_inmulti = in6_addmulti(&sc->sc_dst.in6, ifp0, &error); 1249 break; 1250 #endif 1251 default: 1252 unhandled_af(sc->sc_af); 1253 } 1254 1255 NET_UNLOCK(); 1256 1257 return (error); 1258 } 1259 1260 static void 1261 vxlan_delmulti(struct vxlan_softc *sc) 1262 { 1263 NET_LOCK(); 1264 1265 switch (sc->sc_af) { 1266 case AF_INET: 1267 in_delmulti(sc->sc_inmulti); 1268 break; 1269 #ifdef INET6 1270 case AF_INET6: 1271 in6_delmulti(sc->sc_inmulti); 1272 break; 1273 #endif 1274 default: 1275 unhandled_af(sc->sc_af); 1276 } 1277 1278 sc->sc_inmulti = NULL; /* keep it tidy */ 1279 1280 NET_UNLOCK(); 1281 } 1282 1283 static int 1284 vxlan_set_rdomain(struct vxlan_softc *sc, const struct ifreq *ifr) 1285 { 1286 struct ifnet *ifp = &sc->sc_ac.ac_if; 1287 1288 if (ifr->ifr_rdomainid < 0 || 1289 ifr->ifr_rdomainid > RT_TABLEID_MAX) 1290 return (EINVAL); 1291 if (!rtable_exists(ifr->ifr_rdomainid)) 1292 return (EADDRNOTAVAIL); 1293 1294 if (sc->sc_rdomain == ifr->ifr_rdomainid) 1295 return (0); 1296 1297 if (ISSET(ifp->if_flags, IFF_RUNNING)) 1298 return (EBUSY); 1299 1300 /* commit */ 1301 sc->sc_rdomain = ifr->ifr_rdomainid; 1302 etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL); 1303 1304 return (0); 1305 } 1306 1307 static int 1308 vxlan_get_rdomain(struct vxlan_softc *sc, struct ifreq *ifr) 1309 { 1310 ifr->ifr_rdomainid = sc->sc_rdomain; 1311 1312 return (0); 1313 } 1314 1315 static int 1316 vxlan_set_tunnel(struct vxlan_softc *sc, const struct if_laddrreq *req) 1317 { 1318 struct ifnet *ifp = &sc->sc_ac.ac_if; 1319 struct sockaddr *src = (struct sockaddr *)&req->addr; 1320 struct sockaddr *dst = (struct sockaddr *)&req->dstaddr; 1321 struct sockaddr_in *src4, *dst4; 1322 #ifdef INET6 1323 struct sockaddr_in6 *src6, *dst6; 1324 int error; 1325 #endif 1326 union vxlan_addr saddr, daddr; 1327 unsigned int mode = VXLAN_TMODE_ENDPOINT; 1328 in_port_t port = htons(VXLAN_PORT); 1329 1330 memset(&saddr, 0, sizeof(saddr)); 1331 memset(&daddr, 0, sizeof(daddr)); 1332 1333 /* validate */ 1334 switch (src->sa_family) { 1335 case AF_INET: 1336 src4 = (struct sockaddr_in *)src; 1337 if (in_nullhost(src4->sin_addr) || 1338 IN_MULTICAST(src4->sin_addr.s_addr)) 1339 return (EINVAL); 1340 1341 if (src4->sin_port != htons(0)) 1342 port = src4->sin_port; 1343 1344 if (dst->sa_family != AF_UNSPEC) { 1345 if (dst->sa_family != AF_INET) 1346 return (EINVAL); 1347 1348 dst4 = (struct sockaddr_in *)dst; 1349 if (in_nullhost(dst4->sin_addr)) 1350 return (EINVAL); 1351 1352 /* all good */ 1353 mode = IN_MULTICAST(dst4->sin_addr.s_addr) ? 1354 VXLAN_TMODE_LEARNING : VXLAN_TMODE_P2P; 1355 daddr.in4 = dst4->sin_addr; 1356 } 1357 1358 saddr.in4 = src4->sin_addr; 1359 break; 1360 1361 #ifdef INET6 1362 case AF_INET6: 1363 src6 = (struct sockaddr_in6 *)src; 1364 if (IN6_IS_ADDR_UNSPECIFIED(&src6->sin6_addr) || 1365 IN6_IS_ADDR_MULTICAST(&src6->sin6_addr)) 1366 return (EINVAL); 1367 1368 if (src6->sin6_port != htons(0)) 1369 port = src6->sin6_port; 1370 1371 if (dst->sa_family != AF_UNSPEC) { 1372 if (dst->sa_family != AF_INET6) 1373 return (EINVAL); 1374 1375 dst6 = (struct sockaddr_in6 *)dst; 1376 if (IN6_IS_ADDR_UNSPECIFIED(&dst6->sin6_addr)) 1377 return (EINVAL); 1378 1379 if (src6->sin6_scope_id != dst6->sin6_scope_id) 1380 return (EINVAL); 1381 1382 /* all good */ 1383 mode = IN6_IS_ADDR_MULTICAST(&dst6->sin6_addr) ? 1384 VXLAN_TMODE_LEARNING : VXLAN_TMODE_P2P; 1385 error = in6_embedscope(&daddr.in6, dst6, NULL); 1386 if (error != 0) 1387 return (error); 1388 } 1389 1390 error = in6_embedscope(&saddr.in6, src6, NULL); 1391 if (error != 0) 1392 return (error); 1393 1394 break; 1395 #endif 1396 default: 1397 return (EAFNOSUPPORT); 1398 } 1399 1400 if (memcmp(&sc->sc_src, &saddr, sizeof(sc->sc_src)) == 0 && 1401 memcmp(&sc->sc_dst, &daddr, sizeof(sc->sc_dst)) == 0 && 1402 sc->sc_port == port) 1403 return (0); 1404 1405 if (ISSET(ifp->if_flags, IFF_RUNNING)) 1406 return (EBUSY); 1407 1408 /* commit */ 1409 sc->sc_af = src->sa_family; 1410 sc->sc_src = saddr; 1411 sc->sc_dst = daddr; 1412 sc->sc_port = port; 1413 sc->sc_mode = mode; 1414 etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL); 1415 1416 return (0); 1417 } 1418 1419 static int 1420 vxlan_get_tunnel(struct vxlan_softc *sc, struct if_laddrreq *req) 1421 { 1422 struct sockaddr *dstaddr = (struct sockaddr *)&req->dstaddr; 1423 struct sockaddr_in *sin; 1424 #ifdef INET6 1425 struct sockaddr_in6 *sin6; 1426 #endif 1427 1428 if (sc->sc_af == AF_UNSPEC) 1429 return (EADDRNOTAVAIL); 1430 KASSERT(sc->sc_mode != VXLAN_TMODE_UNSET); 1431 1432 memset(&req->addr, 0, sizeof(req->addr)); 1433 memset(&req->dstaddr, 0, sizeof(req->dstaddr)); 1434 1435 /* default to endpoint */ 1436 dstaddr->sa_len = 2; 1437 dstaddr->sa_family = AF_UNSPEC; 1438 1439 switch (sc->sc_af) { 1440 case AF_INET: 1441 sin = (struct sockaddr_in *)&req->addr; 1442 sin->sin_len = sizeof(*sin); 1443 sin->sin_family = AF_INET; 1444 sin->sin_addr = sc->sc_src.in4; 1445 sin->sin_port = sc->sc_port; 1446 1447 if (sc->sc_mode == VXLAN_TMODE_ENDPOINT) 1448 break; 1449 1450 sin = (struct sockaddr_in *)&req->dstaddr; 1451 sin->sin_len = sizeof(*sin); 1452 sin->sin_family = AF_INET; 1453 sin->sin_addr = sc->sc_dst.in4; 1454 break; 1455 1456 #ifdef INET6 1457 case AF_INET6: 1458 sin6 = (struct sockaddr_in6 *)&req->addr; 1459 sin6->sin6_len = sizeof(*sin6); 1460 sin6->sin6_family = AF_INET6; 1461 in6_recoverscope(sin6, &sc->sc_src.in6); 1462 sin6->sin6_port = sc->sc_port; 1463 1464 if (sc->sc_mode == VXLAN_TMODE_ENDPOINT) 1465 break; 1466 1467 sin6 = (struct sockaddr_in6 *)&req->dstaddr; 1468 sin6->sin6_len = sizeof(*sin6); 1469 sin6->sin6_family = AF_INET6; 1470 in6_recoverscope(sin6, &sc->sc_dst.in6); 1471 break; 1472 #endif 1473 default: 1474 unhandled_af(sc->sc_af); 1475 } 1476 1477 return (0); 1478 } 1479 1480 static int 1481 vxlan_del_tunnel(struct vxlan_softc *sc) 1482 { 1483 struct ifnet *ifp = &sc->sc_ac.ac_if; 1484 1485 if (sc->sc_af == AF_UNSPEC) 1486 return (0); 1487 1488 if (ISSET(ifp->if_flags, IFF_RUNNING)) 1489 return (EBUSY); 1490 1491 /* commit */ 1492 sc->sc_af = AF_UNSPEC; 1493 memset(&sc->sc_src, 0, sizeof(sc->sc_src)); 1494 memset(&sc->sc_dst, 0, sizeof(sc->sc_dst)); 1495 sc->sc_port = htons(0); 1496 sc->sc_mode = VXLAN_TMODE_UNSET; 1497 etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL); 1498 1499 return (0); 1500 } 1501 1502 static int 1503 vxlan_set_vnetid(struct vxlan_softc *sc, const struct ifreq *ifr) 1504 { 1505 struct ifnet *ifp = &sc->sc_ac.ac_if; 1506 uint32_t vni; 1507 1508 if (ifr->ifr_vnetid < VXLAN_VNI_MIN || 1509 ifr->ifr_vnetid > VXLAN_VNI_MAX) 1510 return (EINVAL); 1511 1512 vni = htonl(ifr->ifr_vnetid << VXLAN_VNI_SHIFT); 1513 if (ISSET(sc->sc_header.vxlan_flags, htonl(VXLAN_F_I)) && 1514 sc->sc_header.vxlan_id == vni) 1515 return (0); 1516 1517 if (ISSET(ifp->if_flags, IFF_RUNNING)) 1518 return (EBUSY); 1519 1520 /* commit */ 1521 SET(sc->sc_header.vxlan_flags, htonl(VXLAN_F_I)); 1522 sc->sc_header.vxlan_id = vni; 1523 etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL); 1524 1525 return (0); 1526 } 1527 1528 static int 1529 vxlan_get_vnetid(struct vxlan_softc *sc, struct ifreq *ifr) 1530 { 1531 uint32_t vni; 1532 1533 if (!ISSET(sc->sc_header.vxlan_flags, htonl(VXLAN_F_I))) 1534 return (EADDRNOTAVAIL); 1535 1536 vni = ntohl(sc->sc_header.vxlan_id); 1537 vni &= VXLAN_VNI_MASK; 1538 vni >>= VXLAN_VNI_SHIFT; 1539 1540 ifr->ifr_vnetid = vni; 1541 1542 return (0); 1543 } 1544 1545 static int 1546 vxlan_del_vnetid(struct vxlan_softc *sc) 1547 { 1548 struct ifnet *ifp = &sc->sc_ac.ac_if; 1549 1550 if (!ISSET(sc->sc_header.vxlan_flags, htonl(VXLAN_F_I))) 1551 return (0); 1552 1553 if (ISSET(ifp->if_flags, IFF_RUNNING)) 1554 return (EBUSY); 1555 1556 /* commit */ 1557 CLR(sc->sc_header.vxlan_flags, htonl(VXLAN_F_I)); 1558 sc->sc_header.vxlan_id = htonl(0 << VXLAN_VNI_SHIFT); 1559 etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL); 1560 1561 return (0); 1562 } 1563 1564 static int 1565 vxlan_set_parent(struct vxlan_softc *sc, const struct if_parent *p) 1566 { 1567 struct ifnet *ifp = &sc->sc_ac.ac_if; 1568 struct ifnet *ifp0; 1569 int error = 0; 1570 1571 ifp0 = if_unit(p->ifp_parent); 1572 if (ifp0 == NULL) 1573 return (ENXIO); 1574 1575 if (!ISSET(ifp0->if_flags, IFF_MULTICAST)) { 1576 error = ENXIO; 1577 goto put; 1578 } 1579 1580 if (sc->sc_if_index0 == ifp0->if_index) 1581 goto put; 1582 1583 if (ISSET(ifp->if_flags, IFF_RUNNING)) { 1584 error = EBUSY; 1585 goto put; 1586 } 1587 1588 /* commit */ 1589 sc->sc_if_index0 = ifp0->if_index; 1590 etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL); 1591 1592 put: 1593 if_put(ifp0); 1594 return (error); 1595 } 1596 1597 static int 1598 vxlan_get_parent(struct vxlan_softc *sc, struct if_parent *p) 1599 { 1600 struct ifnet *ifp0; 1601 int error = 0; 1602 1603 ifp0 = if_get(sc->sc_if_index0); 1604 if (ifp0 == NULL) 1605 error = EADDRNOTAVAIL; 1606 else 1607 strlcpy(p->ifp_parent, ifp0->if_xname, sizeof(p->ifp_parent)); 1608 if_put(ifp0); 1609 1610 return (error); 1611 } 1612 1613 static int 1614 vxlan_del_parent(struct vxlan_softc *sc) 1615 { 1616 struct ifnet *ifp = &sc->sc_ac.ac_if; 1617 1618 if (sc->sc_if_index0 == 0) 1619 return (0); 1620 1621 if (ISSET(ifp->if_flags, IFF_RUNNING)) 1622 return (EBUSY); 1623 1624 /* commit */ 1625 sc->sc_if_index0 = 0; 1626 etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL); 1627 1628 return (0); 1629 } 1630 1631 static int 1632 vxlan_add_addr(struct vxlan_softc *sc, const struct ifbareq *ifba) 1633 { 1634 struct sockaddr_in *sin; 1635 #ifdef INET6 1636 struct sockaddr_in6 *sin6; 1637 struct sockaddr_in6 src6 = { 1638 .sin6_len = sizeof(src6), 1639 .sin6_family = AF_UNSPEC, 1640 }; 1641 int error; 1642 #endif 1643 union vxlan_addr endpoint; 1644 unsigned int type; 1645 1646 switch (sc->sc_mode) { 1647 case VXLAN_TMODE_UNSET: 1648 return (ENOPROTOOPT); 1649 case VXLAN_TMODE_P2P: 1650 return (EPROTONOSUPPORT); 1651 default: 1652 break; 1653 } 1654 1655 /* ignore ifba_ifsname */ 1656 1657 if (ISSET(ifba->ifba_flags, ~IFBAF_TYPEMASK)) 1658 return (EINVAL); 1659 switch (ifba->ifba_flags & IFBAF_TYPEMASK) { 1660 case IFBAF_DYNAMIC: 1661 type = EBE_DYNAMIC; 1662 break; 1663 case IFBAF_STATIC: 1664 type = EBE_STATIC; 1665 break; 1666 default: 1667 return (EINVAL); 1668 } 1669 1670 memset(&endpoint, 0, sizeof(endpoint)); 1671 1672 if (ifba->ifba_dstsa.ss_family != sc->sc_af) 1673 return (EAFNOSUPPORT); 1674 switch (ifba->ifba_dstsa.ss_family) { 1675 case AF_INET: 1676 sin = (struct sockaddr_in *)&ifba->ifba_dstsa; 1677 if (in_nullhost(sin->sin_addr) || 1678 IN_MULTICAST(sin->sin_addr.s_addr)) 1679 return (EADDRNOTAVAIL); 1680 1681 if (sin->sin_port != htons(0)) 1682 return (EADDRNOTAVAIL); 1683 1684 endpoint.in4 = sin->sin_addr; 1685 break; 1686 1687 #ifdef INET6 1688 case AF_INET6: 1689 sin6 = (struct sockaddr_in6 *)&ifba->ifba_dstsa; 1690 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) || 1691 IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) 1692 return (EADDRNOTAVAIL); 1693 1694 in6_recoverscope(&src6, &sc->sc_src.in6); 1695 if (src6.sin6_scope_id != sin6->sin6_scope_id) 1696 return (EADDRNOTAVAIL); 1697 1698 if (sin6->sin6_port != htons(0)) 1699 return (EADDRNOTAVAIL); 1700 1701 error = in6_embedscope(&endpoint.in6, sin6, NULL); 1702 if (error != 0) 1703 return (error); 1704 1705 break; 1706 #endif 1707 default: /* AF_UNSPEC */ 1708 return (EADDRNOTAVAIL); 1709 } 1710 1711 return (etherbridge_add_addr(&sc->sc_eb, &endpoint, 1712 &ifba->ifba_dst, type)); 1713 } 1714 1715 static int 1716 vxlan_del_addr(struct vxlan_softc *sc, const struct ifbareq *ifba) 1717 { 1718 return (etherbridge_del_addr(&sc->sc_eb, &ifba->ifba_dst)); 1719 } 1720 1721 void 1722 vxlan_detach_hook(void *arg) 1723 { 1724 struct vxlan_softc *sc = arg; 1725 struct ifnet *ifp = &sc->sc_ac.ac_if; 1726 1727 if (ISSET(ifp->if_flags, IFF_RUNNING)) { 1728 vxlan_down(sc); 1729 CLR(ifp->if_flags, IFF_UP); 1730 } 1731 1732 sc->sc_if_index0 = 0; 1733 } 1734 1735 static int 1736 vxlan_eb_port_eq(void *arg, void *a, void *b) 1737 { 1738 const union vxlan_addr *va = a, *vb = b; 1739 size_t i; 1740 1741 for (i = 0; i < nitems(va->in6.s6_addr32); i++) { 1742 if (va->in6.s6_addr32[i] != vb->in6.s6_addr32[i]) 1743 return (0); 1744 } 1745 1746 return (1); 1747 } 1748 1749 static void * 1750 vxlan_eb_port_take(void *arg, void *port) 1751 { 1752 union vxlan_addr *endpoint; 1753 1754 endpoint = pool_get(&vxlan_endpoint_pool, PR_NOWAIT); 1755 if (endpoint == NULL) 1756 return (NULL); 1757 1758 *endpoint = *(union vxlan_addr *)port; 1759 1760 return (endpoint); 1761 } 1762 1763 static void 1764 vxlan_eb_port_rele(void *arg, void *port) 1765 { 1766 union vxlan_addr *endpoint = port; 1767 1768 pool_put(&vxlan_endpoint_pool, endpoint); 1769 } 1770 1771 static size_t 1772 vxlan_eb_port_ifname(void *arg, char *dst, size_t len, void *port) 1773 { 1774 struct vxlan_softc *sc = arg; 1775 1776 return (strlcpy(dst, sc->sc_ac.ac_if.if_xname, len)); 1777 } 1778 1779 static void 1780 vxlan_eb_port_sa(void *arg, struct sockaddr_storage *ss, void *port) 1781 { 1782 struct vxlan_softc *sc = arg; 1783 union vxlan_addr *endpoint = port; 1784 1785 switch (sc->sc_af) { 1786 case AF_INET: { 1787 struct sockaddr_in *sin = (struct sockaddr_in *)ss; 1788 1789 sin->sin_len = sizeof(*sin); 1790 sin->sin_family = AF_INET; 1791 sin->sin_addr = endpoint->in4; 1792 break; 1793 } 1794 #ifdef INET6 1795 case AF_INET6: { 1796 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)ss; 1797 1798 sin6->sin6_len = sizeof(*sin6); 1799 sin6->sin6_family = AF_INET6; 1800 in6_recoverscope(sin6, &endpoint->in6); 1801 break; 1802 } 1803 #endif /* INET6 */ 1804 default: 1805 unhandled_af(sc->sc_af); 1806 } 1807 } 1808 1809 static inline int 1810 vxlan_peer_cmp(const struct vxlan_peer *ap, const struct vxlan_peer *bp) 1811 { 1812 size_t i; 1813 1814 if (ap->p_header.vxlan_id > bp->p_header.vxlan_id) 1815 return (1); 1816 if (ap->p_header.vxlan_id < bp->p_header.vxlan_id) 1817 return (-1); 1818 if (ap->p_header.vxlan_flags > bp->p_header.vxlan_flags) 1819 return (1); 1820 if (ap->p_header.vxlan_flags < bp->p_header.vxlan_flags) 1821 return (-1); 1822 1823 for (i = 0; i < nitems(ap->p_addr.in6.s6_addr32); i++) { 1824 if (ap->p_addr.in6.s6_addr32[i] > 1825 bp->p_addr.in6.s6_addr32[i]) 1826 return (1); 1827 if (ap->p_addr.in6.s6_addr32[i] < 1828 bp->p_addr.in6.s6_addr32[i]) 1829 return (-1); 1830 } 1831 1832 return (0); 1833 } 1834 1835 RBT_GENERATE(vxlan_peers, vxlan_peer, p_entry, vxlan_peer_cmp); 1836