1 /* $OpenBSD: if_vxlan.c,v 1.92 2023/04/13 02:19:05 jsg Exp $ */ 2 3 /* 4 * Copyright (c) 2021 David Gwynne <dlg@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #include "bpfilter.h" 20 #include "pf.h" 21 22 #include <sys/param.h> 23 #include <sys/systm.h> 24 #include <sys/kernel.h> 25 #include <sys/mbuf.h> 26 #include <sys/socket.h> 27 #include <sys/ioctl.h> 28 #include <sys/timeout.h> 29 #include <sys/pool.h> 30 #include <sys/tree.h> 31 #include <sys/refcnt.h> 32 #include <sys/smr.h> 33 34 #include <sys/socketvar.h> 35 36 #include <net/if.h> 37 #include <net/if_var.h> 38 #include <net/if_dl.h> 39 #include <net/if_media.h> 40 #include <net/if_types.h> 41 #include <net/route.h> 42 #include <net/rtable.h> 43 44 #include <netinet/in.h> 45 #include <netinet/in_var.h> 46 #include <netinet/if_ether.h> 47 #include <netinet/ip.h> 48 #include <netinet/udp.h> 49 #include <netinet/in_pcb.h> 50 #include <netinet/ip_var.h> 51 52 #ifdef INET6 53 #include <netinet/ip6.h> 54 #include <netinet6/ip6_var.h> 55 #include <netinet6/in6_var.h> 56 #endif 57 58 /* for bridge stuff */ 59 #include <net/if_bridge.h> 60 #include <net/if_etherbridge.h> 61 62 #if NBPFILTER > 0 63 #include <net/bpf.h> 64 #endif 65 66 /* 67 * The protocol. 68 */ 69 70 #define VXLANMTU 1492 71 #define VXLAN_PORT 4789 72 73 struct vxlan_header { 74 uint32_t vxlan_flags; 75 #define VXLAN_F_I (1U << 27) 76 uint32_t vxlan_id; 77 #define VXLAN_VNI_SHIFT 8 78 #define VXLAN_VNI_MASK (0xffffffU << VXLAN_VNI_SHIFT) 79 }; 80 81 #define VXLAN_VNI_MAX 0x00ffffffU 82 #define VXLAN_VNI_MIN 0x00000000U 83 84 /* 85 * The driver. 86 */ 87 88 union vxlan_addr { 89 struct in_addr in4; 90 struct in6_addr in6; 91 }; 92 93 struct vxlan_softc; 94 95 struct vxlan_peer { 96 RBT_ENTRY(vxlan_peer) p_entry; 97 98 struct vxlan_header p_header; 99 union vxlan_addr p_addr; 100 101 struct vxlan_softc *p_sc; 102 }; 103 104 RBT_HEAD(vxlan_peers, vxlan_peer); 105 106 struct vxlan_tep { 107 TAILQ_ENTRY(vxlan_tep) vt_entry; 108 109 sa_family_t vt_af; 110 unsigned int vt_rdomain; 111 union vxlan_addr vt_addr; 112 #define vt_addr4 vt_addr.in4 113 #define vt_addr6 vt_addr.in6 114 in_port_t vt_port; 115 116 struct socket *vt_so; 117 118 struct mutex vt_mtx; 119 struct vxlan_peers vt_peers; 120 }; 121 122 TAILQ_HEAD(vxlan_teps, vxlan_tep); 123 124 enum vxlan_tunnel_mode { 125 VXLAN_TMODE_UNSET, 126 VXLAN_TMODE_P2P, /* unicast destination, no learning */ 127 VXLAN_TMODE_LEARNING, /* multicast destination, learning */ 128 VXLAN_TMODE_ENDPOINT, /* unset destination, no learning */ 129 }; 130 131 struct vxlan_softc { 132 struct arpcom sc_ac; 133 struct etherbridge sc_eb; 134 135 unsigned int sc_rdomain; 136 sa_family_t sc_af; 137 union vxlan_addr sc_src; 138 union vxlan_addr sc_dst; 139 in_port_t sc_port; 140 struct vxlan_header sc_header; 141 unsigned int sc_if_index0; 142 143 struct task sc_dtask; 144 void *sc_inmulti; 145 146 enum vxlan_tunnel_mode sc_mode; 147 struct vxlan_peer *sc_ucast_peer; 148 struct vxlan_peer *sc_mcast_peer; 149 struct refcnt sc_refs; 150 151 uint16_t sc_df; 152 int sc_ttl; 153 int sc_txhprio; 154 int sc_rxhprio; 155 156 struct task sc_send_task; 157 }; 158 159 void vxlanattach(int); 160 161 static int vxlan_clone_create(struct if_clone *, int); 162 static int vxlan_clone_destroy(struct ifnet *); 163 164 static int vxlan_output(struct ifnet *, struct mbuf *, 165 struct sockaddr *, struct rtentry *); 166 static int vxlan_enqueue(struct ifnet *, struct mbuf *); 167 static void vxlan_start(struct ifqueue *); 168 static void vxlan_send(void *); 169 170 static int vxlan_ioctl(struct ifnet *, u_long, caddr_t); 171 static int vxlan_up(struct vxlan_softc *); 172 static int vxlan_down(struct vxlan_softc *); 173 static int vxlan_addmulti(struct vxlan_softc *, struct ifnet *); 174 static void vxlan_delmulti(struct vxlan_softc *); 175 176 static struct mbuf * 177 vxlan_input(void *, struct mbuf *, 178 struct ip *, struct ip6_hdr *, void *, int); 179 180 static int vxlan_set_rdomain(struct vxlan_softc *, const struct ifreq *); 181 static int vxlan_get_rdomain(struct vxlan_softc *, struct ifreq *); 182 static int vxlan_set_tunnel(struct vxlan_softc *, 183 const struct if_laddrreq *); 184 static int vxlan_get_tunnel(struct vxlan_softc *, struct if_laddrreq *); 185 static int vxlan_del_tunnel(struct vxlan_softc *); 186 static int vxlan_set_vnetid(struct vxlan_softc *, const struct ifreq *); 187 static int vxlan_get_vnetid(struct vxlan_softc *, struct ifreq *); 188 static int vxlan_del_vnetid(struct vxlan_softc *); 189 static int vxlan_set_parent(struct vxlan_softc *, 190 const struct if_parent *); 191 static int vxlan_get_parent(struct vxlan_softc *, struct if_parent *); 192 static int vxlan_del_parent(struct vxlan_softc *); 193 194 static int vxlan_add_addr(struct vxlan_softc *, const struct ifbareq *); 195 static int vxlan_del_addr(struct vxlan_softc *, const struct ifbareq *); 196 197 static void vxlan_detach_hook(void *); 198 199 static struct if_clone vxlan_cloner = 200 IF_CLONE_INITIALIZER("vxlan", vxlan_clone_create, vxlan_clone_destroy); 201 202 static int vxlan_eb_port_eq(void *, void *, void *); 203 static void *vxlan_eb_port_take(void *, void *); 204 static void vxlan_eb_port_rele(void *, void *); 205 static size_t vxlan_eb_port_ifname(void *, char *, size_t, void *); 206 static void vxlan_eb_port_sa(void *, struct sockaddr_storage *, void *); 207 208 static const struct etherbridge_ops vxlan_etherbridge_ops = { 209 vxlan_eb_port_eq, 210 vxlan_eb_port_take, 211 vxlan_eb_port_rele, 212 vxlan_eb_port_ifname, 213 vxlan_eb_port_sa, 214 }; 215 216 static struct rwlock vxlan_lock = RWLOCK_INITIALIZER("vteps"); 217 static struct vxlan_teps vxlan_teps = TAILQ_HEAD_INITIALIZER(vxlan_teps); 218 static struct pool vxlan_endpoint_pool; 219 220 static inline int vxlan_peer_cmp(const struct vxlan_peer *, 221 const struct vxlan_peer *); 222 223 RBT_PROTOTYPE(vxlan_peers, vxlan_peer, p_entry, vxlan_peer_cmp); 224 225 void 226 vxlanattach(int count) 227 { 228 if_clone_attach(&vxlan_cloner); 229 } 230 231 static int 232 vxlan_clone_create(struct if_clone *ifc, int unit) 233 { 234 struct vxlan_softc *sc; 235 struct ifnet *ifp; 236 int error; 237 238 if (vxlan_endpoint_pool.pr_size == 0) { 239 pool_init(&vxlan_endpoint_pool, sizeof(union vxlan_addr), 240 0, IPL_SOFTNET, 0, "vxlanep", NULL); 241 } 242 243 sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO|M_CANFAIL); 244 if (sc == NULL) 245 return (ENOMEM); 246 247 ifp = &sc->sc_ac.ac_if; 248 249 snprintf(ifp->if_xname, sizeof(ifp->if_xname), "%s%d", 250 ifc->ifc_name, unit); 251 252 error = etherbridge_init(&sc->sc_eb, ifp->if_xname, 253 &vxlan_etherbridge_ops, sc); 254 if (error == -1) { 255 free(sc, M_DEVBUF, sizeof(*sc)); 256 return (error); 257 } 258 259 sc->sc_af = AF_UNSPEC; 260 sc->sc_txhprio = 0; 261 sc->sc_rxhprio = IF_HDRPRIO_OUTER; 262 sc->sc_df = 0; 263 sc->sc_ttl = IP_DEFAULT_MULTICAST_TTL; 264 265 task_set(&sc->sc_dtask, vxlan_detach_hook, sc); 266 refcnt_init(&sc->sc_refs); 267 task_set(&sc->sc_send_task, vxlan_send, sc); 268 269 ifp->if_softc = sc; 270 ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN; 271 ifp->if_ioctl = vxlan_ioctl; 272 ifp->if_output = vxlan_output; 273 ifp->if_enqueue = vxlan_enqueue; 274 ifp->if_qstart = vxlan_start; 275 ifp->if_flags = IFF_BROADCAST | IFF_MULTICAST | IFF_SIMPLEX; 276 ifp->if_xflags = IFXF_CLONED | IFXF_MPSAFE; 277 ether_fakeaddr(ifp); 278 279 if_counters_alloc(ifp); 280 if_attach(ifp); 281 ether_ifattach(ifp); 282 283 return (0); 284 } 285 286 static int 287 vxlan_clone_destroy(struct ifnet *ifp) 288 { 289 struct vxlan_softc *sc = ifp->if_softc; 290 291 NET_LOCK(); 292 if (ISSET(ifp->if_flags, IFF_RUNNING)) 293 vxlan_down(sc); 294 NET_UNLOCK(); 295 296 ether_ifdetach(ifp); 297 if_detach(ifp); 298 299 etherbridge_destroy(&sc->sc_eb); 300 301 refcnt_finalize(&sc->sc_refs, "vxlanfini"); 302 303 free(sc, M_DEVBUF, sizeof(*sc)); 304 305 return (0); 306 } 307 308 static struct vxlan_softc * 309 vxlan_take(struct vxlan_softc *sc) 310 { 311 refcnt_take(&sc->sc_refs); 312 return (sc); 313 } 314 315 static void 316 vxlan_rele(struct vxlan_softc *sc) 317 { 318 refcnt_rele_wake(&sc->sc_refs); 319 } 320 321 static struct mbuf * 322 vxlan_encap(struct vxlan_softc *sc, struct mbuf *m, 323 struct mbuf *(ip_encap)(struct vxlan_softc *sc, struct mbuf *, 324 const union vxlan_addr *, uint8_t)) 325 { 326 struct ifnet *ifp = &sc->sc_ac.ac_if; 327 struct m_tag *mtag; 328 struct mbuf *m0; 329 union vxlan_addr gateway; 330 const union vxlan_addr *endpoint; 331 struct vxlan_header *vh; 332 struct udphdr *uh; 333 int prio; 334 uint8_t tos; 335 336 if (sc->sc_mode == VXLAN_TMODE_UNSET) 337 goto drop; 338 339 if (sc->sc_mode == VXLAN_TMODE_P2P) 340 endpoint = &sc->sc_dst; 341 else { /* VXLAN_TMODE_LEARNING || VXLAN_TMODE_ENDPOINT */ 342 struct ether_header *eh = mtod(m, struct ether_header *); 343 344 smr_read_enter(); 345 endpoint = etherbridge_resolve_ea(&sc->sc_eb, 346 (struct ether_addr *)eh->ether_dhost); 347 if (endpoint != NULL) { 348 gateway = *endpoint; 349 endpoint = &gateway; 350 } 351 smr_read_leave(); 352 353 if (endpoint == NULL) { 354 if (sc->sc_mode == VXLAN_TMODE_ENDPOINT) 355 goto drop; 356 357 /* "flood" to unknown destinations */ 358 endpoint = &sc->sc_dst; 359 } 360 } 361 362 /* force prepend mbuf because of payload alignment */ 363 m0 = m_get(M_DONTWAIT, m->m_type); 364 if (m0 == NULL) 365 goto drop; 366 367 m_align(m0, 0); 368 m0->m_len = 0; 369 370 M_MOVE_PKTHDR(m0, m); 371 m0->m_next = m; 372 373 m = m_prepend(m0, sizeof(*vh), M_DONTWAIT); 374 if (m == NULL) 375 return (NULL); 376 377 vh = mtod(m, struct vxlan_header *); 378 *vh = sc->sc_header; 379 380 m = m_prepend(m, sizeof(*uh), M_DONTWAIT); 381 if (m == NULL) 382 return (NULL); 383 384 uh = mtod(m, struct udphdr *); 385 uh->uh_sport = sc->sc_port; /* XXX */ 386 uh->uh_dport = sc->sc_port; 387 htobem16(&uh->uh_ulen, m->m_pkthdr.len); 388 uh->uh_sum = htons(0); 389 390 SET(m->m_pkthdr.csum_flags, M_UDP_CSUM_OUT); 391 392 mtag = m_tag_get(PACKET_TAG_GRE, sizeof(ifp->if_index), M_NOWAIT); 393 if (mtag == NULL) 394 goto drop; 395 396 *(int *)(mtag + 1) = ifp->if_index; 397 m_tag_prepend(m, mtag); 398 399 prio = sc->sc_txhprio; 400 if (prio == IF_HDRPRIO_PACKET) 401 prio = m->m_pkthdr.pf.prio; 402 tos = IFQ_PRIO2TOS(prio); 403 404 CLR(m->m_flags, M_BCAST|M_MCAST); 405 m->m_pkthdr.ph_rtableid = sc->sc_rdomain; 406 407 #if NPF > 0 408 pf_pkt_addr_changed(m); 409 #endif 410 411 return ((*ip_encap)(sc, m, endpoint, tos)); 412 drop: 413 m_freem(m); 414 return (NULL); 415 } 416 417 static struct mbuf * 418 vxlan_encap_ipv4(struct vxlan_softc *sc, struct mbuf *m, 419 const union vxlan_addr *endpoint, uint8_t tos) 420 { 421 struct ip *ip; 422 423 m = m_prepend(m, sizeof(*ip), M_DONTWAIT); 424 if (m == NULL) 425 return (NULL); 426 427 ip = mtod(m, struct ip *); 428 ip->ip_v = IPVERSION; 429 ip->ip_hl = sizeof(*ip) >> 2; 430 ip->ip_off = sc->sc_df; 431 ip->ip_tos = tos; 432 ip->ip_len = htons(m->m_pkthdr.len); 433 ip->ip_ttl = sc->sc_ttl; 434 ip->ip_p = IPPROTO_UDP; 435 ip->ip_src = sc->sc_src.in4; 436 ip->ip_dst = endpoint->in4; 437 438 return (m); 439 } 440 441 #ifdef INET6 442 static struct mbuf * 443 vxlan_encap_ipv6(struct vxlan_softc *sc, struct mbuf *m, 444 const union vxlan_addr *endpoint, uint8_t tos) 445 { 446 struct ip6_hdr *ip6; 447 int len = m->m_pkthdr.len; 448 449 m = m_prepend(m, sizeof(*ip6), M_DONTWAIT); 450 if (m == NULL) 451 return (NULL); 452 453 ip6 = mtod(m, struct ip6_hdr *); 454 ip6->ip6_flow = ISSET(m->m_pkthdr.csum_flags, M_FLOWID) ? 455 htonl(m->m_pkthdr.ph_flowid) : 0; 456 ip6->ip6_vfc |= IPV6_VERSION; 457 ip6->ip6_flow |= htonl((uint32_t)tos << 20); 458 ip6->ip6_plen = htons(len); 459 ip6->ip6_nxt = IPPROTO_UDP; 460 ip6->ip6_hlim = sc->sc_ttl; 461 ip6->ip6_src = sc->sc_src.in6; 462 ip6->ip6_dst = endpoint->in6; 463 464 if (sc->sc_df) 465 SET(m->m_pkthdr.csum_flags, M_IPV6_DF_OUT); 466 467 return (m); 468 } 469 #endif /* INET6 */ 470 471 static int 472 vxlan_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, 473 struct rtentry *rt) 474 { 475 struct m_tag *mtag; 476 477 mtag = NULL; 478 while ((mtag = m_tag_find(m, PACKET_TAG_GRE, mtag)) != NULL) { 479 if (*(int *)(mtag + 1) == ifp->if_index) { 480 m_freem(m); 481 return (EIO); 482 } 483 } 484 485 return (ether_output(ifp, m, dst, rt)); 486 } 487 488 static int 489 vxlan_enqueue(struct ifnet *ifp, struct mbuf *m) 490 { 491 struct vxlan_softc *sc = ifp->if_softc; 492 struct ifqueue *ifq = &ifp->if_snd; 493 494 if (ifq_enqueue(ifq, m) != 0) 495 return (ENOBUFS); 496 497 task_add(ifq->ifq_softnet, &sc->sc_send_task); 498 499 return (0); 500 } 501 502 static void 503 vxlan_start(struct ifqueue *ifq) 504 { 505 struct ifnet *ifp = ifq->ifq_if; 506 struct vxlan_softc *sc = ifp->if_softc; 507 508 task_add(ifq->ifq_softnet, &sc->sc_send_task); 509 } 510 511 static uint64_t 512 vxlan_send_ipv4(struct vxlan_softc *sc, struct mbuf_list *ml) 513 { 514 struct ip_moptions imo; 515 struct mbuf *m; 516 uint64_t oerrors = 0; 517 518 imo.imo_ifidx = sc->sc_if_index0; 519 imo.imo_ttl = sc->sc_ttl; 520 imo.imo_loop = 0; 521 522 NET_LOCK(); 523 while ((m = ml_dequeue(ml)) != NULL) { 524 if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &imo, NULL, 0) != 0) 525 oerrors++; 526 } 527 NET_UNLOCK(); 528 529 return (oerrors); 530 } 531 532 #ifdef INET6 533 static uint64_t 534 vxlan_send_ipv6(struct vxlan_softc *sc, struct mbuf_list *ml) 535 { 536 struct ip6_moptions im6o; 537 struct mbuf *m; 538 uint64_t oerrors = 0; 539 540 im6o.im6o_ifidx = sc->sc_if_index0; 541 im6o.im6o_hlim = sc->sc_ttl; 542 im6o.im6o_loop = 0; 543 544 NET_LOCK(); 545 while ((m = ml_dequeue(ml)) != NULL) { 546 if (ip6_output(m, NULL, NULL, 0, &im6o, NULL) != 0) 547 oerrors++; 548 } 549 NET_UNLOCK(); 550 551 return (oerrors); 552 } 553 #endif /* INET6 */ 554 555 static void 556 vxlan_send(void *arg) 557 { 558 struct vxlan_softc *sc = arg; 559 struct ifnet *ifp = &sc->sc_ac.ac_if; 560 struct mbuf *(*ip_encap)(struct vxlan_softc *, struct mbuf *, 561 const union vxlan_addr *, uint8_t); 562 uint64_t (*ip_send)(struct vxlan_softc *, struct mbuf_list *); 563 struct mbuf_list ml = MBUF_LIST_INITIALIZER(); 564 struct mbuf *m; 565 uint64_t oerrors; 566 567 if (!ISSET(ifp->if_flags, IFF_RUNNING)) 568 return; 569 570 switch (sc->sc_af) { 571 case AF_INET: 572 ip_encap = vxlan_encap_ipv4; 573 ip_send = vxlan_send_ipv4; 574 break; 575 #ifdef INET6 576 case AF_INET6: 577 ip_encap = vxlan_encap_ipv6; 578 ip_send = vxlan_send_ipv6; 579 break; 580 #endif 581 default: 582 unhandled_af(sc->sc_af); 583 /* NOTREACHED */ 584 } 585 586 while ((m = ifq_dequeue(&ifp->if_snd)) != NULL) { 587 #if NBPFILTER > 0 588 caddr_t if_bpf = READ_ONCE(ifp->if_bpf); 589 if (if_bpf != NULL) 590 bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_OUT); 591 #endif 592 m = vxlan_encap(sc, m, ip_encap); 593 if (m == NULL) 594 continue; 595 596 ml_enqueue(&ml, m); 597 } 598 599 oerrors = (*ip_send)(sc, &ml); 600 601 counters_add(ifp->if_counters, ifc_oerrors, oerrors); 602 } 603 604 static struct mbuf * 605 vxlan_input(void *arg, struct mbuf *m, struct ip *ip, struct ip6_hdr *ip6, 606 void *uhp, int hlen) 607 { 608 struct vxlan_tep *vt = arg; 609 union vxlan_addr addr; 610 struct vxlan_peer key, *p; 611 struct udphdr *uh; 612 struct vxlan_header *vh; 613 struct ether_header *eh; 614 int vhlen = hlen + sizeof(*vh); 615 struct mbuf *n; 616 int off; 617 in_port_t port; 618 struct vxlan_softc *sc = NULL; 619 struct ifnet *ifp; 620 int rxhprio; 621 uint8_t tos; 622 623 if (m->m_pkthdr.len < vhlen) 624 goto drop; 625 626 uh = uhp; 627 port = uh->uh_sport; 628 629 if (ip != NULL) { 630 memset(&addr, 0, sizeof(addr)); 631 addr.in4 = ip->ip_src; 632 tos = ip->ip_tos; 633 } 634 #ifdef INET6 635 else { 636 addr.in6 = ip6->ip6_src; 637 tos = bemtoh32(&ip6->ip6_flow) >> 20; 638 } 639 #endif 640 641 if (m->m_len < vhlen) { 642 m = m_pullup(m, vhlen); 643 if (m == NULL) 644 return (NULL); 645 } 646 647 /* can't use ip/ip6/uh after this */ 648 649 vh = (struct vxlan_header *)(mtod(m, caddr_t) + hlen); 650 651 memset(&key, 0, sizeof(key)); 652 key.p_addr = addr; 653 key.p_header.vxlan_flags = vh->vxlan_flags & htonl(VXLAN_F_I); 654 key.p_header.vxlan_id = vh->vxlan_id & htonl(VXLAN_VNI_MASK); 655 656 mtx_enter(&vt->vt_mtx); 657 p = RBT_FIND(vxlan_peers, &vt->vt_peers, &key); 658 if (p == NULL) { 659 memset(&key.p_addr, 0, sizeof(key.p_addr)); 660 p = RBT_FIND(vxlan_peers, &vt->vt_peers, &key); 661 } 662 if (p != NULL) 663 sc = vxlan_take(p->p_sc); 664 mtx_leave(&vt->vt_mtx); 665 666 if (sc == NULL) 667 goto drop; 668 669 ifp = &sc->sc_ac.ac_if; 670 if (ISSET(ifp->if_flags, IFF_LINK0) && port != sc->sc_port) 671 goto rele_drop; 672 673 m_adj(m, vhlen); 674 675 if (m->m_pkthdr.len < sizeof(*eh)) 676 goto rele_drop; 677 678 if (m->m_len < sizeof(*eh)) { 679 m = m_pullup(m, sizeof(*eh)); 680 if (m == NULL) 681 goto rele; 682 } 683 684 n = m_getptr(m, sizeof(*eh), &off); 685 if (n == NULL) 686 goto rele_drop; 687 688 if (!ALIGNED_POINTER(mtod(n, caddr_t) + off, uint32_t)) { 689 n = m_dup_pkt(m, ETHER_ALIGN, M_NOWAIT); 690 m_freem(m); 691 if (n == NULL) 692 goto rele; 693 m = n; 694 } 695 696 if (sc->sc_mode == VXLAN_TMODE_LEARNING) { 697 eh = mtod(m, struct ether_header *); 698 etherbridge_map_ea(&sc->sc_eb, &addr, 699 (struct ether_addr *)eh->ether_shost); 700 } 701 702 rxhprio = sc->sc_rxhprio; 703 switch (rxhprio) { 704 case IF_HDRPRIO_PACKET: 705 /* nop */ 706 break; 707 case IF_HDRPRIO_OUTER: 708 m->m_pkthdr.pf.prio = IFQ_TOS2PRIO(tos); 709 break; 710 default: 711 m->m_pkthdr.pf.prio = rxhprio; 712 break; \ 713 } \ 714 715 if_vinput(ifp, m); 716 rele: 717 vxlan_rele(sc); 718 return (NULL); 719 720 rele_drop: 721 vxlan_rele(sc); 722 drop: 723 m_freem(m); 724 return (NULL); 725 } 726 727 static int 728 vxlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 729 { 730 struct vxlan_softc *sc = ifp->if_softc; 731 struct ifreq *ifr = (struct ifreq *)data; 732 struct ifbrparam *bparam = (struct ifbrparam *)data; 733 int error = 0; 734 735 switch (cmd) { 736 case SIOCSIFADDR: 737 break; 738 case SIOCSIFFLAGS: 739 if (ISSET(ifp->if_flags, IFF_UP)) { 740 if (!ISSET(ifp->if_flags, IFF_RUNNING)) 741 error = vxlan_up(sc); 742 else 743 error = 0; 744 } else { 745 if (ISSET(ifp->if_flags, IFF_RUNNING)) 746 error = vxlan_down(sc); 747 } 748 break; 749 750 case SIOCSLIFPHYRTABLE: 751 error = vxlan_set_rdomain(sc, ifr); 752 break; 753 case SIOCGLIFPHYRTABLE: 754 error = vxlan_get_rdomain(sc, ifr); 755 break; 756 757 case SIOCSLIFPHYADDR: 758 error = vxlan_set_tunnel(sc, (const struct if_laddrreq *)data); 759 break; 760 case SIOCGLIFPHYADDR: 761 error = vxlan_get_tunnel(sc, (struct if_laddrreq *)data); 762 break; 763 case SIOCDIFPHYADDR: 764 error = vxlan_del_tunnel(sc); 765 break; 766 767 case SIOCSVNETID: 768 error = vxlan_set_vnetid(sc, ifr); 769 break; 770 case SIOCGVNETID: 771 error = vxlan_get_vnetid(sc, ifr); 772 break; 773 case SIOCDVNETID: 774 error = vxlan_del_vnetid(sc); 775 break; 776 777 case SIOCSIFPARENT: 778 error = vxlan_set_parent(sc, (struct if_parent *)data); 779 break; 780 case SIOCGIFPARENT: 781 error = vxlan_get_parent(sc, (struct if_parent *)data); 782 break; 783 case SIOCDIFPARENT: 784 error = vxlan_del_parent(sc); 785 break; 786 787 case SIOCSTXHPRIO: 788 error = if_txhprio_l2_check(ifr->ifr_hdrprio); 789 if (error != 0) 790 break; 791 792 sc->sc_txhprio = ifr->ifr_hdrprio; 793 break; 794 case SIOCGTXHPRIO: 795 ifr->ifr_hdrprio = sc->sc_txhprio; 796 break; 797 798 case SIOCSRXHPRIO: 799 error = if_rxhprio_l2_check(ifr->ifr_hdrprio); 800 if (error != 0) 801 break; 802 803 sc->sc_rxhprio = ifr->ifr_hdrprio; 804 break; 805 case SIOCGRXHPRIO: 806 ifr->ifr_hdrprio = sc->sc_rxhprio; 807 break; 808 809 case SIOCSLIFPHYDF: 810 /* commit */ 811 sc->sc_df = ifr->ifr_df ? htons(IP_DF) : htons(0); 812 break; 813 case SIOCGLIFPHYDF: 814 ifr->ifr_df = sc->sc_df ? 1 : 0; 815 break; 816 817 case SIOCSLIFPHYTTL: 818 if (ifr->ifr_ttl < 1 || ifr->ifr_ttl > 0xff) { 819 error = EINVAL; 820 break; 821 } 822 823 /* commit */ 824 sc->sc_ttl = (uint8_t)ifr->ifr_ttl; 825 break; 826 case SIOCGLIFPHYTTL: 827 ifr->ifr_ttl = (int)sc->sc_ttl; 828 break; 829 830 case SIOCBRDGSCACHE: 831 error = etherbridge_set_max(&sc->sc_eb, bparam); 832 break; 833 case SIOCBRDGGCACHE: 834 error = etherbridge_get_max(&sc->sc_eb, bparam); 835 break; 836 case SIOCBRDGSTO: 837 error = etherbridge_set_tmo(&sc->sc_eb, bparam); 838 break; 839 case SIOCBRDGGTO: 840 error = etherbridge_get_tmo(&sc->sc_eb, bparam); 841 break; 842 843 case SIOCBRDGRTS: 844 error = etherbridge_rtfind(&sc->sc_eb, 845 (struct ifbaconf *)data); 846 break; 847 case SIOCBRDGFLUSH: 848 etherbridge_flush(&sc->sc_eb, 849 ((struct ifbreq *)data)->ifbr_ifsflags); 850 break; 851 case SIOCBRDGSADDR: 852 error = vxlan_add_addr(sc, (struct ifbareq *)data); 853 break; 854 case SIOCBRDGDADDR: 855 error = vxlan_del_addr(sc, (struct ifbareq *)data); 856 break; 857 858 case SIOCADDMULTI: 859 case SIOCDELMULTI: 860 /* no hardware to program */ 861 break; 862 863 default: 864 error = ether_ioctl(ifp, &sc->sc_ac, cmd, data); 865 break; 866 } 867 868 if (error == ENETRESET) { 869 /* no hardware to program */ 870 error = 0; 871 } 872 873 return (error); 874 } 875 876 static struct vxlan_tep * 877 vxlan_tep_get(struct vxlan_softc *sc, const union vxlan_addr *addr) 878 { 879 struct vxlan_tep *vt; 880 881 TAILQ_FOREACH(vt, &vxlan_teps, vt_entry) { 882 if (sc->sc_af == vt->vt_af && 883 sc->sc_rdomain == vt->vt_rdomain && 884 memcmp(addr, &vt->vt_addr, sizeof(*addr)) == 0 && 885 sc->sc_port == vt->vt_port) 886 return (vt); 887 } 888 889 return (NULL); 890 } 891 892 static int 893 vxlan_tep_add_addr(struct vxlan_softc *sc, const union vxlan_addr *addr, 894 struct vxlan_peer *p) 895 { 896 struct mbuf m; 897 struct vxlan_tep *vt; 898 struct socket *so; 899 struct sockaddr_in *sin; 900 #ifdef INET6 901 struct sockaddr_in6 *sin6; 902 #endif 903 int error; 904 905 vt = vxlan_tep_get(sc, addr); 906 if (vt != NULL) { 907 struct vxlan_peer *op; 908 909 mtx_enter(&vt->vt_mtx); 910 op = RBT_INSERT(vxlan_peers, &vt->vt_peers, p); 911 mtx_leave(&vt->vt_mtx); 912 913 if (op != NULL) 914 return (EADDRINUSE); 915 916 return (0); 917 } 918 919 vt = malloc(sizeof(*vt), M_DEVBUF, M_NOWAIT|M_ZERO); 920 if (vt == NULL) 921 return (ENOMEM); 922 923 vt->vt_af = sc->sc_af; 924 vt->vt_rdomain = sc->sc_rdomain; 925 vt->vt_addr = *addr; 926 vt->vt_port = sc->sc_port; 927 928 mtx_init(&vt->vt_mtx, IPL_SOFTNET); 929 RBT_INIT(vxlan_peers, &vt->vt_peers); 930 RBT_INSERT(vxlan_peers, &vt->vt_peers, p); 931 932 error = socreate(vt->vt_af, &so, SOCK_DGRAM, IPPROTO_UDP); 933 if (error != 0) 934 goto free; 935 936 solock(so); 937 938 sotoinpcb(so)->inp_upcall = vxlan_input; 939 sotoinpcb(so)->inp_upcall_arg = vt; 940 941 m_inithdr(&m); 942 m.m_len = sizeof(vt->vt_rdomain); 943 *mtod(&m, unsigned int *) = vt->vt_rdomain; 944 error = sosetopt(so, SOL_SOCKET, SO_RTABLE, &m); 945 if (error != 0) 946 goto close; 947 948 m_inithdr(&m); 949 switch (vt->vt_af) { 950 case AF_INET: 951 sin = mtod(&m, struct sockaddr_in *); 952 memset(sin, 0, sizeof(*sin)); 953 sin->sin_len = sizeof(*sin); 954 sin->sin_family = AF_INET; 955 sin->sin_addr = addr->in4; 956 sin->sin_port = vt->vt_port; 957 958 m.m_len = sizeof(*sin); 959 break; 960 961 #ifdef INET6 962 case AF_INET6: 963 sin6 = mtod(&m, struct sockaddr_in6 *); 964 sin6->sin6_len = sizeof(*sin6); 965 sin6->sin6_family = AF_INET6; 966 in6_recoverscope(sin6, &addr->in6); 967 sin6->sin6_port = sc->sc_port; 968 969 m.m_len = sizeof(*sin6); 970 break; 971 #endif 972 default: 973 unhandled_af(vt->vt_af); 974 } 975 976 error = sobind(so, &m, curproc); 977 if (error != 0) 978 goto close; 979 980 sounlock(so); 981 982 rw_assert_wrlock(&vxlan_lock); 983 TAILQ_INSERT_TAIL(&vxlan_teps, vt, vt_entry); 984 985 vt->vt_so = so; 986 987 return (0); 988 989 close: 990 sounlock(so); 991 soclose(so, MSG_DONTWAIT); 992 free: 993 free(vt, M_DEVBUF, sizeof(*vt)); 994 return (error); 995 } 996 997 static void 998 vxlan_tep_del_addr(struct vxlan_softc *sc, const union vxlan_addr *addr, 999 struct vxlan_peer *p) 1000 { 1001 struct vxlan_tep *vt; 1002 int empty; 1003 1004 vt = vxlan_tep_get(sc, addr); 1005 if (vt == NULL) 1006 panic("unable to find vxlan_tep for peer %p (sc %p)", p, sc); 1007 1008 mtx_enter(&vt->vt_mtx); 1009 RBT_REMOVE(vxlan_peers, &vt->vt_peers, p); 1010 empty = RBT_EMPTY(vxlan_peers, &vt->vt_peers); 1011 mtx_leave(&vt->vt_mtx); 1012 1013 if (!empty) 1014 return; 1015 1016 rw_assert_wrlock(&vxlan_lock); 1017 TAILQ_REMOVE(&vxlan_teps, vt, vt_entry); 1018 1019 soclose(vt->vt_so, MSG_DONTWAIT); 1020 free(vt, M_DEVBUF, sizeof(*vt)); 1021 } 1022 1023 static int 1024 vxlan_tep_up(struct vxlan_softc *sc) 1025 { 1026 struct vxlan_peer *up, *mp; 1027 int error; 1028 1029 up = malloc(sizeof(*up), M_DEVBUF, M_NOWAIT|M_ZERO); 1030 if (up == NULL) 1031 return (ENOMEM); 1032 1033 if (sc->sc_mode == VXLAN_TMODE_P2P) 1034 up->p_addr = sc->sc_dst; 1035 up->p_header = sc->sc_header; 1036 up->p_sc = vxlan_take(sc); 1037 1038 error = vxlan_tep_add_addr(sc, &sc->sc_src, up); 1039 if (error != 0) 1040 goto freeup; 1041 1042 sc->sc_ucast_peer = up; 1043 1044 if (sc->sc_mode != VXLAN_TMODE_LEARNING) 1045 return (0); 1046 1047 mp = malloc(sizeof(*mp), M_DEVBUF, M_NOWAIT|M_ZERO); 1048 if (mp == NULL) { 1049 error = ENOMEM; 1050 goto delup; 1051 } 1052 1053 /* addr is multicast, leave it as 0s */ 1054 mp->p_header = sc->sc_header; 1055 mp->p_sc = vxlan_take(sc); 1056 1057 /* destination address is a multicast group we want to join */ 1058 error = vxlan_tep_add_addr(sc, &sc->sc_dst, up); 1059 if (error != 0) 1060 goto freemp; 1061 1062 sc->sc_mcast_peer = mp; 1063 1064 return (0); 1065 1066 freemp: 1067 vxlan_rele(mp->p_sc); 1068 free(mp, M_DEVBUF, sizeof(*mp)); 1069 delup: 1070 vxlan_tep_del_addr(sc, &sc->sc_src, up); 1071 freeup: 1072 vxlan_rele(up->p_sc); 1073 free(up, M_DEVBUF, sizeof(*up)); 1074 return (error); 1075 } 1076 1077 static void 1078 vxlan_tep_down(struct vxlan_softc *sc) 1079 { 1080 struct vxlan_peer *up = sc->sc_ucast_peer; 1081 1082 if (sc->sc_mode == VXLAN_TMODE_LEARNING) { 1083 struct vxlan_peer *mp = sc->sc_mcast_peer; 1084 vxlan_tep_del_addr(sc, &sc->sc_dst, mp); 1085 vxlan_rele(mp->p_sc); 1086 free(mp, M_DEVBUF, sizeof(*mp)); 1087 } 1088 1089 vxlan_tep_del_addr(sc, &sc->sc_src, up); 1090 vxlan_rele(up->p_sc); 1091 free(up, M_DEVBUF, sizeof(*up)); 1092 } 1093 1094 static int 1095 vxlan_up(struct vxlan_softc *sc) 1096 { 1097 struct ifnet *ifp = &sc->sc_ac.ac_if; 1098 struct ifnet *ifp0 = NULL; 1099 int error; 1100 1101 KASSERT(!ISSET(ifp->if_flags, IFF_RUNNING)); 1102 NET_ASSERT_LOCKED(); 1103 1104 if (sc->sc_af == AF_UNSPEC) 1105 return (EDESTADDRREQ); 1106 KASSERT(sc->sc_mode != VXLAN_TMODE_UNSET); 1107 1108 NET_UNLOCK(); 1109 1110 error = rw_enter(&vxlan_lock, RW_WRITE|RW_INTR); 1111 if (error != 0) 1112 goto netlock; 1113 1114 NET_LOCK(); 1115 if (ISSET(ifp->if_flags, IFF_RUNNING)) { 1116 /* something else beat us */ 1117 rw_exit(&vxlan_lock); 1118 return (0); 1119 } 1120 NET_UNLOCK(); 1121 1122 if (sc->sc_mode != VXLAN_TMODE_P2P) { 1123 error = etherbridge_up(&sc->sc_eb); 1124 if (error != 0) 1125 goto unlock; 1126 } 1127 1128 if (sc->sc_mode == VXLAN_TMODE_LEARNING) { 1129 ifp0 = if_get(sc->sc_if_index0); 1130 if (ifp0 == NULL) { 1131 error = ENXIO; 1132 goto down; 1133 } 1134 1135 /* check again if multicast will work on top of the parent */ 1136 if (!ISSET(ifp0->if_flags, IFF_MULTICAST)) { 1137 error = EPROTONOSUPPORT; 1138 goto put; 1139 } 1140 1141 error = vxlan_addmulti(sc, ifp0); 1142 if (error != 0) 1143 goto put; 1144 1145 /* Register callback if parent wants to unregister */ 1146 if_detachhook_add(ifp0, &sc->sc_dtask); 1147 } else { 1148 if (sc->sc_if_index0 != 0) { 1149 error = EPROTONOSUPPORT; 1150 goto down; 1151 } 1152 } 1153 1154 error = vxlan_tep_up(sc); 1155 if (error != 0) 1156 goto del; 1157 1158 if_put(ifp0); 1159 1160 NET_LOCK(); 1161 SET(ifp->if_flags, IFF_RUNNING); 1162 rw_exit(&vxlan_lock); 1163 1164 return (0); 1165 1166 del: 1167 if (sc->sc_mode == VXLAN_TMODE_LEARNING) { 1168 if (ifp0 != NULL) 1169 if_detachhook_del(ifp0, &sc->sc_dtask); 1170 vxlan_delmulti(sc); 1171 } 1172 put: 1173 if_put(ifp0); 1174 down: 1175 if (sc->sc_mode != VXLAN_TMODE_P2P) 1176 etherbridge_down(&sc->sc_eb); 1177 unlock: 1178 rw_exit(&vxlan_lock); 1179 netlock: 1180 NET_LOCK(); 1181 1182 return (error); 1183 } 1184 1185 static int 1186 vxlan_down(struct vxlan_softc *sc) 1187 { 1188 struct ifnet *ifp = &sc->sc_ac.ac_if; 1189 struct ifnet *ifp0; 1190 int error; 1191 1192 KASSERT(ISSET(ifp->if_flags, IFF_RUNNING)); 1193 NET_UNLOCK(); 1194 1195 error = rw_enter(&vxlan_lock, RW_WRITE|RW_INTR); 1196 if (error != 0) { 1197 NET_LOCK(); 1198 return (error); 1199 } 1200 1201 NET_LOCK(); 1202 if (!ISSET(ifp->if_flags, IFF_RUNNING)) { 1203 /* something else beat us */ 1204 rw_exit(&vxlan_lock); 1205 return (0); 1206 } 1207 NET_UNLOCK(); 1208 1209 vxlan_tep_down(sc); 1210 1211 if (sc->sc_mode == VXLAN_TMODE_LEARNING) { 1212 vxlan_delmulti(sc); 1213 ifp0 = if_get(sc->sc_if_index0); 1214 if (ifp0 != NULL) { 1215 if_detachhook_del(ifp0, &sc->sc_dtask); 1216 } 1217 if_put(ifp0); 1218 } 1219 1220 if (sc->sc_mode != VXLAN_TMODE_P2P) 1221 etherbridge_down(&sc->sc_eb); 1222 1223 taskq_del_barrier(ifp->if_snd.ifq_softnet, &sc->sc_send_task); 1224 NET_LOCK(); 1225 CLR(ifp->if_flags, IFF_RUNNING); 1226 rw_exit(&vxlan_lock); 1227 1228 return (0); 1229 } 1230 1231 static int 1232 vxlan_addmulti(struct vxlan_softc *sc, struct ifnet *ifp0) 1233 { 1234 int error = 0; 1235 1236 NET_LOCK(); 1237 1238 switch (sc->sc_af) { 1239 case AF_INET: 1240 sc->sc_inmulti = in_addmulti(&sc->sc_dst.in4, ifp0); 1241 if (sc->sc_inmulti == NULL) 1242 error = EADDRNOTAVAIL; 1243 break; 1244 #ifdef INET6 1245 case AF_INET6: 1246 sc->sc_inmulti = in6_addmulti(&sc->sc_dst.in6, ifp0, &error); 1247 break; 1248 #endif 1249 default: 1250 unhandled_af(sc->sc_af); 1251 } 1252 1253 NET_UNLOCK(); 1254 1255 return (error); 1256 } 1257 1258 static void 1259 vxlan_delmulti(struct vxlan_softc *sc) 1260 { 1261 NET_LOCK(); 1262 1263 switch (sc->sc_af) { 1264 case AF_INET: 1265 in_delmulti(sc->sc_inmulti); 1266 break; 1267 #ifdef INET6 1268 case AF_INET6: 1269 in6_delmulti(sc->sc_inmulti); 1270 break; 1271 #endif 1272 default: 1273 unhandled_af(sc->sc_af); 1274 } 1275 1276 sc->sc_inmulti = NULL; /* keep it tidy */ 1277 1278 NET_UNLOCK(); 1279 } 1280 1281 static int 1282 vxlan_set_rdomain(struct vxlan_softc *sc, const struct ifreq *ifr) 1283 { 1284 struct ifnet *ifp = &sc->sc_ac.ac_if; 1285 1286 if (ifr->ifr_rdomainid < 0 || 1287 ifr->ifr_rdomainid > RT_TABLEID_MAX) 1288 return (EINVAL); 1289 if (!rtable_exists(ifr->ifr_rdomainid)) 1290 return (EADDRNOTAVAIL); 1291 1292 if (sc->sc_rdomain == ifr->ifr_rdomainid) 1293 return (0); 1294 1295 if (ISSET(ifp->if_flags, IFF_RUNNING)) 1296 return (EBUSY); 1297 1298 /* commit */ 1299 sc->sc_rdomain = ifr->ifr_rdomainid; 1300 etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL); 1301 1302 return (0); 1303 } 1304 1305 static int 1306 vxlan_get_rdomain(struct vxlan_softc *sc, struct ifreq *ifr) 1307 { 1308 ifr->ifr_rdomainid = sc->sc_rdomain; 1309 1310 return (0); 1311 } 1312 1313 static int 1314 vxlan_set_tunnel(struct vxlan_softc *sc, const struct if_laddrreq *req) 1315 { 1316 struct ifnet *ifp = &sc->sc_ac.ac_if; 1317 struct sockaddr *src = (struct sockaddr *)&req->addr; 1318 struct sockaddr *dst = (struct sockaddr *)&req->dstaddr; 1319 struct sockaddr_in *src4, *dst4; 1320 #ifdef INET6 1321 struct sockaddr_in6 *src6, *dst6; 1322 int error; 1323 #endif 1324 union vxlan_addr saddr, daddr; 1325 unsigned int mode = VXLAN_TMODE_ENDPOINT; 1326 in_port_t port = htons(VXLAN_PORT); 1327 1328 memset(&saddr, 0, sizeof(saddr)); 1329 memset(&daddr, 0, sizeof(daddr)); 1330 1331 /* validate */ 1332 switch (src->sa_family) { 1333 case AF_INET: 1334 src4 = (struct sockaddr_in *)src; 1335 if (in_nullhost(src4->sin_addr) || 1336 IN_MULTICAST(src4->sin_addr.s_addr)) 1337 return (EINVAL); 1338 1339 if (src4->sin_port != htons(0)) 1340 port = src4->sin_port; 1341 1342 if (dst->sa_family != AF_UNSPEC) { 1343 if (dst->sa_family != AF_INET) 1344 return (EINVAL); 1345 1346 dst4 = (struct sockaddr_in *)dst; 1347 if (in_nullhost(dst4->sin_addr)) 1348 return (EINVAL); 1349 1350 /* all good */ 1351 mode = IN_MULTICAST(dst4->sin_addr.s_addr) ? 1352 VXLAN_TMODE_LEARNING : VXLAN_TMODE_P2P; 1353 daddr.in4 = dst4->sin_addr; 1354 } 1355 1356 saddr.in4 = src4->sin_addr; 1357 break; 1358 1359 #ifdef INET6 1360 case AF_INET6: 1361 src6 = (struct sockaddr_in6 *)src; 1362 if (IN6_IS_ADDR_UNSPECIFIED(&src6->sin6_addr) || 1363 IN6_IS_ADDR_MULTICAST(&src6->sin6_addr)) 1364 return (EINVAL); 1365 1366 if (src6->sin6_port != htons(0)) 1367 port = src6->sin6_port; 1368 1369 if (dst->sa_family != AF_UNSPEC) { 1370 if (dst->sa_family != AF_INET6) 1371 return (EINVAL); 1372 1373 dst6 = (struct sockaddr_in6 *)dst; 1374 if (IN6_IS_ADDR_UNSPECIFIED(&dst6->sin6_addr)) 1375 return (EINVAL); 1376 1377 if (src6->sin6_scope_id != dst6->sin6_scope_id) 1378 return (EINVAL); 1379 1380 /* all good */ 1381 mode = IN6_IS_ADDR_MULTICAST(&dst6->sin6_addr) ? 1382 VXLAN_TMODE_LEARNING : VXLAN_TMODE_P2P; 1383 error = in6_embedscope(&daddr.in6, dst6, NULL); 1384 if (error != 0) 1385 return (error); 1386 } 1387 1388 error = in6_embedscope(&saddr.in6, src6, NULL); 1389 if (error != 0) 1390 return (error); 1391 1392 break; 1393 #endif 1394 default: 1395 return (EAFNOSUPPORT); 1396 } 1397 1398 if (memcmp(&sc->sc_src, &saddr, sizeof(sc->sc_src)) == 0 && 1399 memcmp(&sc->sc_dst, &daddr, sizeof(sc->sc_dst)) == 0 && 1400 sc->sc_port == port) 1401 return (0); 1402 1403 if (ISSET(ifp->if_flags, IFF_RUNNING)) 1404 return (EBUSY); 1405 1406 /* commit */ 1407 sc->sc_af = src->sa_family; 1408 sc->sc_src = saddr; 1409 sc->sc_dst = daddr; 1410 sc->sc_port = port; 1411 sc->sc_mode = mode; 1412 etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL); 1413 1414 return (0); 1415 } 1416 1417 static int 1418 vxlan_get_tunnel(struct vxlan_softc *sc, struct if_laddrreq *req) 1419 { 1420 struct sockaddr *dstaddr = (struct sockaddr *)&req->dstaddr; 1421 struct sockaddr_in *sin; 1422 #ifdef INET6 1423 struct sockaddr_in6 *sin6; 1424 #endif 1425 1426 if (sc->sc_af == AF_UNSPEC) 1427 return (EADDRNOTAVAIL); 1428 KASSERT(sc->sc_mode != VXLAN_TMODE_UNSET); 1429 1430 memset(&req->addr, 0, sizeof(req->addr)); 1431 memset(&req->dstaddr, 0, sizeof(req->dstaddr)); 1432 1433 /* default to endpoint */ 1434 dstaddr->sa_len = 2; 1435 dstaddr->sa_family = AF_UNSPEC; 1436 1437 switch (sc->sc_af) { 1438 case AF_INET: 1439 sin = (struct sockaddr_in *)&req->addr; 1440 sin->sin_len = sizeof(*sin); 1441 sin->sin_family = AF_INET; 1442 sin->sin_addr = sc->sc_src.in4; 1443 sin->sin_port = sc->sc_port; 1444 1445 if (sc->sc_mode == VXLAN_TMODE_ENDPOINT) 1446 break; 1447 1448 sin = (struct sockaddr_in *)&req->dstaddr; 1449 sin->sin_len = sizeof(*sin); 1450 sin->sin_family = AF_INET; 1451 sin->sin_addr = sc->sc_dst.in4; 1452 break; 1453 1454 #ifdef INET6 1455 case AF_INET6: 1456 sin6 = (struct sockaddr_in6 *)&req->addr; 1457 sin6->sin6_len = sizeof(*sin6); 1458 sin6->sin6_family = AF_INET6; 1459 in6_recoverscope(sin6, &sc->sc_src.in6); 1460 sin6->sin6_port = sc->sc_port; 1461 1462 if (sc->sc_mode == VXLAN_TMODE_ENDPOINT) 1463 break; 1464 1465 sin6 = (struct sockaddr_in6 *)&req->dstaddr; 1466 sin6->sin6_len = sizeof(*sin6); 1467 sin6->sin6_family = AF_INET6; 1468 in6_recoverscope(sin6, &sc->sc_dst.in6); 1469 break; 1470 #endif 1471 default: 1472 unhandled_af(sc->sc_af); 1473 } 1474 1475 return (0); 1476 } 1477 1478 static int 1479 vxlan_del_tunnel(struct vxlan_softc *sc) 1480 { 1481 struct ifnet *ifp = &sc->sc_ac.ac_if; 1482 1483 if (sc->sc_af == AF_UNSPEC) 1484 return (0); 1485 1486 if (ISSET(ifp->if_flags, IFF_RUNNING)) 1487 return (EBUSY); 1488 1489 /* commit */ 1490 sc->sc_af = AF_UNSPEC; 1491 memset(&sc->sc_src, 0, sizeof(sc->sc_src)); 1492 memset(&sc->sc_dst, 0, sizeof(sc->sc_dst)); 1493 sc->sc_port = htons(0); 1494 sc->sc_mode = VXLAN_TMODE_UNSET; 1495 etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL); 1496 1497 return (0); 1498 } 1499 1500 static int 1501 vxlan_set_vnetid(struct vxlan_softc *sc, const struct ifreq *ifr) 1502 { 1503 struct ifnet *ifp = &sc->sc_ac.ac_if; 1504 uint32_t vni; 1505 1506 if (ifr->ifr_vnetid < VXLAN_VNI_MIN || 1507 ifr->ifr_vnetid > VXLAN_VNI_MAX) 1508 return (EINVAL); 1509 1510 vni = htonl(ifr->ifr_vnetid << VXLAN_VNI_SHIFT); 1511 if (ISSET(sc->sc_header.vxlan_flags, htonl(VXLAN_F_I)) && 1512 sc->sc_header.vxlan_id == vni) 1513 return (0); 1514 1515 if (ISSET(ifp->if_flags, IFF_RUNNING)) 1516 return (EBUSY); 1517 1518 /* commit */ 1519 SET(sc->sc_header.vxlan_flags, htonl(VXLAN_F_I)); 1520 sc->sc_header.vxlan_id = vni; 1521 etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL); 1522 1523 return (0); 1524 } 1525 1526 static int 1527 vxlan_get_vnetid(struct vxlan_softc *sc, struct ifreq *ifr) 1528 { 1529 uint32_t vni; 1530 1531 if (!ISSET(sc->sc_header.vxlan_flags, htonl(VXLAN_F_I))) 1532 return (EADDRNOTAVAIL); 1533 1534 vni = ntohl(sc->sc_header.vxlan_id); 1535 vni &= VXLAN_VNI_MASK; 1536 vni >>= VXLAN_VNI_SHIFT; 1537 1538 ifr->ifr_vnetid = vni; 1539 1540 return (0); 1541 } 1542 1543 static int 1544 vxlan_del_vnetid(struct vxlan_softc *sc) 1545 { 1546 struct ifnet *ifp = &sc->sc_ac.ac_if; 1547 1548 if (!ISSET(sc->sc_header.vxlan_flags, htonl(VXLAN_F_I))) 1549 return (0); 1550 1551 if (ISSET(ifp->if_flags, IFF_RUNNING)) 1552 return (EBUSY); 1553 1554 /* commit */ 1555 CLR(sc->sc_header.vxlan_flags, htonl(VXLAN_F_I)); 1556 sc->sc_header.vxlan_id = htonl(0 << VXLAN_VNI_SHIFT); 1557 etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL); 1558 1559 return (0); 1560 } 1561 1562 static int 1563 vxlan_set_parent(struct vxlan_softc *sc, const struct if_parent *p) 1564 { 1565 struct ifnet *ifp = &sc->sc_ac.ac_if; 1566 struct ifnet *ifp0; 1567 int error = 0; 1568 1569 ifp0 = if_unit(p->ifp_parent); 1570 if (ifp0 == NULL) 1571 return (ENXIO); 1572 1573 if (!ISSET(ifp0->if_flags, IFF_MULTICAST)) { 1574 error = ENXIO; 1575 goto put; 1576 } 1577 1578 if (sc->sc_if_index0 == ifp0->if_index) 1579 goto put; 1580 1581 if (ISSET(ifp->if_flags, IFF_RUNNING)) { 1582 error = EBUSY; 1583 goto put; 1584 } 1585 1586 /* commit */ 1587 sc->sc_if_index0 = ifp0->if_index; 1588 etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL); 1589 1590 put: 1591 if_put(ifp0); 1592 return (error); 1593 } 1594 1595 static int 1596 vxlan_get_parent(struct vxlan_softc *sc, struct if_parent *p) 1597 { 1598 struct ifnet *ifp0; 1599 int error = 0; 1600 1601 ifp0 = if_get(sc->sc_if_index0); 1602 if (ifp0 == NULL) 1603 error = EADDRNOTAVAIL; 1604 else 1605 strlcpy(p->ifp_parent, ifp0->if_xname, sizeof(p->ifp_parent)); 1606 if_put(ifp0); 1607 1608 return (error); 1609 } 1610 1611 static int 1612 vxlan_del_parent(struct vxlan_softc *sc) 1613 { 1614 struct ifnet *ifp = &sc->sc_ac.ac_if; 1615 1616 if (sc->sc_if_index0 == 0) 1617 return (0); 1618 1619 if (ISSET(ifp->if_flags, IFF_RUNNING)) 1620 return (EBUSY); 1621 1622 /* commit */ 1623 sc->sc_if_index0 = 0; 1624 etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL); 1625 1626 return (0); 1627 } 1628 1629 static int 1630 vxlan_add_addr(struct vxlan_softc *sc, const struct ifbareq *ifba) 1631 { 1632 struct sockaddr_in *sin; 1633 #ifdef INET6 1634 struct sockaddr_in6 *sin6; 1635 struct sockaddr_in6 src6 = { 1636 .sin6_len = sizeof(src6), 1637 .sin6_family = AF_UNSPEC, 1638 }; 1639 int error; 1640 #endif 1641 union vxlan_addr endpoint; 1642 unsigned int type; 1643 1644 switch (sc->sc_mode) { 1645 case VXLAN_TMODE_UNSET: 1646 return (ENOPROTOOPT); 1647 case VXLAN_TMODE_P2P: 1648 return (EPROTONOSUPPORT); 1649 default: 1650 break; 1651 } 1652 1653 /* ignore ifba_ifsname */ 1654 1655 if (ISSET(ifba->ifba_flags, ~IFBAF_TYPEMASK)) 1656 return (EINVAL); 1657 switch (ifba->ifba_flags & IFBAF_TYPEMASK) { 1658 case IFBAF_DYNAMIC: 1659 type = EBE_DYNAMIC; 1660 break; 1661 case IFBAF_STATIC: 1662 type = EBE_STATIC; 1663 break; 1664 default: 1665 return (EINVAL); 1666 } 1667 1668 memset(&endpoint, 0, sizeof(endpoint)); 1669 1670 if (ifba->ifba_dstsa.ss_family != sc->sc_af) 1671 return (EAFNOSUPPORT); 1672 switch (ifba->ifba_dstsa.ss_family) { 1673 case AF_INET: 1674 sin = (struct sockaddr_in *)&ifba->ifba_dstsa; 1675 if (in_nullhost(sin->sin_addr) || 1676 IN_MULTICAST(sin->sin_addr.s_addr)) 1677 return (EADDRNOTAVAIL); 1678 1679 if (sin->sin_port != htons(0)) 1680 return (EADDRNOTAVAIL); 1681 1682 endpoint.in4 = sin->sin_addr; 1683 break; 1684 1685 #ifdef INET6 1686 case AF_INET6: 1687 sin6 = (struct sockaddr_in6 *)&ifba->ifba_dstsa; 1688 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) || 1689 IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) 1690 return (EADDRNOTAVAIL); 1691 1692 in6_recoverscope(&src6, &sc->sc_src.in6); 1693 if (src6.sin6_scope_id != sin6->sin6_scope_id) 1694 return (EADDRNOTAVAIL); 1695 1696 if (sin6->sin6_port != htons(0)) 1697 return (EADDRNOTAVAIL); 1698 1699 error = in6_embedscope(&endpoint.in6, sin6, NULL); 1700 if (error != 0) 1701 return (error); 1702 1703 break; 1704 #endif 1705 default: /* AF_UNSPEC */ 1706 return (EADDRNOTAVAIL); 1707 } 1708 1709 return (etherbridge_add_addr(&sc->sc_eb, &endpoint, 1710 &ifba->ifba_dst, type)); 1711 } 1712 1713 static int 1714 vxlan_del_addr(struct vxlan_softc *sc, const struct ifbareq *ifba) 1715 { 1716 return (etherbridge_del_addr(&sc->sc_eb, &ifba->ifba_dst)); 1717 } 1718 1719 void 1720 vxlan_detach_hook(void *arg) 1721 { 1722 struct vxlan_softc *sc = arg; 1723 struct ifnet *ifp = &sc->sc_ac.ac_if; 1724 1725 if (ISSET(ifp->if_flags, IFF_RUNNING)) { 1726 vxlan_down(sc); 1727 CLR(ifp->if_flags, IFF_UP); 1728 } 1729 1730 sc->sc_if_index0 = 0; 1731 } 1732 1733 static int 1734 vxlan_eb_port_eq(void *arg, void *a, void *b) 1735 { 1736 const union vxlan_addr *va = a, *vb = b; 1737 size_t i; 1738 1739 for (i = 0; i < nitems(va->in6.s6_addr32); i++) { 1740 if (va->in6.s6_addr32[i] != vb->in6.s6_addr32[i]) 1741 return (0); 1742 } 1743 1744 return (1); 1745 } 1746 1747 static void * 1748 vxlan_eb_port_take(void *arg, void *port) 1749 { 1750 union vxlan_addr *endpoint; 1751 1752 endpoint = pool_get(&vxlan_endpoint_pool, PR_NOWAIT); 1753 if (endpoint == NULL) 1754 return (NULL); 1755 1756 *endpoint = *(union vxlan_addr *)port; 1757 1758 return (endpoint); 1759 } 1760 1761 static void 1762 vxlan_eb_port_rele(void *arg, void *port) 1763 { 1764 union vxlan_addr *endpoint = port; 1765 1766 pool_put(&vxlan_endpoint_pool, endpoint); 1767 } 1768 1769 static size_t 1770 vxlan_eb_port_ifname(void *arg, char *dst, size_t len, void *port) 1771 { 1772 struct vxlan_softc *sc = arg; 1773 1774 return (strlcpy(dst, sc->sc_ac.ac_if.if_xname, len)); 1775 } 1776 1777 static void 1778 vxlan_eb_port_sa(void *arg, struct sockaddr_storage *ss, void *port) 1779 { 1780 struct vxlan_softc *sc = arg; 1781 union vxlan_addr *endpoint = port; 1782 1783 switch (sc->sc_af) { 1784 case AF_INET: { 1785 struct sockaddr_in *sin = (struct sockaddr_in *)ss; 1786 1787 sin->sin_len = sizeof(*sin); 1788 sin->sin_family = AF_INET; 1789 sin->sin_addr = endpoint->in4; 1790 break; 1791 } 1792 #ifdef INET6 1793 case AF_INET6: { 1794 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)ss; 1795 1796 sin6->sin6_len = sizeof(*sin6); 1797 sin6->sin6_family = AF_INET6; 1798 in6_recoverscope(sin6, &endpoint->in6); 1799 break; 1800 } 1801 #endif /* INET6 */ 1802 default: 1803 unhandled_af(sc->sc_af); 1804 } 1805 } 1806 1807 static inline int 1808 vxlan_peer_cmp(const struct vxlan_peer *ap, const struct vxlan_peer *bp) 1809 { 1810 size_t i; 1811 1812 if (ap->p_header.vxlan_id > bp->p_header.vxlan_id) 1813 return (1); 1814 if (ap->p_header.vxlan_id < bp->p_header.vxlan_id) 1815 return (-1); 1816 if (ap->p_header.vxlan_flags > bp->p_header.vxlan_flags) 1817 return (1); 1818 if (ap->p_header.vxlan_flags < bp->p_header.vxlan_flags) 1819 return (-1); 1820 1821 for (i = 0; i < nitems(ap->p_addr.in6.s6_addr32); i++) { 1822 if (ap->p_addr.in6.s6_addr32[i] > 1823 bp->p_addr.in6.s6_addr32[i]) 1824 return (1); 1825 if (ap->p_addr.in6.s6_addr32[i] < 1826 bp->p_addr.in6.s6_addr32[i]) 1827 return (-1); 1828 } 1829 1830 return (0); 1831 } 1832 1833 RBT_GENERATE(vxlan_peers, vxlan_peer, p_entry, vxlan_peer_cmp); 1834