1 /* $OpenBSD: if_vxlan.c,v 1.91 2022/06/06 14:45:41 claudio Exp $ */ 2 3 /* 4 * Copyright (c) 2021 David Gwynne <dlg@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #include "bpfilter.h" 20 #include "pf.h" 21 22 #include <sys/param.h> 23 #include <sys/systm.h> 24 #include <sys/kernel.h> 25 #include <sys/mbuf.h> 26 #include <sys/socket.h> 27 #include <sys/ioctl.h> 28 #include <sys/timeout.h> 29 #include <sys/pool.h> 30 #include <sys/tree.h> 31 #include <sys/refcnt.h> 32 #include <sys/smr.h> 33 34 #include <sys/socket.h> 35 #include <sys/socketvar.h> 36 37 #include <net/if.h> 38 #include <net/if_var.h> 39 #include <net/if_dl.h> 40 #include <net/if_media.h> 41 #include <net/if_types.h> 42 #include <net/route.h> 43 #include <net/rtable.h> 44 45 #include <netinet/in.h> 46 #include <netinet/in_var.h> 47 #include <netinet/if_ether.h> 48 #include <netinet/ip.h> 49 #include <netinet/udp.h> 50 #include <netinet/in_pcb.h> 51 #include <netinet/ip_var.h> 52 53 #ifdef INET6 54 #include <netinet/ip6.h> 55 #include <netinet6/ip6_var.h> 56 #include <netinet6/in6_var.h> 57 #endif 58 59 /* for bridge stuff */ 60 #include <net/if_bridge.h> 61 #include <net/if_etherbridge.h> 62 63 #if NBPFILTER > 0 64 #include <net/bpf.h> 65 #endif 66 67 /* 68 * The protocol. 69 */ 70 71 #define VXLANMTU 1492 72 #define VXLAN_PORT 4789 73 74 struct vxlan_header { 75 uint32_t vxlan_flags; 76 #define VXLAN_F_I (1U << 27) 77 uint32_t vxlan_id; 78 #define VXLAN_VNI_SHIFT 8 79 #define VXLAN_VNI_MASK (0xffffffU << VXLAN_VNI_SHIFT) 80 }; 81 82 #define VXLAN_VNI_MAX 0x00ffffffU 83 #define VXLAN_VNI_MIN 0x00000000U 84 85 /* 86 * The driver. 87 */ 88 89 union vxlan_addr { 90 struct in_addr in4; 91 struct in6_addr in6; 92 }; 93 94 struct vxlan_softc; 95 96 struct vxlan_peer { 97 RBT_ENTRY(vxlan_peer) p_entry; 98 99 struct vxlan_header p_header; 100 union vxlan_addr p_addr; 101 102 struct vxlan_softc *p_sc; 103 }; 104 105 RBT_HEAD(vxlan_peers, vxlan_peer); 106 107 struct vxlan_tep { 108 TAILQ_ENTRY(vxlan_tep) vt_entry; 109 110 sa_family_t vt_af; 111 unsigned int vt_rdomain; 112 union vxlan_addr vt_addr; 113 #define vt_addr4 vt_addr.in4 114 #define vt_addr6 vt_addr.in6 115 in_port_t vt_port; 116 117 struct socket *vt_so; 118 119 struct mutex vt_mtx; 120 struct vxlan_peers vt_peers; 121 }; 122 123 TAILQ_HEAD(vxlan_teps, vxlan_tep); 124 125 enum vxlan_tunnel_mode { 126 VXLAN_TMODE_UNSET, 127 VXLAN_TMODE_P2P, /* unicast destination, no learning */ 128 VXLAN_TMODE_LEARNING, /* multicast destination, learning */ 129 VXLAN_TMODE_ENDPOINT, /* unset destination, no learning */ 130 }; 131 132 struct vxlan_softc { 133 struct arpcom sc_ac; 134 struct etherbridge sc_eb; 135 136 unsigned int sc_rdomain; 137 sa_family_t sc_af; 138 union vxlan_addr sc_src; 139 union vxlan_addr sc_dst; 140 in_port_t sc_port; 141 struct vxlan_header sc_header; 142 unsigned int sc_if_index0; 143 144 struct task sc_dtask; 145 void *sc_inmulti; 146 147 enum vxlan_tunnel_mode sc_mode; 148 struct vxlan_peer *sc_ucast_peer; 149 struct vxlan_peer *sc_mcast_peer; 150 struct refcnt sc_refs; 151 152 uint16_t sc_df; 153 int sc_ttl; 154 int sc_txhprio; 155 int sc_rxhprio; 156 157 struct task sc_send_task; 158 }; 159 160 void vxlanattach(int); 161 162 static int vxlan_clone_create(struct if_clone *, int); 163 static int vxlan_clone_destroy(struct ifnet *); 164 165 static int vxlan_output(struct ifnet *, struct mbuf *, 166 struct sockaddr *, struct rtentry *); 167 static int vxlan_enqueue(struct ifnet *, struct mbuf *); 168 static void vxlan_start(struct ifqueue *); 169 static void vxlan_send(void *); 170 171 static int vxlan_ioctl(struct ifnet *, u_long, caddr_t); 172 static int vxlan_up(struct vxlan_softc *); 173 static int vxlan_down(struct vxlan_softc *); 174 static int vxlan_addmulti(struct vxlan_softc *, struct ifnet *); 175 static void vxlan_delmulti(struct vxlan_softc *); 176 177 static struct mbuf * 178 vxlan_input(void *, struct mbuf *, 179 struct ip *, struct ip6_hdr *, void *, int); 180 181 static int vxlan_set_rdomain(struct vxlan_softc *, const struct ifreq *); 182 static int vxlan_get_rdomain(struct vxlan_softc *, struct ifreq *); 183 static int vxlan_set_tunnel(struct vxlan_softc *, 184 const struct if_laddrreq *); 185 static int vxlan_get_tunnel(struct vxlan_softc *, struct if_laddrreq *); 186 static int vxlan_del_tunnel(struct vxlan_softc *); 187 static int vxlan_set_vnetid(struct vxlan_softc *, const struct ifreq *); 188 static int vxlan_get_vnetid(struct vxlan_softc *, struct ifreq *); 189 static int vxlan_del_vnetid(struct vxlan_softc *); 190 static int vxlan_set_parent(struct vxlan_softc *, 191 const struct if_parent *); 192 static int vxlan_get_parent(struct vxlan_softc *, struct if_parent *); 193 static int vxlan_del_parent(struct vxlan_softc *); 194 195 static int vxlan_add_addr(struct vxlan_softc *, const struct ifbareq *); 196 static int vxlan_del_addr(struct vxlan_softc *, const struct ifbareq *); 197 198 static void vxlan_detach_hook(void *); 199 200 static struct if_clone vxlan_cloner = 201 IF_CLONE_INITIALIZER("vxlan", vxlan_clone_create, vxlan_clone_destroy); 202 203 static int vxlan_eb_port_eq(void *, void *, void *); 204 static void *vxlan_eb_port_take(void *, void *); 205 static void vxlan_eb_port_rele(void *, void *); 206 static size_t vxlan_eb_port_ifname(void *, char *, size_t, void *); 207 static void vxlan_eb_port_sa(void *, struct sockaddr_storage *, void *); 208 209 static const struct etherbridge_ops vxlan_etherbridge_ops = { 210 vxlan_eb_port_eq, 211 vxlan_eb_port_take, 212 vxlan_eb_port_rele, 213 vxlan_eb_port_ifname, 214 vxlan_eb_port_sa, 215 }; 216 217 static struct rwlock vxlan_lock = RWLOCK_INITIALIZER("vteps"); 218 static struct vxlan_teps vxlan_teps = TAILQ_HEAD_INITIALIZER(vxlan_teps); 219 static struct pool vxlan_endpoint_pool; 220 221 static inline int vxlan_peer_cmp(const struct vxlan_peer *, 222 const struct vxlan_peer *); 223 224 RBT_PROTOTYPE(vxlan_peers, vxlan_peer, p_entry, vxlan_peer_cmp); 225 226 void 227 vxlanattach(int count) 228 { 229 if_clone_attach(&vxlan_cloner); 230 } 231 232 static int 233 vxlan_clone_create(struct if_clone *ifc, int unit) 234 { 235 struct vxlan_softc *sc; 236 struct ifnet *ifp; 237 int error; 238 239 if (vxlan_endpoint_pool.pr_size == 0) { 240 pool_init(&vxlan_endpoint_pool, sizeof(union vxlan_addr), 241 0, IPL_SOFTNET, 0, "vxlanep", NULL); 242 } 243 244 sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO|M_CANFAIL); 245 if (sc == NULL) 246 return (ENOMEM); 247 248 ifp = &sc->sc_ac.ac_if; 249 250 snprintf(ifp->if_xname, sizeof(ifp->if_xname), "%s%d", 251 ifc->ifc_name, unit); 252 253 error = etherbridge_init(&sc->sc_eb, ifp->if_xname, 254 &vxlan_etherbridge_ops, sc); 255 if (error == -1) { 256 free(sc, M_DEVBUF, sizeof(*sc)); 257 return (error); 258 } 259 260 sc->sc_af = AF_UNSPEC; 261 sc->sc_txhprio = 0; 262 sc->sc_rxhprio = IF_HDRPRIO_OUTER; 263 sc->sc_df = 0; 264 sc->sc_ttl = IP_DEFAULT_MULTICAST_TTL; 265 266 task_set(&sc->sc_dtask, vxlan_detach_hook, sc); 267 refcnt_init(&sc->sc_refs); 268 task_set(&sc->sc_send_task, vxlan_send, sc); 269 270 ifp->if_softc = sc; 271 ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN; 272 ifp->if_ioctl = vxlan_ioctl; 273 ifp->if_output = vxlan_output; 274 ifp->if_enqueue = vxlan_enqueue; 275 ifp->if_qstart = vxlan_start; 276 ifp->if_flags = IFF_BROADCAST | IFF_MULTICAST | IFF_SIMPLEX; 277 ifp->if_xflags = IFXF_CLONED | IFXF_MPSAFE; 278 ether_fakeaddr(ifp); 279 280 if_counters_alloc(ifp); 281 if_attach(ifp); 282 ether_ifattach(ifp); 283 284 return (0); 285 } 286 287 static int 288 vxlan_clone_destroy(struct ifnet *ifp) 289 { 290 struct vxlan_softc *sc = ifp->if_softc; 291 292 NET_LOCK(); 293 if (ISSET(ifp->if_flags, IFF_RUNNING)) 294 vxlan_down(sc); 295 NET_UNLOCK(); 296 297 ether_ifdetach(ifp); 298 if_detach(ifp); 299 300 etherbridge_destroy(&sc->sc_eb); 301 302 refcnt_finalize(&sc->sc_refs, "vxlanfini"); 303 304 free(sc, M_DEVBUF, sizeof(*sc)); 305 306 return (0); 307 } 308 309 static struct vxlan_softc * 310 vxlan_take(struct vxlan_softc *sc) 311 { 312 refcnt_take(&sc->sc_refs); 313 return (sc); 314 } 315 316 static void 317 vxlan_rele(struct vxlan_softc *sc) 318 { 319 refcnt_rele_wake(&sc->sc_refs); 320 } 321 322 static struct mbuf * 323 vxlan_encap(struct vxlan_softc *sc, struct mbuf *m, 324 struct mbuf *(ip_encap)(struct vxlan_softc *sc, struct mbuf *, 325 const union vxlan_addr *, uint8_t)) 326 { 327 struct ifnet *ifp = &sc->sc_ac.ac_if; 328 struct m_tag *mtag; 329 struct mbuf *m0; 330 union vxlan_addr gateway; 331 const union vxlan_addr *endpoint; 332 struct vxlan_header *vh; 333 struct udphdr *uh; 334 int prio; 335 uint8_t tos; 336 337 if (sc->sc_mode == VXLAN_TMODE_UNSET) 338 goto drop; 339 340 if (sc->sc_mode == VXLAN_TMODE_P2P) 341 endpoint = &sc->sc_dst; 342 else { /* VXLAN_TMODE_LEARNING || VXLAN_TMODE_ENDPOINT */ 343 struct ether_header *eh = mtod(m, struct ether_header *); 344 345 smr_read_enter(); 346 endpoint = etherbridge_resolve_ea(&sc->sc_eb, 347 (struct ether_addr *)eh->ether_dhost); 348 if (endpoint != NULL) { 349 gateway = *endpoint; 350 endpoint = &gateway; 351 } 352 smr_read_leave(); 353 354 if (endpoint == NULL) { 355 if (sc->sc_mode == VXLAN_TMODE_ENDPOINT) 356 goto drop; 357 358 /* "flood" to unknown destinations */ 359 endpoint = &sc->sc_dst; 360 } 361 } 362 363 /* force prepend mbuf because of payload alignment */ 364 m0 = m_get(M_DONTWAIT, m->m_type); 365 if (m0 == NULL) 366 goto drop; 367 368 m_align(m0, 0); 369 m0->m_len = 0; 370 371 M_MOVE_PKTHDR(m0, m); 372 m0->m_next = m; 373 374 m = m_prepend(m0, sizeof(*vh), M_DONTWAIT); 375 if (m == NULL) 376 return (NULL); 377 378 vh = mtod(m, struct vxlan_header *); 379 *vh = sc->sc_header; 380 381 m = m_prepend(m, sizeof(*uh), M_DONTWAIT); 382 if (m == NULL) 383 return (NULL); 384 385 uh = mtod(m, struct udphdr *); 386 uh->uh_sport = sc->sc_port; /* XXX */ 387 uh->uh_dport = sc->sc_port; 388 htobem16(&uh->uh_ulen, m->m_pkthdr.len); 389 uh->uh_sum = htons(0); 390 391 SET(m->m_pkthdr.csum_flags, M_UDP_CSUM_OUT); 392 393 mtag = m_tag_get(PACKET_TAG_GRE, sizeof(ifp->if_index), M_NOWAIT); 394 if (mtag == NULL) 395 goto drop; 396 397 *(int *)(mtag + 1) = ifp->if_index; 398 m_tag_prepend(m, mtag); 399 400 prio = sc->sc_txhprio; 401 if (prio == IF_HDRPRIO_PACKET) 402 prio = m->m_pkthdr.pf.prio; 403 tos = IFQ_PRIO2TOS(prio); 404 405 CLR(m->m_flags, M_BCAST|M_MCAST); 406 m->m_pkthdr.ph_rtableid = sc->sc_rdomain; 407 408 #if NPF > 0 409 pf_pkt_addr_changed(m); 410 #endif 411 412 return ((*ip_encap)(sc, m, endpoint, tos)); 413 drop: 414 m_freem(m); 415 return (NULL); 416 } 417 418 static struct mbuf * 419 vxlan_encap_ipv4(struct vxlan_softc *sc, struct mbuf *m, 420 const union vxlan_addr *endpoint, uint8_t tos) 421 { 422 struct ip *ip; 423 424 m = m_prepend(m, sizeof(*ip), M_DONTWAIT); 425 if (m == NULL) 426 return (NULL); 427 428 ip = mtod(m, struct ip *); 429 ip->ip_v = IPVERSION; 430 ip->ip_hl = sizeof(*ip) >> 2; 431 ip->ip_off = sc->sc_df; 432 ip->ip_tos = tos; 433 ip->ip_len = htons(m->m_pkthdr.len); 434 ip->ip_ttl = sc->sc_ttl; 435 ip->ip_p = IPPROTO_UDP; 436 ip->ip_src = sc->sc_src.in4; 437 ip->ip_dst = endpoint->in4; 438 439 return (m); 440 } 441 442 #ifdef INET6 443 static struct mbuf * 444 vxlan_encap_ipv6(struct vxlan_softc *sc, struct mbuf *m, 445 const union vxlan_addr *endpoint, uint8_t tos) 446 { 447 struct ip6_hdr *ip6; 448 int len = m->m_pkthdr.len; 449 450 m = m_prepend(m, sizeof(*ip6), M_DONTWAIT); 451 if (m == NULL) 452 return (NULL); 453 454 ip6 = mtod(m, struct ip6_hdr *); 455 ip6->ip6_flow = ISSET(m->m_pkthdr.csum_flags, M_FLOWID) ? 456 htonl(m->m_pkthdr.ph_flowid) : 0; 457 ip6->ip6_vfc |= IPV6_VERSION; 458 ip6->ip6_flow |= htonl((uint32_t)tos << 20); 459 ip6->ip6_plen = htons(len); 460 ip6->ip6_nxt = IPPROTO_UDP; 461 ip6->ip6_hlim = sc->sc_ttl; 462 ip6->ip6_src = sc->sc_src.in6; 463 ip6->ip6_dst = endpoint->in6; 464 465 if (sc->sc_df) 466 SET(m->m_pkthdr.csum_flags, M_IPV6_DF_OUT); 467 468 return (m); 469 } 470 #endif /* INET6 */ 471 472 static int 473 vxlan_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, 474 struct rtentry *rt) 475 { 476 struct m_tag *mtag; 477 478 mtag = NULL; 479 while ((mtag = m_tag_find(m, PACKET_TAG_GRE, mtag)) != NULL) { 480 if (*(int *)(mtag + 1) == ifp->if_index) { 481 m_freem(m); 482 return (EIO); 483 } 484 } 485 486 return (ether_output(ifp, m, dst, rt)); 487 } 488 489 static int 490 vxlan_enqueue(struct ifnet *ifp, struct mbuf *m) 491 { 492 struct vxlan_softc *sc = ifp->if_softc; 493 struct ifqueue *ifq = &ifp->if_snd; 494 495 if (ifq_enqueue(ifq, m) != 0) 496 return (ENOBUFS); 497 498 task_add(ifq->ifq_softnet, &sc->sc_send_task); 499 500 return (0); 501 } 502 503 static void 504 vxlan_start(struct ifqueue *ifq) 505 { 506 struct ifnet *ifp = ifq->ifq_if; 507 struct vxlan_softc *sc = ifp->if_softc; 508 509 task_add(ifq->ifq_softnet, &sc->sc_send_task); 510 } 511 512 static uint64_t 513 vxlan_send_ipv4(struct vxlan_softc *sc, struct mbuf_list *ml) 514 { 515 struct ip_moptions imo; 516 struct mbuf *m; 517 uint64_t oerrors = 0; 518 519 imo.imo_ifidx = sc->sc_if_index0; 520 imo.imo_ttl = sc->sc_ttl; 521 imo.imo_loop = 0; 522 523 NET_LOCK(); 524 while ((m = ml_dequeue(ml)) != NULL) { 525 if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &imo, NULL, 0) != 0) 526 oerrors++; 527 } 528 NET_UNLOCK(); 529 530 return (oerrors); 531 } 532 533 #ifdef INET6 534 static uint64_t 535 vxlan_send_ipv6(struct vxlan_softc *sc, struct mbuf_list *ml) 536 { 537 struct ip6_moptions im6o; 538 struct mbuf *m; 539 uint64_t oerrors = 0; 540 541 im6o.im6o_ifidx = sc->sc_if_index0; 542 im6o.im6o_hlim = sc->sc_ttl; 543 im6o.im6o_loop = 0; 544 545 NET_LOCK(); 546 while ((m = ml_dequeue(ml)) != NULL) { 547 if (ip6_output(m, NULL, NULL, 0, &im6o, NULL) != 0) 548 oerrors++; 549 } 550 NET_UNLOCK(); 551 552 return (oerrors); 553 } 554 #endif /* INET6 */ 555 556 static void 557 vxlan_send(void *arg) 558 { 559 struct vxlan_softc *sc = arg; 560 struct ifnet *ifp = &sc->sc_ac.ac_if; 561 struct mbuf *(*ip_encap)(struct vxlan_softc *, struct mbuf *, 562 const union vxlan_addr *, uint8_t); 563 uint64_t (*ip_send)(struct vxlan_softc *, struct mbuf_list *); 564 struct mbuf_list ml = MBUF_LIST_INITIALIZER(); 565 struct mbuf *m; 566 uint64_t oerrors; 567 568 if (!ISSET(ifp->if_flags, IFF_RUNNING)) 569 return; 570 571 switch (sc->sc_af) { 572 case AF_INET: 573 ip_encap = vxlan_encap_ipv4; 574 ip_send = vxlan_send_ipv4; 575 break; 576 #ifdef INET6 577 case AF_INET6: 578 ip_encap = vxlan_encap_ipv6; 579 ip_send = vxlan_send_ipv6; 580 break; 581 #endif 582 default: 583 unhandled_af(sc->sc_af); 584 /* NOTREACHED */ 585 } 586 587 while ((m = ifq_dequeue(&ifp->if_snd)) != NULL) { 588 #if NBPFILTER > 0 589 caddr_t if_bpf = READ_ONCE(ifp->if_bpf); 590 if (if_bpf != NULL) 591 bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_OUT); 592 #endif 593 m = vxlan_encap(sc, m, ip_encap); 594 if (m == NULL) 595 continue; 596 597 ml_enqueue(&ml, m); 598 } 599 600 oerrors = (*ip_send)(sc, &ml); 601 602 counters_add(ifp->if_counters, ifc_oerrors, oerrors); 603 } 604 605 static struct mbuf * 606 vxlan_input(void *arg, struct mbuf *m, struct ip *ip, struct ip6_hdr *ip6, 607 void *uhp, int hlen) 608 { 609 struct vxlan_tep *vt = arg; 610 union vxlan_addr addr; 611 struct vxlan_peer key, *p; 612 struct udphdr *uh; 613 struct vxlan_header *vh; 614 struct ether_header *eh; 615 int vhlen = hlen + sizeof(*vh); 616 struct mbuf *n; 617 int off; 618 in_port_t port; 619 struct vxlan_softc *sc = NULL; 620 struct ifnet *ifp; 621 int rxhprio; 622 uint8_t tos; 623 624 if (m->m_pkthdr.len < vhlen) 625 goto drop; 626 627 uh = uhp; 628 port = uh->uh_sport; 629 630 if (ip != NULL) { 631 memset(&addr, 0, sizeof(addr)); 632 addr.in4 = ip->ip_src; 633 tos = ip->ip_tos; 634 } 635 #ifdef INET6 636 else { 637 addr.in6 = ip6->ip6_src; 638 tos = bemtoh32(&ip6->ip6_flow) >> 20; 639 } 640 #endif 641 642 if (m->m_len < vhlen) { 643 m = m_pullup(m, vhlen); 644 if (m == NULL) 645 return (NULL); 646 } 647 648 /* can't use ip/ip6/uh after this */ 649 650 vh = (struct vxlan_header *)(mtod(m, caddr_t) + hlen); 651 652 memset(&key, 0, sizeof(key)); 653 key.p_addr = addr; 654 key.p_header.vxlan_flags = vh->vxlan_flags & htonl(VXLAN_F_I); 655 key.p_header.vxlan_id = vh->vxlan_id & htonl(VXLAN_VNI_MASK); 656 657 mtx_enter(&vt->vt_mtx); 658 p = RBT_FIND(vxlan_peers, &vt->vt_peers, &key); 659 if (p == NULL) { 660 memset(&key.p_addr, 0, sizeof(key.p_addr)); 661 p = RBT_FIND(vxlan_peers, &vt->vt_peers, &key); 662 } 663 if (p != NULL) 664 sc = vxlan_take(p->p_sc); 665 mtx_leave(&vt->vt_mtx); 666 667 if (sc == NULL) 668 goto drop; 669 670 ifp = &sc->sc_ac.ac_if; 671 if (ISSET(ifp->if_flags, IFF_LINK0) && port != sc->sc_port) 672 goto rele_drop; 673 674 m_adj(m, vhlen); 675 676 if (m->m_pkthdr.len < sizeof(*eh)) 677 goto rele_drop; 678 679 if (m->m_len < sizeof(*eh)) { 680 m = m_pullup(m, sizeof(*eh)); 681 if (m == NULL) 682 goto rele; 683 } 684 685 n = m_getptr(m, sizeof(*eh), &off); 686 if (n == NULL) 687 goto rele_drop; 688 689 if (!ALIGNED_POINTER(mtod(n, caddr_t) + off, uint32_t)) { 690 n = m_dup_pkt(m, ETHER_ALIGN, M_NOWAIT); 691 m_freem(m); 692 if (n == NULL) 693 goto rele; 694 m = n; 695 } 696 697 if (sc->sc_mode == VXLAN_TMODE_LEARNING) { 698 eh = mtod(m, struct ether_header *); 699 etherbridge_map_ea(&sc->sc_eb, &addr, 700 (struct ether_addr *)eh->ether_shost); 701 } 702 703 rxhprio = sc->sc_rxhprio; 704 switch (rxhprio) { 705 case IF_HDRPRIO_PACKET: 706 /* nop */ 707 break; 708 case IF_HDRPRIO_OUTER: 709 m->m_pkthdr.pf.prio = IFQ_TOS2PRIO(tos); 710 break; 711 default: 712 m->m_pkthdr.pf.prio = rxhprio; 713 break; \ 714 } \ 715 716 if_vinput(ifp, m); 717 rele: 718 vxlan_rele(sc); 719 return (NULL); 720 721 rele_drop: 722 vxlan_rele(sc); 723 drop: 724 m_freem(m); 725 return (NULL); 726 } 727 728 static int 729 vxlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 730 { 731 struct vxlan_softc *sc = ifp->if_softc; 732 struct ifreq *ifr = (struct ifreq *)data; 733 struct ifbrparam *bparam = (struct ifbrparam *)data; 734 int error = 0; 735 736 switch (cmd) { 737 case SIOCSIFADDR: 738 break; 739 case SIOCSIFFLAGS: 740 if (ISSET(ifp->if_flags, IFF_UP)) { 741 if (!ISSET(ifp->if_flags, IFF_RUNNING)) 742 error = vxlan_up(sc); 743 else 744 error = 0; 745 } else { 746 if (ISSET(ifp->if_flags, IFF_RUNNING)) 747 error = vxlan_down(sc); 748 } 749 break; 750 751 case SIOCSLIFPHYRTABLE: 752 error = vxlan_set_rdomain(sc, ifr); 753 break; 754 case SIOCGLIFPHYRTABLE: 755 error = vxlan_get_rdomain(sc, ifr); 756 break; 757 758 case SIOCSLIFPHYADDR: 759 error = vxlan_set_tunnel(sc, (const struct if_laddrreq *)data); 760 break; 761 case SIOCGLIFPHYADDR: 762 error = vxlan_get_tunnel(sc, (struct if_laddrreq *)data); 763 break; 764 case SIOCDIFPHYADDR: 765 error = vxlan_del_tunnel(sc); 766 break; 767 768 case SIOCSVNETID: 769 error = vxlan_set_vnetid(sc, ifr); 770 break; 771 case SIOCGVNETID: 772 error = vxlan_get_vnetid(sc, ifr); 773 break; 774 case SIOCDVNETID: 775 error = vxlan_del_vnetid(sc); 776 break; 777 778 case SIOCSIFPARENT: 779 error = vxlan_set_parent(sc, (struct if_parent *)data); 780 break; 781 case SIOCGIFPARENT: 782 error = vxlan_get_parent(sc, (struct if_parent *)data); 783 break; 784 case SIOCDIFPARENT: 785 error = vxlan_del_parent(sc); 786 break; 787 788 case SIOCSTXHPRIO: 789 error = if_txhprio_l2_check(ifr->ifr_hdrprio); 790 if (error != 0) 791 break; 792 793 sc->sc_txhprio = ifr->ifr_hdrprio; 794 break; 795 case SIOCGTXHPRIO: 796 ifr->ifr_hdrprio = sc->sc_txhprio; 797 break; 798 799 case SIOCSRXHPRIO: 800 error = if_rxhprio_l2_check(ifr->ifr_hdrprio); 801 if (error != 0) 802 break; 803 804 sc->sc_rxhprio = ifr->ifr_hdrprio; 805 break; 806 case SIOCGRXHPRIO: 807 ifr->ifr_hdrprio = sc->sc_rxhprio; 808 break; 809 810 case SIOCSLIFPHYDF: 811 /* commit */ 812 sc->sc_df = ifr->ifr_df ? htons(IP_DF) : htons(0); 813 break; 814 case SIOCGLIFPHYDF: 815 ifr->ifr_df = sc->sc_df ? 1 : 0; 816 break; 817 818 case SIOCSLIFPHYTTL: 819 if (ifr->ifr_ttl < 1 || ifr->ifr_ttl > 0xff) { 820 error = EINVAL; 821 break; 822 } 823 824 /* commit */ 825 sc->sc_ttl = (uint8_t)ifr->ifr_ttl; 826 break; 827 case SIOCGLIFPHYTTL: 828 ifr->ifr_ttl = (int)sc->sc_ttl; 829 break; 830 831 case SIOCBRDGSCACHE: 832 error = etherbridge_set_max(&sc->sc_eb, bparam); 833 break; 834 case SIOCBRDGGCACHE: 835 error = etherbridge_get_max(&sc->sc_eb, bparam); 836 break; 837 case SIOCBRDGSTO: 838 error = etherbridge_set_tmo(&sc->sc_eb, bparam); 839 break; 840 case SIOCBRDGGTO: 841 error = etherbridge_get_tmo(&sc->sc_eb, bparam); 842 break; 843 844 case SIOCBRDGRTS: 845 error = etherbridge_rtfind(&sc->sc_eb, 846 (struct ifbaconf *)data); 847 break; 848 case SIOCBRDGFLUSH: 849 etherbridge_flush(&sc->sc_eb, 850 ((struct ifbreq *)data)->ifbr_ifsflags); 851 break; 852 case SIOCBRDGSADDR: 853 error = vxlan_add_addr(sc, (struct ifbareq *)data); 854 break; 855 case SIOCBRDGDADDR: 856 error = vxlan_del_addr(sc, (struct ifbareq *)data); 857 break; 858 859 case SIOCADDMULTI: 860 case SIOCDELMULTI: 861 /* no hardware to program */ 862 break; 863 864 default: 865 error = ether_ioctl(ifp, &sc->sc_ac, cmd, data); 866 break; 867 } 868 869 if (error == ENETRESET) { 870 /* no hardware to program */ 871 error = 0; 872 } 873 874 return (error); 875 } 876 877 static struct vxlan_tep * 878 vxlan_tep_get(struct vxlan_softc *sc, const union vxlan_addr *addr) 879 { 880 struct vxlan_tep *vt; 881 882 TAILQ_FOREACH(vt, &vxlan_teps, vt_entry) { 883 if (sc->sc_af == vt->vt_af && 884 sc->sc_rdomain == vt->vt_rdomain && 885 memcmp(addr, &vt->vt_addr, sizeof(*addr)) == 0 && 886 sc->sc_port == vt->vt_port) 887 return (vt); 888 } 889 890 return (NULL); 891 } 892 893 static int 894 vxlan_tep_add_addr(struct vxlan_softc *sc, const union vxlan_addr *addr, 895 struct vxlan_peer *p) 896 { 897 struct mbuf m; 898 struct vxlan_tep *vt; 899 struct socket *so; 900 struct sockaddr_in *sin; 901 #ifdef INET6 902 struct sockaddr_in6 *sin6; 903 #endif 904 int error; 905 906 vt = vxlan_tep_get(sc, addr); 907 if (vt != NULL) { 908 struct vxlan_peer *op; 909 910 mtx_enter(&vt->vt_mtx); 911 op = RBT_INSERT(vxlan_peers, &vt->vt_peers, p); 912 mtx_leave(&vt->vt_mtx); 913 914 if (op != NULL) 915 return (EADDRINUSE); 916 917 return (0); 918 } 919 920 vt = malloc(sizeof(*vt), M_DEVBUF, M_NOWAIT|M_ZERO); 921 if (vt == NULL) 922 return (ENOMEM); 923 924 vt->vt_af = sc->sc_af; 925 vt->vt_rdomain = sc->sc_rdomain; 926 vt->vt_addr = *addr; 927 vt->vt_port = sc->sc_port; 928 929 mtx_init(&vt->vt_mtx, IPL_SOFTNET); 930 RBT_INIT(vxlan_peers, &vt->vt_peers); 931 RBT_INSERT(vxlan_peers, &vt->vt_peers, p); 932 933 error = socreate(vt->vt_af, &so, SOCK_DGRAM, IPPROTO_UDP); 934 if (error != 0) 935 goto free; 936 937 solock(so); 938 939 sotoinpcb(so)->inp_upcall = vxlan_input; 940 sotoinpcb(so)->inp_upcall_arg = vt; 941 942 m_inithdr(&m); 943 m.m_len = sizeof(vt->vt_rdomain); 944 *mtod(&m, unsigned int *) = vt->vt_rdomain; 945 error = sosetopt(so, SOL_SOCKET, SO_RTABLE, &m); 946 if (error != 0) 947 goto close; 948 949 m_inithdr(&m); 950 switch (vt->vt_af) { 951 case AF_INET: 952 sin = mtod(&m, struct sockaddr_in *); 953 memset(sin, 0, sizeof(*sin)); 954 sin->sin_len = sizeof(*sin); 955 sin->sin_family = AF_INET; 956 sin->sin_addr = addr->in4; 957 sin->sin_port = vt->vt_port; 958 959 m.m_len = sizeof(*sin); 960 break; 961 962 #ifdef INET6 963 case AF_INET6: 964 sin6 = mtod(&m, struct sockaddr_in6 *); 965 sin6->sin6_len = sizeof(*sin6); 966 sin6->sin6_family = AF_INET6; 967 in6_recoverscope(sin6, &addr->in6); 968 sin6->sin6_port = sc->sc_port; 969 970 m.m_len = sizeof(*sin6); 971 break; 972 #endif 973 default: 974 unhandled_af(vt->vt_af); 975 } 976 977 error = sobind(so, &m, curproc); 978 if (error != 0) 979 goto close; 980 981 sounlock(so); 982 983 rw_assert_wrlock(&vxlan_lock); 984 TAILQ_INSERT_TAIL(&vxlan_teps, vt, vt_entry); 985 986 vt->vt_so = so; 987 988 return (0); 989 990 close: 991 sounlock(so); 992 soclose(so, MSG_DONTWAIT); 993 free: 994 free(vt, M_DEVBUF, sizeof(*vt)); 995 return (error); 996 } 997 998 static void 999 vxlan_tep_del_addr(struct vxlan_softc *sc, const union vxlan_addr *addr, 1000 struct vxlan_peer *p) 1001 { 1002 struct vxlan_tep *vt; 1003 int empty; 1004 1005 vt = vxlan_tep_get(sc, addr); 1006 if (vt == NULL) 1007 panic("unable to find vxlan_tep for peer %p (sc %p)", p, sc); 1008 1009 mtx_enter(&vt->vt_mtx); 1010 RBT_REMOVE(vxlan_peers, &vt->vt_peers, p); 1011 empty = RBT_EMPTY(vxlan_peers, &vt->vt_peers); 1012 mtx_leave(&vt->vt_mtx); 1013 1014 if (!empty) 1015 return; 1016 1017 rw_assert_wrlock(&vxlan_lock); 1018 TAILQ_REMOVE(&vxlan_teps, vt, vt_entry); 1019 1020 soclose(vt->vt_so, MSG_DONTWAIT); 1021 free(vt, M_DEVBUF, sizeof(*vt)); 1022 } 1023 1024 static int 1025 vxlan_tep_up(struct vxlan_softc *sc) 1026 { 1027 struct vxlan_peer *up, *mp; 1028 int error; 1029 1030 up = malloc(sizeof(*up), M_DEVBUF, M_NOWAIT|M_ZERO); 1031 if (up == NULL) 1032 return (ENOMEM); 1033 1034 if (sc->sc_mode == VXLAN_TMODE_P2P) 1035 up->p_addr = sc->sc_dst; 1036 up->p_header = sc->sc_header; 1037 up->p_sc = vxlan_take(sc); 1038 1039 error = vxlan_tep_add_addr(sc, &sc->sc_src, up); 1040 if (error != 0) 1041 goto freeup; 1042 1043 sc->sc_ucast_peer = up; 1044 1045 if (sc->sc_mode != VXLAN_TMODE_LEARNING) 1046 return (0); 1047 1048 mp = malloc(sizeof(*mp), M_DEVBUF, M_NOWAIT|M_ZERO); 1049 if (mp == NULL) { 1050 error = ENOMEM; 1051 goto delup; 1052 } 1053 1054 /* addr is multicast, leave it as 0s */ 1055 mp->p_header = sc->sc_header; 1056 mp->p_sc = vxlan_take(sc); 1057 1058 /* destination address is a multicast group we want to join */ 1059 error = vxlan_tep_add_addr(sc, &sc->sc_dst, up); 1060 if (error != 0) 1061 goto freemp; 1062 1063 sc->sc_mcast_peer = mp; 1064 1065 return (0); 1066 1067 freemp: 1068 vxlan_rele(mp->p_sc); 1069 free(mp, M_DEVBUF, sizeof(*mp)); 1070 delup: 1071 vxlan_tep_del_addr(sc, &sc->sc_src, up); 1072 freeup: 1073 vxlan_rele(up->p_sc); 1074 free(up, M_DEVBUF, sizeof(*up)); 1075 return (error); 1076 } 1077 1078 static void 1079 vxlan_tep_down(struct vxlan_softc *sc) 1080 { 1081 struct vxlan_peer *up = sc->sc_ucast_peer; 1082 1083 if (sc->sc_mode == VXLAN_TMODE_LEARNING) { 1084 struct vxlan_peer *mp = sc->sc_mcast_peer; 1085 vxlan_tep_del_addr(sc, &sc->sc_dst, mp); 1086 vxlan_rele(mp->p_sc); 1087 free(mp, M_DEVBUF, sizeof(*mp)); 1088 } 1089 1090 vxlan_tep_del_addr(sc, &sc->sc_src, up); 1091 vxlan_rele(up->p_sc); 1092 free(up, M_DEVBUF, sizeof(*up)); 1093 } 1094 1095 static int 1096 vxlan_up(struct vxlan_softc *sc) 1097 { 1098 struct ifnet *ifp = &sc->sc_ac.ac_if; 1099 struct ifnet *ifp0 = NULL; 1100 int error; 1101 1102 KASSERT(!ISSET(ifp->if_flags, IFF_RUNNING)); 1103 NET_ASSERT_LOCKED(); 1104 1105 if (sc->sc_af == AF_UNSPEC) 1106 return (EDESTADDRREQ); 1107 KASSERT(sc->sc_mode != VXLAN_TMODE_UNSET); 1108 1109 NET_UNLOCK(); 1110 1111 error = rw_enter(&vxlan_lock, RW_WRITE|RW_INTR); 1112 if (error != 0) 1113 goto netlock; 1114 1115 NET_LOCK(); 1116 if (ISSET(ifp->if_flags, IFF_RUNNING)) { 1117 /* something else beat us */ 1118 rw_exit(&vxlan_lock); 1119 return (0); 1120 } 1121 NET_UNLOCK(); 1122 1123 if (sc->sc_mode != VXLAN_TMODE_P2P) { 1124 error = etherbridge_up(&sc->sc_eb); 1125 if (error != 0) 1126 goto unlock; 1127 } 1128 1129 if (sc->sc_mode == VXLAN_TMODE_LEARNING) { 1130 ifp0 = if_get(sc->sc_if_index0); 1131 if (ifp0 == NULL) { 1132 error = ENXIO; 1133 goto down; 1134 } 1135 1136 /* check again if multicast will work on top of the parent */ 1137 if (!ISSET(ifp0->if_flags, IFF_MULTICAST)) { 1138 error = EPROTONOSUPPORT; 1139 goto put; 1140 } 1141 1142 error = vxlan_addmulti(sc, ifp0); 1143 if (error != 0) 1144 goto put; 1145 1146 /* Register callback if parent wants to unregister */ 1147 if_detachhook_add(ifp0, &sc->sc_dtask); 1148 } else { 1149 if (sc->sc_if_index0 != 0) { 1150 error = EPROTONOSUPPORT; 1151 goto down; 1152 } 1153 } 1154 1155 error = vxlan_tep_up(sc); 1156 if (error != 0) 1157 goto del; 1158 1159 if_put(ifp0); 1160 1161 NET_LOCK(); 1162 SET(ifp->if_flags, IFF_RUNNING); 1163 rw_exit(&vxlan_lock); 1164 1165 return (0); 1166 1167 del: 1168 if (sc->sc_mode == VXLAN_TMODE_LEARNING) { 1169 if (ifp0 != NULL) 1170 if_detachhook_del(ifp0, &sc->sc_dtask); 1171 vxlan_delmulti(sc); 1172 } 1173 put: 1174 if_put(ifp0); 1175 down: 1176 if (sc->sc_mode != VXLAN_TMODE_P2P) 1177 etherbridge_down(&sc->sc_eb); 1178 unlock: 1179 rw_exit(&vxlan_lock); 1180 netlock: 1181 NET_LOCK(); 1182 1183 return (error); 1184 } 1185 1186 static int 1187 vxlan_down(struct vxlan_softc *sc) 1188 { 1189 struct ifnet *ifp = &sc->sc_ac.ac_if; 1190 struct ifnet *ifp0; 1191 int error; 1192 1193 KASSERT(ISSET(ifp->if_flags, IFF_RUNNING)); 1194 NET_UNLOCK(); 1195 1196 error = rw_enter(&vxlan_lock, RW_WRITE|RW_INTR); 1197 if (error != 0) { 1198 NET_LOCK(); 1199 return (error); 1200 } 1201 1202 NET_LOCK(); 1203 if (!ISSET(ifp->if_flags, IFF_RUNNING)) { 1204 /* something else beat us */ 1205 rw_exit(&vxlan_lock); 1206 return (0); 1207 } 1208 NET_UNLOCK(); 1209 1210 vxlan_tep_down(sc); 1211 1212 if (sc->sc_mode == VXLAN_TMODE_LEARNING) { 1213 vxlan_delmulti(sc); 1214 ifp0 = if_get(sc->sc_if_index0); 1215 if (ifp0 != NULL) { 1216 if_detachhook_del(ifp0, &sc->sc_dtask); 1217 } 1218 if_put(ifp0); 1219 } 1220 1221 if (sc->sc_mode != VXLAN_TMODE_P2P) 1222 etherbridge_down(&sc->sc_eb); 1223 1224 taskq_del_barrier(ifp->if_snd.ifq_softnet, &sc->sc_send_task); 1225 NET_LOCK(); 1226 CLR(ifp->if_flags, IFF_RUNNING); 1227 rw_exit(&vxlan_lock); 1228 1229 return (0); 1230 } 1231 1232 static int 1233 vxlan_addmulti(struct vxlan_softc *sc, struct ifnet *ifp0) 1234 { 1235 int error = 0; 1236 1237 NET_LOCK(); 1238 1239 switch (sc->sc_af) { 1240 case AF_INET: 1241 sc->sc_inmulti = in_addmulti(&sc->sc_dst.in4, ifp0); 1242 if (sc->sc_inmulti == NULL) 1243 error = EADDRNOTAVAIL; 1244 break; 1245 #ifdef INET6 1246 case AF_INET6: 1247 sc->sc_inmulti = in6_addmulti(&sc->sc_dst.in6, ifp0, &error); 1248 break; 1249 #endif 1250 default: 1251 unhandled_af(sc->sc_af); 1252 } 1253 1254 NET_UNLOCK(); 1255 1256 return (error); 1257 } 1258 1259 static void 1260 vxlan_delmulti(struct vxlan_softc *sc) 1261 { 1262 NET_LOCK(); 1263 1264 switch (sc->sc_af) { 1265 case AF_INET: 1266 in_delmulti(sc->sc_inmulti); 1267 break; 1268 #ifdef INET6 1269 case AF_INET6: 1270 in6_delmulti(sc->sc_inmulti); 1271 break; 1272 #endif 1273 default: 1274 unhandled_af(sc->sc_af); 1275 } 1276 1277 sc->sc_inmulti = NULL; /* keep it tidy */ 1278 1279 NET_UNLOCK(); 1280 } 1281 1282 static int 1283 vxlan_set_rdomain(struct vxlan_softc *sc, const struct ifreq *ifr) 1284 { 1285 struct ifnet *ifp = &sc->sc_ac.ac_if; 1286 1287 if (ifr->ifr_rdomainid < 0 || 1288 ifr->ifr_rdomainid > RT_TABLEID_MAX) 1289 return (EINVAL); 1290 if (!rtable_exists(ifr->ifr_rdomainid)) 1291 return (EADDRNOTAVAIL); 1292 1293 if (sc->sc_rdomain == ifr->ifr_rdomainid) 1294 return (0); 1295 1296 if (ISSET(ifp->if_flags, IFF_RUNNING)) 1297 return (EBUSY); 1298 1299 /* commit */ 1300 sc->sc_rdomain = ifr->ifr_rdomainid; 1301 etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL); 1302 1303 return (0); 1304 } 1305 1306 static int 1307 vxlan_get_rdomain(struct vxlan_softc *sc, struct ifreq *ifr) 1308 { 1309 ifr->ifr_rdomainid = sc->sc_rdomain; 1310 1311 return (0); 1312 } 1313 1314 static int 1315 vxlan_set_tunnel(struct vxlan_softc *sc, const struct if_laddrreq *req) 1316 { 1317 struct ifnet *ifp = &sc->sc_ac.ac_if; 1318 struct sockaddr *src = (struct sockaddr *)&req->addr; 1319 struct sockaddr *dst = (struct sockaddr *)&req->dstaddr; 1320 struct sockaddr_in *src4, *dst4; 1321 #ifdef INET6 1322 struct sockaddr_in6 *src6, *dst6; 1323 int error; 1324 #endif 1325 union vxlan_addr saddr, daddr; 1326 unsigned int mode = VXLAN_TMODE_ENDPOINT; 1327 in_port_t port = htons(VXLAN_PORT); 1328 1329 memset(&saddr, 0, sizeof(saddr)); 1330 memset(&daddr, 0, sizeof(daddr)); 1331 1332 /* validate */ 1333 switch (src->sa_family) { 1334 case AF_INET: 1335 src4 = (struct sockaddr_in *)src; 1336 if (in_nullhost(src4->sin_addr) || 1337 IN_MULTICAST(src4->sin_addr.s_addr)) 1338 return (EINVAL); 1339 1340 if (src4->sin_port != htons(0)) 1341 port = src4->sin_port; 1342 1343 if (dst->sa_family != AF_UNSPEC) { 1344 if (dst->sa_family != AF_INET) 1345 return (EINVAL); 1346 1347 dst4 = (struct sockaddr_in *)dst; 1348 if (in_nullhost(dst4->sin_addr)) 1349 return (EINVAL); 1350 1351 /* all good */ 1352 mode = IN_MULTICAST(dst4->sin_addr.s_addr) ? 1353 VXLAN_TMODE_LEARNING : VXLAN_TMODE_P2P; 1354 daddr.in4 = dst4->sin_addr; 1355 } 1356 1357 saddr.in4 = src4->sin_addr; 1358 break; 1359 1360 #ifdef INET6 1361 case AF_INET6: 1362 src6 = (struct sockaddr_in6 *)src; 1363 if (IN6_IS_ADDR_UNSPECIFIED(&src6->sin6_addr) || 1364 IN6_IS_ADDR_MULTICAST(&src6->sin6_addr)) 1365 return (EINVAL); 1366 1367 if (src6->sin6_port != htons(0)) 1368 port = src6->sin6_port; 1369 1370 if (dst->sa_family != AF_UNSPEC) { 1371 if (dst->sa_family != AF_INET6) 1372 return (EINVAL); 1373 1374 dst6 = (struct sockaddr_in6 *)dst; 1375 if (IN6_IS_ADDR_UNSPECIFIED(&dst6->sin6_addr)) 1376 return (EINVAL); 1377 1378 if (src6->sin6_scope_id != dst6->sin6_scope_id) 1379 return (EINVAL); 1380 1381 /* all good */ 1382 mode = IN6_IS_ADDR_MULTICAST(&dst6->sin6_addr) ? 1383 VXLAN_TMODE_LEARNING : VXLAN_TMODE_P2P; 1384 error = in6_embedscope(&daddr.in6, dst6, NULL); 1385 if (error != 0) 1386 return (error); 1387 } 1388 1389 error = in6_embedscope(&saddr.in6, src6, NULL); 1390 if (error != 0) 1391 return (error); 1392 1393 break; 1394 #endif 1395 default: 1396 return (EAFNOSUPPORT); 1397 } 1398 1399 if (memcmp(&sc->sc_src, &saddr, sizeof(sc->sc_src)) == 0 && 1400 memcmp(&sc->sc_dst, &daddr, sizeof(sc->sc_dst)) == 0 && 1401 sc->sc_port == port) 1402 return (0); 1403 1404 if (ISSET(ifp->if_flags, IFF_RUNNING)) 1405 return (EBUSY); 1406 1407 /* commit */ 1408 sc->sc_af = src->sa_family; 1409 sc->sc_src = saddr; 1410 sc->sc_dst = daddr; 1411 sc->sc_port = port; 1412 sc->sc_mode = mode; 1413 etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL); 1414 1415 return (0); 1416 } 1417 1418 static int 1419 vxlan_get_tunnel(struct vxlan_softc *sc, struct if_laddrreq *req) 1420 { 1421 struct sockaddr *dstaddr = (struct sockaddr *)&req->dstaddr; 1422 struct sockaddr_in *sin; 1423 #ifdef INET6 1424 struct sockaddr_in6 *sin6; 1425 #endif 1426 1427 if (sc->sc_af == AF_UNSPEC) 1428 return (EADDRNOTAVAIL); 1429 KASSERT(sc->sc_mode != VXLAN_TMODE_UNSET); 1430 1431 memset(&req->addr, 0, sizeof(req->addr)); 1432 memset(&req->dstaddr, 0, sizeof(req->dstaddr)); 1433 1434 /* default to endpoint */ 1435 dstaddr->sa_len = 2; 1436 dstaddr->sa_family = AF_UNSPEC; 1437 1438 switch (sc->sc_af) { 1439 case AF_INET: 1440 sin = (struct sockaddr_in *)&req->addr; 1441 sin->sin_len = sizeof(*sin); 1442 sin->sin_family = AF_INET; 1443 sin->sin_addr = sc->sc_src.in4; 1444 sin->sin_port = sc->sc_port; 1445 1446 if (sc->sc_mode == VXLAN_TMODE_ENDPOINT) 1447 break; 1448 1449 sin = (struct sockaddr_in *)&req->dstaddr; 1450 sin->sin_len = sizeof(*sin); 1451 sin->sin_family = AF_INET; 1452 sin->sin_addr = sc->sc_dst.in4; 1453 break; 1454 1455 #ifdef INET6 1456 case AF_INET6: 1457 sin6 = (struct sockaddr_in6 *)&req->addr; 1458 sin6->sin6_len = sizeof(*sin6); 1459 sin6->sin6_family = AF_INET6; 1460 in6_recoverscope(sin6, &sc->sc_src.in6); 1461 sin6->sin6_port = sc->sc_port; 1462 1463 if (sc->sc_mode == VXLAN_TMODE_ENDPOINT) 1464 break; 1465 1466 sin6 = (struct sockaddr_in6 *)&req->dstaddr; 1467 sin6->sin6_len = sizeof(*sin6); 1468 sin6->sin6_family = AF_INET6; 1469 in6_recoverscope(sin6, &sc->sc_dst.in6); 1470 break; 1471 #endif 1472 default: 1473 unhandled_af(sc->sc_af); 1474 } 1475 1476 return (0); 1477 } 1478 1479 static int 1480 vxlan_del_tunnel(struct vxlan_softc *sc) 1481 { 1482 struct ifnet *ifp = &sc->sc_ac.ac_if; 1483 1484 if (sc->sc_af == AF_UNSPEC) 1485 return (0); 1486 1487 if (ISSET(ifp->if_flags, IFF_RUNNING)) 1488 return (EBUSY); 1489 1490 /* commit */ 1491 sc->sc_af = AF_UNSPEC; 1492 memset(&sc->sc_src, 0, sizeof(sc->sc_src)); 1493 memset(&sc->sc_dst, 0, sizeof(sc->sc_dst)); 1494 sc->sc_port = htons(0); 1495 sc->sc_mode = VXLAN_TMODE_UNSET; 1496 etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL); 1497 1498 return (0); 1499 } 1500 1501 static int 1502 vxlan_set_vnetid(struct vxlan_softc *sc, const struct ifreq *ifr) 1503 { 1504 struct ifnet *ifp = &sc->sc_ac.ac_if; 1505 uint32_t vni; 1506 1507 if (ifr->ifr_vnetid < VXLAN_VNI_MIN || 1508 ifr->ifr_vnetid > VXLAN_VNI_MAX) 1509 return (EINVAL); 1510 1511 vni = htonl(ifr->ifr_vnetid << VXLAN_VNI_SHIFT); 1512 if (ISSET(sc->sc_header.vxlan_flags, htonl(VXLAN_F_I)) && 1513 sc->sc_header.vxlan_id == vni) 1514 return (0); 1515 1516 if (ISSET(ifp->if_flags, IFF_RUNNING)) 1517 return (EBUSY); 1518 1519 /* commit */ 1520 SET(sc->sc_header.vxlan_flags, htonl(VXLAN_F_I)); 1521 sc->sc_header.vxlan_id = vni; 1522 etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL); 1523 1524 return (0); 1525 } 1526 1527 static int 1528 vxlan_get_vnetid(struct vxlan_softc *sc, struct ifreq *ifr) 1529 { 1530 uint32_t vni; 1531 1532 if (!ISSET(sc->sc_header.vxlan_flags, htonl(VXLAN_F_I))) 1533 return (EADDRNOTAVAIL); 1534 1535 vni = ntohl(sc->sc_header.vxlan_id); 1536 vni &= VXLAN_VNI_MASK; 1537 vni >>= VXLAN_VNI_SHIFT; 1538 1539 ifr->ifr_vnetid = vni; 1540 1541 return (0); 1542 } 1543 1544 static int 1545 vxlan_del_vnetid(struct vxlan_softc *sc) 1546 { 1547 struct ifnet *ifp = &sc->sc_ac.ac_if; 1548 1549 if (!ISSET(sc->sc_header.vxlan_flags, htonl(VXLAN_F_I))) 1550 return (0); 1551 1552 if (ISSET(ifp->if_flags, IFF_RUNNING)) 1553 return (EBUSY); 1554 1555 /* commit */ 1556 CLR(sc->sc_header.vxlan_flags, htonl(VXLAN_F_I)); 1557 sc->sc_header.vxlan_id = htonl(0 << VXLAN_VNI_SHIFT); 1558 etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL); 1559 1560 return (0); 1561 } 1562 1563 static int 1564 vxlan_set_parent(struct vxlan_softc *sc, const struct if_parent *p) 1565 { 1566 struct ifnet *ifp = &sc->sc_ac.ac_if; 1567 struct ifnet *ifp0; 1568 int error = 0; 1569 1570 ifp0 = if_unit(p->ifp_parent); 1571 if (ifp0 == NULL) 1572 return (ENXIO); 1573 1574 if (!ISSET(ifp0->if_flags, IFF_MULTICAST)) { 1575 error = ENXIO; 1576 goto put; 1577 } 1578 1579 if (sc->sc_if_index0 == ifp0->if_index) 1580 goto put; 1581 1582 if (ISSET(ifp->if_flags, IFF_RUNNING)) { 1583 error = EBUSY; 1584 goto put; 1585 } 1586 1587 /* commit */ 1588 sc->sc_if_index0 = ifp0->if_index; 1589 etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL); 1590 1591 put: 1592 if_put(ifp0); 1593 return (error); 1594 } 1595 1596 static int 1597 vxlan_get_parent(struct vxlan_softc *sc, struct if_parent *p) 1598 { 1599 struct ifnet *ifp0; 1600 int error = 0; 1601 1602 ifp0 = if_get(sc->sc_if_index0); 1603 if (ifp0 == NULL) 1604 error = EADDRNOTAVAIL; 1605 else 1606 strlcpy(p->ifp_parent, ifp0->if_xname, sizeof(p->ifp_parent)); 1607 if_put(ifp0); 1608 1609 return (error); 1610 } 1611 1612 static int 1613 vxlan_del_parent(struct vxlan_softc *sc) 1614 { 1615 struct ifnet *ifp = &sc->sc_ac.ac_if; 1616 1617 if (sc->sc_if_index0 == 0) 1618 return (0); 1619 1620 if (ISSET(ifp->if_flags, IFF_RUNNING)) 1621 return (EBUSY); 1622 1623 /* commit */ 1624 sc->sc_if_index0 = 0; 1625 etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL); 1626 1627 return (0); 1628 } 1629 1630 static int 1631 vxlan_add_addr(struct vxlan_softc *sc, const struct ifbareq *ifba) 1632 { 1633 struct sockaddr_in *sin; 1634 #ifdef INET6 1635 struct sockaddr_in6 *sin6; 1636 struct sockaddr_in6 src6 = { 1637 .sin6_len = sizeof(src6), 1638 .sin6_family = AF_UNSPEC, 1639 }; 1640 int error; 1641 #endif 1642 union vxlan_addr endpoint; 1643 unsigned int type; 1644 1645 switch (sc->sc_mode) { 1646 case VXLAN_TMODE_UNSET: 1647 return (ENOPROTOOPT); 1648 case VXLAN_TMODE_P2P: 1649 return (EPROTONOSUPPORT); 1650 default: 1651 break; 1652 } 1653 1654 /* ignore ifba_ifsname */ 1655 1656 if (ISSET(ifba->ifba_flags, ~IFBAF_TYPEMASK)) 1657 return (EINVAL); 1658 switch (ifba->ifba_flags & IFBAF_TYPEMASK) { 1659 case IFBAF_DYNAMIC: 1660 type = EBE_DYNAMIC; 1661 break; 1662 case IFBAF_STATIC: 1663 type = EBE_STATIC; 1664 break; 1665 default: 1666 return (EINVAL); 1667 } 1668 1669 memset(&endpoint, 0, sizeof(endpoint)); 1670 1671 if (ifba->ifba_dstsa.ss_family != sc->sc_af) 1672 return (EAFNOSUPPORT); 1673 switch (ifba->ifba_dstsa.ss_family) { 1674 case AF_INET: 1675 sin = (struct sockaddr_in *)&ifba->ifba_dstsa; 1676 if (in_nullhost(sin->sin_addr) || 1677 IN_MULTICAST(sin->sin_addr.s_addr)) 1678 return (EADDRNOTAVAIL); 1679 1680 if (sin->sin_port != htons(0)) 1681 return (EADDRNOTAVAIL); 1682 1683 endpoint.in4 = sin->sin_addr; 1684 break; 1685 1686 #ifdef INET6 1687 case AF_INET6: 1688 sin6 = (struct sockaddr_in6 *)&ifba->ifba_dstsa; 1689 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) || 1690 IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) 1691 return (EADDRNOTAVAIL); 1692 1693 in6_recoverscope(&src6, &sc->sc_src.in6); 1694 if (src6.sin6_scope_id != sin6->sin6_scope_id) 1695 return (EADDRNOTAVAIL); 1696 1697 if (sin6->sin6_port != htons(0)) 1698 return (EADDRNOTAVAIL); 1699 1700 error = in6_embedscope(&endpoint.in6, sin6, NULL); 1701 if (error != 0) 1702 return (error); 1703 1704 break; 1705 #endif 1706 default: /* AF_UNSPEC */ 1707 return (EADDRNOTAVAIL); 1708 } 1709 1710 return (etherbridge_add_addr(&sc->sc_eb, &endpoint, 1711 &ifba->ifba_dst, type)); 1712 } 1713 1714 static int 1715 vxlan_del_addr(struct vxlan_softc *sc, const struct ifbareq *ifba) 1716 { 1717 return (etherbridge_del_addr(&sc->sc_eb, &ifba->ifba_dst)); 1718 } 1719 1720 void 1721 vxlan_detach_hook(void *arg) 1722 { 1723 struct vxlan_softc *sc = arg; 1724 struct ifnet *ifp = &sc->sc_ac.ac_if; 1725 1726 if (ISSET(ifp->if_flags, IFF_RUNNING)) { 1727 vxlan_down(sc); 1728 CLR(ifp->if_flags, IFF_UP); 1729 } 1730 1731 sc->sc_if_index0 = 0; 1732 } 1733 1734 static int 1735 vxlan_eb_port_eq(void *arg, void *a, void *b) 1736 { 1737 const union vxlan_addr *va = a, *vb = b; 1738 size_t i; 1739 1740 for (i = 0; i < nitems(va->in6.s6_addr32); i++) { 1741 if (va->in6.s6_addr32[i] != vb->in6.s6_addr32[i]) 1742 return (0); 1743 } 1744 1745 return (1); 1746 } 1747 1748 static void * 1749 vxlan_eb_port_take(void *arg, void *port) 1750 { 1751 union vxlan_addr *endpoint; 1752 1753 endpoint = pool_get(&vxlan_endpoint_pool, PR_NOWAIT); 1754 if (endpoint == NULL) 1755 return (NULL); 1756 1757 *endpoint = *(union vxlan_addr *)port; 1758 1759 return (endpoint); 1760 } 1761 1762 static void 1763 vxlan_eb_port_rele(void *arg, void *port) 1764 { 1765 union vxlan_addr *endpoint = port; 1766 1767 pool_put(&vxlan_endpoint_pool, endpoint); 1768 } 1769 1770 static size_t 1771 vxlan_eb_port_ifname(void *arg, char *dst, size_t len, void *port) 1772 { 1773 struct vxlan_softc *sc = arg; 1774 1775 return (strlcpy(dst, sc->sc_ac.ac_if.if_xname, len)); 1776 } 1777 1778 static void 1779 vxlan_eb_port_sa(void *arg, struct sockaddr_storage *ss, void *port) 1780 { 1781 struct vxlan_softc *sc = arg; 1782 union vxlan_addr *endpoint = port; 1783 1784 switch (sc->sc_af) { 1785 case AF_INET: { 1786 struct sockaddr_in *sin = (struct sockaddr_in *)ss; 1787 1788 sin->sin_len = sizeof(*sin); 1789 sin->sin_family = AF_INET; 1790 sin->sin_addr = endpoint->in4; 1791 break; 1792 } 1793 #ifdef INET6 1794 case AF_INET6: { 1795 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)ss; 1796 1797 sin6->sin6_len = sizeof(*sin6); 1798 sin6->sin6_family = AF_INET6; 1799 in6_recoverscope(sin6, &endpoint->in6); 1800 break; 1801 } 1802 #endif /* INET6 */ 1803 default: 1804 unhandled_af(sc->sc_af); 1805 } 1806 } 1807 1808 static inline int 1809 vxlan_peer_cmp(const struct vxlan_peer *ap, const struct vxlan_peer *bp) 1810 { 1811 size_t i; 1812 1813 if (ap->p_header.vxlan_id > bp->p_header.vxlan_id) 1814 return (1); 1815 if (ap->p_header.vxlan_id < bp->p_header.vxlan_id) 1816 return (-1); 1817 if (ap->p_header.vxlan_flags > bp->p_header.vxlan_flags) 1818 return (1); 1819 if (ap->p_header.vxlan_flags < bp->p_header.vxlan_flags) 1820 return (-1); 1821 1822 for (i = 0; i < nitems(ap->p_addr.in6.s6_addr32); i++) { 1823 if (ap->p_addr.in6.s6_addr32[i] > 1824 bp->p_addr.in6.s6_addr32[i]) 1825 return (1); 1826 if (ap->p_addr.in6.s6_addr32[i] < 1827 bp->p_addr.in6.s6_addr32[i]) 1828 return (-1); 1829 } 1830 1831 return (0); 1832 } 1833 1834 RBT_GENERATE(vxlan_peers, vxlan_peer, p_entry, vxlan_peer_cmp); 1835