xref: /openbsd-src/sys/net/if_vxlan.c (revision f2da64fbbbf1b03f09f390ab01267c93dfd77c4c)
1 /*	$OpenBSD: if_vxlan.c,v 1.44 2016/09/04 11:14:44 reyk Exp $	*/
2 
3 /*
4  * Copyright (c) 2013 Reyk Floeter <reyk@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include "bpfilter.h"
20 #include "vxlan.h"
21 #include "vlan.h"
22 #include "pf.h"
23 #include "bridge.h"
24 
25 #include <sys/param.h>
26 #include <sys/systm.h>
27 #include <sys/mbuf.h>
28 #include <sys/socket.h>
29 #include <sys/sockio.h>
30 #include <sys/ioctl.h>
31 
32 #include <net/if.h>
33 #include <net/if_var.h>
34 #include <net/if_media.h>
35 #include <net/route.h>
36 
37 #if NBPFILTER > 0
38 #include <net/bpf.h>
39 #endif
40 
41 #include <netinet/in.h>
42 #include <netinet/in_var.h>
43 #include <netinet/if_ether.h>
44 #include <netinet/ip.h>
45 #include <netinet/ip_var.h>
46 #include <netinet/udp.h>
47 #include <netinet/udp_var.h>
48 #include <netinet/in_pcb.h>
49 
50 #if NPF > 0
51 #include <net/pfvar.h>
52 #endif
53 
54 #if NBRIDGE > 0
55 #include <net/if_bridge.h>
56 #endif
57 
58 #include <net/if_vxlan.h>
59 
60 struct vxlan_softc {
61 	struct arpcom		 sc_ac;
62 	struct ifmedia		 sc_media;
63 
64 	struct ip_moptions	 sc_imo;
65 	void			*sc_ahcookie;
66 	void			*sc_lhcookie;
67 	void			*sc_dhcookie;
68 
69 	struct sockaddr_storage	 sc_src;
70 	struct sockaddr_storage	 sc_dst;
71 	in_port_t		 sc_dstport;
72 	u_int			 sc_rdomain;
73 	int64_t			 sc_vnetid;
74 	u_int8_t		 sc_ttl;
75 
76 	LIST_ENTRY(vxlan_softc)	 sc_entry;
77 };
78 
79 void	 vxlanattach(int);
80 int	 vxlanioctl(struct ifnet *, u_long, caddr_t);
81 void	 vxlanstart(struct ifnet *);
82 int	 vxlan_clone_create(struct if_clone *, int);
83 int	 vxlan_clone_destroy(struct ifnet *);
84 void	 vxlan_multicast_cleanup(struct ifnet *);
85 int	 vxlan_multicast_join(struct ifnet *, struct sockaddr *,
86 	    struct sockaddr *);
87 int	 vxlan_media_change(struct ifnet *);
88 void	 vxlan_media_status(struct ifnet *, struct ifmediareq *);
89 int	 vxlan_config(struct ifnet *, struct sockaddr *, struct sockaddr *);
90 int	 vxlan_output(struct ifnet *, struct mbuf *);
91 void	 vxlan_addr_change(void *);
92 void	 vxlan_if_change(void *);
93 void	 vxlan_link_change(void *);
94 
95 int	 vxlan_sockaddr_cmp(struct sockaddr *, struct sockaddr *);
96 uint16_t vxlan_sockaddr_port(struct sockaddr *);
97 
98 struct if_clone	vxlan_cloner =
99     IF_CLONE_INITIALIZER("vxlan", vxlan_clone_create, vxlan_clone_destroy);
100 
101 int	 vxlan_enable = 0;
102 u_long	 vxlan_tagmask;
103 
104 #define VXLAN_TAGHASHSIZE		 32
105 #define VXLAN_TAGHASH(tag)		 ((unsigned int)tag & vxlan_tagmask)
106 LIST_HEAD(vxlan_taghash, vxlan_softc)	*vxlan_tagh, vxlan_any;
107 
108 void
109 vxlanattach(int count)
110 {
111 	/* Regular vxlan interfaces with a VNI */
112 	if ((vxlan_tagh = hashinit(VXLAN_TAGHASHSIZE, M_DEVBUF, M_NOWAIT,
113 	    &vxlan_tagmask)) == NULL)
114 		panic("vxlanattach: hashinit");
115 
116 	/* multipoint-to-multipoint interfaces that accept any VNI */
117 	LIST_INIT(&vxlan_any);
118 
119 	if_clone_attach(&vxlan_cloner);
120 }
121 
122 int
123 vxlan_clone_create(struct if_clone *ifc, int unit)
124 {
125 	struct ifnet		*ifp;
126 	struct vxlan_softc	*sc;
127 
128 	if ((sc = malloc(sizeof(*sc),
129 	    M_DEVBUF, M_NOWAIT|M_ZERO)) == NULL)
130 		return (ENOMEM);
131 
132 	sc->sc_imo.imo_membership = malloc(
133 	    (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_IPMOPTS,
134 	    M_WAITOK|M_ZERO);
135 	sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS;
136 	sc->sc_dstport = htons(VXLAN_PORT);
137 	sc->sc_vnetid = VXLAN_VNI_UNSET;
138 
139 	ifp = &sc->sc_ac.ac_if;
140 	snprintf(ifp->if_xname, sizeof ifp->if_xname, "vxlan%d", unit);
141 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
142 	ether_fakeaddr(ifp);
143 
144 	ifp->if_softc = sc;
145 	ifp->if_ioctl = vxlanioctl;
146 	ifp->if_start = vxlanstart;
147 	IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN);
148 
149 	ifp->if_hardmtu = 0xffff;
150 	ifp->if_capabilities = IFCAP_VLAN_MTU;
151 
152 	ifmedia_init(&sc->sc_media, 0, vxlan_media_change,
153 	    vxlan_media_status);
154 	ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
155 	ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO);
156 
157 	if_attach(ifp);
158 	ether_ifattach(ifp);
159 
160 #if 0
161 	/*
162 	 * Instead of using a decreased MTU of 1450 bytes, prefer
163 	 * to use the default Ethernet-size MTU of 1500 bytes and to
164 	 * increase the MTU of the outer transport interfaces to
165 	 * at least 1550 bytes. The following is disabled by default.
166 	 */
167 	ifp->if_mtu = ETHERMTU - sizeof(struct ether_header);
168 	ifp->if_mtu -= sizeof(struct vxlanudphdr) + sizeof(struct ipovly);
169 #endif
170 
171 	LIST_INSERT_HEAD(&vxlan_tagh[VXLAN_TAGHASH(0)], sc, sc_entry);
172 	vxlan_enable++;
173 
174 	return (0);
175 }
176 
177 int
178 vxlan_clone_destroy(struct ifnet *ifp)
179 {
180 	struct vxlan_softc	*sc = ifp->if_softc;
181 	int			 s;
182 
183 	s = splnet();
184 	vxlan_multicast_cleanup(ifp);
185 	splx(s);
186 
187 	vxlan_enable--;
188 	LIST_REMOVE(sc, sc_entry);
189 
190 	ifmedia_delete_instance(&sc->sc_media, IFM_INST_ANY);
191 	ether_ifdetach(ifp);
192 	if_detach(ifp);
193 	free(sc->sc_imo.imo_membership, M_IPMOPTS, 0);
194 	free(sc, M_DEVBUF, sizeof(*sc));
195 
196 	return (0);
197 }
198 
199 void
200 vxlan_multicast_cleanup(struct ifnet *ifp)
201 {
202 	struct vxlan_softc	*sc = (struct vxlan_softc *)ifp->if_softc;
203 	struct ip_moptions	*imo = &sc->sc_imo;
204 	struct ifnet		*mifp;
205 
206 	mifp = if_get(imo->imo_ifidx);
207 	if (mifp != NULL) {
208 		if (sc->sc_ahcookie != NULL) {
209 			hook_disestablish(mifp->if_addrhooks, sc->sc_ahcookie);
210 			sc->sc_ahcookie = NULL;
211 		}
212 		if (sc->sc_lhcookie != NULL) {
213 			hook_disestablish(mifp->if_linkstatehooks,
214 			    sc->sc_lhcookie);
215 			sc->sc_lhcookie = NULL;
216 		}
217 		if (sc->sc_dhcookie != NULL) {
218 			hook_disestablish(mifp->if_detachhooks,
219 			    sc->sc_dhcookie);
220 			sc->sc_dhcookie = NULL;
221 		}
222 
223 		if_put(mifp);
224 	}
225 
226 	if (imo->imo_num_memberships > 0) {
227 		in_delmulti(imo->imo_membership[--imo->imo_num_memberships]);
228 		imo->imo_ifidx = 0;
229 	}
230 }
231 
232 int
233 vxlan_multicast_join(struct ifnet *ifp, struct sockaddr *src,
234     struct sockaddr *dst)
235 {
236 	struct vxlan_softc	*sc = ifp->if_softc;
237 	struct ip_moptions	*imo = &sc->sc_imo;
238 	struct sockaddr_in	*src4, *dst4;
239 	struct sockaddr_in6	*dst6;
240 	struct ifaddr		*ifa;
241 	struct ifnet		*mifp;
242 
243 	if (dst->sa_family == AF_INET) {
244 		dst4 = satosin(dst);
245 		if (!IN_MULTICAST(dst4->sin_addr.s_addr))
246 			return (0);
247 	} else if (dst->sa_family == AF_INET6) {
248 		dst6 = satosin6(dst);
249 		if (!IN6_IS_ADDR_MULTICAST(&dst6->sin6_addr))
250 			return (0);
251 
252 		/* Multicast mode is currently not supported for IPv6 */
253 		return (EAFNOSUPPORT);
254 	}
255 
256 	src4 = satosin(src);
257 	dst4 = satosin(dst);
258 
259 	if (src4->sin_addr.s_addr == INADDR_ANY ||
260 	    IN_MULTICAST(src4->sin_addr.s_addr))
261 		return (EINVAL);
262 	if ((ifa = ifa_ifwithaddr(src, sc->sc_rdomain)) == NULL ||
263 	    (mifp = ifa->ifa_ifp) == NULL ||
264 	    (mifp->if_flags & IFF_MULTICAST) == 0)
265 		return (EADDRNOTAVAIL);
266 
267 	if ((imo->imo_membership[0] =
268 	    in_addmulti(&dst4->sin_addr, mifp)) == NULL)
269 		return (ENOBUFS);
270 
271 	imo->imo_num_memberships++;
272 	imo->imo_ifidx = mifp->if_index;
273 	if (sc->sc_ttl > 0)
274 		imo->imo_ttl = sc->sc_ttl;
275 	else
276 		imo->imo_ttl = IP_DEFAULT_MULTICAST_TTL;
277 	imo->imo_loop = 0;
278 
279 	/*
280 	 * Use interface hooks to track any changes on the interface
281 	 * that is used to send out the tunnel traffic as multicast.
282 	 */
283 	if ((sc->sc_ahcookie = hook_establish(mifp->if_addrhooks,
284 	    0, vxlan_addr_change, sc)) == NULL ||
285 	    (sc->sc_lhcookie = hook_establish(mifp->if_linkstatehooks,
286 	    0, vxlan_link_change, sc)) == NULL ||
287 	    (sc->sc_dhcookie = hook_establish(mifp->if_detachhooks,
288 	    0, vxlan_if_change, sc)) == NULL)
289 		panic("%s: cannot allocate interface hook",
290 		    mifp->if_xname);
291 
292 	return (0);
293 }
294 
295 void
296 vxlanstart(struct ifnet *ifp)
297 {
298 	struct mbuf		*m;
299 
300 	for (;;) {
301 		IFQ_DEQUEUE(&ifp->if_snd, m);
302 		if (m == NULL)
303 			return;
304 
305 		ifp->if_opackets++;
306 
307 #if NBPFILTER > 0
308 		if (ifp->if_bpf)
309 			bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT);
310 #endif
311 
312 		vxlan_output(ifp, m);
313 	}
314 }
315 
316 int
317 vxlan_config(struct ifnet *ifp, struct sockaddr *src, struct sockaddr *dst)
318 {
319 	struct vxlan_softc	*sc = (struct vxlan_softc *)ifp->if_softc;
320 	int			 reset = 0, error, af;
321 	socklen_t		 slen;
322 	in_port_t		 port;
323 	struct vxlan_taghash	*tagh;
324 
325 	if (src != NULL && dst != NULL) {
326 		if ((af = src->sa_family) != dst->sa_family)
327 			return (EAFNOSUPPORT);
328 	} else {
329 		/* Reset current configuration */
330 		af = sc->sc_src.ss_family;
331 		src = (struct sockaddr *)&sc->sc_src;
332 		dst = (struct sockaddr *)&sc->sc_dst;
333 		reset = 1;
334 	}
335 
336 	if (af == AF_INET)
337 		slen = sizeof(struct sockaddr_in);
338 	else if (af == AF_INET6)
339 		slen = sizeof(struct sockaddr_in6);
340 	else
341 		return (EAFNOSUPPORT);
342 
343 	if (src->sa_len != slen || dst->sa_len != slen)
344 		return (EINVAL);
345 
346 	vxlan_multicast_cleanup(ifp);
347 
348 	/* returns without error if multicast is not configured */
349 	if ((error = vxlan_multicast_join(ifp, src, dst)) != 0)
350 		return (error);
351 
352 	if ((port = vxlan_sockaddr_port(dst)) != 0)
353 		sc->sc_dstport = port;
354 
355 	if (!reset) {
356 		bzero(&sc->sc_src, sizeof(sc->sc_src));
357 		bzero(&sc->sc_dst, sizeof(sc->sc_dst));
358 		memcpy(&sc->sc_src, src, src->sa_len);
359 		memcpy(&sc->sc_dst, dst, dst->sa_len);
360 	}
361 
362 	if (sc->sc_vnetid == VXLAN_VNI_ANY) {
363 		/*
364 		 * If the interface accepts any VNI, put it into a separate
365 		 * list that is not part of the main hash.
366 		 */
367 		tagh = &vxlan_any;
368 	} else
369 		tagh = &vxlan_tagh[VXLAN_TAGHASH(sc->sc_vnetid)];
370 
371 	LIST_REMOVE(sc, sc_entry);
372 	LIST_INSERT_HEAD(tagh, sc, sc_entry);
373 
374 	return (0);
375 }
376 
377 int
378 vxlanioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
379 {
380 	struct vxlan_softc	*sc = (struct vxlan_softc *)ifp->if_softc;
381 	struct ifreq		*ifr = (struct ifreq *)data;
382 	struct if_laddrreq	*lifr = (struct if_laddrreq *)data;
383 	int			 error = 0, s;
384 
385 	switch (cmd) {
386 	case SIOCSIFADDR:
387 		ifp->if_flags |= IFF_UP;
388 		/* FALLTHROUGH */
389 
390 	case SIOCSIFFLAGS:
391 		if (ifp->if_flags & IFF_UP) {
392 			ifp->if_flags |= IFF_RUNNING;
393 		} else {
394 			ifp->if_flags &= ~IFF_RUNNING;
395 		}
396 		break;
397 
398 	case SIOCADDMULTI:
399 	case SIOCDELMULTI:
400 		break;
401 
402 	case SIOCGIFMEDIA:
403 	case SIOCSIFMEDIA:
404 		error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd);
405 		break;
406 
407 	case SIOCSLIFPHYADDR:
408 		s = splnet();
409 		error = vxlan_config(ifp,
410 		    (struct sockaddr *)&lifr->addr,
411 		    (struct sockaddr *)&lifr->dstaddr);
412 		splx(s);
413 		break;
414 
415 	case SIOCDIFPHYADDR:
416 		s = splnet();
417 		vxlan_multicast_cleanup(ifp);
418 		bzero(&sc->sc_src, sizeof(sc->sc_src));
419 		bzero(&sc->sc_dst, sizeof(sc->sc_dst));
420 		sc->sc_dstport = htons(VXLAN_PORT);
421 		splx(s);
422 		break;
423 
424 	case SIOCGLIFPHYADDR:
425 		if (sc->sc_dst.ss_family == AF_UNSPEC) {
426 			error = EADDRNOTAVAIL;
427 			break;
428 		}
429 		bzero(&lifr->addr, sizeof(lifr->addr));
430 		bzero(&lifr->dstaddr, sizeof(lifr->dstaddr));
431 		memcpy(&lifr->addr, &sc->sc_src, sc->sc_src.ss_len);
432 		memcpy(&lifr->dstaddr, &sc->sc_dst, sc->sc_dst.ss_len);
433 		break;
434 
435 	case SIOCSLIFPHYRTABLE:
436 		if (ifr->ifr_rdomainid < 0 ||
437 		    ifr->ifr_rdomainid > RT_TABLEID_MAX ||
438 		    !rtable_exists(ifr->ifr_rdomainid)) {
439 			error = EINVAL;
440 			break;
441 		}
442 		s = splnet();
443 		sc->sc_rdomain = ifr->ifr_rdomainid;
444 		(void)vxlan_config(ifp, NULL, NULL);
445 		splx(s);
446 		break;
447 
448 	case SIOCGLIFPHYRTABLE:
449 		ifr->ifr_rdomainid = sc->sc_rdomain;
450 		break;
451 
452 	case SIOCSLIFPHYTTL:
453 		if (ifr->ifr_ttl < 0 || ifr->ifr_ttl > 0xff) {
454 			error = EINVAL;
455 			break;
456 		}
457 		if (sc->sc_ttl == (u_int8_t)ifr->ifr_ttl)
458 			break;
459 		s = splnet();
460 		sc->sc_ttl = (u_int8_t)(ifr->ifr_ttl);
461 		(void)vxlan_config(ifp, NULL, NULL);
462 		splx(s);
463 		break;
464 
465 	case SIOCGLIFPHYTTL:
466 		ifr->ifr_ttl = (int)sc->sc_ttl;
467 		break;
468 
469 	case SIOCSVNETID:
470 		if (sc->sc_vnetid == ifr->ifr_vnetid)
471 			break;
472 
473 		if ((ifr->ifr_vnetid != VXLAN_VNI_ANY) &&
474 		    (ifr->ifr_vnetid > VXLAN_VNI_MAX ||
475 		     ifr->ifr_vnetid < VXLAN_VNI_MIN)) {
476 			error = EINVAL;
477 			break;
478 		}
479 
480 		s = splnet();
481 		sc->sc_vnetid = (int)ifr->ifr_vnetid;
482 		(void)vxlan_config(ifp, NULL, NULL);
483 		splx(s);
484 		break;
485 
486 	case SIOCGVNETID:
487 		if ((sc->sc_vnetid != VXLAN_VNI_ANY) &&
488 		    (sc->sc_vnetid > VXLAN_VNI_MAX ||
489 		     sc->sc_vnetid < VXLAN_VNI_MIN)) {
490 			error = EADDRNOTAVAIL;
491 			break;
492 		}
493 
494 		ifr->ifr_vnetid = sc->sc_vnetid;
495 		break;
496 
497 	case SIOCDVNETID:
498 		s = splnet();
499 		sc->sc_vnetid = VXLAN_VNI_UNSET;
500 		(void)vxlan_config(ifp, NULL, NULL);
501 		splx(s);
502 		break;
503 
504 	default:
505 		error = ether_ioctl(ifp, &sc->sc_ac, cmd, data);
506 		break;
507 	}
508 
509 	return (error);
510 }
511 
512 int
513 vxlan_media_change(struct ifnet *ifp)
514 {
515 	return (0);
516 }
517 
518 void
519 vxlan_media_status(struct ifnet *ifp, struct ifmediareq *imr)
520 {
521 	imr->ifm_status = IFM_AVALID | IFM_ACTIVE;
522 }
523 
524 int
525 vxlan_sockaddr_cmp(struct sockaddr *srcsa, struct sockaddr *dstsa)
526 {
527 	struct sockaddr_in	*src4, *dst4;
528 	struct sockaddr_in6	*src6, *dst6;
529 
530 	if (srcsa->sa_family != dstsa->sa_family)
531 		return (1);
532 
533 	switch (dstsa->sa_family) {
534 	case AF_INET:
535 		src4 = satosin(srcsa);
536 		dst4 = satosin(dstsa);
537 		if (src4->sin_addr.s_addr == dst4->sin_addr.s_addr)
538 			return (0);
539 	case AF_INET6:
540 		src6 = satosin6(srcsa);
541 		dst6 = satosin6(dstsa);
542 		if (IN6_ARE_ADDR_EQUAL(&src6->sin6_addr, &dst6->sin6_addr))
543 			return (0);
544 	}
545 
546 	return (1);
547 }
548 
549 uint16_t
550 vxlan_sockaddr_port(struct sockaddr *sa)
551 {
552 	struct sockaddr_in	*sin4;
553 	struct sockaddr_in6	*sin6;
554 
555 	switch (sa->sa_family) {
556 	case AF_INET:
557 		sin4 = satosin(sa);
558 		return (sin4->sin_port);
559 	case AF_INET6:
560 		sin6 = satosin6(sa);
561 		return (sin6->sin6_port);
562 	default:
563 		break;
564 	}
565 
566 	return (0);
567 }
568 
569 int
570 vxlan_lookup(struct mbuf *m, struct udphdr *uh, int iphlen,
571     struct sockaddr *srcsa, struct sockaddr *dstsa)
572 {
573 	struct mbuf_list	 ml = MBUF_LIST_INITIALIZER();
574 	struct vxlan_softc	*sc = NULL, *sc_cand = NULL;
575 	struct vxlan_header	 v;
576 	int			 vni;
577 	struct ifnet		*ifp;
578 	int			 skip;
579 	struct ether_header	*eh;
580 #if NBRIDGE > 0
581 	struct bridge_tunneltag	*brtag;
582 #endif
583 
584 	/* XXX Should verify the UDP port first before copying the packet */
585 	skip = iphlen + sizeof(*uh);
586 	if (m->m_pkthdr.len - skip < sizeof(v))
587 		return (0);
588 	m_copydata(m, skip, sizeof(v), (caddr_t)&v);
589 	skip += sizeof(v);
590 
591 	if (v.vxlan_flags & htonl(VXLAN_RESERVED1) ||
592 	    v.vxlan_id & htonl(VXLAN_RESERVED2))
593 		return (0);
594 
595 	vni = ntohl(v.vxlan_id) >> VXLAN_VNI_S;
596 	if ((v.vxlan_flags & htonl(VXLAN_FLAGS_VNI)) == 0) {
597 		if (vni != 0)
598 			return (0);
599 
600 		vni = VXLAN_VNI_UNSET;
601 	}
602 
603 	/* First search for a vxlan(4) interface with the packet's VNI */
604 	LIST_FOREACH(sc, &vxlan_tagh[VXLAN_TAGHASH(vni)], sc_entry) {
605 		if ((uh->uh_dport == sc->sc_dstport) &&
606 		    vni == sc->sc_vnetid &&
607 		    sc->sc_rdomain == rtable_l2(m->m_pkthdr.ph_rtableid)) {
608 			sc_cand = sc;
609 			if (vxlan_sockaddr_cmp(srcsa,
610 			    (struct sockaddr *)&sc->sc_dst) == 0)
611 				goto found;
612 		}
613 	}
614 
615 	/*
616 	 * Now loop through all the vxlan(4) interfaces that are configured
617 	 * to accept any VNI and operating in multipoint-to-multipoint mode
618 	 * that is used in combination with bridge(4) or switch(4).
619 	 * If a vxlan(4) interface has been found for the packet's VNI, this
620 	 * code is not reached as the other interface is more specific.
621 	 */
622 	LIST_FOREACH(sc, &vxlan_any, sc_entry) {
623 		if ((uh->uh_dport == sc->sc_dstport) &&
624 		    (sc->sc_rdomain == rtable_l2(m->m_pkthdr.ph_rtableid))) {
625 			sc_cand = sc;
626 			goto found;
627 		}
628 	}
629 
630 	if (sc_cand) {
631 		sc = sc_cand;
632 		goto found;
633 	}
634 
635 	/* not found */
636 	return (0);
637 
638  found:
639 	m_adj(m, skip);
640 	ifp = &sc->sc_ac.ac_if;
641 
642 	if ((eh = mtod(m, struct ether_header *)) == NULL)
643 		return (EINVAL);
644 
645 #if NBRIDGE > 0
646 	/* Store the tunnel src/dst IP and vni for the bridge or switch */
647 	if ((ifp->if_bridgeport != NULL || ifp->if_switchport != NULL) &&
648 	    srcsa->sa_family != AF_UNSPEC &&
649 	    ((brtag = bridge_tunneltag(m)) != NULL)) {
650 		memcpy(&brtag->brtag_src.sa, srcsa, srcsa->sa_len);
651 		memcpy(&brtag->brtag_dst.sa, dstsa, dstsa->sa_len);
652 		brtag->brtag_id = vni;
653 	}
654 #endif
655 
656 	m->m_flags &= ~(M_MCAST|M_BCAST);
657 
658 #if NPF > 0
659 	pf_pkt_addr_changed(m);
660 #endif
661 
662 	ml_enqueue(&ml, m);
663 	if_input(ifp, &ml);
664 
665 	/* success */
666 	return (1);
667 }
668 
669 struct mbuf *
670 vxlan_encap4(struct ifnet *ifp, struct mbuf *m,
671     struct sockaddr *src, struct sockaddr *dst)
672 {
673 	struct vxlan_softc	*sc = (struct vxlan_softc *)ifp->if_softc;
674 	struct ip		*ip;
675 
676 	M_PREPEND(m, sizeof(*ip), M_DONTWAIT);
677 	if (m == NULL)
678 		return (NULL);
679 
680 	ip = mtod(m, struct ip *);
681 	ip->ip_v = IPVERSION;
682 	ip->ip_hl = sizeof(struct ip) >> 2;
683 	ip->ip_id = htons(ip_randomid());
684 	ip->ip_off = 0; /* htons(IP_DF); XXX should we disallow IP fragments? */
685 	ip->ip_p = IPPROTO_UDP;
686 	ip->ip_tos = IPTOS_LOWDELAY;
687 	ip->ip_len = htons(m->m_pkthdr.len);
688 
689 	ip->ip_src = satosin(src)->sin_addr;
690 	ip->ip_dst = satosin(dst)->sin_addr;
691 
692 	if (sc->sc_ttl > 0)
693 		ip->ip_ttl = sc->sc_ttl;
694 	else
695 		ip->ip_ttl = IPDEFTTL;
696 
697 	return (m);
698 }
699 
700 struct mbuf *
701 vxlan_encap6(struct ifnet *ifp, struct mbuf *m,
702     struct sockaddr *src, struct sockaddr *dst)
703 {
704 	struct vxlan_softc	*sc = (struct vxlan_softc *)ifp->if_softc;
705 	struct ip6_hdr		*ip6;
706 	struct in6_addr		*in6a;
707 	int			 error;
708 
709 	M_PREPEND(m, sizeof(struct ip6_hdr), M_DONTWAIT);
710 	if (m == NULL)
711 		return (NULL);
712 
713 	ip6 = mtod(m, struct ip6_hdr *);
714 	ip6->ip6_flow = 0;
715 	ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
716 	ip6->ip6_vfc |= IPV6_VERSION;
717 	ip6->ip6_nxt = IPPROTO_UDP;
718 	ip6->ip6_plen = htons(m->m_pkthdr.len - sizeof(struct ip6_hdr));
719 	ip6->ip6_src  = satosin6(src)->sin6_addr;
720 	ip6->ip6_dst = satosin6(dst)->sin6_addr;
721 
722 	if (sc->sc_ttl > 0)
723 		ip6->ip6_hlim = sc->sc_ttl;
724 	else
725 		ip6->ip6_hlim = ip6_defhlim;
726 
727 	if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(src)->sin6_addr)) {
728 		error = in6_selectsrc(&in6a, satosin6(dst), NULL, NULL,
729 		    sc->sc_rdomain);
730 		if (error != 0) {
731 			m_freem(m);
732 			return (NULL);
733 		}
734 		ip6->ip6_src = *in6a;
735 	}
736 
737 	/*
738 	 * The UDP checksum of VXLAN packets should be set to zero,
739 	 * but the IPv6 UDP checksum is not optional.  There is an RFC 6539
740 	 * to relax the IPv6 UDP checksum requirement for tunnels, but it
741 	 * is currently not supported by most implementations.
742 	 */
743 	m->m_pkthdr.csum_flags |= M_UDP_CSUM_OUT;
744 
745 	return (m);
746 }
747 
748 int
749 vxlan_output(struct ifnet *ifp, struct mbuf *m)
750 {
751 	struct vxlan_softc	*sc = (struct vxlan_softc *)ifp->if_softc;
752 	struct vxlanudphdr	*vu;
753 	struct sockaddr		*src, *dst;
754 #if NBRIDGE > 0
755 	struct bridge_tunneltag	*brtag;
756 #endif
757 	int			 error, af;
758 	uint32_t		 tag;
759 
760 	/* VXLAN header */
761 	M_PREPEND(m, sizeof(*vu), M_DONTWAIT);
762 	if (m == NULL) {
763 		ifp->if_oerrors++;
764 		return (ENOBUFS);
765 	}
766 
767 	src = (struct sockaddr *)&sc->sc_src;
768 	dst = (struct sockaddr *)&sc->sc_dst;
769 	af = src->sa_family;
770 
771 	vu = mtod(m, struct vxlanudphdr *);
772 	vu->vu_u.uh_sport = sc->sc_dstport;
773 	vu->vu_u.uh_dport = sc->sc_dstport;
774 	vu->vu_u.uh_ulen = htons(m->m_pkthdr.len);
775 	vu->vu_u.uh_sum = 0;
776 	tag = sc->sc_vnetid;
777 
778 #if NBRIDGE > 0
779 	if ((brtag = bridge_tunnel(m)) != NULL) {
780 		dst = &brtag->brtag_dst.sa;
781 
782 		/* If accepting any VNI, source ip address is from brtag */
783 		if (sc->sc_vnetid == VXLAN_VNI_ANY) {
784 			src = &brtag->brtag_src.sa;
785 			tag = (uint32_t)brtag->brtag_id;
786 			af = src->sa_family;
787 		}
788 
789 		if (dst->sa_family != af) {
790 			ifp->if_oerrors++;
791 			m_freem(m);
792 			return (EINVAL);
793 		}
794 	} else
795 #endif
796 	if (sc->sc_vnetid == VXLAN_VNI_ANY) {
797 		/*
798 		 * If accepting any VNI, build the vxlan header only by
799 		 * bridge_tunneltag or drop packet if the tag does not exist.
800 		 */
801 		ifp->if_oerrors++;
802 		m_freem(m);
803 		return (ENETUNREACH);
804 	}
805 
806 	if (sc->sc_vnetid != VXLAN_VNI_UNSET) {
807 		vu->vu_v.vxlan_flags = htonl(VXLAN_FLAGS_VNI);
808 		vu->vu_v.vxlan_id = htonl(tag << VXLAN_VNI_S);
809 	} else {
810 		vu->vu_v.vxlan_flags = htonl(0);
811 		vu->vu_v.vxlan_id = htonl(0);
812 	}
813 
814 	if (af == AF_INET)
815 		m = vxlan_encap4(ifp, m, src, dst);
816 	else if (af == AF_INET6)
817 		m = vxlan_encap6(ifp, m, src, dst);
818 	else {
819 		m_freem(m);
820 		m = NULL;
821 	}
822 
823 	if (m == NULL) {
824 		ifp->if_oerrors++;
825 		return (ENOBUFS);
826 	}
827 
828 #if NBRIDGE > 0
829 	if (brtag != NULL)
830 		bridge_tunneluntag(m);
831 #endif
832 
833 	ifp->if_opackets++;
834 	ifp->if_obytes += m->m_pkthdr.len;
835 
836 	m->m_pkthdr.ph_rtableid = sc->sc_rdomain;
837 
838 #if NPF > 0
839 	pf_pkt_addr_changed(m);
840 #endif
841 
842 	if (af == AF_INET)
843 		error = ip_output(m, NULL, NULL, IP_RAWOUTPUT,
844 		    &sc->sc_imo, NULL, 0);
845 	else
846 		error = ip6_output(m, 0, NULL, IPV6_MINMTU, 0, NULL);
847 
848 	if (error)
849 		ifp->if_oerrors++;
850 
851 	return (error);
852 }
853 
854 void
855 vxlan_addr_change(void *arg)
856 {
857 	struct vxlan_softc	*sc = arg;
858 	struct ifnet		*ifp = &sc->sc_ac.ac_if;
859 	int			 s, error;
860 
861 	/*
862 	 * Reset the configuration after resume or any possible address
863 	 * configuration changes.
864 	 */
865 	s = splnet();
866 	if ((error = vxlan_config(ifp, NULL, NULL))) {
867 		/*
868 		 * The source address of the tunnel can temporarily disappear,
869 		 * after a link state change when running the DHCP client,
870 		 * so keep it configured.
871 		 */
872 	}
873 	splx(s);
874 }
875 
876 void
877 vxlan_if_change(void *arg)
878 {
879 	struct vxlan_softc	*sc = arg;
880 	struct ifnet		*ifp = &sc->sc_ac.ac_if;
881 	int			 s, error;
882 
883 	/*
884 	 * Reset the configuration after the parent interface disappeared.
885 	 */
886 	s = splnet();
887 	if ((error = vxlan_config(ifp, NULL, NULL)) != 0) {
888 		/* The configured tunnel addresses are invalid, remove them */
889 		bzero(&sc->sc_src, sizeof(sc->sc_src));
890 		bzero(&sc->sc_dst, sizeof(sc->sc_dst));
891 	}
892 	splx(s);
893 }
894 
895 void
896 vxlan_link_change(void *arg)
897 {
898 	struct vxlan_softc	*sc = arg;
899 	struct ifnet		*ifp = &sc->sc_ac.ac_if;
900 	int			 s;
901 
902 	/*
903 	 * The machine might have lost its multicast associations after
904 	 * link state changes.  This fixes a problem with VMware after
905 	 * suspend/resume of the host or guest.
906 	 */
907 	s = splnet();
908 	(void)vxlan_config(ifp, NULL, NULL);
909 	splx(s);
910 }
911