xref: /openbsd-src/sys/net/if_vxlan.c (revision 4b70baf6e17fc8b27fc1f7fa7929335753fa94c3)
1 /*	$OpenBSD: if_vxlan.c,v 1.72 2019/04/28 22:15:58 mpi Exp $	*/
2 
3 /*
4  * Copyright (c) 2013 Reyk Floeter <reyk@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include "bpfilter.h"
20 #include "vxlan.h"
21 #include "vlan.h"
22 #include "pf.h"
23 #include "bridge.h"
24 
25 #include <sys/param.h>
26 #include <sys/systm.h>
27 #include <sys/mbuf.h>
28 #include <sys/socket.h>
29 #include <sys/sockio.h>
30 #include <sys/ioctl.h>
31 
32 #include <net/if.h>
33 #include <net/if_var.h>
34 #include <net/if_media.h>
35 #include <net/route.h>
36 
37 #if NBPFILTER > 0
38 #include <net/bpf.h>
39 #endif
40 
41 #include <netinet/in.h>
42 #include <netinet/in_var.h>
43 #include <netinet/if_ether.h>
44 #include <netinet/ip.h>
45 #include <netinet/ip_var.h>
46 #include <netinet/udp.h>
47 #include <netinet/udp_var.h>
48 #include <netinet/in_pcb.h>
49 
50 #if NPF > 0
51 #include <net/pfvar.h>
52 #endif
53 
54 #if NBRIDGE > 0
55 #include <net/if_bridge.h>
56 #endif
57 
58 #include <net/if_vxlan.h>
59 
60 struct vxlan_softc {
61 	struct arpcom		 sc_ac;
62 	struct ifmedia		 sc_media;
63 
64 	struct ip_moptions	 sc_imo;
65 	void			*sc_ahcookie;
66 	void			*sc_lhcookie;
67 	void			*sc_dhcookie;
68 
69 	struct sockaddr_storage	 sc_src;
70 	struct sockaddr_storage	 sc_dst;
71 	in_port_t		 sc_dstport;
72 	u_int			 sc_rdomain;
73 	int64_t			 sc_vnetid;
74 	uint16_t		 sc_df;
75 	u_int8_t		 sc_ttl;
76 	int			 sc_txhprio;
77 
78 	struct task		 sc_sendtask;
79 
80 	LIST_ENTRY(vxlan_softc)	 sc_entry;
81 };
82 
83 void	 vxlanattach(int);
84 int	 vxlanioctl(struct ifnet *, u_long, caddr_t);
85 void	 vxlanstart(struct ifnet *);
86 int	 vxlan_clone_create(struct if_clone *, int);
87 int	 vxlan_clone_destroy(struct ifnet *);
88 void	 vxlan_multicast_cleanup(struct ifnet *);
89 int	 vxlan_multicast_join(struct ifnet *, struct sockaddr *,
90 	    struct sockaddr *);
91 int	 vxlan_media_change(struct ifnet *);
92 void	 vxlan_media_status(struct ifnet *, struct ifmediareq *);
93 int	 vxlan_config(struct ifnet *, struct sockaddr *, struct sockaddr *);
94 int	 vxlan_output(struct ifnet *, struct mbuf *);
95 void	 vxlan_addr_change(void *);
96 void	 vxlan_if_change(void *);
97 void	 vxlan_link_change(void *);
98 void	 vxlan_send_dispatch(void *);
99 
100 int	 vxlan_sockaddr_cmp(struct sockaddr *, struct sockaddr *);
101 uint16_t vxlan_sockaddr_port(struct sockaddr *);
102 
103 struct if_clone	vxlan_cloner =
104     IF_CLONE_INITIALIZER("vxlan", vxlan_clone_create, vxlan_clone_destroy);
105 
106 int	 vxlan_enable = 0;
107 u_long	 vxlan_tagmask;
108 
109 #define VXLAN_TAGHASHSIZE		 32
110 #define VXLAN_TAGHASH(tag)		 ((unsigned int)tag & vxlan_tagmask)
111 LIST_HEAD(vxlan_taghash, vxlan_softc)	*vxlan_tagh, vxlan_any;
112 
113 void
114 vxlanattach(int count)
115 {
116 	/* Regular vxlan interfaces with a VNI */
117 	if ((vxlan_tagh = hashinit(VXLAN_TAGHASHSIZE, M_DEVBUF, M_NOWAIT,
118 	    &vxlan_tagmask)) == NULL)
119 		panic("vxlanattach: hashinit");
120 
121 	/* multipoint-to-multipoint interfaces that accept any VNI */
122 	LIST_INIT(&vxlan_any);
123 
124 	if_clone_attach(&vxlan_cloner);
125 }
126 
127 int
128 vxlan_clone_create(struct if_clone *ifc, int unit)
129 {
130 	struct ifnet		*ifp;
131 	struct vxlan_softc	*sc;
132 
133 	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
134 	sc->sc_imo.imo_membership = malloc(
135 	    (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_IPMOPTS,
136 	    M_WAITOK|M_ZERO);
137 	sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS;
138 	sc->sc_dstport = htons(VXLAN_PORT);
139 	sc->sc_vnetid = VXLAN_VNI_UNSET;
140 	sc->sc_txhprio = IFQ_TOS2PRIO(IPTOS_PREC_ROUTINE); /* 0 */
141 	sc->sc_df = htons(0);
142 	task_set(&sc->sc_sendtask, vxlan_send_dispatch, sc);
143 
144 	ifp = &sc->sc_ac.ac_if;
145 	snprintf(ifp->if_xname, sizeof ifp->if_xname, "vxlan%d", unit);
146 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
147 	ether_fakeaddr(ifp);
148 
149 	ifp->if_softc = sc;
150 	ifp->if_ioctl = vxlanioctl;
151 	ifp->if_start = vxlanstart;
152 	IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN);
153 
154 	ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN;
155 	ifp->if_capabilities = IFCAP_VLAN_MTU;
156 
157 	ifmedia_init(&sc->sc_media, 0, vxlan_media_change,
158 	    vxlan_media_status);
159 	ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
160 	ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO);
161 
162 	if_counters_alloc(ifp);
163 	if_attach(ifp);
164 	ether_ifattach(ifp);
165 
166 #if 0
167 	/*
168 	 * Instead of using a decreased MTU of 1450 bytes, prefer
169 	 * to use the default Ethernet-size MTU of 1500 bytes and to
170 	 * increase the MTU of the outer transport interfaces to
171 	 * at least 1550 bytes. The following is disabled by default.
172 	 */
173 	ifp->if_mtu = ETHERMTU - sizeof(struct ether_header);
174 	ifp->if_mtu -= sizeof(struct vxlanudphdr) + sizeof(struct ipovly);
175 #endif
176 
177 	LIST_INSERT_HEAD(&vxlan_tagh[VXLAN_TAGHASH(0)], sc, sc_entry);
178 	vxlan_enable++;
179 
180 	return (0);
181 }
182 
183 int
184 vxlan_clone_destroy(struct ifnet *ifp)
185 {
186 	struct vxlan_softc	*sc = ifp->if_softc;
187 
188 	NET_LOCK();
189 	vxlan_multicast_cleanup(ifp);
190 	NET_UNLOCK();
191 
192 	vxlan_enable--;
193 	LIST_REMOVE(sc, sc_entry);
194 
195 	ifmedia_delete_instance(&sc->sc_media, IFM_INST_ANY);
196 	ether_ifdetach(ifp);
197 	if_detach(ifp);
198 
199 	if (!task_del(net_tq(ifp->if_index), &sc->sc_sendtask))
200 		taskq_barrier(net_tq(ifp->if_index));
201 
202 	free(sc->sc_imo.imo_membership, M_IPMOPTS, 0);
203 	free(sc, M_DEVBUF, sizeof(*sc));
204 
205 	return (0);
206 }
207 
208 void
209 vxlan_multicast_cleanup(struct ifnet *ifp)
210 {
211 	struct vxlan_softc	*sc = (struct vxlan_softc *)ifp->if_softc;
212 	struct ip_moptions	*imo = &sc->sc_imo;
213 	struct ifnet		*mifp;
214 
215 	mifp = if_get(imo->imo_ifidx);
216 	if (mifp != NULL) {
217 		if (sc->sc_ahcookie != NULL) {
218 			hook_disestablish(mifp->if_addrhooks, sc->sc_ahcookie);
219 			sc->sc_ahcookie = NULL;
220 		}
221 		if (sc->sc_lhcookie != NULL) {
222 			hook_disestablish(mifp->if_linkstatehooks,
223 			    sc->sc_lhcookie);
224 			sc->sc_lhcookie = NULL;
225 		}
226 		if (sc->sc_dhcookie != NULL) {
227 			hook_disestablish(mifp->if_detachhooks,
228 			    sc->sc_dhcookie);
229 			sc->sc_dhcookie = NULL;
230 		}
231 
232 		if_put(mifp);
233 	}
234 
235 	if (imo->imo_num_memberships > 0) {
236 		in_delmulti(imo->imo_membership[--imo->imo_num_memberships]);
237 		imo->imo_ifidx = 0;
238 	}
239 }
240 
241 int
242 vxlan_multicast_join(struct ifnet *ifp, struct sockaddr *src,
243     struct sockaddr *dst)
244 {
245 	struct vxlan_softc	*sc = ifp->if_softc;
246 	struct ip_moptions	*imo = &sc->sc_imo;
247 	struct sockaddr_in	*src4, *dst4;
248 #ifdef INET6
249 	struct sockaddr_in6	*dst6;
250 #endif /* INET6 */
251 	struct ifaddr		*ifa;
252 	struct ifnet		*mifp;
253 
254 	switch (dst->sa_family) {
255 	case AF_INET:
256 		dst4 = satosin(dst);
257 		if (!IN_MULTICAST(dst4->sin_addr.s_addr))
258 			return (0);
259 		break;
260 #ifdef INET6
261 	case AF_INET6:
262 		dst6 = satosin6(dst);
263 		if (!IN6_IS_ADDR_MULTICAST(&dst6->sin6_addr))
264 			return (0);
265 
266 		/* Multicast mode is currently not supported for IPv6 */
267 		return (EAFNOSUPPORT);
268 #endif /* INET6 */
269 	default:
270 		return (EAFNOSUPPORT);
271 	}
272 
273 	src4 = satosin(src);
274 	dst4 = satosin(dst);
275 
276 	if (src4->sin_addr.s_addr == INADDR_ANY ||
277 	    IN_MULTICAST(src4->sin_addr.s_addr))
278 		return (EINVAL);
279 	if ((ifa = ifa_ifwithaddr(src, sc->sc_rdomain)) == NULL ||
280 	    (mifp = ifa->ifa_ifp) == NULL ||
281 	    (mifp->if_flags & IFF_MULTICAST) == 0)
282 		return (EADDRNOTAVAIL);
283 
284 	if ((imo->imo_membership[0] =
285 	    in_addmulti(&dst4->sin_addr, mifp)) == NULL)
286 		return (ENOBUFS);
287 
288 	imo->imo_num_memberships++;
289 	imo->imo_ifidx = mifp->if_index;
290 	if (sc->sc_ttl > 0)
291 		imo->imo_ttl = sc->sc_ttl;
292 	else
293 		imo->imo_ttl = IP_DEFAULT_MULTICAST_TTL;
294 	imo->imo_loop = 0;
295 
296 	/*
297 	 * Use interface hooks to track any changes on the interface
298 	 * that is used to send out the tunnel traffic as multicast.
299 	 */
300 	if ((sc->sc_ahcookie = hook_establish(mifp->if_addrhooks,
301 	    0, vxlan_addr_change, sc)) == NULL ||
302 	    (sc->sc_lhcookie = hook_establish(mifp->if_linkstatehooks,
303 	    0, vxlan_link_change, sc)) == NULL ||
304 	    (sc->sc_dhcookie = hook_establish(mifp->if_detachhooks,
305 	    0, vxlan_if_change, sc)) == NULL)
306 		panic("%s: cannot allocate interface hook",
307 		    mifp->if_xname);
308 
309 	return (0);
310 }
311 
312 void
313 vxlanstart(struct ifnet *ifp)
314 {
315 	struct vxlan_softc	*sc = (struct vxlan_softc *)ifp->if_softc;
316 
317 	task_add(net_tq(ifp->if_index), &sc->sc_sendtask);
318 }
319 
320 void
321 vxlan_send_dispatch(void *xsc)
322 {
323 	struct vxlan_softc	*sc = xsc;
324 	struct ifnet		*ifp = &sc->sc_ac.ac_if;
325 	struct mbuf		*m;
326 	struct mbuf_list	 ml;
327 
328 	ml_init(&ml);
329 	for (;;) {
330 		IFQ_DEQUEUE(&ifp->if_snd, m);
331 		if (m == NULL)
332 			break;
333 
334 #if NBPFILTER > 0
335 		if (ifp->if_bpf)
336 			bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT);
337 #endif
338 
339 		ml_enqueue(&ml, m);
340 	}
341 
342 	if (ml_empty(&ml))
343 		return;
344 
345 	NET_RLOCK();
346 	while ((m = ml_dequeue(&ml)) != NULL) {
347 		vxlan_output(ifp, m);
348 	}
349 	NET_RUNLOCK();
350 }
351 
352 
353 int
354 vxlan_config(struct ifnet *ifp, struct sockaddr *src, struct sockaddr *dst)
355 {
356 	struct vxlan_softc	*sc = (struct vxlan_softc *)ifp->if_softc;
357 	int			 reset = 0, error, af;
358 	socklen_t		 slen;
359 	in_port_t		 port;
360 	struct vxlan_taghash	*tagh;
361 
362 	if (src != NULL && dst != NULL) {
363 		if ((af = src->sa_family) != dst->sa_family)
364 			return (EAFNOSUPPORT);
365 	} else {
366 		/* Reset current configuration */
367 		af = sc->sc_src.ss_family;
368 		src = sstosa(&sc->sc_src);
369 		dst = sstosa(&sc->sc_dst);
370 		reset = 1;
371 	}
372 
373 	switch (af) {
374 	case AF_INET:
375 		slen = sizeof(struct sockaddr_in);
376 		break;
377 #ifdef INET6
378 	case AF_INET6:
379 		slen = sizeof(struct sockaddr_in6);
380 		break;
381 #endif /* INET6 */
382 	default:
383 		return (EAFNOSUPPORT);
384 	}
385 
386 	if (src->sa_len != slen || dst->sa_len != slen)
387 		return (EINVAL);
388 
389 	vxlan_multicast_cleanup(ifp);
390 
391 	/* returns without error if multicast is not configured */
392 	if ((error = vxlan_multicast_join(ifp, src, dst)) != 0)
393 		return (error);
394 
395 	if ((port = vxlan_sockaddr_port(dst)) != 0)
396 		sc->sc_dstport = port;
397 
398 	if (!reset) {
399 		bzero(&sc->sc_src, sizeof(sc->sc_src));
400 		bzero(&sc->sc_dst, sizeof(sc->sc_dst));
401 		memcpy(&sc->sc_src, src, src->sa_len);
402 		memcpy(&sc->sc_dst, dst, dst->sa_len);
403 	}
404 
405 	if (sc->sc_vnetid == VXLAN_VNI_ANY) {
406 		/*
407 		 * If the interface accepts any VNI, put it into a separate
408 		 * list that is not part of the main hash.
409 		 */
410 		tagh = &vxlan_any;
411 	} else
412 		tagh = &vxlan_tagh[VXLAN_TAGHASH(sc->sc_vnetid)];
413 
414 	LIST_REMOVE(sc, sc_entry);
415 	LIST_INSERT_HEAD(tagh, sc, sc_entry);
416 
417 	return (0);
418 }
419 
420 int
421 vxlanioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
422 {
423 	struct vxlan_softc	*sc = (struct vxlan_softc *)ifp->if_softc;
424 	struct ifreq		*ifr = (struct ifreq *)data;
425 	struct if_laddrreq	*lifr = (struct if_laddrreq *)data;
426 	int			 error = 0;
427 
428 	switch (cmd) {
429 	case SIOCSIFADDR:
430 		ifp->if_flags |= IFF_UP;
431 		/* FALLTHROUGH */
432 
433 	case SIOCSIFFLAGS:
434 		if (ifp->if_flags & IFF_UP) {
435 			ifp->if_flags |= IFF_RUNNING;
436 		} else {
437 			ifp->if_flags &= ~IFF_RUNNING;
438 		}
439 		break;
440 
441 	case SIOCADDMULTI:
442 	case SIOCDELMULTI:
443 		break;
444 
445 	case SIOCGIFMEDIA:
446 	case SIOCSIFMEDIA:
447 		error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd);
448 		break;
449 
450 	case SIOCSLIFPHYADDR:
451 		error = vxlan_config(ifp,
452 		    sstosa(&lifr->addr),
453 		    sstosa(&lifr->dstaddr));
454 		break;
455 
456 	case SIOCDIFPHYADDR:
457 		vxlan_multicast_cleanup(ifp);
458 		bzero(&sc->sc_src, sizeof(sc->sc_src));
459 		bzero(&sc->sc_dst, sizeof(sc->sc_dst));
460 		sc->sc_dstport = htons(VXLAN_PORT);
461 		break;
462 
463 	case SIOCGLIFPHYADDR:
464 		if (sc->sc_dst.ss_family == AF_UNSPEC) {
465 			error = EADDRNOTAVAIL;
466 			break;
467 		}
468 		bzero(&lifr->addr, sizeof(lifr->addr));
469 		bzero(&lifr->dstaddr, sizeof(lifr->dstaddr));
470 		memcpy(&lifr->addr, &sc->sc_src, sc->sc_src.ss_len);
471 		memcpy(&lifr->dstaddr, &sc->sc_dst, sc->sc_dst.ss_len);
472 		break;
473 
474 	case SIOCSLIFPHYRTABLE:
475 		if (ifr->ifr_rdomainid < 0 ||
476 		    ifr->ifr_rdomainid > RT_TABLEID_MAX ||
477 		    !rtable_exists(ifr->ifr_rdomainid)) {
478 			error = EINVAL;
479 			break;
480 		}
481 		sc->sc_rdomain = ifr->ifr_rdomainid;
482 		(void)vxlan_config(ifp, NULL, NULL);
483 		break;
484 
485 	case SIOCGLIFPHYRTABLE:
486 		ifr->ifr_rdomainid = sc->sc_rdomain;
487 		break;
488 
489 	case SIOCSLIFPHYTTL:
490 		if (ifr->ifr_ttl < 0 || ifr->ifr_ttl > 0xff) {
491 			error = EINVAL;
492 			break;
493 		}
494 		if (sc->sc_ttl == (u_int8_t)ifr->ifr_ttl)
495 			break;
496 		sc->sc_ttl = (u_int8_t)(ifr->ifr_ttl);
497 		(void)vxlan_config(ifp, NULL, NULL);
498 		break;
499 
500 	case SIOCGLIFPHYTTL:
501 		ifr->ifr_ttl = (int)sc->sc_ttl;
502 		break;
503 
504 	case SIOCSLIFPHYDF:
505 		/* commit */
506 		sc->sc_df = ifr->ifr_df ? htons(IP_DF) : htons(0);
507 		break;
508 	case SIOCGLIFPHYDF:
509 		ifr->ifr_df = sc->sc_df ? 1 : 0;
510 		break;
511 
512 	case SIOCSTXHPRIO:
513 		if (ifr->ifr_hdrprio == IF_HDRPRIO_PACKET)
514 			; /* fall through */
515 		else if (ifr->ifr_hdrprio < IF_HDRPRIO_MIN ||
516 		    ifr->ifr_hdrprio > IF_HDRPRIO_MAX) {
517 			error = EINVAL;
518 			break;
519 		}
520 
521 		sc->sc_txhprio = ifr->ifr_hdrprio;
522 		break;
523 	case SIOCGTXHPRIO:
524 		ifr->ifr_hdrprio = sc->sc_txhprio;
525 		break;
526 
527 	case SIOCSVNETID:
528 		if (sc->sc_vnetid == ifr->ifr_vnetid)
529 			break;
530 
531 		if ((ifr->ifr_vnetid != VXLAN_VNI_ANY) &&
532 		    (ifr->ifr_vnetid > VXLAN_VNI_MAX ||
533 		     ifr->ifr_vnetid < VXLAN_VNI_MIN)) {
534 			error = EINVAL;
535 			break;
536 		}
537 
538 		sc->sc_vnetid = (int)ifr->ifr_vnetid;
539 		(void)vxlan_config(ifp, NULL, NULL);
540 		break;
541 
542 	case SIOCGVNETID:
543 		if ((sc->sc_vnetid != VXLAN_VNI_ANY) &&
544 		    (sc->sc_vnetid > VXLAN_VNI_MAX ||
545 		     sc->sc_vnetid < VXLAN_VNI_MIN)) {
546 			error = EADDRNOTAVAIL;
547 			break;
548 		}
549 
550 		ifr->ifr_vnetid = sc->sc_vnetid;
551 		break;
552 
553 	case SIOCDVNETID:
554 		sc->sc_vnetid = VXLAN_VNI_UNSET;
555 		(void)vxlan_config(ifp, NULL, NULL);
556 		break;
557 
558 	default:
559 		error = ether_ioctl(ifp, &sc->sc_ac, cmd, data);
560 		break;
561 	}
562 
563 	return (error);
564 }
565 
566 int
567 vxlan_media_change(struct ifnet *ifp)
568 {
569 	return (0);
570 }
571 
572 void
573 vxlan_media_status(struct ifnet *ifp, struct ifmediareq *imr)
574 {
575 	imr->ifm_status = IFM_AVALID | IFM_ACTIVE;
576 }
577 
578 int
579 vxlan_sockaddr_cmp(struct sockaddr *srcsa, struct sockaddr *dstsa)
580 {
581 	struct sockaddr_in	*src4, *dst4;
582 #ifdef INET6
583 	struct sockaddr_in6	*src6, *dst6;
584 #endif /* INET6 */
585 
586 	if (srcsa->sa_family != dstsa->sa_family)
587 		return (1);
588 
589 	switch (dstsa->sa_family) {
590 	case AF_INET:
591 		src4 = satosin(srcsa);
592 		dst4 = satosin(dstsa);
593 		if (src4->sin_addr.s_addr == dst4->sin_addr.s_addr)
594 			return (0);
595 		break;
596 #ifdef INET6
597 	case AF_INET6:
598 		src6 = satosin6(srcsa);
599 		dst6 = satosin6(dstsa);
600 		if (IN6_ARE_ADDR_EQUAL(&src6->sin6_addr, &dst6->sin6_addr) &&
601 		    src6->sin6_scope_id == dst6->sin6_scope_id)
602 			return (0);
603 		break;
604 #endif /* INET6 */
605 	}
606 
607 	return (1);
608 }
609 
610 uint16_t
611 vxlan_sockaddr_port(struct sockaddr *sa)
612 {
613 	struct sockaddr_in	*sin4;
614 #ifdef INET6
615 	struct sockaddr_in6	*sin6;
616 #endif /* INET6 */
617 
618 	switch (sa->sa_family) {
619 	case AF_INET:
620 		sin4 = satosin(sa);
621 		return (sin4->sin_port);
622 #ifdef INET6
623 	case AF_INET6:
624 		sin6 = satosin6(sa);
625 		return (sin6->sin6_port);
626 #endif /* INET6 */
627 	default:
628 		break;
629 	}
630 
631 	return (0);
632 }
633 
634 int
635 vxlan_lookup(struct mbuf *m, struct udphdr *uh, int iphlen,
636     struct sockaddr *srcsa, struct sockaddr *dstsa)
637 {
638 	struct vxlan_softc	*sc = NULL, *sc_cand = NULL;
639 	struct vxlan_header	 v;
640 	int			 vni;
641 	struct ifnet		*ifp;
642 	int			 skip;
643 #if NBRIDGE > 0
644 	struct bridge_tunneltag	*brtag;
645 #endif
646 	struct mbuf		*n;
647 	int			 off;
648 
649 	/* XXX Should verify the UDP port first before copying the packet */
650 	skip = iphlen + sizeof(*uh);
651 	if (m->m_pkthdr.len - skip < sizeof(v))
652 		return (0);
653 	m_copydata(m, skip, sizeof(v), (caddr_t)&v);
654 	skip += sizeof(v);
655 
656 	if (v.vxlan_flags & htonl(VXLAN_RESERVED1) ||
657 	    v.vxlan_id & htonl(VXLAN_RESERVED2))
658 		return (0);
659 
660 	vni = ntohl(v.vxlan_id) >> VXLAN_VNI_S;
661 	if ((v.vxlan_flags & htonl(VXLAN_FLAGS_VNI)) == 0) {
662 		if (vni != 0)
663 			return (0);
664 
665 		vni = VXLAN_VNI_UNSET;
666 	}
667 
668 	NET_ASSERT_LOCKED();
669 	/* First search for a vxlan(4) interface with the packet's VNI */
670 	LIST_FOREACH(sc, &vxlan_tagh[VXLAN_TAGHASH(vni)], sc_entry) {
671 		if ((uh->uh_dport == sc->sc_dstport) &&
672 		    vni == sc->sc_vnetid &&
673 		    sc->sc_rdomain == rtable_l2(m->m_pkthdr.ph_rtableid)) {
674 			sc_cand = sc;
675 			if (vxlan_sockaddr_cmp(srcsa, sstosa(&sc->sc_dst)) == 0)
676 				goto found;
677 		}
678 	}
679 
680 	/*
681 	 * Now loop through all the vxlan(4) interfaces that are configured
682 	 * to accept any VNI and operating in multipoint-to-multipoint mode
683 	 * that is used in combination with bridge(4) or switch(4).
684 	 * If a vxlan(4) interface has been found for the packet's VNI, this
685 	 * code is not reached as the other interface is more specific.
686 	 */
687 	LIST_FOREACH(sc, &vxlan_any, sc_entry) {
688 		if ((uh->uh_dport == sc->sc_dstport) &&
689 		    (sc->sc_rdomain == rtable_l2(m->m_pkthdr.ph_rtableid))) {
690 			sc_cand = sc;
691 			goto found;
692 		}
693 	}
694 
695 	if (sc_cand) {
696 		sc = sc_cand;
697 		goto found;
698 	}
699 
700 	/* not found */
701 	return (0);
702 
703  found:
704 	if (m->m_pkthdr.len < skip + sizeof(struct ether_header)) {
705 		m_freem(m);
706 		return (EINVAL);
707 	}
708 
709 	m_adj(m, skip);
710 	ifp = &sc->sc_ac.ac_if;
711 
712 #if NBRIDGE > 0
713 	/* Store the tunnel src/dst IP and vni for the bridge or switch */
714 	if ((ifp->if_bridgeidx != 0 || ifp->if_switchport != NULL) &&
715 	    srcsa->sa_family != AF_UNSPEC &&
716 	    ((brtag = bridge_tunneltag(m)) != NULL)) {
717 		memcpy(&brtag->brtag_peer.sa, srcsa, srcsa->sa_len);
718 		memcpy(&brtag->brtag_local.sa, dstsa, dstsa->sa_len);
719 		brtag->brtag_id = vni;
720 	}
721 #endif
722 
723 	m->m_flags &= ~(M_BCAST|M_MCAST);
724 
725 #if NPF > 0
726 	pf_pkt_addr_changed(m);
727 #endif
728 	if ((m->m_len < sizeof(struct ether_header)) &&
729 	    (m = m_pullup(m, sizeof(struct ether_header))) == NULL)
730 		return (ENOBUFS);
731 
732 	n = m_getptr(m, sizeof(struct ether_header), &off);
733 	if (n == NULL) {
734 		m_freem(m);
735 		return (EINVAL);
736 	}
737 	if (!ALIGNED_POINTER(mtod(n, caddr_t) + off, uint32_t)) {
738 		n = m_dup_pkt(m, ETHER_ALIGN, M_NOWAIT);
739 		/* Dispose of the original mbuf chain */
740 		m_freem(m);
741 		if (n == NULL)
742 			return (ENOBUFS);
743 		m = n;
744 	}
745 
746 	if_vinput(ifp, m);
747 
748 	/* success */
749 	return (1);
750 }
751 
752 struct mbuf *
753 vxlan_encap4(struct ifnet *ifp, struct mbuf *m,
754     struct sockaddr *src, struct sockaddr *dst)
755 {
756 	struct vxlan_softc	*sc = (struct vxlan_softc *)ifp->if_softc;
757 	struct ip		*ip;
758 
759 	/*
760 	 * Remove multicast and broadcast flags or encapsulated packet
761 	 * ends up as multicast or broadcast packet.
762 	 */
763 	m->m_flags &= ~(M_BCAST|M_MCAST);
764 
765 	M_PREPEND(m, sizeof(*ip), M_DONTWAIT);
766 	if (m == NULL)
767 		return (NULL);
768 
769 	ip = mtod(m, struct ip *);
770 	ip->ip_v = IPVERSION;
771 	ip->ip_hl = sizeof(struct ip) >> 2;
772 	ip->ip_id = htons(ip_randomid());
773 	ip->ip_off = sc->sc_df;
774 	ip->ip_p = IPPROTO_UDP;
775 	ip->ip_tos = IFQ_PRIO2TOS(sc->sc_txhprio == IF_HDRPRIO_PACKET ?
776 	    m->m_pkthdr.pf.prio : sc->sc_txhprio);
777 	ip->ip_len = htons(m->m_pkthdr.len);
778 
779 	ip->ip_src = satosin(src)->sin_addr;
780 	ip->ip_dst = satosin(dst)->sin_addr;
781 
782 	if (sc->sc_ttl > 0)
783 		ip->ip_ttl = sc->sc_ttl;
784 	else
785 		ip->ip_ttl = IPDEFTTL;
786 
787 	return (m);
788 }
789 
790 #ifdef INET6
791 struct mbuf *
792 vxlan_encap6(struct ifnet *ifp, struct mbuf *m,
793     struct sockaddr *src, struct sockaddr *dst)
794 {
795 	struct vxlan_softc	*sc = (struct vxlan_softc *)ifp->if_softc;
796 	struct ip6_hdr		*ip6;
797 	struct in6_addr		*in6a;
798 	uint32_t		 flow;
799 
800 	/*
801 	 * Remove multicast and broadcast flags or encapsulated packet
802 	 * ends up as multicast or broadcast packet.
803 	 */
804 	m->m_flags &= ~(M_BCAST|M_MCAST);
805 
806 	M_PREPEND(m, sizeof(struct ip6_hdr), M_DONTWAIT);
807 	if (m == NULL)
808 		return (NULL);
809 
810 	flow = (uint32_t)IFQ_PRIO2TOS(sc->sc_txhprio == IF_HDRPRIO_PACKET ?
811 	    m->m_pkthdr.pf.prio : sc->sc_txhprio) << 20;
812 
813 	ip6 = mtod(m, struct ip6_hdr *);
814 	ip6->ip6_flow = htonl(flow);
815 	ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
816 	ip6->ip6_vfc |= IPV6_VERSION;
817 	ip6->ip6_nxt = IPPROTO_UDP;
818 	ip6->ip6_plen = htons(m->m_pkthdr.len - sizeof(struct ip6_hdr));
819 	if (in6_embedscope(&ip6->ip6_src, satosin6(src), NULL) != 0)
820 		goto drop;
821 	if (in6_embedscope(&ip6->ip6_dst, satosin6(dst), NULL) != 0)
822 		goto drop;
823 
824 	if (sc->sc_ttl > 0)
825 		ip6->ip6_hlim = sc->sc_ttl;
826 	else
827 		ip6->ip6_hlim = ip6_defhlim;
828 
829 	if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(src)->sin6_addr)) {
830 		if (in6_selectsrc(&in6a, satosin6(dst), NULL,
831 		    sc->sc_rdomain) != 0)
832 			goto drop;
833 
834 		ip6->ip6_src = *in6a;
835 	}
836 
837 	if (sc->sc_df)
838 		SET(m->m_pkthdr.csum_flags, M_IPV6_DF_OUT);
839 
840 	/*
841 	 * The UDP checksum of VXLAN packets should be set to zero,
842 	 * but the IPv6 UDP checksum is not optional.  There is an RFC 6539
843 	 * to relax the IPv6 UDP checksum requirement for tunnels, but it
844 	 * is currently not supported by most implementations.
845 	 */
846 	m->m_pkthdr.csum_flags |= M_UDP_CSUM_OUT;
847 
848 	return (m);
849 
850 drop:
851 	m_freem(m);
852 	return (NULL);
853 }
854 #endif /* INET6 */
855 
856 int
857 vxlan_output(struct ifnet *ifp, struct mbuf *m)
858 {
859 	struct vxlan_softc	*sc = (struct vxlan_softc *)ifp->if_softc;
860 	struct vxlanudphdr	*vu;
861 	struct sockaddr		*src, *dst;
862 #if NBRIDGE > 0
863 	struct bridge_tunneltag	*brtag;
864 #endif
865 	int			 error, af;
866 	uint32_t		 tag;
867 	struct mbuf		*m0;
868 
869 	/* VXLAN header, needs new mbuf because of alignment issues */
870 	MGET(m0, M_DONTWAIT, m->m_type);
871 	if (m0 == NULL) {
872 		ifp->if_oerrors++;
873 		return (ENOBUFS);
874 	}
875 	M_MOVE_PKTHDR(m0, m);
876 	m0->m_next = m;
877 	m = m0;
878 	m_align(m, sizeof(*vu));
879 	m->m_len = sizeof(*vu);
880 	m->m_pkthdr.len += sizeof(*vu);
881 
882 	src = sstosa(&sc->sc_src);
883 	dst = sstosa(&sc->sc_dst);
884 	af = src->sa_family;
885 
886 	vu = mtod(m, struct vxlanudphdr *);
887 	vu->vu_u.uh_sport = sc->sc_dstport;
888 	vu->vu_u.uh_dport = sc->sc_dstport;
889 	vu->vu_u.uh_ulen = htons(m->m_pkthdr.len);
890 	vu->vu_u.uh_sum = 0;
891 	tag = sc->sc_vnetid;
892 
893 #if NBRIDGE > 0
894 	if ((brtag = bridge_tunnel(m)) != NULL) {
895 		dst = &brtag->brtag_peer.sa;
896 
897 		/* If accepting any VNI, source ip address is from brtag */
898 		if (sc->sc_vnetid == VXLAN_VNI_ANY) {
899 			src = &brtag->brtag_local.sa;
900 			tag = (uint32_t)brtag->brtag_id;
901 			af = src->sa_family;
902 		}
903 
904 		if (dst->sa_family != af) {
905 			ifp->if_oerrors++;
906 			m_freem(m);
907 			return (EINVAL);
908 		}
909 	} else
910 #endif
911 	if (sc->sc_vnetid == VXLAN_VNI_ANY) {
912 		/*
913 		 * If accepting any VNI, build the vxlan header only by
914 		 * bridge_tunneltag or drop packet if the tag does not exist.
915 		 */
916 		ifp->if_oerrors++;
917 		m_freem(m);
918 		return (ENETUNREACH);
919 	}
920 
921 	if (sc->sc_vnetid != VXLAN_VNI_UNSET) {
922 		vu->vu_v.vxlan_flags = htonl(VXLAN_FLAGS_VNI);
923 		vu->vu_v.vxlan_id = htonl(tag << VXLAN_VNI_S);
924 	} else {
925 		vu->vu_v.vxlan_flags = htonl(0);
926 		vu->vu_v.vxlan_id = htonl(0);
927 	}
928 
929 	switch (af) {
930 	case AF_INET:
931 		m = vxlan_encap4(ifp, m, src, dst);
932 		break;
933 #ifdef INET6
934 	case AF_INET6:
935 		m = vxlan_encap6(ifp, m, src, dst);
936 		break;
937 #endif /* INET6 */
938 	default:
939 		m_freem(m);
940 		m = NULL;
941 	}
942 
943 	if (m == NULL) {
944 		ifp->if_oerrors++;
945 		return (ENOBUFS);
946 	}
947 
948 #if NBRIDGE > 0
949 	if (brtag != NULL)
950 		bridge_tunneluntag(m);
951 #endif
952 
953 	m->m_pkthdr.ph_rtableid = sc->sc_rdomain;
954 
955 #if NPF > 0
956 	pf_pkt_addr_changed(m);
957 #endif
958 
959 	switch (af) {
960 	case AF_INET:
961 		error = ip_output(m, NULL, NULL, IP_RAWOUTPUT,
962 		    &sc->sc_imo, NULL, 0);
963 		break;
964 #ifdef INET6
965 	case AF_INET6:
966 		error = ip6_output(m, 0, NULL, IPV6_MINMTU, 0, NULL);
967 		break;
968 #endif /* INET6 */
969 	default:
970 		m_freem(m);
971 		error = EAFNOSUPPORT;
972 	}
973 
974 	if (error)
975 		ifp->if_oerrors++;
976 
977 	return (error);
978 }
979 
980 void
981 vxlan_addr_change(void *arg)
982 {
983 	struct vxlan_softc	*sc = arg;
984 	struct ifnet		*ifp = &sc->sc_ac.ac_if;
985 	int			 error;
986 
987 	/*
988 	 * Reset the configuration after resume or any possible address
989 	 * configuration changes.
990 	 */
991 	if ((error = vxlan_config(ifp, NULL, NULL))) {
992 		/*
993 		 * The source address of the tunnel can temporarily disappear,
994 		 * after a link state change when running the DHCP client,
995 		 * so keep it configured.
996 		 */
997 	}
998 }
999 
1000 void
1001 vxlan_if_change(void *arg)
1002 {
1003 	struct vxlan_softc	*sc = arg;
1004 	struct ifnet		*ifp = &sc->sc_ac.ac_if;
1005 
1006 	/*
1007 	 * Reset the configuration after the parent interface disappeared.
1008 	 */
1009 	vxlan_multicast_cleanup(ifp);
1010 	memset(&sc->sc_src, 0, sizeof(sc->sc_src));
1011 	memset(&sc->sc_dst, 0, sizeof(sc->sc_dst));
1012 	sc->sc_dstport = htons(VXLAN_PORT);
1013 }
1014 
1015 void
1016 vxlan_link_change(void *arg)
1017 {
1018 	struct vxlan_softc	*sc = arg;
1019 	struct ifnet		*ifp = &sc->sc_ac.ac_if;
1020 
1021 	/*
1022 	 * The machine might have lost its multicast associations after
1023 	 * link state changes.  This fixes a problem with VMware after
1024 	 * suspend/resume of the host or guest.
1025 	 */
1026 	(void)vxlan_config(ifp, NULL, NULL);
1027 }
1028