xref: /openbsd-src/sys/net/if_gre.c (revision 8550894424f8a4aa4aafb6cd57229dd6ed7cd9dd)
1 /*	$OpenBSD: if_gre.c,v 1.172 2022/06/26 15:50:21 mvs Exp $ */
2 /*	$NetBSD: if_gre.c,v 1.9 1999/10/25 19:18:11 drochner Exp $ */
3 
4 /*
5  * Copyright (c) 1998 The NetBSD Foundation, Inc.
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to The NetBSD Foundation
9  * by Heiko W.Rupp <hwr@pilhuhn.de>
10  *
11  * IPv6-over-GRE contributed by Gert Doering <gert@greenie.muc.de>
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
23  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
24  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
26  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32  * POSSIBILITY OF SUCH DAMAGE.
33  */
34 
35 /*
36  * Encapsulate L3 protocols into IP, per RFC 1701 and 1702.
37  * See gre(4) for more details.
38  * Also supported: IP in IP encapsulation (proto 55) per RFC 2004.
39  */
40 
41 #include "bpfilter.h"
42 #include "pf.h"
43 
44 #include <sys/param.h>
45 #include <sys/mbuf.h>
46 #include <sys/socket.h>
47 #include <sys/sockio.h>
48 #include <sys/kernel.h>
49 #include <sys/systm.h>
50 #include <sys/errno.h>
51 #include <sys/timeout.h>
52 #include <sys/queue.h>
53 #include <sys/tree.h>
54 #include <sys/pool.h>
55 #include <sys/rwlock.h>
56 
57 #include <crypto/siphash.h>
58 
59 #include <net/if.h>
60 #include <net/if_var.h>
61 #include <net/if_types.h>
62 #include <net/if_media.h>
63 #include <net/route.h>
64 
65 #include <netinet/in.h>
66 #include <netinet/in_var.h>
67 #include <netinet/if_ether.h>
68 #include <netinet/ip.h>
69 #include <netinet/ip_var.h>
70 #include <netinet/ip_ecn.h>
71 
72 #ifdef INET6
73 #include <netinet/ip6.h>
74 #include <netinet6/ip6_var.h>
75 #include <netinet6/in6_var.h>
76 #endif
77 
78 #ifdef PIPEX
79 #include <net/pipex.h>
80 #endif
81 
82 #ifdef MPLS
83 #include <netmpls/mpls.h>
84 #endif /* MPLS */
85 
86 #if NBPFILTER > 0
87 #include <net/bpf.h>
88 #endif
89 
90 #if NPF > 0
91 #include <net/pfvar.h>
92 #endif
93 
94 #include <net/if_gre.h>
95 
96 #include <netinet/ip_gre.h>
97 #include <sys/sysctl.h>
98 
99 /* for nvgre bridge shizz */
100 #include <sys/socket.h>
101 #include <net/if_bridge.h>
102 #include <net/if_etherbridge.h>
103 
104 /*
105  * packet formats
106  */
107 struct gre_header {
108 	uint16_t		gre_flags;
109 #define GRE_CP				0x8000  /* Checksum Present */
110 #define GRE_KP				0x2000  /* Key Present */
111 #define GRE_SP				0x1000  /* Sequence Present */
112 
113 #define GRE_VERS_MASK			0x0007
114 #define GRE_VERS_0			0x0000
115 #define GRE_VERS_1			0x0001
116 
117 	uint16_t		gre_proto;
118 } __packed __aligned(4);
119 
120 struct gre_h_cksum {
121 	uint16_t		gre_cksum;
122 	uint16_t		gre_reserved1;
123 } __packed __aligned(4);
124 
125 struct gre_h_key {
126 	uint32_t		gre_key;
127 } __packed __aligned(4);
128 
129 #define GRE_EOIP		0x6400
130 
131 struct gre_h_key_eoip {
132 	uint16_t		eoip_len;	/* network order */
133 	uint16_t		eoip_tunnel_id;	/* little endian */
134 } __packed __aligned(4);
135 
136 #define NVGRE_VSID_RES_MIN	0x000000 /* reserved for future use */
137 #define NVGRE_VSID_RES_MAX	0x000fff
138 #define NVGRE_VSID_NVE2NVE	0xffffff /* vendor specific NVE-to-NVE comms */
139 
140 struct gre_h_seq {
141 	uint32_t		gre_seq;
142 } __packed __aligned(4);
143 
144 struct gre_h_wccp {
145 	uint8_t			wccp_flags;
146 	uint8_t			service_id;
147 	uint8_t			alt_bucket;
148 	uint8_t			pri_bucket;
149 } __packed __aligned(4);
150 
151 #define GRE_WCCP 0x883e
152 
153 #define GRE_HDRLEN (sizeof(struct ip) + sizeof(struct gre_header))
154 
155 /*
156  * GRE tunnel metadata
157  */
158 
159 #define GRE_KA_NONE		0
160 #define GRE_KA_DOWN		1
161 #define GRE_KA_HOLD		2
162 #define GRE_KA_UP		3
163 
164 union gre_addr {
165 	struct in_addr		in4;
166 	struct in6_addr		in6;
167 };
168 
169 static inline int
170 		gre_ip_cmp(int, const union gre_addr *,
171 		    const union gre_addr *);
172 
173 #define GRE_KEY_MIN		0x00000000U
174 #define GRE_KEY_MAX		0xffffffffU
175 #define GRE_KEY_SHIFT		0
176 
177 #define GRE_KEY_ENTROPY_MIN	0x00000000U
178 #define GRE_KEY_ENTROPY_MAX	0x00ffffffU
179 #define GRE_KEY_ENTROPY_SHIFT	8
180 
181 struct gre_tunnel {
182 	uint32_t		t_key_mask;
183 #define GRE_KEY_NONE			htonl(0x00000000U)
184 #define GRE_KEY_ENTROPY			htonl(0xffffff00U)
185 #define GRE_KEY_MASK			htonl(0xffffffffU)
186 	uint32_t		t_key;
187 
188 	u_int			t_rtableid;
189 	union gre_addr		t_src;
190 #define t_src4	t_src.in4
191 #define t_src6	t_src.in6
192 	union gre_addr		t_dst;
193 #define t_dst4	t_dst.in4
194 #define t_dst6	t_dst.in6
195 	int			t_ttl;
196 	int			t_txhprio;
197 	int			t_rxhprio;
198 	int			t_ecn;
199 	uint16_t		t_df;
200 	sa_family_t		t_af;
201 };
202 
203 static int
204 		gre_cmp_src(const struct gre_tunnel *,
205 		    const struct gre_tunnel *);
206 static int
207 		gre_cmp(const struct gre_tunnel *, const struct gre_tunnel *);
208 
209 static int	gre_set_tunnel(struct gre_tunnel *, struct if_laddrreq *, int);
210 static int	gre_get_tunnel(struct gre_tunnel *, struct if_laddrreq *);
211 static int	gre_del_tunnel(struct gre_tunnel *);
212 
213 static int	gre_set_vnetid(struct gre_tunnel *, struct ifreq *);
214 static int	gre_get_vnetid(struct gre_tunnel *, struct ifreq *);
215 static int	gre_del_vnetid(struct gre_tunnel *);
216 
217 static int	gre_set_vnetflowid(struct gre_tunnel *, struct ifreq *);
218 static int	gre_get_vnetflowid(struct gre_tunnel *, struct ifreq *);
219 
220 static struct mbuf *
221 		gre_encap_dst(const struct gre_tunnel *, const union gre_addr *,
222 		    struct mbuf *, uint16_t, uint8_t, uint8_t);
223 #define gre_encap(_t, _m, _p, _ttl, _tos) \
224 		gre_encap_dst((_t), &(_t)->t_dst, (_m), (_p), (_ttl), (_tos))
225 
226 static struct mbuf *
227 		gre_encap_dst_ip(const struct gre_tunnel *,
228 		    const union gre_addr *, struct mbuf *, uint8_t, uint8_t);
229 #define gre_encap_ip(_t, _m, _ttl, _tos) \
230 		gre_encap_dst_ip((_t), &(_t)->t_dst, (_m), (_ttl), (_tos))
231 
232 static int
233 		gre_ip_output(const struct gre_tunnel *, struct mbuf *);
234 
235 static int	gre_tunnel_ioctl(struct ifnet *, struct gre_tunnel *,
236 		    u_long, void *);
237 
238 static uint8_t	gre_l2_tos(const struct gre_tunnel *, const struct mbuf *);
239 static uint8_t	gre_l3_tos(const struct gre_tunnel *,
240 		    const struct mbuf *, uint8_t);
241 
242 /*
243  * layer 3 GRE tunnels
244  */
245 
246 struct gre_softc {
247 	struct gre_tunnel	sc_tunnel; /* must be first */
248 	TAILQ_ENTRY(gre_softc)	sc_entry;
249 
250 	struct ifnet		sc_if;
251 
252 	struct timeout		sc_ka_send;
253 	struct timeout		sc_ka_hold;
254 
255 	unsigned int		sc_ka_state;
256 	unsigned int		sc_ka_timeo;
257 	unsigned int		sc_ka_count;
258 
259 	unsigned int		sc_ka_holdmax;
260 	unsigned int		sc_ka_holdcnt;
261 
262 	SIPHASH_KEY		sc_ka_key;
263 	uint32_t		sc_ka_bias;
264 	int			sc_ka_recvtm;
265 };
266 
267 TAILQ_HEAD(gre_list, gre_softc);
268 
269 struct gre_keepalive {
270 	uint32_t		gk_uptime;
271 	uint32_t		gk_random;
272 	uint8_t			gk_digest[SIPHASH_DIGEST_LENGTH];
273 } __packed __aligned(4);
274 
275 static int	gre_clone_create(struct if_clone *, int);
276 static int	gre_clone_destroy(struct ifnet *);
277 
278 struct if_clone gre_cloner =
279     IF_CLONE_INITIALIZER("gre", gre_clone_create, gre_clone_destroy);
280 
281 /* protected by NET_LOCK */
282 struct gre_list gre_list = TAILQ_HEAD_INITIALIZER(gre_list);
283 
284 static int	gre_output(struct ifnet *, struct mbuf *, struct sockaddr *,
285 		    struct rtentry *);
286 static void	gre_start(struct ifnet *);
287 static int	gre_ioctl(struct ifnet *, u_long, caddr_t);
288 
289 static int	gre_up(struct gre_softc *);
290 static int	gre_down(struct gre_softc *);
291 static void	gre_link_state(struct ifnet *, unsigned int);
292 
293 static int	gre_input_key(struct mbuf **, int *, int, int, uint8_t,
294 		    struct gre_tunnel *);
295 
296 static struct mbuf *
297 		gre_ipv4_patch(const struct gre_tunnel *, struct mbuf *,
298 		    uint8_t *, uint8_t);
299 #ifdef INET6
300 static struct mbuf *
301 		gre_ipv6_patch(const struct gre_tunnel *, struct mbuf *,
302 		    uint8_t *, uint8_t);
303 #endif
304 #ifdef MPLS
305 static struct mbuf *
306 		gre_mpls_patch(const struct gre_tunnel *, struct mbuf *,
307 		    uint8_t *, uint8_t);
308 #endif
309 static void	gre_keepalive_send(void *);
310 static void	gre_keepalive_recv(struct ifnet *ifp, struct mbuf *);
311 static void	gre_keepalive_hold(void *);
312 
313 static struct mbuf *
314 		gre_l3_encap_dst(const struct gre_tunnel *, const void *,
315 		    struct mbuf *m, sa_family_t);
316 
317 #define gre_l3_encap(_t, _m, _af) \
318 		gre_l3_encap_dst((_t), &(_t)->t_dst, (_m), (_af))
319 
320 struct mgre_softc {
321 	struct gre_tunnel	sc_tunnel; /* must be first */
322 	RBT_ENTRY(mgre_softc)	sc_entry;
323 
324 	struct ifnet		sc_if;
325 };
326 
327 RBT_HEAD(mgre_tree, mgre_softc);
328 
329 static inline int
330 		mgre_cmp(const struct mgre_softc *, const struct mgre_softc *);
331 
332 RBT_PROTOTYPE(mgre_tree, mgre_softc, sc_entry, mgre_cmp);
333 
334 static int	mgre_clone_create(struct if_clone *, int);
335 static int	mgre_clone_destroy(struct ifnet *);
336 
337 struct if_clone mgre_cloner =
338     IF_CLONE_INITIALIZER("mgre", mgre_clone_create, mgre_clone_destroy);
339 
340 static void	mgre_rtrequest(struct ifnet *, int, struct rtentry *);
341 static int	mgre_output(struct ifnet *, struct mbuf *, struct sockaddr *,
342 		    struct rtentry *);
343 static void	mgre_start(struct ifnet *);
344 static int	mgre_ioctl(struct ifnet *, u_long, caddr_t);
345 
346 static int	mgre_set_tunnel(struct mgre_softc *, struct if_laddrreq *);
347 static int	mgre_get_tunnel(struct mgre_softc *, struct if_laddrreq *);
348 static int	mgre_up(struct mgre_softc *);
349 static int	mgre_down(struct mgre_softc *);
350 
351 /* protected by NET_LOCK */
352 struct mgre_tree mgre_tree = RBT_INITIALIZER();
353 
354 /*
355  * Ethernet GRE tunnels
356  */
357 
358 static struct mbuf *
359 		gre_ether_align(struct mbuf *, int);
360 
361 struct egre_softc {
362 	struct gre_tunnel	sc_tunnel; /* must be first */
363 	RBT_ENTRY(egre_softc)	sc_entry;
364 
365 	struct arpcom		sc_ac;
366 	struct ifmedia		sc_media;
367 };
368 
369 RBT_HEAD(egre_tree, egre_softc);
370 
371 static inline int
372 		egre_cmp(const struct egre_softc *, const struct egre_softc *);
373 
374 RBT_PROTOTYPE(egre_tree, egre_softc, sc_entry, egre_cmp);
375 
376 static int	egre_clone_create(struct if_clone *, int);
377 static int	egre_clone_destroy(struct ifnet *);
378 
379 static void	egre_start(struct ifnet *);
380 static int	egre_ioctl(struct ifnet *, u_long, caddr_t);
381 static int	egre_media_change(struct ifnet *);
382 static void	egre_media_status(struct ifnet *, struct ifmediareq *);
383 
384 static int	egre_up(struct egre_softc *);
385 static int	egre_down(struct egre_softc *);
386 
387 static int	egre_input(const struct gre_tunnel *, struct mbuf *, int,
388 		    uint8_t);
389 struct if_clone egre_cloner =
390     IF_CLONE_INITIALIZER("egre", egre_clone_create, egre_clone_destroy);
391 
392 /* protected by NET_LOCK */
393 struct egre_tree egre_tree = RBT_INITIALIZER();
394 
395 /*
396  * Network Virtualisation Using Generic Routing Encapsulation (NVGRE)
397  */
398 
399 struct nvgre_softc {
400 	struct gre_tunnel	 sc_tunnel; /* must be first */
401 	unsigned int		 sc_ifp0;
402 	RBT_ENTRY(nvgre_softc)	 sc_uentry;
403 	RBT_ENTRY(nvgre_softc)	 sc_mentry;
404 
405 	struct arpcom		 sc_ac;
406 	struct ifmedia		 sc_media;
407 
408 	struct mbuf_queue	 sc_send_list;
409 	struct task		 sc_send_task;
410 
411 	void			*sc_inm;
412 	struct task		 sc_ltask;
413 	struct task		 sc_dtask;
414 
415 	struct etherbridge	 sc_eb;
416 };
417 
418 RBT_HEAD(nvgre_ucast_tree, nvgre_softc);
419 RBT_HEAD(nvgre_mcast_tree, nvgre_softc);
420 
421 static inline int
422 		nvgre_cmp_ucast(const struct nvgre_softc *,
423 		    const struct nvgre_softc *);
424 static int
425 		nvgre_cmp_mcast(const struct gre_tunnel *,
426 		    const union gre_addr *, unsigned int,
427 		    const struct gre_tunnel *, const union gre_addr *,
428 		    unsigned int);
429 static inline int
430 		nvgre_cmp_mcast_sc(const struct nvgre_softc *,
431 		    const struct nvgre_softc *);
432 
433 RBT_PROTOTYPE(nvgre_ucast_tree, nvgre_softc, sc_uentry, nvgre_cmp_ucast);
434 RBT_PROTOTYPE(nvgre_mcast_tree, nvgre_softc, sc_mentry, nvgre_cmp_mcast_sc);
435 
436 static int	nvgre_clone_create(struct if_clone *, int);
437 static int	nvgre_clone_destroy(struct ifnet *);
438 
439 static void	nvgre_start(struct ifnet *);
440 static int	nvgre_ioctl(struct ifnet *, u_long, caddr_t);
441 
442 static int	nvgre_up(struct nvgre_softc *);
443 static int	nvgre_down(struct nvgre_softc *);
444 static int	nvgre_set_parent(struct nvgre_softc *, const char *);
445 static void	nvgre_link_change(void *);
446 static void	nvgre_detach(void *);
447 
448 static int	nvgre_input(const struct gre_tunnel *, struct mbuf *, int,
449 		    uint8_t);
450 static void	nvgre_send(void *);
451 
452 static int	nvgre_add_addr(struct nvgre_softc *, const struct ifbareq *);
453 static int	nvgre_del_addr(struct nvgre_softc *, const struct ifbareq *);
454 
455 static int	 nvgre_eb_port_eq(void *, void *, void *);
456 static void	*nvgre_eb_port_take(void *, void *);
457 static void	 nvgre_eb_port_rele(void *, void *);
458 static size_t	 nvgre_eb_port_ifname(void *, char *, size_t, void *);
459 static void	 nvgre_eb_port_sa(void *, struct sockaddr_storage *, void *);
460 
461 static const struct etherbridge_ops nvgre_etherbridge_ops = {
462 	nvgre_eb_port_eq,
463 	nvgre_eb_port_take,
464 	nvgre_eb_port_rele,
465 	nvgre_eb_port_ifname,
466 	nvgre_eb_port_sa,
467 };
468 
469 struct if_clone nvgre_cloner =
470     IF_CLONE_INITIALIZER("nvgre", nvgre_clone_create, nvgre_clone_destroy);
471 
472 struct pool nvgre_endpoint_pool;
473 
474 /* protected by NET_LOCK */
475 struct nvgre_ucast_tree nvgre_ucast_tree = RBT_INITIALIZER();
476 struct nvgre_mcast_tree nvgre_mcast_tree = RBT_INITIALIZER();
477 
478 /*
479  * MikroTik Ethernet over IP protocol (eoip)
480  */
481 
482 struct eoip_softc {
483 	struct gre_tunnel	sc_tunnel; /* must be first */
484 	uint16_t		sc_tunnel_id;
485 	RBT_ENTRY(eoip_softc)	sc_entry;
486 
487 	struct arpcom		sc_ac;
488 	struct ifmedia		sc_media;
489 
490 	struct timeout		sc_ka_send;
491 	struct timeout		sc_ka_hold;
492 
493 	unsigned int		sc_ka_state;
494 	unsigned int		sc_ka_timeo;
495 	unsigned int		sc_ka_count;
496 
497 	unsigned int		sc_ka_holdmax;
498 	unsigned int		sc_ka_holdcnt;
499 };
500 
501 RBT_HEAD(eoip_tree, eoip_softc);
502 
503 static inline int
504 		eoip_cmp(const struct eoip_softc *, const struct eoip_softc *);
505 
506 RBT_PROTOTYPE(eoip_tree, eoip_softc, sc_entry, eoip_cmp);
507 
508 static int	eoip_clone_create(struct if_clone *, int);
509 static int	eoip_clone_destroy(struct ifnet *);
510 
511 static void	eoip_start(struct ifnet *);
512 static int	eoip_ioctl(struct ifnet *, u_long, caddr_t);
513 
514 static void	eoip_keepalive_send(void *);
515 static void	eoip_keepalive_recv(struct eoip_softc *);
516 static void	eoip_keepalive_hold(void *);
517 
518 static int	eoip_up(struct eoip_softc *);
519 static int	eoip_down(struct eoip_softc *);
520 
521 static struct mbuf *
522 		eoip_encap(struct eoip_softc *, struct mbuf *, uint8_t);
523 
524 static struct mbuf *
525 		eoip_input(struct gre_tunnel *, struct mbuf *,
526 		    const struct gre_header *, uint8_t, int);
527 struct if_clone eoip_cloner =
528     IF_CLONE_INITIALIZER("eoip", eoip_clone_create, eoip_clone_destroy);
529 
530 /* protected by NET_LOCK */
531 struct eoip_tree eoip_tree = RBT_INITIALIZER();
532 
533 /*
534  * It is not easy to calculate the right value for a GRE MTU.
535  * We leave this task to the admin and use the same default that
536  * other vendors use.
537  */
538 #define GREMTU 1476
539 
540 /*
541  * We can control the acceptance of GRE and MobileIP packets by
542  * altering the sysctl net.inet.gre.allow values
543  * respectively. Zero means drop them, all else is acceptance.  We can also
544  * control acceptance of WCCPv1-style GRE packets through the
545  * net.inet.gre.wccp value, but be aware it depends upon normal GRE being
546  * allowed as well.
547  *
548  */
549 int gre_allow = 0;
550 int gre_wccp = 0;
551 
552 void
553 greattach(int n)
554 {
555 	if_clone_attach(&gre_cloner);
556 	if_clone_attach(&mgre_cloner);
557 	if_clone_attach(&egre_cloner);
558 	if_clone_attach(&nvgre_cloner);
559 	if_clone_attach(&eoip_cloner);
560 }
561 
562 static int
563 gre_clone_create(struct if_clone *ifc, int unit)
564 {
565 	struct gre_softc *sc;
566 	struct ifnet *ifp;
567 
568 	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
569 	snprintf(sc->sc_if.if_xname, sizeof sc->sc_if.if_xname, "%s%d",
570 	    ifc->ifc_name, unit);
571 
572 	ifp = &sc->sc_if;
573 	ifp->if_softc = sc;
574 	ifp->if_type = IFT_TUNNEL;
575 	ifp->if_hdrlen = GRE_HDRLEN;
576 	ifp->if_mtu = GREMTU;
577 	ifp->if_flags = IFF_POINTOPOINT|IFF_MULTICAST;
578 	ifp->if_xflags = IFXF_CLONED;
579 	ifp->if_bpf_mtap = p2p_bpf_mtap;
580 	ifp->if_input = p2p_input;
581 	ifp->if_output = gre_output;
582 	ifp->if_start = gre_start;
583 	ifp->if_ioctl = gre_ioctl;
584 	ifp->if_rtrequest = p2p_rtrequest;
585 
586 	sc->sc_tunnel.t_ttl = ip_defttl;
587 	sc->sc_tunnel.t_txhprio = IF_HDRPRIO_PAYLOAD;
588 	sc->sc_tunnel.t_rxhprio = IF_HDRPRIO_PACKET;
589 	sc->sc_tunnel.t_df = htons(0);
590 	sc->sc_tunnel.t_ecn = ECN_ALLOWED;
591 
592 	timeout_set(&sc->sc_ka_send, gre_keepalive_send, sc);
593 	timeout_set_proc(&sc->sc_ka_hold, gre_keepalive_hold, sc);
594 	sc->sc_ka_state = GRE_KA_NONE;
595 
596 	if_counters_alloc(ifp);
597 	if_attach(ifp);
598 	if_alloc_sadl(ifp);
599 
600 #if NBPFILTER > 0
601 	bpfattach(&ifp->if_bpf, ifp, DLT_LOOP, sizeof(uint32_t));
602 #endif
603 
604 	ifp->if_llprio = IFQ_TOS2PRIO(IPTOS_PREC_INTERNETCONTROL);
605 
606 	NET_LOCK();
607 	TAILQ_INSERT_TAIL(&gre_list, sc, sc_entry);
608 	NET_UNLOCK();
609 
610 	return (0);
611 }
612 
613 static int
614 gre_clone_destroy(struct ifnet *ifp)
615 {
616 	struct gre_softc *sc = ifp->if_softc;
617 
618 	NET_LOCK();
619 	if (ISSET(ifp->if_flags, IFF_RUNNING))
620 		gre_down(sc);
621 
622 	TAILQ_REMOVE(&gre_list, sc, sc_entry);
623 	NET_UNLOCK();
624 
625 	if_detach(ifp);
626 
627 	free(sc, M_DEVBUF, sizeof(*sc));
628 
629 	return (0);
630 }
631 
632 static int
633 mgre_clone_create(struct if_clone *ifc, int unit)
634 {
635 	struct mgre_softc *sc;
636 	struct ifnet *ifp;
637 
638 	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
639 	ifp = &sc->sc_if;
640 
641 	snprintf(ifp->if_xname, sizeof(ifp->if_xname),
642 	    "%s%d", ifc->ifc_name, unit);
643 
644 	ifp->if_softc = sc;
645 	ifp->if_type = IFT_L3IPVLAN;
646 	ifp->if_hdrlen = GRE_HDRLEN;
647 	ifp->if_mtu = GREMTU;
648 	ifp->if_flags = IFF_MULTICAST|IFF_SIMPLEX;
649 	ifp->if_xflags = IFXF_CLONED;
650 	ifp->if_bpf_mtap = p2p_bpf_mtap;
651 	ifp->if_input = p2p_input;
652 	ifp->if_rtrequest = mgre_rtrequest;
653 	ifp->if_output = mgre_output;
654 	ifp->if_start = mgre_start;
655 	ifp->if_ioctl = mgre_ioctl;
656 
657 	sc->sc_tunnel.t_ttl = ip_defttl;
658 	sc->sc_tunnel.t_txhprio = IF_HDRPRIO_PAYLOAD;
659 	sc->sc_tunnel.t_rxhprio = IF_HDRPRIO_PACKET;
660 	sc->sc_tunnel.t_df = htons(0);
661 	sc->sc_tunnel.t_ecn = ECN_ALLOWED;
662 
663 	if_counters_alloc(ifp);
664 	if_attach(ifp);
665 	if_alloc_sadl(ifp);
666 
667 #if NBPFILTER > 0
668 	bpfattach(&ifp->if_bpf, ifp, DLT_LOOP, sizeof(uint32_t));
669 #endif
670 
671 	return (0);
672 }
673 
674 static int
675 mgre_clone_destroy(struct ifnet *ifp)
676 {
677 	struct mgre_softc *sc = ifp->if_softc;
678 
679 	NET_LOCK();
680 	if (ISSET(ifp->if_flags, IFF_RUNNING))
681 		mgre_down(sc);
682 	NET_UNLOCK();
683 
684 	if_detach(ifp);
685 
686 	free(sc, M_DEVBUF, sizeof(*sc));
687 
688 	return (0);
689 }
690 
691 static int
692 egre_clone_create(struct if_clone *ifc, int unit)
693 {
694 	struct egre_softc *sc;
695 	struct ifnet *ifp;
696 
697 	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
698 	ifp = &sc->sc_ac.ac_if;
699 
700 	snprintf(ifp->if_xname, sizeof(ifp->if_xname), "%s%d",
701 	    ifc->ifc_name, unit);
702 
703 	ifp->if_softc = sc;
704 	ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN;
705 	ifp->if_ioctl = egre_ioctl;
706 	ifp->if_start = egre_start;
707 	ifp->if_xflags = IFXF_CLONED;
708 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
709 	ether_fakeaddr(ifp);
710 
711 	sc->sc_tunnel.t_ttl = ip_defttl;
712 	sc->sc_tunnel.t_txhprio = 0;
713 	sc->sc_tunnel.t_rxhprio = IF_HDRPRIO_PACKET;
714 	sc->sc_tunnel.t_df = htons(0);
715 
716 	ifmedia_init(&sc->sc_media, 0, egre_media_change, egre_media_status);
717 	ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
718 	ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO);
719 
720 	if_counters_alloc(ifp);
721 	if_attach(ifp);
722 	ether_ifattach(ifp);
723 
724 	return (0);
725 }
726 
727 static int
728 egre_clone_destroy(struct ifnet *ifp)
729 {
730 	struct egre_softc *sc = ifp->if_softc;
731 
732 	NET_LOCK();
733 	if (ISSET(ifp->if_flags, IFF_RUNNING))
734 		egre_down(sc);
735 	NET_UNLOCK();
736 
737 	ifmedia_delete_instance(&sc->sc_media, IFM_INST_ANY);
738 	ether_ifdetach(ifp);
739 	if_detach(ifp);
740 
741 	free(sc, M_DEVBUF, sizeof(*sc));
742 
743 	return (0);
744 }
745 
746 static int
747 nvgre_clone_create(struct if_clone *ifc, int unit)
748 {
749 	struct nvgre_softc *sc;
750 	struct ifnet *ifp;
751 	struct gre_tunnel *tunnel;
752 	int error;
753 
754 	if (nvgre_endpoint_pool.pr_size == 0) {
755 		pool_init(&nvgre_endpoint_pool, sizeof(union gre_addr),
756 		    0, IPL_SOFTNET, 0, "nvgreep", NULL);
757 	}
758 
759 	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
760 	ifp = &sc->sc_ac.ac_if;
761 
762 	snprintf(ifp->if_xname, sizeof(ifp->if_xname), "%s%d",
763 	    ifc->ifc_name, unit);
764 
765 	error = etherbridge_init(&sc->sc_eb, ifp->if_xname,
766 	    &nvgre_etherbridge_ops, sc);
767 	if (error != 0) {
768 		free(sc, M_DEVBUF, sizeof(*sc));
769 		return (error);
770 	}
771 
772 	ifp->if_softc = sc;
773 	ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN;
774 	ifp->if_ioctl = nvgre_ioctl;
775 	ifp->if_start = nvgre_start;
776 	ifp->if_xflags = IFXF_CLONED;
777 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
778 	ether_fakeaddr(ifp);
779 
780 	tunnel = &sc->sc_tunnel;
781 	tunnel->t_ttl = IP_DEFAULT_MULTICAST_TTL;
782 	tunnel->t_txhprio = 0;
783 	sc->sc_tunnel.t_rxhprio = IF_HDRPRIO_PACKET;
784 	tunnel->t_df = htons(IP_DF);
785 	tunnel->t_key_mask = GRE_KEY_ENTROPY;
786 	tunnel->t_key = htonl((NVGRE_VSID_RES_MAX + 1) <<
787 	    GRE_KEY_ENTROPY_SHIFT);
788 
789 	mq_init(&sc->sc_send_list, IFQ_MAXLEN * 2, IPL_SOFTNET);
790 	task_set(&sc->sc_send_task, nvgre_send, sc);
791 	task_set(&sc->sc_ltask, nvgre_link_change, sc);
792 	task_set(&sc->sc_dtask, nvgre_detach, sc);
793 
794 	ifmedia_init(&sc->sc_media, 0, egre_media_change, egre_media_status);
795 	ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
796 	ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO);
797 
798 	if_counters_alloc(ifp);
799 	if_attach(ifp);
800 	ether_ifattach(ifp);
801 
802 	return (0);
803 }
804 
805 static int
806 nvgre_clone_destroy(struct ifnet *ifp)
807 {
808 	struct nvgre_softc *sc = ifp->if_softc;
809 
810 	NET_LOCK();
811 	if (ISSET(ifp->if_flags, IFF_RUNNING))
812 		nvgre_down(sc);
813 	NET_UNLOCK();
814 
815 	etherbridge_destroy(&sc->sc_eb);
816 
817 	ifmedia_delete_instance(&sc->sc_media, IFM_INST_ANY);
818 	ether_ifdetach(ifp);
819 	if_detach(ifp);
820 
821 	free(sc, M_DEVBUF, sizeof(*sc));
822 
823 	return (0);
824 }
825 
826 static int
827 eoip_clone_create(struct if_clone *ifc, int unit)
828 {
829 	struct eoip_softc *sc;
830 	struct ifnet *ifp;
831 
832 	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
833 	ifp = &sc->sc_ac.ac_if;
834 
835 	snprintf(ifp->if_xname, sizeof(ifp->if_xname), "%s%d",
836 	    ifc->ifc_name, unit);
837 
838 	ifp->if_softc = sc;
839 	ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN;
840 	ifp->if_ioctl = eoip_ioctl;
841 	ifp->if_start = eoip_start;
842 	ifp->if_xflags = IFXF_CLONED;
843 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
844 	ether_fakeaddr(ifp);
845 
846 	sc->sc_tunnel.t_ttl = ip_defttl;
847 	sc->sc_tunnel.t_txhprio = 0;
848 	sc->sc_tunnel.t_rxhprio = IF_HDRPRIO_PACKET;
849 	sc->sc_tunnel.t_df = htons(0);
850 
851 	sc->sc_ka_timeo = 10;
852 	sc->sc_ka_count = 10;
853 
854 	timeout_set(&sc->sc_ka_send, eoip_keepalive_send, sc);
855 	timeout_set_proc(&sc->sc_ka_hold, eoip_keepalive_hold, sc);
856 	sc->sc_ka_state = GRE_KA_DOWN;
857 
858 	ifmedia_init(&sc->sc_media, 0, egre_media_change, egre_media_status);
859 	ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
860 	ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO);
861 
862 	if_counters_alloc(ifp);
863 	if_attach(ifp);
864 	ether_ifattach(ifp);
865 
866 	return (0);
867 }
868 
869 static int
870 eoip_clone_destroy(struct ifnet *ifp)
871 {
872 	struct eoip_softc *sc = ifp->if_softc;
873 
874 	NET_LOCK();
875 	if (ISSET(ifp->if_flags, IFF_RUNNING))
876 		eoip_down(sc);
877 	NET_UNLOCK();
878 
879 	ifmedia_delete_instance(&sc->sc_media, IFM_INST_ANY);
880 	ether_ifdetach(ifp);
881 	if_detach(ifp);
882 
883 	free(sc, M_DEVBUF, sizeof(*sc));
884 
885 	return (0);
886 }
887 
888 int
889 gre_input(struct mbuf **mp, int *offp, int type, int af)
890 {
891 	struct mbuf *m = *mp;
892 	struct gre_tunnel key;
893 	struct ip *ip;
894 
895 	ip = mtod(m, struct ip *);
896 
897 	/* XXX check if ip_src is sane for nvgre? */
898 
899 	key.t_af = AF_INET;
900 	key.t_src4 = ip->ip_dst;
901 	key.t_dst4 = ip->ip_src;
902 
903 	if (gre_input_key(mp, offp, type, af, ip->ip_tos, &key) == -1)
904 		return (rip_input(mp, offp, type, af));
905 
906 	return (IPPROTO_DONE);
907 }
908 
909 #ifdef INET6
910 int
911 gre_input6(struct mbuf **mp, int *offp, int type, int af)
912 {
913 	struct mbuf *m = *mp;
914 	struct gre_tunnel key;
915 	struct ip6_hdr *ip6;
916 	uint32_t flow;
917 
918 	ip6 = mtod(m, struct ip6_hdr *);
919 
920 	/* XXX check if ip6_src is sane for nvgre? */
921 
922 	key.t_af = AF_INET6;
923 	key.t_src6 = ip6->ip6_dst;
924 	key.t_dst6 = ip6->ip6_src;
925 
926 	flow = bemtoh32(&ip6->ip6_flow);
927 
928 	if (gre_input_key(mp, offp, type, af, flow >> 20, &key) == -1)
929 		return (rip6_input(mp, offp, type, af));
930 
931 	return (IPPROTO_DONE);
932 }
933 #endif /* INET6 */
934 
935 static inline struct ifnet *
936 gre_find(const struct gre_tunnel *key)
937 {
938 	struct gre_softc *sc;
939 
940 	TAILQ_FOREACH(sc, &gre_list, sc_entry) {
941 		if (gre_cmp(key, &sc->sc_tunnel) != 0)
942 			continue;
943 
944 		if (!ISSET(sc->sc_if.if_flags, IFF_RUNNING))
945 			continue;
946 
947 		return (&sc->sc_if);
948 	}
949 
950 	return (NULL);
951 }
952 
953 static inline struct ifnet *
954 mgre_find(const struct gre_tunnel *key)
955 {
956 	struct mgre_softc *sc;
957 
958 	NET_ASSERT_LOCKED();
959 	sc = RBT_FIND(mgre_tree, &mgre_tree, (const struct mgre_softc *)key);
960 	if (sc != NULL)
961 		return (&sc->sc_if);
962 
963 	return (NULL);
964 }
965 
966 static struct mbuf *
967 gre_input_1(struct gre_tunnel *key, struct mbuf *m,
968     const struct gre_header *gh, uint8_t otos, int iphlen)
969 {
970 	switch (gh->gre_proto) {
971 	case htons(ETHERTYPE_PPP):
972 #ifdef PIPEX
973 		if (pipex_enable) {
974 			struct pipex_session *session;
975 
976 			session = pipex_pptp_lookup_session(m);
977 			if (session != NULL) {
978 				struct mbuf *m0;
979 
980 				m0 = pipex_pptp_input(m, session);
981 				pipex_rele_session(session);
982 
983 				if (m0 == NULL)
984 					return (NULL);
985 			}
986 		}
987 #endif
988 		break;
989 	case htons(GRE_EOIP):
990 		return (eoip_input(key, m, gh, otos, iphlen));
991 		break;
992 	}
993 
994 	return (m);
995 }
996 
997 static int
998 gre_input_key(struct mbuf **mp, int *offp, int type, int af, uint8_t otos,
999     struct gre_tunnel *key)
1000 {
1001 	struct mbuf *m = *mp;
1002 	int iphlen = *offp, hlen, rxprio;
1003 	struct ifnet *ifp;
1004 	const struct gre_tunnel *tunnel;
1005 	caddr_t buf;
1006 	struct gre_header *gh;
1007 	struct gre_h_key *gkh;
1008 	struct mbuf *(*patch)(const struct gre_tunnel *, struct mbuf *,
1009 	    uint8_t *, uint8_t);
1010 	int mcast = 0;
1011 	uint8_t itos;
1012 
1013 	if (!gre_allow)
1014 		goto decline;
1015 
1016 	key->t_rtableid = m->m_pkthdr.ph_rtableid;
1017 
1018 	hlen = iphlen + sizeof(*gh);
1019 	if (m->m_pkthdr.len < hlen)
1020 		goto decline;
1021 
1022 	m = m_pullup(m, hlen);
1023 	if (m == NULL)
1024 		return (IPPROTO_DONE);
1025 
1026 	buf = mtod(m, caddr_t);
1027 	gh = (struct gre_header *)(buf + iphlen);
1028 
1029 	/* check the version */
1030 	switch (gh->gre_flags & htons(GRE_VERS_MASK)) {
1031 	case htons(GRE_VERS_0):
1032 		break;
1033 
1034 	case htons(GRE_VERS_1):
1035 		m = gre_input_1(key, m, gh, otos, iphlen);
1036 		if (m == NULL)
1037 			return (IPPROTO_DONE);
1038 		/* FALLTHROUGH */
1039 	default:
1040 		goto decline;
1041 	}
1042 
1043 	/* the only optional bit in the header is K flag */
1044 	if ((gh->gre_flags & htons(~(GRE_KP|GRE_VERS_MASK))) != htons(0))
1045 		goto decline;
1046 
1047 	if (gh->gre_flags & htons(GRE_KP)) {
1048 		hlen += sizeof(*gkh);
1049 		if (m->m_pkthdr.len < hlen)
1050 			goto decline;
1051 
1052 		m = m_pullup(m, hlen);
1053 		if (m == NULL)
1054 			return (IPPROTO_DONE);
1055 
1056 		buf = mtod(m, caddr_t);
1057 		gh = (struct gre_header *)(buf + iphlen);
1058 		gkh = (struct gre_h_key *)(gh + 1);
1059 
1060 		key->t_key_mask = GRE_KEY_MASK;
1061 		key->t_key = gkh->gre_key;
1062 	} else
1063 		key->t_key_mask = GRE_KEY_NONE;
1064 
1065 	if (gh->gre_proto == htons(ETHERTYPE_TRANSETHER)) {
1066 		if (egre_input(key, m, hlen, otos) == -1 &&
1067 		    nvgre_input(key, m, hlen, otos) == -1)
1068 			goto decline;
1069 
1070 		return (IPPROTO_DONE);
1071 	}
1072 
1073 	ifp = gre_find(key);
1074 	if (ifp == NULL) {
1075 		ifp = mgre_find(key);
1076 		if (ifp == NULL)
1077 			goto decline;
1078 	}
1079 
1080 	switch (gh->gre_proto) {
1081 	case htons(GRE_WCCP): {
1082 		struct mbuf *n;
1083 		int off;
1084 
1085 		/* WCCP/GRE:
1086 		 *   So far as I can see (and test) it seems that Cisco's WCCP
1087 		 *   GRE tunnel is precisely a IP-in-GRE tunnel that differs
1088 		 *   only in its protocol number.  At least, it works for me.
1089 		 *
1090 		 *   The Internet Drafts can be found if you look for
1091 		 *   the following:
1092 		 *     draft-forster-wrec-wccp-v1-00.txt
1093 		 *     draft-wilson-wrec-wccp-v2-01.txt
1094 		 */
1095 
1096 		if (!gre_wccp && !ISSET(ifp->if_flags, IFF_LINK0))
1097 			goto decline;
1098 
1099 		/*
1100 		 * If the first nibble of the payload does not look like
1101 		 * IPv4, assume it is WCCP v2.
1102 		 */
1103 		n = m_getptr(m, hlen, &off);
1104 		if (n == NULL)
1105 			goto decline;
1106 		if (n->m_data[off] >> 4 != IPVERSION)
1107 			hlen += 4;  /* four-octet Redirect header */
1108 
1109 		/* FALLTHROUGH */
1110 	}
1111 	case htons(ETHERTYPE_IP):
1112 		m->m_pkthdr.ph_family = AF_INET;
1113 		patch = gre_ipv4_patch;
1114 		break;
1115 #ifdef INET6
1116 	case htons(ETHERTYPE_IPV6):
1117 		m->m_pkthdr.ph_family = AF_INET6;
1118 		patch = gre_ipv6_patch;
1119 		break;
1120 #endif
1121 #ifdef MPLS
1122 	case htons(ETHERTYPE_MPLS_MCAST):
1123 		mcast = M_MCAST|M_BCAST;
1124 		/* fallthrough */
1125 	case htons(ETHERTYPE_MPLS):
1126 		m->m_pkthdr.ph_family = AF_MPLS;
1127 		patch = gre_mpls_patch;
1128 		break;
1129 #endif
1130 	case htons(0):
1131 		if (ifp->if_type != IFT_TUNNEL) {
1132 			/* keepalives dont make sense for mgre */
1133 			goto decline;
1134 		}
1135 
1136 		m_adj(m, hlen);
1137 		gre_keepalive_recv(ifp, m);
1138 		return (IPPROTO_DONE);
1139 
1140 	default:
1141 		goto decline;
1142 	}
1143 
1144 	/* it's ours now */
1145 
1146 	m_adj(m, hlen);
1147 
1148 	tunnel = ifp->if_softc; /* gre and mgre tunnel info is at the front */
1149 
1150 	m = (*patch)(tunnel, m, &itos, otos);
1151 	if (m == NULL)
1152 		return (IPPROTO_DONE);
1153 
1154 	if (tunnel->t_key_mask == GRE_KEY_ENTROPY) {
1155 		SET(m->m_pkthdr.csum_flags, M_FLOWID);
1156 		m->m_pkthdr.ph_flowid =
1157 		    bemtoh32(&key->t_key) & ~GRE_KEY_ENTROPY;
1158 	}
1159 
1160 	rxprio = tunnel->t_rxhprio;
1161 	switch (rxprio) {
1162 	case IF_HDRPRIO_PACKET:
1163 		/* nop */
1164 		break;
1165 	case IF_HDRPRIO_OUTER:
1166 		m->m_pkthdr.pf.prio = IFQ_TOS2PRIO(otos);
1167 		break;
1168 	case IF_HDRPRIO_PAYLOAD:
1169 		m->m_pkthdr.pf.prio = IFQ_TOS2PRIO(itos);
1170 		break;
1171 	default:
1172 		m->m_pkthdr.pf.prio = rxprio;
1173 		break;
1174 	}
1175 
1176 	m->m_flags &= ~(M_MCAST|M_BCAST);
1177 	m->m_flags |= mcast;
1178 
1179 	if_vinput(ifp, m);
1180 	return (IPPROTO_DONE);
1181 decline:
1182 	*mp = m;
1183 	return (-1);
1184 }
1185 
1186 static struct mbuf *
1187 gre_ipv4_patch(const struct gre_tunnel *tunnel, struct mbuf *m,
1188     uint8_t *itosp, uint8_t otos)
1189 {
1190 	struct ip *ip;
1191 	uint8_t itos;
1192 
1193 	m = m_pullup(m, sizeof(*ip));
1194 	if (m == NULL)
1195 		return (NULL);
1196 
1197 	ip = mtod(m, struct ip *);
1198 
1199 	itos = ip->ip_tos;
1200 	if (ip_ecn_egress(tunnel->t_ecn, &otos, &itos) == 0) {
1201 		m_freem(m);
1202 		return (NULL);
1203 	}
1204 	if (itos != ip->ip_tos)
1205 		ip_tos_patch(ip, itos);
1206 
1207 	*itosp = itos;
1208 
1209 	return (m);
1210 }
1211 
1212 #ifdef INET6
1213 static struct mbuf *
1214 gre_ipv6_patch(const struct gre_tunnel *tunnel, struct mbuf *m,
1215     uint8_t *itosp, uint8_t otos)
1216 {
1217 	struct ip6_hdr *ip6;
1218 	uint32_t flow;
1219 	uint8_t itos;
1220 
1221 	m = m_pullup(m, sizeof(*ip6));
1222 	if (m == NULL)
1223 		return (NULL);
1224 
1225 	ip6 = mtod(m, struct ip6_hdr *);
1226 
1227 	flow = bemtoh32(&ip6->ip6_flow);
1228 	itos = flow >> 20;
1229 	if (ip_ecn_egress(tunnel->t_ecn, &otos, &itos) == 0) {
1230 		m_freem(m);
1231 		return (NULL);
1232 	}
1233 
1234 	CLR(flow, 0xff << 20);
1235 	SET(flow, itos << 20);
1236 	htobem32(&ip6->ip6_flow, flow);
1237 
1238 	*itosp = itos;
1239 
1240 	return (m);
1241 }
1242 #endif
1243 
1244 #ifdef MPLS
1245 static struct mbuf *
1246 gre_mpls_patch(const struct gre_tunnel *tunnel, struct mbuf *m,
1247     uint8_t *itosp, uint8_t otos)
1248 {
1249 	uint8_t itos;
1250 	uint32_t shim;
1251 
1252 	m = m_pullup(m, sizeof(shim));
1253 	if (m == NULL)
1254 		return (NULL);
1255 
1256 	shim = *mtod(m, uint32_t *);
1257 	itos = (ntohl(shim & MPLS_EXP_MASK) >> MPLS_EXP_OFFSET) << 5;
1258 
1259 	if (ip_ecn_egress(tunnel->t_ecn, &otos, &itos) == 0) {
1260 		m_freem(m);
1261 		return (NULL);
1262 	}
1263 
1264 	*itosp = itos;
1265 
1266 	return (m);
1267 }
1268 #endif
1269 
1270 #define gre_l2_prio(_t, _m, _otos) do {					\
1271 	int rxprio = (_t)->t_rxhprio;					\
1272 	switch (rxprio) {						\
1273 	case IF_HDRPRIO_PACKET:						\
1274 		/* nop */						\
1275 		break;							\
1276 	case IF_HDRPRIO_OUTER:						\
1277 		(_m)->m_pkthdr.pf.prio = IFQ_TOS2PRIO((_otos));		\
1278 		break;							\
1279 	default:							\
1280 		(_m)->m_pkthdr.pf.prio = rxprio;			\
1281 		break;							\
1282 	}								\
1283 } while (0)
1284 
1285 static int
1286 egre_input(const struct gre_tunnel *key, struct mbuf *m, int hlen, uint8_t otos)
1287 {
1288 	struct egre_softc *sc;
1289 
1290 	NET_ASSERT_LOCKED();
1291 	sc = RBT_FIND(egre_tree, &egre_tree, (const struct egre_softc *)key);
1292 	if (sc == NULL)
1293 		return (-1);
1294 
1295 	/* it's ours now */
1296 	m = gre_ether_align(m, hlen);
1297 	if (m == NULL)
1298 		return (0);
1299 
1300 	if (sc->sc_tunnel.t_key_mask == GRE_KEY_ENTROPY) {
1301 		SET(m->m_pkthdr.csum_flags, M_FLOWID);
1302 		m->m_pkthdr.ph_flowid =
1303 		    bemtoh32(&key->t_key) & ~GRE_KEY_ENTROPY;
1304 	}
1305 
1306 	m->m_flags &= ~(M_MCAST|M_BCAST);
1307 
1308 	gre_l2_prio(&sc->sc_tunnel, m, otos);
1309 
1310 	if_vinput(&sc->sc_ac.ac_if, m);
1311 
1312 	return (0);
1313 }
1314 
1315 static inline struct nvgre_softc *
1316 nvgre_mcast_find(const struct gre_tunnel *key, unsigned int if0idx)
1317 {
1318 	struct nvgre_softc *sc;
1319 	int rv;
1320 
1321 	/*
1322 	 * building an nvgre_softc to use with RBT_FIND is expensive, and
1323 	 * would need to swap the src and dst addresses in the key. so do the
1324 	 * find by hand.
1325 	 */
1326 
1327 	NET_ASSERT_LOCKED();
1328 	sc = RBT_ROOT(nvgre_mcast_tree, &nvgre_mcast_tree);
1329 	while (sc != NULL) {
1330 		rv = nvgre_cmp_mcast(key, &key->t_src, if0idx,
1331 		    &sc->sc_tunnel, &sc->sc_tunnel.t_dst, sc->sc_ifp0);
1332 		if (rv == 0)
1333 			return (sc);
1334 		if (rv < 0)
1335 			sc = RBT_LEFT(nvgre_mcast_tree, sc);
1336 		else
1337 			sc = RBT_RIGHT(nvgre_mcast_tree, sc);
1338 	}
1339 
1340 	return (NULL);
1341 }
1342 
1343 static inline struct nvgre_softc *
1344 nvgre_ucast_find(const struct gre_tunnel *key)
1345 {
1346 	NET_ASSERT_LOCKED();
1347 	return (RBT_FIND(nvgre_ucast_tree, &nvgre_ucast_tree,
1348 	    (struct nvgre_softc *)key));
1349 }
1350 
1351 static int
1352 nvgre_input(const struct gre_tunnel *key, struct mbuf *m, int hlen,
1353     uint8_t otos)
1354 {
1355 	struct nvgre_softc *sc;
1356 	struct ether_header *eh;
1357 
1358 	if (ISSET(m->m_flags, M_MCAST|M_BCAST))
1359 		sc = nvgre_mcast_find(key, m->m_pkthdr.ph_ifidx);
1360 	else
1361 		sc = nvgre_ucast_find(key);
1362 
1363 	if (sc == NULL)
1364 		return (-1);
1365 
1366 	/* it's ours now */
1367 	m = gre_ether_align(m, hlen);
1368 	if (m == NULL)
1369 		return (0);
1370 
1371 	eh = mtod(m, struct ether_header *);
1372 	etherbridge_map_ea(&sc->sc_eb, (void *)&key->t_dst,
1373 	    (struct ether_addr *)eh->ether_shost);
1374 
1375 	SET(m->m_pkthdr.csum_flags, M_FLOWID);
1376 	m->m_pkthdr.ph_flowid = bemtoh32(&key->t_key) & ~GRE_KEY_ENTROPY;
1377 
1378 	m->m_flags &= ~(M_MCAST|M_BCAST);
1379 
1380 	gre_l2_prio(&sc->sc_tunnel, m, otos);
1381 
1382 	if_vinput(&sc->sc_ac.ac_if, m);
1383 
1384 	return (0);
1385 }
1386 
1387 static struct mbuf *
1388 gre_ether_align(struct mbuf *m, int hlen)
1389 {
1390 	struct mbuf *n;
1391 	int off;
1392 
1393 	m_adj(m, hlen);
1394 
1395 	if (m->m_pkthdr.len < sizeof(struct ether_header)) {
1396 		m_freem(m);
1397 		return (NULL);
1398 	}
1399 
1400 	m = m_pullup(m, sizeof(struct ether_header));
1401 	if (m == NULL)
1402 		return (NULL);
1403 
1404 	n = m_getptr(m, sizeof(struct ether_header), &off);
1405 	if (n == NULL) {
1406 		m_freem(m);
1407 		return (NULL);
1408 	}
1409 
1410 	if (!ALIGNED_POINTER(mtod(n, caddr_t) + off, uint32_t)) {
1411 		n = m_dup_pkt(m, ETHER_ALIGN, M_NOWAIT);
1412 		m_freem(m);
1413 		if (n == NULL)
1414 			return (NULL);
1415 		m = n;
1416 	}
1417 
1418 	return (m);
1419 }
1420 
1421 static void
1422 gre_keepalive_recv(struct ifnet *ifp, struct mbuf *m)
1423 {
1424 	struct gre_softc *sc = ifp->if_softc;
1425 	struct gre_keepalive *gk;
1426 	SIPHASH_CTX ctx;
1427 	uint8_t digest[SIPHASH_DIGEST_LENGTH];
1428 	int uptime, delta;
1429 	int tick = ticks;
1430 
1431 	if (sc->sc_ka_state == GRE_KA_NONE ||
1432 	    sc->sc_tunnel.t_rtableid != sc->sc_if.if_rdomain)
1433 		goto drop;
1434 
1435 	if (m->m_pkthdr.len < sizeof(*gk))
1436 		goto drop;
1437 	m = m_pullup(m, sizeof(*gk));
1438 	if (m == NULL)
1439 		return;
1440 
1441 	gk = mtod(m, struct gre_keepalive *);
1442 	uptime = bemtoh32(&gk->gk_uptime) - sc->sc_ka_bias;
1443 	delta = tick - uptime;
1444 	if (delta < 0)
1445 		goto drop;
1446 	if (delta > hz * 10) /* magic */
1447 		goto drop;
1448 
1449 	/* avoid too much siphash work */
1450 	delta = tick - sc->sc_ka_recvtm;
1451 	if (delta > 0 && delta < (hz / 10))
1452 		goto drop;
1453 
1454 	SipHash24_Init(&ctx, &sc->sc_ka_key);
1455 	SipHash24_Update(&ctx, &gk->gk_uptime, sizeof(gk->gk_uptime));
1456 	SipHash24_Update(&ctx, &gk->gk_random, sizeof(gk->gk_random));
1457 	SipHash24_Final(digest, &ctx);
1458 
1459 	if (memcmp(digest, gk->gk_digest, sizeof(digest)) != 0)
1460 		goto drop;
1461 
1462 	sc->sc_ka_recvtm = tick;
1463 
1464 	switch (sc->sc_ka_state) {
1465 	case GRE_KA_DOWN:
1466 		sc->sc_ka_state = GRE_KA_HOLD;
1467 		sc->sc_ka_holdcnt = sc->sc_ka_holdmax;
1468 		sc->sc_ka_holdmax = MIN(sc->sc_ka_holdmax * 2,
1469 		    16 * sc->sc_ka_count);
1470 		break;
1471 	case GRE_KA_HOLD:
1472 		if (--sc->sc_ka_holdcnt > 0)
1473 			break;
1474 
1475 		sc->sc_ka_state = GRE_KA_UP;
1476 		gre_link_state(&sc->sc_if, sc->sc_ka_state);
1477 		break;
1478 
1479 	case GRE_KA_UP:
1480 		sc->sc_ka_holdmax--;
1481 		sc->sc_ka_holdmax = MAX(sc->sc_ka_holdmax, sc->sc_ka_count);
1482 		break;
1483 	}
1484 
1485 	timeout_add_sec(&sc->sc_ka_hold, sc->sc_ka_timeo * sc->sc_ka_count);
1486 
1487 drop:
1488 	m_freem(m);
1489 }
1490 
1491 static int
1492 gre_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
1493     struct rtentry *rt)
1494 {
1495 	struct m_tag *mtag;
1496 	int error = 0;
1497 
1498 	if (!gre_allow) {
1499 		error = EACCES;
1500 		goto drop;
1501 	}
1502 
1503 	if (!ISSET(ifp->if_flags, IFF_RUNNING)) {
1504 		error = ENETDOWN;
1505 		goto drop;
1506 	}
1507 
1508 	switch (dst->sa_family) {
1509 	case AF_INET:
1510 #ifdef INET6
1511 	case AF_INET6:
1512 #endif
1513 #ifdef MPLS
1514 	case AF_MPLS:
1515 #endif
1516 		break;
1517 	default:
1518 		error = EAFNOSUPPORT;
1519 		goto drop;
1520 	}
1521 
1522 	/* Try to limit infinite recursion through misconfiguration. */
1523 	for (mtag = m_tag_find(m, PACKET_TAG_GRE, NULL); mtag;
1524 	     mtag = m_tag_find(m, PACKET_TAG_GRE, mtag)) {
1525 		if (memcmp((caddr_t)(mtag + 1), &ifp->if_index,
1526 		    sizeof(ifp->if_index)) == 0) {
1527 			m_freem(m);
1528 			error = EIO;
1529 			goto end;
1530 		}
1531 	}
1532 
1533 	mtag = m_tag_get(PACKET_TAG_GRE, sizeof(ifp->if_index), M_NOWAIT);
1534 	if (mtag == NULL) {
1535 		m_freem(m);
1536 		error = ENOBUFS;
1537 		goto end;
1538 	}
1539 	memcpy((caddr_t)(mtag + 1), &ifp->if_index, sizeof(ifp->if_index));
1540 	m_tag_prepend(m, mtag);
1541 
1542 	m->m_pkthdr.ph_family = dst->sa_family;
1543 
1544 	error = if_enqueue(ifp, m);
1545 end:
1546 	if (error)
1547 		ifp->if_oerrors++;
1548 	return (error);
1549 
1550 drop:
1551 	m_freem(m);
1552 	return (error);
1553 }
1554 
1555 void
1556 gre_start(struct ifnet *ifp)
1557 {
1558 	struct gre_softc *sc = ifp->if_softc;
1559 	struct mbuf *m;
1560 	int af;
1561 #if NBPFILTER > 0
1562 	caddr_t if_bpf;
1563 #endif
1564 
1565 	while ((m = ifq_dequeue(&ifp->if_snd)) != NULL) {
1566 		af = m->m_pkthdr.ph_family;
1567 
1568 #if NBPFILTER > 0
1569 		if_bpf = ifp->if_bpf;
1570 		if (if_bpf)
1571 			bpf_mtap_af(if_bpf, af, m, BPF_DIRECTION_OUT);
1572 #endif
1573 
1574 		m = gre_l3_encap(&sc->sc_tunnel, m, af);
1575 		if (m == NULL || gre_ip_output(&sc->sc_tunnel, m) != 0) {
1576 			ifp->if_oerrors++;
1577 			continue;
1578 		}
1579 	}
1580 }
1581 
1582 void
1583 mgre_rtrequest(struct ifnet *ifp, int req, struct rtentry *rt)
1584 {
1585 	struct ifnet *lo0ifp;
1586 	struct ifaddr *ifa, *lo0ifa;
1587 
1588 	switch (req) {
1589 	case RTM_ADD:
1590 		if (!ISSET(rt->rt_flags, RTF_LOCAL))
1591 			break;
1592 
1593 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1594 			if (memcmp(rt_key(rt), ifa->ifa_addr,
1595 			    rt_key(rt)->sa_len) == 0)
1596 				break;
1597 		}
1598 
1599 		if (ifa == NULL)
1600 			break;
1601 
1602 		KASSERT(ifa == rt->rt_ifa);
1603 
1604 		lo0ifp = if_get(rtable_loindex(ifp->if_rdomain));
1605 		KASSERT(lo0ifp != NULL);
1606 		TAILQ_FOREACH(lo0ifa, &lo0ifp->if_addrlist, ifa_list) {
1607 			if (lo0ifa->ifa_addr->sa_family ==
1608 			    ifa->ifa_addr->sa_family)
1609 				break;
1610 		}
1611 		if_put(lo0ifp);
1612 
1613 		if (lo0ifa == NULL)
1614 			break;
1615 
1616 		rt->rt_flags &= ~RTF_LLINFO;
1617 		break;
1618 	case RTM_DELETE:
1619 	case RTM_RESOLVE:
1620 	default:
1621 		break;
1622 	}
1623 }
1624 
1625 static int
1626 mgre_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dest,
1627     struct rtentry *rt0)
1628 {
1629 	struct mgre_softc *sc = ifp->if_softc;
1630 	struct sockaddr *gate;
1631 	struct rtentry *rt;
1632 	struct m_tag *mtag;
1633 	int error = 0;
1634 	sa_family_t af;
1635 	const void *addr;
1636 
1637 	if (!gre_allow) {
1638 		error = EACCES;
1639 		goto drop;
1640 	}
1641 
1642 	if (!ISSET(ifp->if_flags, IFF_RUNNING)) {
1643 		error = ENETDOWN;
1644 		goto drop;
1645 	}
1646 
1647 	switch (dest->sa_family) {
1648 	case AF_INET:
1649 #ifdef INET6
1650 	case AF_INET6:
1651 #endif
1652 #ifdef MPLS
1653 	case AF_MPLS:
1654 #endif
1655 		break;
1656 	default:
1657 		error = EAFNOSUPPORT;
1658 		goto drop;
1659 	}
1660 
1661 	if (ISSET(m->m_flags, M_MCAST|M_BCAST)) {
1662 		error = ENETUNREACH;
1663 		goto drop;
1664 	}
1665 
1666 	rt = rt_getll(rt0);
1667 
1668 	/* check rt_expire? */
1669 	if (ISSET(rt->rt_flags, RTF_REJECT)) {
1670 		error = (rt == rt0) ? EHOSTDOWN : EHOSTUNREACH;
1671 		goto drop;
1672 	}
1673 	if (!ISSET(rt->rt_flags, RTF_HOST)) {
1674 		error = EHOSTUNREACH;
1675 		goto drop;
1676 	}
1677 	if (ISSET(rt->rt_flags, RTF_GATEWAY)) {
1678 		error = EINVAL;
1679 		goto drop;
1680 	}
1681 
1682 	gate = rt->rt_gateway;
1683 	af = gate->sa_family;
1684 	if (af != sc->sc_tunnel.t_af) {
1685 		error = EAGAIN;
1686 		goto drop;
1687 	}
1688 
1689 	/* Try to limit infinite recursion through misconfiguration. */
1690 	for (mtag = m_tag_find(m, PACKET_TAG_GRE, NULL); mtag;
1691 	     mtag = m_tag_find(m, PACKET_TAG_GRE, mtag)) {
1692 		if (memcmp((caddr_t)(mtag + 1), &ifp->if_index,
1693 		    sizeof(ifp->if_index)) == 0) {
1694 			error = EIO;
1695 			goto drop;
1696 		}
1697 	}
1698 
1699 	mtag = m_tag_get(PACKET_TAG_GRE, sizeof(ifp->if_index), M_NOWAIT);
1700 	if (mtag == NULL) {
1701 		error = ENOBUFS;
1702 		goto drop;
1703 	}
1704 	memcpy((caddr_t)(mtag + 1), &ifp->if_index, sizeof(ifp->if_index));
1705 	m_tag_prepend(m, mtag);
1706 
1707 	switch (af) {
1708 	case AF_INET: {
1709 		struct sockaddr_in *sin = (struct sockaddr_in *)gate;
1710 		addr = &sin->sin_addr;
1711 		break;
1712 	}
1713 #ifdef INET6
1714 	case AF_INET6: {
1715 		struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)gate;
1716 		addr = &sin6->sin6_addr;
1717 		break;
1718 	}
1719  #endif
1720 	default:
1721 		unhandled_af(af);
1722 		/* NOTREACHED */
1723 	}
1724 
1725 	m = gre_l3_encap_dst(&sc->sc_tunnel, addr, m, dest->sa_family);
1726 	if (m == NULL) {
1727 		ifp->if_oerrors++;
1728 		return (ENOBUFS);
1729 	}
1730 
1731 	m->m_pkthdr.ph_family = dest->sa_family;
1732 
1733 	error = if_enqueue(ifp, m);
1734 	if (error)
1735 		ifp->if_oerrors++;
1736 	return (error);
1737 
1738 drop:
1739 	m_freem(m);
1740 	return (error);
1741 }
1742 
1743 static void
1744 mgre_start(struct ifnet *ifp)
1745 {
1746 	struct mgre_softc *sc = ifp->if_softc;
1747 	struct mbuf *m;
1748 #if NBPFILTER > 0
1749 	caddr_t if_bpf;
1750 #endif
1751 
1752 	while ((m = ifq_dequeue(&ifp->if_snd)) != NULL) {
1753 #if NBPFILTER > 0
1754 		if_bpf = ifp->if_bpf;
1755 		if (if_bpf) {
1756 			struct m_hdr mh;
1757 			struct mbuf *n;
1758 			int off;
1759 
1760 			n = m_getptr(m, ifp->if_hdrlen, &off);
1761 			KASSERT(n != NULL);
1762 
1763 			mh.mh_flags = 0;
1764 			mh.mh_next = n->m_next;
1765 			mh.mh_len = n->m_len - off;
1766 			mh.mh_data = n->m_data + off;
1767 
1768 			bpf_mtap_af(if_bpf, m->m_pkthdr.ph_family,
1769 			    (struct mbuf *)&mh, BPF_DIRECTION_OUT);
1770 		}
1771 #endif
1772 
1773 		if (gre_ip_output(&sc->sc_tunnel, m) != 0) {
1774 			ifp->if_oerrors++;
1775 			continue;
1776 		}
1777 	}
1778 }
1779 
1780 static void
1781 egre_start(struct ifnet *ifp)
1782 {
1783 	struct egre_softc *sc = ifp->if_softc;
1784 	struct mbuf *m0, *m;
1785 #if NBPFILTER > 0
1786 	caddr_t if_bpf;
1787 #endif
1788 
1789 	if (!gre_allow) {
1790 		ifq_purge(&ifp->if_snd);
1791 		return;
1792 	}
1793 
1794 	while ((m0 = ifq_dequeue(&ifp->if_snd)) != NULL) {
1795 #if NBPFILTER > 0
1796 		if_bpf = ifp->if_bpf;
1797 		if (if_bpf)
1798 			bpf_mtap_ether(if_bpf, m0, BPF_DIRECTION_OUT);
1799 #endif
1800 
1801 		/* force prepend mbuf because of alignment problems */
1802 		m = m_get(M_DONTWAIT, m0->m_type);
1803 		if (m == NULL) {
1804 			m_freem(m0);
1805 			continue;
1806 		}
1807 
1808 		M_MOVE_PKTHDR(m, m0);
1809 		m->m_next = m0;
1810 
1811 		m_align(m, 0);
1812 		m->m_len = 0;
1813 
1814 		m = gre_encap(&sc->sc_tunnel, m, htons(ETHERTYPE_TRANSETHER),
1815 		    sc->sc_tunnel.t_ttl, gre_l2_tos(&sc->sc_tunnel, m));
1816 		if (m == NULL || gre_ip_output(&sc->sc_tunnel, m) != 0) {
1817 			ifp->if_oerrors++;
1818 			continue;
1819 		}
1820 	}
1821 }
1822 
1823 static struct mbuf *
1824 gre_l3_encap_dst(const struct gre_tunnel *tunnel, const void *dst,
1825     struct mbuf *m, sa_family_t af)
1826 {
1827 	uint16_t proto;
1828 	uint8_t ttl, itos, otos;
1829 	int tttl = tunnel->t_ttl;
1830 	int ttloff;
1831 
1832 	switch (af) {
1833 	case AF_INET: {
1834 		struct ip *ip;
1835 
1836 		m = m_pullup(m, sizeof(*ip));
1837 		if (m == NULL)
1838 			return (NULL);
1839 
1840 		ip = mtod(m, struct ip *);
1841 		itos = ip->ip_tos;
1842 
1843 		ttloff = offsetof(struct ip, ip_ttl);
1844 		proto = htons(ETHERTYPE_IP);
1845 		break;
1846 	}
1847 #ifdef INET6
1848 	case AF_INET6: {
1849 		struct ip6_hdr *ip6;
1850 
1851 		m = m_pullup(m, sizeof(*ip6));
1852 		if (m == NULL)
1853 			return (NULL);
1854 
1855 		ip6 = mtod(m, struct ip6_hdr *);
1856 		itos = (ntohl(ip6->ip6_flow) & 0x0ff00000) >> 20;
1857 
1858 		ttloff = offsetof(struct ip6_hdr, ip6_hlim);
1859 		proto = htons(ETHERTYPE_IPV6);
1860 		break;
1861 	}
1862  #endif
1863 #ifdef MPLS
1864 	case AF_MPLS: {
1865 		uint32_t shim;
1866 
1867 		m = m_pullup(m, sizeof(shim));
1868 		if (m == NULL)
1869 			return (NULL);
1870 
1871 		shim = bemtoh32(mtod(m, uint32_t *)) & MPLS_EXP_MASK;
1872 		itos = (shim >> MPLS_EXP_OFFSET) << 5;
1873 
1874 		ttloff = 3;
1875 
1876 		if (m->m_flags & (M_BCAST | M_MCAST))
1877 			proto = htons(ETHERTYPE_MPLS_MCAST);
1878 		else
1879 			proto = htons(ETHERTYPE_MPLS);
1880 		break;
1881 	}
1882 #endif
1883 	default:
1884 		unhandled_af(af);
1885 	}
1886 
1887 	if (tttl == -1) {
1888 		KASSERT(m->m_len > ttloff); /* m_pullup has happened */
1889 
1890 		ttl = *(m->m_data + ttloff);
1891 	} else
1892 		ttl = tttl;
1893 
1894 	itos = gre_l3_tos(tunnel, m, itos);
1895 	ip_ecn_ingress(tunnel->t_ecn, &otos, &itos);
1896 
1897 	return (gre_encap_dst(tunnel, dst, m, proto, ttl, otos));
1898 }
1899 
1900 static struct mbuf *
1901 gre_encap_dst(const struct gre_tunnel *tunnel, const union gre_addr *dst,
1902     struct mbuf *m, uint16_t proto, uint8_t ttl, uint8_t tos)
1903 {
1904 	struct gre_header *gh;
1905 	struct gre_h_key *gkh;
1906 	int hlen;
1907 
1908 	hlen = sizeof(*gh);
1909 	if (tunnel->t_key_mask != GRE_KEY_NONE)
1910 		hlen += sizeof(*gkh);
1911 
1912 	m = m_prepend(m, hlen, M_DONTWAIT);
1913 	if (m == NULL)
1914 		return (NULL);
1915 
1916 	gh = mtod(m, struct gre_header *);
1917 	gh->gre_flags = GRE_VERS_0;
1918 	gh->gre_proto = proto;
1919 	if (tunnel->t_key_mask != GRE_KEY_NONE) {
1920 		gh->gre_flags |= htons(GRE_KP);
1921 
1922 		gkh = (struct gre_h_key *)(gh + 1);
1923 		gkh->gre_key = tunnel->t_key;
1924 
1925 		if (tunnel->t_key_mask == GRE_KEY_ENTROPY &&
1926 		    ISSET(m->m_pkthdr.csum_flags, M_FLOWID)) {
1927 			gkh->gre_key |= htonl(~GRE_KEY_ENTROPY &
1928 			    m->m_pkthdr.ph_flowid);
1929 		}
1930 	}
1931 
1932 	return (gre_encap_dst_ip(tunnel, dst, m, ttl, tos));
1933 }
1934 
1935 static struct mbuf *
1936 gre_encap_dst_ip(const struct gre_tunnel *tunnel, const union gre_addr *dst,
1937     struct mbuf *m, uint8_t ttl, uint8_t tos)
1938 {
1939 	switch (tunnel->t_af) {
1940 	case AF_UNSPEC:
1941 		/* packets may arrive before tunnel is set up */
1942 		m_freem(m);
1943 		return (NULL);
1944 	case AF_INET: {
1945 		struct ip *ip;
1946 
1947 		m = m_prepend(m, sizeof(*ip), M_DONTWAIT);
1948 		if (m == NULL)
1949 			return (NULL);
1950 
1951 		ip = mtod(m, struct ip *);
1952 		ip->ip_v = IPVERSION;
1953 		ip->ip_hl = sizeof(*ip) >> 2;
1954 		ip->ip_off = tunnel->t_df;
1955 		ip->ip_tos = tos;
1956 		ip->ip_len = htons(m->m_pkthdr.len);
1957 		ip->ip_ttl = ttl;
1958 		ip->ip_p = IPPROTO_GRE;
1959 		ip->ip_src = tunnel->t_src4;
1960 		ip->ip_dst = dst->in4;
1961 		break;
1962 	}
1963 #ifdef INET6
1964 	case AF_INET6: {
1965 		struct ip6_hdr *ip6;
1966 		int len = m->m_pkthdr.len;
1967 
1968 		m = m_prepend(m, sizeof(*ip6), M_DONTWAIT);
1969 		if (m == NULL)
1970 			return (NULL);
1971 
1972 		ip6 = mtod(m, struct ip6_hdr *);
1973 		ip6->ip6_flow = ISSET(m->m_pkthdr.csum_flags, M_FLOWID) ?
1974 		    htonl(m->m_pkthdr.ph_flowid) : 0;
1975 		ip6->ip6_vfc |= IPV6_VERSION;
1976 		ip6->ip6_flow |= htonl((uint32_t)tos << 20);
1977 		ip6->ip6_plen = htons(len);
1978 		ip6->ip6_nxt = IPPROTO_GRE;
1979 		ip6->ip6_hlim = ttl;
1980 		ip6->ip6_src = tunnel->t_src6;
1981 		ip6->ip6_dst = dst->in6;
1982 
1983 		if (tunnel->t_df)
1984 			SET(m->m_pkthdr.csum_flags, M_IPV6_DF_OUT);
1985 
1986 		break;
1987 	}
1988 #endif /* INET6 */
1989 	default:
1990 		unhandled_af(tunnel->t_af);
1991 	}
1992 
1993 	return (m);
1994 }
1995 
1996 static int
1997 gre_ip_output(const struct gre_tunnel *tunnel, struct mbuf *m)
1998 {
1999 	m->m_flags &= ~(M_BCAST|M_MCAST);
2000 	m->m_pkthdr.ph_rtableid = tunnel->t_rtableid;
2001 
2002 #if NPF > 0
2003 	pf_pkt_addr_changed(m);
2004 #endif
2005 
2006 	switch (tunnel->t_af) {
2007 	case AF_INET:
2008 		ip_send(m);
2009 		break;
2010 #ifdef INET6
2011 	case AF_INET6:
2012 		ip6_send(m);
2013 		break;
2014 #endif
2015 	default:
2016 		unhandled_af(tunnel->t_af);
2017 	}
2018 
2019 	return (0);
2020 }
2021 
2022 static int
2023 gre_tunnel_ioctl(struct ifnet *ifp, struct gre_tunnel *tunnel,
2024     u_long cmd, void *data)
2025 {
2026 	struct ifreq *ifr = (struct ifreq *)data;
2027 	int error = 0;
2028 
2029 	switch(cmd) {
2030 	case SIOCSIFMTU:
2031 		if (ifr->ifr_mtu < 576) {
2032 			error = EINVAL;
2033 			break;
2034 		}
2035 		ifp->if_mtu = ifr->ifr_mtu;
2036 		break;
2037 	case SIOCADDMULTI:
2038 	case SIOCDELMULTI:
2039 		break;
2040 
2041 	case SIOCSVNETID:
2042 		error = gre_set_vnetid(tunnel, ifr);
2043 		break;
2044 
2045 	case SIOCGVNETID:
2046 		error = gre_get_vnetid(tunnel, ifr);
2047 		break;
2048 	case SIOCDVNETID:
2049 		error = gre_del_vnetid(tunnel);
2050 		break;
2051 
2052 	case SIOCSVNETFLOWID:
2053 		error = gre_set_vnetflowid(tunnel, ifr);
2054 		break;
2055 
2056 	case SIOCGVNETFLOWID:
2057 		error = gre_get_vnetflowid(tunnel, ifr);
2058 		break;
2059 
2060 	case SIOCSLIFPHYADDR:
2061 		error = gre_set_tunnel(tunnel, (struct if_laddrreq *)data, 1);
2062 		break;
2063 	case SIOCGLIFPHYADDR:
2064 		error = gre_get_tunnel(tunnel, (struct if_laddrreq *)data);
2065 		break;
2066 	case SIOCDIFPHYADDR:
2067 		error = gre_del_tunnel(tunnel);
2068 		break;
2069 
2070 	case SIOCSLIFPHYRTABLE:
2071 		if (ifr->ifr_rdomainid < 0 ||
2072 		    ifr->ifr_rdomainid > RT_TABLEID_MAX ||
2073 		    !rtable_exists(ifr->ifr_rdomainid)) {
2074 			error = EINVAL;
2075 			break;
2076 		}
2077 		tunnel->t_rtableid = ifr->ifr_rdomainid;
2078 		break;
2079 	case SIOCGLIFPHYRTABLE:
2080 		ifr->ifr_rdomainid = tunnel->t_rtableid;
2081 		break;
2082 
2083 	case SIOCSLIFPHYDF:
2084 		/* commit */
2085 		tunnel->t_df = ifr->ifr_df ? htons(IP_DF) : htons(0);
2086 		break;
2087 	case SIOCGLIFPHYDF:
2088 		ifr->ifr_df = tunnel->t_df ? 1 : 0;
2089 		break;
2090 
2091 	default:
2092 		error = ENOTTY;
2093 		break;
2094 	}
2095 
2096 	return (error);
2097 }
2098 
2099 static uint8_t
2100 gre_l2_tos(const struct gre_tunnel *t, const struct mbuf *m)
2101 {
2102 	uint8_t prio;
2103 
2104 	switch (t->t_txhprio) {
2105 	case IF_HDRPRIO_PACKET:
2106 		prio = m->m_pkthdr.pf.prio;
2107 		break;
2108 	default:
2109 		prio = t->t_txhprio;
2110 		break;
2111 	}
2112 
2113 	return (IFQ_PRIO2TOS(prio));
2114 }
2115 
2116 static uint8_t
2117 gre_l3_tos(const struct gre_tunnel *t, const struct mbuf *m, uint8_t tos)
2118 {
2119 	uint8_t prio;
2120 
2121 	switch (t->t_txhprio) {
2122 	case IF_HDRPRIO_PAYLOAD:
2123 		return (tos);
2124 	case IF_HDRPRIO_PACKET:
2125 		prio = m->m_pkthdr.pf.prio;
2126 		break;
2127 	default:
2128 		prio = t->t_txhprio;
2129 		break;
2130 	}
2131 
2132 	return (IFQ_PRIO2TOS(prio) | (tos & IPTOS_ECN_MASK));
2133 }
2134 
2135 static int
2136 gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
2137 {
2138 	struct gre_softc *sc = ifp->if_softc;
2139 	struct ifreq *ifr = (struct ifreq *)data;
2140 	struct ifkalivereq *ikar = (struct ifkalivereq *)data;
2141 	int error = 0;
2142 
2143 	switch(cmd) {
2144 	case SIOCSIFADDR:
2145 		ifp->if_flags |= IFF_UP;
2146 		/* FALLTHROUGH */
2147 	case SIOCSIFFLAGS:
2148 		if (ISSET(ifp->if_flags, IFF_UP)) {
2149 			if (!ISSET(ifp->if_flags, IFF_RUNNING))
2150 				error = gre_up(sc);
2151 			else
2152 				error = 0;
2153 		} else {
2154 			if (ISSET(ifp->if_flags, IFF_RUNNING))
2155 				error = gre_down(sc);
2156 		}
2157 		break;
2158 	case SIOCSIFRDOMAIN:
2159 		/* let if_rdomain do its thing */
2160 		error = ENOTTY;
2161 		break;
2162 
2163 	case SIOCSETKALIVE:
2164 		if (ikar->ikar_timeo < 0 || ikar->ikar_timeo > 86400 ||
2165 		    ikar->ikar_cnt < 0 || ikar->ikar_cnt > 256 ||
2166 		    (ikar->ikar_timeo == 0) != (ikar->ikar_cnt == 0))
2167 			return (EINVAL);
2168 
2169 		if (ikar->ikar_timeo == 0 || ikar->ikar_cnt == 0) {
2170 			sc->sc_ka_count = 0;
2171 			sc->sc_ka_timeo = 0;
2172 			sc->sc_ka_state = GRE_KA_NONE;
2173 		} else {
2174 			sc->sc_ka_count = ikar->ikar_cnt;
2175 			sc->sc_ka_timeo = ikar->ikar_timeo;
2176 			sc->sc_ka_state = GRE_KA_DOWN;
2177 
2178 			arc4random_buf(&sc->sc_ka_key, sizeof(sc->sc_ka_key));
2179 			sc->sc_ka_bias = arc4random();
2180 			sc->sc_ka_holdmax = sc->sc_ka_count;
2181 
2182 			sc->sc_ka_recvtm = ticks - hz;
2183 			timeout_add(&sc->sc_ka_send, 1);
2184 			timeout_add_sec(&sc->sc_ka_hold,
2185 			    sc->sc_ka_timeo * sc->sc_ka_count);
2186 		}
2187 		break;
2188 
2189 	case SIOCGETKALIVE:
2190 		ikar->ikar_cnt = sc->sc_ka_count;
2191 		ikar->ikar_timeo = sc->sc_ka_timeo;
2192 		break;
2193 
2194 	case SIOCSLIFPHYTTL:
2195 		if (ifr->ifr_ttl != -1 &&
2196 		    (ifr->ifr_ttl < 1 || ifr->ifr_ttl > 0xff)) {
2197 			error = EINVAL;
2198 			break;
2199 		}
2200 
2201 		/* commit */
2202 		sc->sc_tunnel.t_ttl = ifr->ifr_ttl;
2203 		break;
2204 
2205 	case SIOCGLIFPHYTTL:
2206 		ifr->ifr_ttl = sc->sc_tunnel.t_ttl;
2207 		break;
2208 
2209 	case SIOCSLIFPHYECN:
2210 		sc->sc_tunnel.t_ecn =
2211 		    ifr->ifr_metric ? ECN_ALLOWED : ECN_FORBIDDEN;
2212 		break;
2213 	case SIOCGLIFPHYECN:
2214 		ifr->ifr_metric = (sc->sc_tunnel.t_ecn == ECN_ALLOWED);
2215 		break;
2216 
2217 	case SIOCSTXHPRIO:
2218 		error = if_txhprio_l3_check(ifr->ifr_hdrprio);
2219 		if (error != 0)
2220 			break;
2221 
2222 		sc->sc_tunnel.t_txhprio = ifr->ifr_hdrprio;
2223 		break;
2224 	case SIOCGTXHPRIO:
2225 		ifr->ifr_hdrprio = sc->sc_tunnel.t_txhprio;
2226 		break;
2227 
2228 	case SIOCSRXHPRIO:
2229 		error = if_rxhprio_l3_check(ifr->ifr_hdrprio);
2230 		if (error != 0)
2231 			break;
2232 
2233 		sc->sc_tunnel.t_rxhprio = ifr->ifr_hdrprio;
2234 		break;
2235 	case SIOCGRXHPRIO:
2236 		ifr->ifr_hdrprio = sc->sc_tunnel.t_rxhprio;
2237 		break;
2238 
2239 	default:
2240 		error = gre_tunnel_ioctl(ifp, &sc->sc_tunnel, cmd, data);
2241 		break;
2242 	}
2243 
2244 	return (error);
2245 }
2246 
2247 static int
2248 mgre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
2249 {
2250 	struct mgre_softc *sc = ifp->if_softc;
2251 	struct ifreq *ifr = (struct ifreq *)data;
2252 	int error = 0;
2253 
2254 	switch(cmd) {
2255 	case SIOCSIFADDR:
2256 		break;
2257 	case SIOCSIFFLAGS:
2258 		if (ISSET(ifp->if_flags, IFF_UP)) {
2259 			if (!ISSET(ifp->if_flags, IFF_RUNNING))
2260 				error = mgre_up(sc);
2261 			else
2262 				error = 0;
2263 		} else {
2264 			if (ISSET(ifp->if_flags, IFF_RUNNING))
2265 				error = mgre_down(sc);
2266 		}
2267 		break;
2268 
2269 	case SIOCSLIFPHYTTL:
2270 		if (ifr->ifr_ttl != -1 &&
2271 		    (ifr->ifr_ttl < 1 || ifr->ifr_ttl > 0xff)) {
2272 			error = EINVAL;
2273 			break;
2274 		}
2275 
2276 		/* commit */
2277 		sc->sc_tunnel.t_ttl = ifr->ifr_ttl;
2278 		break;
2279 
2280 	case SIOCGLIFPHYTTL:
2281 		ifr->ifr_ttl = sc->sc_tunnel.t_ttl;
2282 		break;
2283 
2284 	case SIOCSLIFPHYECN:
2285 		sc->sc_tunnel.t_ecn =
2286 		    ifr->ifr_metric ? ECN_ALLOWED : ECN_FORBIDDEN;
2287 		break;
2288 	case SIOCGLIFPHYECN:
2289 		ifr->ifr_metric = (sc->sc_tunnel.t_ecn == ECN_ALLOWED);
2290 		break;
2291 
2292 	case SIOCSLIFPHYADDR:
2293 		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2294 			error = EBUSY;
2295 			break;
2296 		}
2297 		error = mgre_set_tunnel(sc, (struct if_laddrreq *)data);
2298 		break;
2299 	case SIOCGLIFPHYADDR:
2300 		error = mgre_get_tunnel(sc, (struct if_laddrreq *)data);
2301 		break;
2302 
2303 	case SIOCSTXHPRIO:
2304 		error = if_txhprio_l3_check(ifr->ifr_hdrprio);
2305 		if (error != 0)
2306 			break;
2307 
2308 		sc->sc_tunnel.t_txhprio = ifr->ifr_hdrprio;
2309 		break;
2310 	case SIOCGTXHPRIO:
2311 		ifr->ifr_hdrprio = sc->sc_tunnel.t_txhprio;
2312 		break;
2313 
2314 	case SIOCSRXHPRIO:
2315 		error = if_rxhprio_l3_check(ifr->ifr_hdrprio);
2316 		if (error != 0)
2317 			break;
2318 
2319 		sc->sc_tunnel.t_rxhprio = ifr->ifr_hdrprio;
2320 		break;
2321 	case SIOCGRXHPRIO:
2322 		ifr->ifr_hdrprio = sc->sc_tunnel.t_rxhprio;
2323 		break;
2324 
2325 	case SIOCSVNETID:
2326 	case SIOCDVNETID:
2327 	case SIOCDIFPHYADDR:
2328 	case SIOCSLIFPHYRTABLE:
2329 		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2330 			error = EBUSY;
2331 			break;
2332 		}
2333 
2334 		/* FALLTHROUGH */
2335 	default:
2336 		error = gre_tunnel_ioctl(ifp, &sc->sc_tunnel, cmd, data);
2337 		break;
2338 	}
2339 
2340 	return (error);
2341 }
2342 
2343 static int
2344 mgre_set_tunnel(struct mgre_softc *sc, struct if_laddrreq *req)
2345 {
2346 	struct gre_tunnel *tunnel = &sc->sc_tunnel;
2347 	struct sockaddr *addr = (struct sockaddr *)&req->addr;
2348 	struct sockaddr *dstaddr = (struct sockaddr *)&req->dstaddr;
2349 	struct sockaddr_in *addr4;
2350 #ifdef INET6
2351 	struct sockaddr_in6 *addr6;
2352 	int error;
2353 #endif
2354 
2355 	if (dstaddr->sa_family != AF_UNSPEC)
2356 		return (EINVAL);
2357 
2358 	/* validate */
2359 	switch (addr->sa_family) {
2360 	case AF_INET:
2361 		if (addr->sa_len != sizeof(*addr4))
2362 			return (EINVAL);
2363 
2364 		addr4 = (struct sockaddr_in *)addr;
2365 		if (in_nullhost(addr4->sin_addr) ||
2366 		    IN_MULTICAST(addr4->sin_addr.s_addr))
2367 			return (EINVAL);
2368 
2369 		tunnel->t_src4 = addr4->sin_addr;
2370 		tunnel->t_dst4.s_addr = INADDR_ANY;
2371 
2372 		break;
2373 #ifdef INET6
2374 	case AF_INET6:
2375 		if (addr->sa_len != sizeof(*addr6))
2376 			return (EINVAL);
2377 
2378 		addr6 = (struct sockaddr_in6 *)addr;
2379 		if (IN6_IS_ADDR_UNSPECIFIED(&addr6->sin6_addr) ||
2380 		    IN6_IS_ADDR_MULTICAST(&addr6->sin6_addr))
2381 			return (EINVAL);
2382 
2383 		error = in6_embedscope(&tunnel->t_src6, addr6, NULL);
2384 		if (error != 0)
2385 			return (error);
2386 
2387 		memset(&tunnel->t_dst6, 0, sizeof(tunnel->t_dst6));
2388 
2389 		break;
2390 #endif
2391 	default:
2392 		return (EAFNOSUPPORT);
2393 	}
2394 
2395 	/* commit */
2396 	tunnel->t_af = addr->sa_family;
2397 
2398 	return (0);
2399 }
2400 
2401 static int
2402 mgre_get_tunnel(struct mgre_softc *sc, struct if_laddrreq *req)
2403 {
2404 	struct gre_tunnel *tunnel = &sc->sc_tunnel;
2405 	struct sockaddr *dstaddr = (struct sockaddr *)&req->dstaddr;
2406 	struct sockaddr_in *sin;
2407 #ifdef INET6
2408 	struct sockaddr_in6 *sin6;
2409 #endif
2410 
2411 	switch (tunnel->t_af) {
2412 	case AF_UNSPEC:
2413 		return (EADDRNOTAVAIL);
2414 	case AF_INET:
2415 		sin = (struct sockaddr_in *)&req->addr;
2416 		memset(sin, 0, sizeof(*sin));
2417 		sin->sin_family = AF_INET;
2418 		sin->sin_len = sizeof(*sin);
2419 		sin->sin_addr = tunnel->t_src4;
2420 		break;
2421 
2422 #ifdef INET6
2423 	case AF_INET6:
2424 		sin6 = (struct sockaddr_in6 *)&req->addr;
2425 		memset(sin6, 0, sizeof(*sin6));
2426 		sin6->sin6_family = AF_INET6;
2427 		sin6->sin6_len = sizeof(*sin6);
2428 		in6_recoverscope(sin6, &tunnel->t_src6);
2429 		break;
2430 #endif
2431 	default:
2432 		unhandled_af(tunnel->t_af);
2433 	}
2434 
2435 	dstaddr->sa_len = 2;
2436 	dstaddr->sa_family = AF_UNSPEC;
2437 
2438 	return (0);
2439 }
2440 
2441 static int
2442 egre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
2443 {
2444 	struct egre_softc *sc = ifp->if_softc;
2445 	struct ifreq *ifr = (struct ifreq *)data;
2446 	int error = 0;
2447 
2448 	switch(cmd) {
2449 	case SIOCSIFADDR:
2450 		break;
2451 	case SIOCSIFFLAGS:
2452 		if (ISSET(ifp->if_flags, IFF_UP)) {
2453 			if (!ISSET(ifp->if_flags, IFF_RUNNING))
2454 				error = egre_up(sc);
2455 			else
2456 				error = 0;
2457 		} else {
2458 			if (ISSET(ifp->if_flags, IFF_RUNNING))
2459 				error = egre_down(sc);
2460 		}
2461 		break;
2462 
2463 	case SIOCSLIFPHYTTL:
2464 		if (ifr->ifr_ttl < 1 || ifr->ifr_ttl > 0xff) {
2465 			error = EINVAL;
2466 			break;
2467 		}
2468 
2469 		/* commit */
2470 		sc->sc_tunnel.t_ttl = (uint8_t)ifr->ifr_ttl;
2471 		break;
2472 
2473 	case SIOCGLIFPHYTTL:
2474 		ifr->ifr_ttl = (int)sc->sc_tunnel.t_ttl;
2475 		break;
2476 
2477 	case SIOCSTXHPRIO:
2478 		error = if_txhprio_l2_check(ifr->ifr_hdrprio);
2479 		if (error != 0)
2480 			break;
2481 
2482 		sc->sc_tunnel.t_txhprio = ifr->ifr_hdrprio;
2483 		break;
2484 	case SIOCGTXHPRIO:
2485 		ifr->ifr_hdrprio = sc->sc_tunnel.t_txhprio;
2486 		break;
2487 
2488 	case SIOCSRXHPRIO:
2489 		error = if_rxhprio_l2_check(ifr->ifr_hdrprio);
2490 		if (error != 0)
2491 			break;
2492 
2493 		sc->sc_tunnel.t_rxhprio = ifr->ifr_hdrprio;
2494 		break;
2495 	case SIOCGRXHPRIO:
2496 		ifr->ifr_hdrprio = sc->sc_tunnel.t_rxhprio;
2497 		break;
2498 
2499 	case SIOCSVNETID:
2500 	case SIOCDVNETID:
2501 	case SIOCSVNETFLOWID:
2502 	case SIOCSLIFPHYADDR:
2503 	case SIOCDIFPHYADDR:
2504 	case SIOCSLIFPHYRTABLE:
2505 		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2506 			error = EBUSY;
2507 			break;
2508 		}
2509 
2510 		/* FALLTHROUGH */
2511 	default:
2512 		error = gre_tunnel_ioctl(ifp, &sc->sc_tunnel, cmd, data);
2513 		if (error == ENOTTY)
2514 			error = ether_ioctl(ifp, &sc->sc_ac, cmd, data);
2515 		break;
2516 	}
2517 
2518 	if (error == ENETRESET) {
2519 		/* no hardware to program */
2520 		error = 0;
2521 	}
2522 
2523 	return (error);
2524 }
2525 
2526 static int
2527 nvgre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
2528 {
2529 	struct nvgre_softc *sc = ifp->if_softc;
2530 	struct gre_tunnel *tunnel = &sc->sc_tunnel;
2531 
2532 	struct ifreq *ifr = (struct ifreq *)data;
2533 	struct if_parent *parent = (struct if_parent *)data;
2534 	struct ifbrparam *bparam = (struct ifbrparam *)data;
2535 	struct ifnet *ifp0;
2536 
2537 	int error = 0;
2538 
2539 	switch (cmd) {
2540 	case SIOCSIFADDR:
2541 		break;
2542 	case SIOCSIFFLAGS:
2543 		if (ISSET(ifp->if_flags, IFF_UP)) {
2544 			if (!ISSET(ifp->if_flags, IFF_RUNNING))
2545 				error = nvgre_up(sc);
2546 			else
2547 				error = ENETRESET;
2548 		} else {
2549 			if (ISSET(ifp->if_flags, IFF_RUNNING))
2550 				error = nvgre_down(sc);
2551 		}
2552 		break;
2553 
2554 	case SIOCSLIFPHYADDR:
2555 		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2556 			error = EBUSY;
2557 			break;
2558 		}
2559 		error = gre_set_tunnel(tunnel, (struct if_laddrreq *)data, 0);
2560 		if (error == 0)
2561 			etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL);
2562 		break;
2563 	case SIOCGLIFPHYADDR:
2564 		error = gre_get_tunnel(tunnel, (struct if_laddrreq *)data);
2565 		break;
2566 	case SIOCDIFPHYADDR:
2567 		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2568 			error = EBUSY;
2569 			break;
2570 		}
2571 		error = gre_del_tunnel(tunnel);
2572 		if (error == 0)
2573 			etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL);
2574 		break;
2575 
2576 	case SIOCSIFPARENT:
2577 		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2578 			error = EBUSY;
2579 			break;
2580 		}
2581 		error = nvgre_set_parent(sc, parent->ifp_parent);
2582 		if (error == 0)
2583 			etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL);
2584 		break;
2585 	case SIOCGIFPARENT:
2586 		ifp0 = if_get(sc->sc_ifp0);
2587 		if (ifp0 == NULL)
2588 			error = EADDRNOTAVAIL;
2589 		else {
2590 			memcpy(parent->ifp_parent, ifp0->if_xname,
2591 			    sizeof(parent->ifp_parent));
2592 		}
2593 		if_put(ifp0);
2594 		break;
2595 	case SIOCDIFPARENT:
2596 		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2597 			error = EBUSY;
2598 			break;
2599 		}
2600 		/* commit */
2601 		sc->sc_ifp0 = 0;
2602 		etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL);
2603 		break;
2604 
2605 	case SIOCSVNETID:
2606 		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2607 			error = EBUSY;
2608 			break;
2609 		}
2610 		if (ifr->ifr_vnetid < GRE_KEY_ENTROPY_MIN ||
2611 		    ifr->ifr_vnetid > GRE_KEY_ENTROPY_MAX) {
2612 			error = EINVAL;
2613 			break;
2614 		}
2615 
2616 		/* commit */
2617 		tunnel->t_key = htonl(ifr->ifr_vnetid << GRE_KEY_ENTROPY_SHIFT);
2618 		etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL);
2619 		break;
2620 	case SIOCGVNETID:
2621 		error = gre_get_vnetid(tunnel, ifr);
2622 		break;
2623 
2624 	case SIOCSLIFPHYRTABLE:
2625 		if (ifr->ifr_rdomainid < 0 ||
2626 		    ifr->ifr_rdomainid > RT_TABLEID_MAX ||
2627 		    !rtable_exists(ifr->ifr_rdomainid)) {
2628 			error = EINVAL;
2629 			break;
2630 		}
2631 		tunnel->t_rtableid = ifr->ifr_rdomainid;
2632 		etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL);
2633 		break;
2634 	case SIOCGLIFPHYRTABLE:
2635 		ifr->ifr_rdomainid = tunnel->t_rtableid;
2636 		break;
2637 
2638 	case SIOCSLIFPHYDF:
2639 		/* commit */
2640 		tunnel->t_df = ifr->ifr_df ? htons(IP_DF) : htons(0);
2641 		break;
2642 	case SIOCGLIFPHYDF:
2643 		ifr->ifr_df = tunnel->t_df ? 1 : 0;
2644 		break;
2645 
2646 	case SIOCSLIFPHYTTL:
2647 		if (ifr->ifr_ttl < 1 || ifr->ifr_ttl > 0xff) {
2648 			error = EINVAL;
2649 			break;
2650 		}
2651 
2652 		/* commit */
2653 		tunnel->t_ttl = ifr->ifr_ttl;
2654 		break;
2655 
2656 	case SIOCGLIFPHYTTL:
2657 		ifr->ifr_ttl = tunnel->t_ttl;
2658 		break;
2659 
2660 	case SIOCSTXHPRIO:
2661 		error = if_txhprio_l2_check(ifr->ifr_hdrprio);
2662 		if (error != 0)
2663 			break;
2664 
2665 		sc->sc_tunnel.t_txhprio = ifr->ifr_hdrprio;
2666 		break;
2667 	case SIOCGTXHPRIO:
2668 		ifr->ifr_hdrprio = sc->sc_tunnel.t_txhprio;
2669 		break;
2670 
2671 	case SIOCSRXHPRIO:
2672 		error = if_rxhprio_l2_check(ifr->ifr_hdrprio);
2673 		if (error != 0)
2674 			break;
2675 
2676 		sc->sc_tunnel.t_rxhprio = ifr->ifr_hdrprio;
2677 		break;
2678 	case SIOCGRXHPRIO:
2679 		ifr->ifr_hdrprio = sc->sc_tunnel.t_rxhprio;
2680 		break;
2681 
2682 	case SIOCBRDGSCACHE:
2683 		error = etherbridge_set_max(&sc->sc_eb, bparam);
2684 		break;
2685 	case SIOCBRDGGCACHE:
2686 		error = etherbridge_get_max(&sc->sc_eb, bparam);
2687 		break;
2688 
2689 	case SIOCBRDGSTO:
2690 		error = etherbridge_set_tmo(&sc->sc_eb, bparam);
2691 		break;
2692 	case SIOCBRDGGTO:
2693 		error = etherbridge_get_tmo(&sc->sc_eb, bparam);
2694 		break;
2695 
2696 	case SIOCBRDGRTS:
2697 		error = etherbridge_rtfind(&sc->sc_eb,
2698 		    (struct ifbaconf *)data);
2699 		break;
2700 	case SIOCBRDGFLUSH:
2701 		etherbridge_flush(&sc->sc_eb,
2702 		    ((struct ifbreq *)data)->ifbr_ifsflags);
2703 		break;
2704 	case SIOCBRDGSADDR:
2705 		error = nvgre_add_addr(sc, (struct ifbareq *)data);
2706 		break;
2707 	case SIOCBRDGDADDR:
2708 		error = nvgre_del_addr(sc, (struct ifbareq *)data);
2709 		break;
2710 
2711 	case SIOCADDMULTI:
2712 	case SIOCDELMULTI:
2713 		break;
2714 
2715 	default:
2716 		error = ether_ioctl(ifp, &sc->sc_ac, cmd, data);
2717 		break;
2718 	}
2719 
2720 	if (error == ENETRESET) {
2721 		/* no hardware to program */
2722 		error = 0;
2723 	}
2724 
2725 	return (error);
2726 }
2727 
2728 static int
2729 eoip_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
2730 {
2731 	struct eoip_softc *sc = ifp->if_softc;
2732 	struct ifreq *ifr = (struct ifreq *)data;
2733 	struct ifkalivereq *ikar = (struct ifkalivereq *)data;
2734 	int error = 0;
2735 
2736 	switch(cmd) {
2737 	case SIOCSIFADDR:
2738 		break;
2739 	case SIOCSIFFLAGS:
2740 		if (ISSET(ifp->if_flags, IFF_UP)) {
2741 			if (!ISSET(ifp->if_flags, IFF_RUNNING))
2742 				error = eoip_up(sc);
2743 			else
2744 				error = 0;
2745 		} else {
2746 			if (ISSET(ifp->if_flags, IFF_RUNNING))
2747 				error = eoip_down(sc);
2748 		}
2749 		break;
2750 
2751 	case SIOCSETKALIVE:
2752 		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2753 			error = EBUSY;
2754 			break;
2755 		}
2756 
2757 		if (ikar->ikar_timeo < 0 || ikar->ikar_timeo > 86400 ||
2758 		    ikar->ikar_cnt < 0 || ikar->ikar_cnt > 256)
2759 			return (EINVAL);
2760 
2761 		if (ikar->ikar_timeo == 0 || ikar->ikar_cnt == 0) {
2762 			sc->sc_ka_count = 0;
2763 			sc->sc_ka_timeo = 0;
2764 			sc->sc_ka_state = GRE_KA_NONE;
2765 		} else {
2766 			sc->sc_ka_count = ikar->ikar_cnt;
2767 			sc->sc_ka_timeo = ikar->ikar_timeo;
2768 			sc->sc_ka_state = GRE_KA_DOWN;
2769 		}
2770 		break;
2771 
2772 	case SIOCGETKALIVE:
2773 		ikar->ikar_cnt = sc->sc_ka_count;
2774 		ikar->ikar_timeo = sc->sc_ka_timeo;
2775 		break;
2776 
2777 	case SIOCSVNETID:
2778 		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2779 			error = EBUSY;
2780 			break;
2781 		}
2782 		if (ifr->ifr_vnetid < 0 || ifr->ifr_vnetid > 0xffff)
2783 			return (EINVAL);
2784 
2785 		sc->sc_tunnel.t_key = htole16(ifr->ifr_vnetid); /* for cmp */
2786 		sc->sc_tunnel_id = htole16(ifr->ifr_vnetid);
2787 		break;
2788 
2789 	case SIOCGVNETID:
2790 		ifr->ifr_vnetid = letoh16(sc->sc_tunnel_id);
2791 		break;
2792 
2793 	case SIOCSLIFPHYADDR:
2794 		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2795 			error = EBUSY;
2796 			break;
2797 		}
2798 
2799 		error = gre_set_tunnel(&sc->sc_tunnel,
2800 		    (struct if_laddrreq *)data, 1);
2801 		break;
2802 	case SIOCGLIFPHYADDR:
2803 		error = gre_get_tunnel(&sc->sc_tunnel,
2804 		    (struct if_laddrreq *)data);
2805 		break;
2806 	case SIOCDIFPHYADDR:
2807 		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2808 			error = EBUSY;
2809 			break;
2810 		}
2811 
2812 		error = gre_del_tunnel(&sc->sc_tunnel);
2813 		break;
2814 
2815 	case SIOCSLIFPHYRTABLE:
2816 		if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2817 			error = EBUSY;
2818 			break;
2819 		}
2820 
2821 		if (ifr->ifr_rdomainid < 0 ||
2822 		    ifr->ifr_rdomainid > RT_TABLEID_MAX ||
2823 		    !rtable_exists(ifr->ifr_rdomainid)) {
2824 			error = EINVAL;
2825 			break;
2826 		}
2827 		sc->sc_tunnel.t_rtableid = ifr->ifr_rdomainid;
2828 		break;
2829 	case SIOCGLIFPHYRTABLE:
2830 		ifr->ifr_rdomainid = sc->sc_tunnel.t_rtableid;
2831 		break;
2832 
2833 	case SIOCSLIFPHYTTL:
2834 		if (ifr->ifr_ttl < 1 || ifr->ifr_ttl > 0xff) {
2835 			error = EINVAL;
2836 			break;
2837 		}
2838 
2839 		/* commit */
2840 		sc->sc_tunnel.t_ttl = (uint8_t)ifr->ifr_ttl;
2841 		break;
2842 	case SIOCGLIFPHYTTL:
2843 		ifr->ifr_ttl = (int)sc->sc_tunnel.t_ttl;
2844 		break;
2845 
2846 	case SIOCSLIFPHYDF:
2847 		/* commit */
2848 		sc->sc_tunnel.t_df = ifr->ifr_df ? htons(IP_DF) : htons(0);
2849 		break;
2850 	case SIOCGLIFPHYDF:
2851 		ifr->ifr_df = sc->sc_tunnel.t_df ? 1 : 0;
2852 		break;
2853 
2854 	case SIOCSTXHPRIO:
2855 		error = if_txhprio_l2_check(ifr->ifr_hdrprio);
2856 		if (error != 0)
2857 			break;
2858 
2859 		sc->sc_tunnel.t_txhprio = ifr->ifr_hdrprio;
2860 		break;
2861 	case SIOCGTXHPRIO:
2862 		ifr->ifr_hdrprio = sc->sc_tunnel.t_txhprio;
2863 		break;
2864 
2865 	case SIOCSRXHPRIO:
2866 		error = if_rxhprio_l2_check(ifr->ifr_hdrprio);
2867 		if (error != 0)
2868 			break;
2869 
2870 		sc->sc_tunnel.t_rxhprio = ifr->ifr_hdrprio;
2871 		break;
2872 	case SIOCGRXHPRIO:
2873 		ifr->ifr_hdrprio = sc->sc_tunnel.t_rxhprio;
2874 		break;
2875 
2876 	case SIOCADDMULTI:
2877 	case SIOCDELMULTI:
2878 		break;
2879 
2880 	default:
2881 		error = ether_ioctl(ifp, &sc->sc_ac, cmd, data);
2882 		break;
2883 	}
2884 
2885 	if (error == ENETRESET) {
2886 		/* no hardware to program */
2887 		error = 0;
2888 	}
2889 
2890 	return (error);
2891 }
2892 
2893 static int
2894 gre_up(struct gre_softc *sc)
2895 {
2896 	NET_ASSERT_LOCKED();
2897 	SET(sc->sc_if.if_flags, IFF_RUNNING);
2898 
2899 	if (sc->sc_ka_state != GRE_KA_NONE)
2900 		gre_keepalive_send(sc);
2901 
2902 	return (0);
2903 }
2904 
2905 static int
2906 gre_down(struct gre_softc *sc)
2907 {
2908 	NET_ASSERT_LOCKED();
2909 	CLR(sc->sc_if.if_flags, IFF_RUNNING);
2910 
2911 	if (sc->sc_ka_state != GRE_KA_NONE) {
2912 		timeout_del_barrier(&sc->sc_ka_hold);
2913 		timeout_del_barrier(&sc->sc_ka_send);
2914 
2915 		sc->sc_ka_state = GRE_KA_DOWN;
2916 		gre_link_state(&sc->sc_if, sc->sc_ka_state);
2917 	}
2918 
2919 	return (0);
2920 }
2921 
2922 static void
2923 gre_link_state(struct ifnet *ifp, unsigned int state)
2924 {
2925 	int link_state = LINK_STATE_UNKNOWN;
2926 
2927 	if (ISSET(ifp->if_flags, IFF_RUNNING)) {
2928 		switch (state) {
2929 		case GRE_KA_NONE:
2930 			/* maybe up? or down? it's unknown, really */
2931 			break;
2932 		case GRE_KA_UP:
2933 			link_state = LINK_STATE_UP;
2934 			break;
2935 		default:
2936 			link_state = LINK_STATE_KALIVE_DOWN;
2937 			break;
2938 		}
2939 	}
2940 
2941 	if (ifp->if_link_state != link_state) {
2942 		ifp->if_link_state = link_state;
2943 		if_link_state_change(ifp);
2944 	}
2945 }
2946 
2947 static void
2948 gre_keepalive_send(void *arg)
2949 {
2950 	struct gre_tunnel t;
2951 	struct gre_softc *sc = arg;
2952 	struct mbuf *m;
2953 	struct gre_keepalive *gk;
2954 	SIPHASH_CTX ctx;
2955 	int linkhdr, len;
2956 	uint16_t proto;
2957 	uint8_t ttl;
2958 	uint8_t tos;
2959 
2960 	/*
2961 	 * re-schedule immediately, so we deal with incomplete configuration
2962 	 * or temporary errors.
2963 	 */
2964 	if (sc->sc_ka_timeo)
2965 		timeout_add_sec(&sc->sc_ka_send, sc->sc_ka_timeo);
2966 
2967 	if (!ISSET(sc->sc_if.if_flags, IFF_RUNNING) ||
2968 	    sc->sc_ka_state == GRE_KA_NONE ||
2969 	    sc->sc_tunnel.t_af == AF_UNSPEC ||
2970 	    sc->sc_tunnel.t_rtableid != sc->sc_if.if_rdomain)
2971 		return;
2972 
2973 	/* this is really conservative */
2974 #ifdef INET6
2975 	linkhdr = max_linkhdr + MAX(sizeof(struct ip), sizeof(struct ip6_hdr)) +
2976 	    sizeof(struct gre_header) + sizeof(struct gre_h_key);
2977 #else
2978 	linkhdr = max_linkhdr + sizeof(struct ip) +
2979 	    sizeof(struct gre_header) + sizeof(struct gre_h_key);
2980 #endif
2981 	len = linkhdr + sizeof(*gk);
2982 
2983 	MGETHDR(m, M_DONTWAIT, MT_DATA);
2984 	if (m == NULL)
2985 		return;
2986 
2987 	if (len > MHLEN) {
2988 		MCLGETL(m, M_DONTWAIT, len);
2989 		if (!ISSET(m->m_flags, M_EXT)) {
2990 			m_freem(m);
2991 			return;
2992 		}
2993 	}
2994 
2995 	m->m_pkthdr.len = m->m_len = len;
2996 	m_adj(m, linkhdr);
2997 
2998 	/*
2999 	 * build the inside packet
3000 	 */
3001 	gk = mtod(m, struct gre_keepalive *);
3002 	htobem32(&gk->gk_uptime, sc->sc_ka_bias + ticks);
3003 	htobem32(&gk->gk_random, arc4random());
3004 
3005 	SipHash24_Init(&ctx, &sc->sc_ka_key);
3006 	SipHash24_Update(&ctx, &gk->gk_uptime, sizeof(gk->gk_uptime));
3007 	SipHash24_Update(&ctx, &gk->gk_random, sizeof(gk->gk_random));
3008 	SipHash24_Final(gk->gk_digest, &ctx);
3009 
3010 	ttl = sc->sc_tunnel.t_ttl == -1 ? ip_defttl : sc->sc_tunnel.t_ttl;
3011 
3012 	m->m_pkthdr.pf.prio = sc->sc_if.if_llprio;
3013 	tos = gre_l3_tos(&sc->sc_tunnel, m, IFQ_PRIO2TOS(m->m_pkthdr.pf.prio));
3014 
3015 	t.t_af = sc->sc_tunnel.t_af;
3016 	t.t_df = sc->sc_tunnel.t_df;
3017 	t.t_src = sc->sc_tunnel.t_dst;
3018 	t.t_dst = sc->sc_tunnel.t_src;
3019 	t.t_key = sc->sc_tunnel.t_key;
3020 	t.t_key_mask = sc->sc_tunnel.t_key_mask;
3021 
3022 	m = gre_encap(&t, m, htons(0), ttl, tos);
3023 	if (m == NULL)
3024 		return;
3025 
3026 	switch (sc->sc_tunnel.t_af) {
3027 	case AF_INET: {
3028 		struct ip *ip;
3029 
3030 		ip = mtod(m, struct ip *);
3031 		ip->ip_id = htons(ip_randomid());
3032 		ip->ip_sum = 0;
3033 		ip->ip_sum = in_cksum(m, sizeof(*ip));
3034 
3035 		proto = htons(ETHERTYPE_IP);
3036 		break;
3037 	}
3038 #ifdef INET6
3039 	case AF_INET6:
3040 		proto = htons(ETHERTYPE_IPV6);
3041 		break;
3042 #endif
3043 	default:
3044 		m_freem(m);
3045 		return;
3046 	}
3047 
3048 	/*
3049 	 * put it in the tunnel
3050 	 */
3051 	m = gre_encap(&sc->sc_tunnel, m, proto, ttl, tos);
3052 	if (m == NULL)
3053 		return;
3054 
3055 	gre_ip_output(&sc->sc_tunnel, m);
3056 }
3057 
3058 static void
3059 gre_keepalive_hold(void *arg)
3060 {
3061 	struct gre_softc *sc = arg;
3062 	struct ifnet *ifp = &sc->sc_if;
3063 
3064 	if (!ISSET(ifp->if_flags, IFF_RUNNING) ||
3065 	    sc->sc_ka_state == GRE_KA_NONE)
3066 		return;
3067 
3068 	NET_LOCK();
3069 	sc->sc_ka_state = GRE_KA_DOWN;
3070 	gre_link_state(ifp, sc->sc_ka_state);
3071 	NET_UNLOCK();
3072 }
3073 
3074 static int
3075 gre_set_tunnel(struct gre_tunnel *tunnel, struct if_laddrreq *req, int ucast)
3076 {
3077 	struct sockaddr *src = (struct sockaddr *)&req->addr;
3078 	struct sockaddr *dst = (struct sockaddr *)&req->dstaddr;
3079 	struct sockaddr_in *src4, *dst4;
3080 #ifdef INET6
3081 	struct sockaddr_in6 *src6, *dst6;
3082 	int error;
3083 #endif
3084 
3085 	/* sa_family and sa_len must be equal */
3086 	if (src->sa_family != dst->sa_family || src->sa_len != dst->sa_len)
3087 		return (EINVAL);
3088 
3089 	/* validate */
3090 	switch (dst->sa_family) {
3091 	case AF_INET:
3092 		if (dst->sa_len != sizeof(*dst4))
3093 			return (EINVAL);
3094 
3095 		src4 = (struct sockaddr_in *)src;
3096 		if (in_nullhost(src4->sin_addr) ||
3097 		    IN_MULTICAST(src4->sin_addr.s_addr))
3098 			return (EINVAL);
3099 
3100 		dst4 = (struct sockaddr_in *)dst;
3101 		if (in_nullhost(dst4->sin_addr) ||
3102 		    (IN_MULTICAST(dst4->sin_addr.s_addr) != !ucast))
3103 			return (EINVAL);
3104 
3105 		tunnel->t_src4 = src4->sin_addr;
3106 		tunnel->t_dst4 = dst4->sin_addr;
3107 
3108 		break;
3109 #ifdef INET6
3110 	case AF_INET6:
3111 		if (dst->sa_len != sizeof(*dst6))
3112 			return (EINVAL);
3113 
3114 		src6 = (struct sockaddr_in6 *)src;
3115 		if (IN6_IS_ADDR_UNSPECIFIED(&src6->sin6_addr) ||
3116 		    IN6_IS_ADDR_MULTICAST(&src6->sin6_addr))
3117 			return (EINVAL);
3118 
3119 		dst6 = (struct sockaddr_in6 *)dst;
3120 		if (IN6_IS_ADDR_UNSPECIFIED(&dst6->sin6_addr) ||
3121 		    IN6_IS_ADDR_MULTICAST(&dst6->sin6_addr) != !ucast)
3122 			return (EINVAL);
3123 
3124 		if (src6->sin6_scope_id != dst6->sin6_scope_id)
3125 			return (EINVAL);
3126 
3127 		error = in6_embedscope(&tunnel->t_src6, src6, NULL);
3128 		if (error != 0)
3129 			return (error);
3130 
3131 		error = in6_embedscope(&tunnel->t_dst6, dst6, NULL);
3132 		if (error != 0)
3133 			return (error);
3134 
3135 		break;
3136 #endif
3137 	default:
3138 		return (EAFNOSUPPORT);
3139 	}
3140 
3141 	/* commit */
3142 	tunnel->t_af = dst->sa_family;
3143 
3144 	return (0);
3145 }
3146 
3147 static int
3148 gre_get_tunnel(struct gre_tunnel *tunnel, struct if_laddrreq *req)
3149 {
3150 	struct sockaddr *src = (struct sockaddr *)&req->addr;
3151 	struct sockaddr *dst = (struct sockaddr *)&req->dstaddr;
3152 	struct sockaddr_in *sin;
3153 #ifdef INET6 /* ifconfig already embeds the scopeid */
3154 	struct sockaddr_in6 *sin6;
3155 #endif
3156 
3157 	switch (tunnel->t_af) {
3158 	case AF_UNSPEC:
3159 		return (EADDRNOTAVAIL);
3160 	case AF_INET:
3161 		sin = (struct sockaddr_in *)src;
3162 		memset(sin, 0, sizeof(*sin));
3163 		sin->sin_family = AF_INET;
3164 		sin->sin_len = sizeof(*sin);
3165 		sin->sin_addr = tunnel->t_src4;
3166 
3167 		sin = (struct sockaddr_in *)dst;
3168 		memset(sin, 0, sizeof(*sin));
3169 		sin->sin_family = AF_INET;
3170 		sin->sin_len = sizeof(*sin);
3171 		sin->sin_addr = tunnel->t_dst4;
3172 
3173 		break;
3174 
3175 #ifdef INET6
3176 	case AF_INET6:
3177 		sin6 = (struct sockaddr_in6 *)src;
3178 		memset(sin6, 0, sizeof(*sin6));
3179 		sin6->sin6_family = AF_INET6;
3180 		sin6->sin6_len = sizeof(*sin6);
3181 		in6_recoverscope(sin6, &tunnel->t_src6);
3182 
3183 		sin6 = (struct sockaddr_in6 *)dst;
3184 		memset(sin6, 0, sizeof(*sin6));
3185 		sin6->sin6_family = AF_INET6;
3186 		sin6->sin6_len = sizeof(*sin6);
3187 		in6_recoverscope(sin6, &tunnel->t_dst6);
3188 
3189 		break;
3190 #endif
3191 	default:
3192 		return (EAFNOSUPPORT);
3193 	}
3194 
3195 	return (0);
3196 }
3197 
3198 static int
3199 gre_del_tunnel(struct gre_tunnel *tunnel)
3200 {
3201 	/* commit */
3202 	tunnel->t_af = AF_UNSPEC;
3203 
3204 	return (0);
3205 }
3206 
3207 static int
3208 gre_set_vnetid(struct gre_tunnel *tunnel, struct ifreq *ifr)
3209 {
3210 	uint32_t key;
3211 	uint32_t min = GRE_KEY_MIN;
3212 	uint32_t max = GRE_KEY_MAX;
3213 	unsigned int shift = GRE_KEY_SHIFT;
3214 	uint32_t mask = GRE_KEY_MASK;
3215 
3216 	if (tunnel->t_key_mask == GRE_KEY_ENTROPY) {
3217 		min = GRE_KEY_ENTROPY_MIN;
3218 		max = GRE_KEY_ENTROPY_MAX;
3219 		shift = GRE_KEY_ENTROPY_SHIFT;
3220 		mask = GRE_KEY_ENTROPY;
3221 	}
3222 
3223 	if (ifr->ifr_vnetid < min || ifr->ifr_vnetid > max)
3224 		return (EINVAL);
3225 
3226 	key = htonl(ifr->ifr_vnetid << shift);
3227 
3228 	/* commit */
3229 	tunnel->t_key_mask = mask;
3230 	tunnel->t_key = key;
3231 
3232 	return (0);
3233 }
3234 
3235 static int
3236 gre_get_vnetid(struct gre_tunnel *tunnel, struct ifreq *ifr)
3237 {
3238 	int shift;
3239 
3240 	switch (tunnel->t_key_mask) {
3241 	case GRE_KEY_NONE:
3242 		return (EADDRNOTAVAIL);
3243 	case GRE_KEY_ENTROPY:
3244 		shift = GRE_KEY_ENTROPY_SHIFT;
3245 		break;
3246 	case GRE_KEY_MASK:
3247 		shift = GRE_KEY_SHIFT;
3248 		break;
3249 	}
3250 
3251 	ifr->ifr_vnetid = ntohl(tunnel->t_key) >> shift;
3252 
3253 	return (0);
3254 }
3255 
3256 static int
3257 gre_del_vnetid(struct gre_tunnel *tunnel)
3258 {
3259 	tunnel->t_key_mask = GRE_KEY_NONE;
3260 
3261 	return (0);
3262 }
3263 
3264 static int
3265 gre_set_vnetflowid(struct gre_tunnel *tunnel, struct ifreq *ifr)
3266 {
3267 	uint32_t mask, key;
3268 
3269 	if (tunnel->t_key_mask == GRE_KEY_NONE)
3270 		return (EADDRNOTAVAIL);
3271 
3272 	mask = ifr->ifr_vnetid ? GRE_KEY_ENTROPY : GRE_KEY_MASK;
3273 	if (tunnel->t_key_mask == mask) {
3274 		/* nop */
3275 		return (0);
3276 	}
3277 
3278 	key = ntohl(tunnel->t_key);
3279 	if (mask == GRE_KEY_ENTROPY) {
3280 		if (key > GRE_KEY_ENTROPY_MAX)
3281 			return (ERANGE);
3282 
3283 		key = htonl(key << GRE_KEY_ENTROPY_SHIFT);
3284 	} else
3285 		key = htonl(key >> GRE_KEY_ENTROPY_SHIFT);
3286 
3287 	/* commit */
3288 	tunnel->t_key_mask = mask;
3289 	tunnel->t_key = key;
3290 
3291 	return (0);
3292 }
3293 
3294 static int
3295 gre_get_vnetflowid(struct gre_tunnel *tunnel, struct ifreq *ifr)
3296 {
3297 	if (tunnel->t_key_mask == GRE_KEY_NONE)
3298 		return (EADDRNOTAVAIL);
3299 
3300 	ifr->ifr_vnetid = tunnel->t_key_mask == GRE_KEY_ENTROPY;
3301 
3302 	return (0);
3303 }
3304 
3305 static int
3306 mgre_up(struct mgre_softc *sc)
3307 {
3308 	unsigned int hlen;
3309 
3310 	switch (sc->sc_tunnel.t_af) {
3311 	case AF_UNSPEC:
3312 		return (EDESTADDRREQ);
3313 	case AF_INET:
3314 		hlen = sizeof(struct ip);
3315 		break;
3316 #ifdef INET6
3317 	case AF_INET6:
3318 		hlen = sizeof(struct ip6_hdr);
3319 		break;
3320 #endif /* INET6 */
3321 	default:
3322 		unhandled_af(sc->sc_tunnel.t_af);
3323 	}
3324 
3325 	hlen += sizeof(struct gre_header);
3326 	if (sc->sc_tunnel.t_key_mask != GRE_KEY_NONE)
3327 		hlen += sizeof(struct gre_h_key);
3328 
3329 	NET_ASSERT_LOCKED();
3330 
3331 	if (RBT_INSERT(mgre_tree, &mgre_tree, sc) != NULL)
3332 		return (EADDRINUSE);
3333 
3334 	sc->sc_if.if_hdrlen = hlen;
3335 	SET(sc->sc_if.if_flags, IFF_RUNNING);
3336 
3337 	return (0);
3338 }
3339 
3340 static int
3341 mgre_down(struct mgre_softc *sc)
3342 {
3343 	NET_ASSERT_LOCKED();
3344 
3345 	CLR(sc->sc_if.if_flags, IFF_RUNNING);
3346 	sc->sc_if.if_hdrlen = GRE_HDRLEN; /* symmetry */
3347 
3348 	RBT_REMOVE(mgre_tree, &mgre_tree, sc);
3349 
3350 	/* barrier? */
3351 
3352 	return (0);
3353 }
3354 
3355 static int
3356 egre_up(struct egre_softc *sc)
3357 {
3358 	if (sc->sc_tunnel.t_af == AF_UNSPEC)
3359 		return (EDESTADDRREQ);
3360 
3361 	NET_ASSERT_LOCKED();
3362 
3363 	if (RBT_INSERT(egre_tree, &egre_tree, sc) != NULL)
3364 		return (EADDRINUSE);
3365 
3366 	SET(sc->sc_ac.ac_if.if_flags, IFF_RUNNING);
3367 
3368 	return (0);
3369 }
3370 
3371 static int
3372 egre_down(struct egre_softc *sc)
3373 {
3374 	NET_ASSERT_LOCKED();
3375 
3376 	CLR(sc->sc_ac.ac_if.if_flags, IFF_RUNNING);
3377 
3378 	RBT_REMOVE(egre_tree, &egre_tree, sc);
3379 
3380 	/* barrier? */
3381 
3382 	return (0);
3383 }
3384 
3385 static int
3386 egre_media_change(struct ifnet *ifp)
3387 {
3388 	return (ENOTTY);
3389 }
3390 
3391 static void
3392 egre_media_status(struct ifnet *ifp, struct ifmediareq *imr)
3393 {
3394 	imr->ifm_active = IFM_ETHER | IFM_AUTO;
3395 	imr->ifm_status = IFM_AVALID | IFM_ACTIVE;
3396 }
3397 
3398 static int
3399 nvgre_up(struct nvgre_softc *sc)
3400 {
3401 	struct gre_tunnel *tunnel = &sc->sc_tunnel;
3402 	struct ifnet *ifp0;
3403 	void *inm;
3404 	int error;
3405 
3406 	if (tunnel->t_af == AF_UNSPEC)
3407 		return (EDESTADDRREQ);
3408 
3409 	ifp0 = if_get(sc->sc_ifp0);
3410 	if (ifp0 == NULL)
3411 		return (ENXIO);
3412 	if (!ISSET(ifp0->if_flags, IFF_MULTICAST)) {
3413 		error = ENODEV;
3414 		goto put;
3415 	}
3416 
3417 	NET_ASSERT_LOCKED();
3418 
3419 	if (RBT_INSERT(nvgre_mcast_tree, &nvgre_mcast_tree, sc) != NULL) {
3420 		error = EADDRINUSE;
3421 		goto put;
3422 	}
3423 	if (RBT_INSERT(nvgre_ucast_tree, &nvgre_ucast_tree, sc) != NULL) {
3424 		error = EADDRINUSE;
3425 		goto remove_mcast;
3426 	}
3427 
3428 	switch (tunnel->t_af) {
3429 	case AF_INET:
3430 		inm = in_addmulti(&tunnel->t_dst4, ifp0);
3431 		if (inm == NULL) {
3432 			error = ECONNABORTED;
3433 			goto remove_ucast;
3434 		}
3435 		break;
3436 #ifdef INET6
3437 	case AF_INET6:
3438 		inm = in6_addmulti(&tunnel->t_dst6, ifp0, &error);
3439 		if (inm == NULL) {
3440 			/* error is already set */
3441 			goto remove_ucast;
3442 		}
3443 		break;
3444 #endif /* INET6 */
3445 	default:
3446 		unhandled_af(tunnel->t_af);
3447 	}
3448 
3449 	if_linkstatehook_add(ifp0, &sc->sc_ltask);
3450 	if_detachhook_add(ifp0, &sc->sc_dtask);
3451 
3452 	if_put(ifp0);
3453 
3454 	sc->sc_inm = inm;
3455 	SET(sc->sc_ac.ac_if.if_flags, IFF_RUNNING);
3456 
3457 	return (0);
3458 
3459 remove_ucast:
3460 	RBT_REMOVE(nvgre_ucast_tree, &nvgre_ucast_tree, sc);
3461 remove_mcast:
3462 	RBT_REMOVE(nvgre_mcast_tree, &nvgre_mcast_tree, sc);
3463 put:
3464 	if_put(ifp0);
3465 	return (error);
3466 }
3467 
3468 static int
3469 nvgre_down(struct nvgre_softc *sc)
3470 {
3471 	struct gre_tunnel *tunnel = &sc->sc_tunnel;
3472 	struct ifnet *ifp = &sc->sc_ac.ac_if;
3473 	struct taskq *softnet = net_tq(ifp->if_index);
3474 	struct ifnet *ifp0;
3475 
3476 	NET_ASSERT_LOCKED();
3477 
3478 	CLR(ifp->if_flags, IFF_RUNNING);
3479 
3480 	NET_UNLOCK();
3481 	ifq_barrier(&ifp->if_snd);
3482 	if (!task_del(softnet, &sc->sc_send_task))
3483 		taskq_barrier(softnet);
3484 	NET_LOCK();
3485 
3486 	mq_purge(&sc->sc_send_list);
3487 
3488 	ifp0 = if_get(sc->sc_ifp0);
3489 	if (ifp0 != NULL) {
3490 		if_detachhook_del(ifp0, &sc->sc_dtask);
3491 		if_linkstatehook_del(ifp0, &sc->sc_ltask);
3492 	}
3493 	if_put(ifp0);
3494 
3495 	switch (tunnel->t_af) {
3496 	case AF_INET:
3497 		in_delmulti(sc->sc_inm);
3498 		break;
3499 
3500 #ifdef INET6
3501 	case AF_INET6:
3502 		in6_delmulti(sc->sc_inm);
3503 		break;
3504 #endif
3505 	default:
3506 		unhandled_af(tunnel->t_af);
3507 	}
3508 
3509 	RBT_REMOVE(nvgre_ucast_tree, &nvgre_ucast_tree, sc);
3510 	RBT_REMOVE(nvgre_mcast_tree, &nvgre_mcast_tree, sc);
3511 
3512 	return (0);
3513 }
3514 
3515 static void
3516 nvgre_link_change(void *arg)
3517 {
3518 	/* nop */
3519 }
3520 
3521 static void
3522 nvgre_detach(void *arg)
3523 {
3524 	struct nvgre_softc *sc = arg;
3525 	struct ifnet *ifp = &sc->sc_ac.ac_if;
3526 
3527 	if (ISSET(ifp->if_flags, IFF_RUNNING)) {
3528 		nvgre_down(sc);
3529 		if_down(ifp);
3530 	}
3531 
3532 	sc->sc_ifp0 = 0;
3533 }
3534 
3535 static int
3536 nvgre_set_parent(struct nvgre_softc *sc, const char *parent)
3537 {
3538 	struct ifnet *ifp0;
3539 
3540 	ifp0 = if_unit(parent);
3541 	if (ifp0 == NULL)
3542 		return (EINVAL);
3543 
3544 	if (!ISSET(ifp0->if_flags, IFF_MULTICAST)) {
3545 		if_put(ifp0);
3546 		return (EPROTONOSUPPORT);
3547 	}
3548 
3549 	/* commit */
3550 	sc->sc_ifp0 = ifp0->if_index;
3551 	if_put(ifp0);
3552 
3553 	return (0);
3554 }
3555 
3556 static int
3557 nvgre_add_addr(struct nvgre_softc *sc, const struct ifbareq *ifba)
3558 {
3559 	struct sockaddr_in *sin;
3560 #ifdef INET6
3561 	struct sockaddr_in6 *sin6;
3562 	struct sockaddr_in6 src6 = {
3563 		.sin6_len = sizeof(src6),
3564 		.sin6_family = AF_UNSPEC,
3565 	};
3566 	int error;
3567 #endif
3568 	union gre_addr endpoint;
3569 	unsigned int type;
3570 
3571 	/* ignore ifba_ifsname */
3572 
3573 	if (ISSET(ifba->ifba_flags, ~IFBAF_TYPEMASK))
3574 		return (EINVAL);
3575 	switch (ifba->ifba_flags & IFBAF_TYPEMASK) {
3576 	case IFBAF_DYNAMIC:
3577 		type = EBE_DYNAMIC;
3578 		break;
3579 	case IFBAF_STATIC:
3580 		type = EBE_STATIC;
3581 		break;
3582 	default:
3583 		return (EINVAL);
3584 	}
3585 
3586 	memset(&endpoint, 0, sizeof(endpoint));
3587 
3588 	if (ifba->ifba_dstsa.ss_family != sc->sc_tunnel.t_af)
3589 		return (EAFNOSUPPORT);
3590 	switch (ifba->ifba_dstsa.ss_family) {
3591 	case AF_INET:
3592 		sin = (struct sockaddr_in *)&ifba->ifba_dstsa;
3593 		if (in_nullhost(sin->sin_addr) ||
3594 		    IN_MULTICAST(sin->sin_addr.s_addr))
3595 			return (EADDRNOTAVAIL);
3596 
3597 		endpoint.in4 = sin->sin_addr;
3598 		break;
3599 
3600 #ifdef INET6
3601 	case AF_INET6:
3602 		sin6 = (struct sockaddr_in6 *)&ifba->ifba_dstsa;
3603 		if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) ||
3604 		    IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
3605 			return (EADDRNOTAVAIL);
3606 
3607 		in6_recoverscope(&src6, &sc->sc_tunnel.t_src6);
3608 
3609 		if (src6.sin6_scope_id != sin6->sin6_scope_id)
3610 			return (EADDRNOTAVAIL);
3611 
3612 		error = in6_embedscope(&endpoint.in6, sin6, NULL);
3613 		if (error != 0)
3614 			return (error);
3615 
3616 		break;
3617 #endif
3618 	default: /* AF_UNSPEC */
3619 		return (EADDRNOTAVAIL);
3620 	}
3621 
3622 	return (etherbridge_add_addr(&sc->sc_eb, &endpoint,
3623 	    &ifba->ifba_dst, type));
3624 }
3625 
3626 static int
3627 nvgre_del_addr(struct nvgre_softc *sc, const struct ifbareq *ifba)
3628 {
3629 	return (etherbridge_del_addr(&sc->sc_eb, &ifba->ifba_dst));
3630 }
3631 
3632 static void
3633 nvgre_start(struct ifnet *ifp)
3634 {
3635 	struct nvgre_softc *sc = ifp->if_softc;
3636 	const struct gre_tunnel *tunnel = &sc->sc_tunnel;
3637 	union gre_addr gateway;
3638 	struct mbuf_list ml = MBUF_LIST_INITIALIZER();
3639 	struct ether_header *eh;
3640 	struct mbuf *m, *m0;
3641 #if NBPFILTER > 0
3642 	caddr_t if_bpf;
3643 #endif
3644 
3645 	if (!gre_allow) {
3646 		ifq_purge(&ifp->if_snd);
3647 		return;
3648 	}
3649 
3650 	while ((m0 = ifq_dequeue(&ifp->if_snd)) != NULL) {
3651 #if NBPFILTER > 0
3652 		if_bpf = ifp->if_bpf;
3653 		if (if_bpf)
3654 			bpf_mtap_ether(if_bpf, m0, BPF_DIRECTION_OUT);
3655 #endif
3656 
3657 		eh = mtod(m0, struct ether_header *);
3658 		if (ETHER_IS_BROADCAST(eh->ether_dhost))
3659 			gateway = tunnel->t_dst;
3660 		else {
3661 			const union gre_addr *endpoint;
3662 
3663 			smr_read_enter();
3664 			endpoint = etherbridge_resolve_ea(&sc->sc_eb,
3665 			    (struct ether_addr *)eh->ether_dhost);
3666 			if (endpoint == NULL) {
3667 				/* "flood" to unknown hosts */
3668 				endpoint = &tunnel->t_dst;
3669 			}
3670 			gateway = *endpoint;
3671 			smr_read_leave();
3672 		}
3673 
3674 		/* force prepend mbuf because of alignment problems */
3675 		m = m_get(M_DONTWAIT, m0->m_type);
3676 		if (m == NULL) {
3677 			m_freem(m0);
3678 			continue;
3679 		}
3680 
3681 		M_MOVE_PKTHDR(m, m0);
3682 		m->m_next = m0;
3683 
3684 		m_align(m, 0);
3685 		m->m_len = 0;
3686 
3687 		m = gre_encap_dst(tunnel, &gateway, m,
3688 		    htons(ETHERTYPE_TRANSETHER),
3689 		    tunnel->t_ttl, gre_l2_tos(tunnel, m));
3690 		if (m == NULL)
3691 			continue;
3692 
3693 		m->m_flags &= ~(M_BCAST|M_MCAST);
3694 		m->m_pkthdr.ph_rtableid = tunnel->t_rtableid;
3695 
3696 #if NPF > 0
3697 		pf_pkt_addr_changed(m);
3698 #endif
3699 
3700 		ml_enqueue(&ml, m);
3701 	}
3702 
3703 	if (!ml_empty(&ml)) {
3704 		if (mq_enlist(&sc->sc_send_list, &ml) == 0)
3705 			task_add(net_tq(ifp->if_index), &sc->sc_send_task);
3706 		/* else set OACTIVE? */
3707 	}
3708 }
3709 
3710 static uint64_t
3711 nvgre_send4(struct nvgre_softc *sc, struct mbuf_list *ml)
3712 {
3713 	struct ip_moptions imo;
3714 	struct mbuf *m;
3715 	uint64_t oerrors = 0;
3716 
3717 	imo.imo_ifidx = sc->sc_ifp0;
3718 	imo.imo_ttl = sc->sc_tunnel.t_ttl;
3719 	imo.imo_loop = 0;
3720 
3721 	NET_LOCK();
3722 	while ((m = ml_dequeue(ml)) != NULL) {
3723 		if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &imo, NULL, 0) != 0)
3724 			oerrors++;
3725 	}
3726 	NET_UNLOCK();
3727 
3728 	return (oerrors);
3729 }
3730 
3731 #ifdef INET6
3732 static uint64_t
3733 nvgre_send6(struct nvgre_softc *sc, struct mbuf_list *ml)
3734 {
3735 	struct ip6_moptions im6o;
3736 	struct mbuf *m;
3737 	uint64_t oerrors = 0;
3738 
3739 	im6o.im6o_ifidx = sc->sc_ifp0;
3740 	im6o.im6o_hlim = sc->sc_tunnel.t_ttl;
3741 	im6o.im6o_loop = 0;
3742 
3743 	NET_LOCK();
3744 	while ((m = ml_dequeue(ml)) != NULL) {
3745 		if (ip6_output(m, NULL, NULL, 0, &im6o, NULL) != 0)
3746 			oerrors++;
3747 	}
3748 	NET_UNLOCK();
3749 
3750 	return (oerrors);
3751 }
3752 #endif /* INET6 */
3753 
3754 static void
3755 nvgre_send(void *arg)
3756 {
3757 	struct nvgre_softc *sc = arg;
3758 	struct ifnet *ifp = &sc->sc_ac.ac_if;
3759 	sa_family_t af = sc->sc_tunnel.t_af;
3760 	struct mbuf_list ml;
3761 	uint64_t oerrors;
3762 
3763 	if (!ISSET(ifp->if_flags, IFF_RUNNING))
3764 		return;
3765 
3766 	mq_delist(&sc->sc_send_list, &ml);
3767 	if (ml_empty(&ml))
3768 		return;
3769 
3770 	switch (af) {
3771 	case AF_INET:
3772 		oerrors = nvgre_send4(sc, &ml);
3773 		break;
3774 #ifdef INET6
3775 	case AF_INET6:
3776 		oerrors = nvgre_send6(sc, &ml);
3777 		break;
3778 #endif
3779 	default:
3780 		unhandled_af(af);
3781 		/* NOTREACHED */
3782 	}
3783 
3784 	ifp->if_oerrors += oerrors; /* XXX should be ifq_oerrors */
3785 }
3786 
3787 static int
3788 eoip_up(struct eoip_softc *sc)
3789 {
3790 	if (sc->sc_tunnel.t_af == AF_UNSPEC)
3791 		return (EDESTADDRREQ);
3792 
3793 	NET_ASSERT_LOCKED();
3794 
3795 	if (RBT_INSERT(eoip_tree, &eoip_tree, sc) != NULL)
3796 		return (EADDRINUSE);
3797 
3798 	SET(sc->sc_ac.ac_if.if_flags, IFF_RUNNING);
3799 
3800 	if (sc->sc_ka_state != GRE_KA_NONE) {
3801 		sc->sc_ka_holdmax = sc->sc_ka_count;
3802 		eoip_keepalive_send(sc);
3803 	}
3804 
3805 	return (0);
3806 }
3807 
3808 static int
3809 eoip_down(struct eoip_softc *sc)
3810 {
3811 	NET_ASSERT_LOCKED();
3812 	CLR(sc->sc_ac.ac_if.if_flags, IFF_RUNNING);
3813 
3814 	if (sc->sc_ka_state != GRE_KA_NONE) {
3815 		timeout_del_barrier(&sc->sc_ka_hold);
3816 		timeout_del_barrier(&sc->sc_ka_send);
3817 
3818 		sc->sc_ka_state = GRE_KA_DOWN;
3819 		gre_link_state(&sc->sc_ac.ac_if, sc->sc_ka_state);
3820 	}
3821 
3822 	RBT_REMOVE(eoip_tree, &eoip_tree, sc);
3823 
3824 	return (0);
3825 }
3826 
3827 static void
3828 eoip_start(struct ifnet *ifp)
3829 {
3830 	struct eoip_softc *sc = ifp->if_softc;
3831 	struct mbuf *m0, *m;
3832 #if NBPFILTER > 0
3833 	caddr_t if_bpf;
3834 #endif
3835 
3836 	if (!gre_allow) {
3837 		ifq_purge(&ifp->if_snd);
3838 		return;
3839 	}
3840 
3841 	while ((m0 = ifq_dequeue(&ifp->if_snd)) != NULL) {
3842 #if NBPFILTER > 0
3843 		if_bpf = ifp->if_bpf;
3844 		if (if_bpf)
3845 			bpf_mtap_ether(if_bpf, m0, BPF_DIRECTION_OUT);
3846 #endif
3847 
3848 		/* force prepend mbuf because of alignment problems */
3849 		m = m_get(M_DONTWAIT, m0->m_type);
3850 		if (m == NULL) {
3851 			m_freem(m0);
3852 			continue;
3853 		}
3854 
3855 		M_MOVE_PKTHDR(m, m0);
3856 		m->m_next = m0;
3857 
3858 		m_align(m, 0);
3859 		m->m_len = 0;
3860 
3861 		m = eoip_encap(sc, m, gre_l2_tos(&sc->sc_tunnel, m));
3862 		if (m == NULL || gre_ip_output(&sc->sc_tunnel, m) != 0) {
3863 			ifp->if_oerrors++;
3864 			continue;
3865 		}
3866 	}
3867 }
3868 
3869 static struct mbuf *
3870 eoip_encap(struct eoip_softc *sc, struct mbuf *m, uint8_t tos)
3871 {
3872 	struct gre_header *gh;
3873 	struct gre_h_key_eoip *eoiph;
3874 	int len = m->m_pkthdr.len;
3875 
3876 	m = m_prepend(m, sizeof(*gh) + sizeof(*eoiph), M_DONTWAIT);
3877 	if (m == NULL)
3878 		return (NULL);
3879 
3880 	gh = mtod(m, struct gre_header *);
3881 	gh->gre_flags = htons(GRE_VERS_1 | GRE_KP);
3882 	gh->gre_proto = htons(GRE_EOIP);
3883 
3884 	eoiph = (struct gre_h_key_eoip *)(gh + 1);
3885 	htobem16(&eoiph->eoip_len, len);
3886 	eoiph->eoip_tunnel_id = sc->sc_tunnel_id;
3887 
3888 	return (gre_encap_ip(&sc->sc_tunnel, m, sc->sc_tunnel.t_ttl, tos));
3889 }
3890 
3891 static void
3892 eoip_keepalive_send(void *arg)
3893 {
3894 	struct eoip_softc *sc = arg;
3895 	struct ifnet *ifp = &sc->sc_ac.ac_if;
3896 	struct mbuf *m;
3897 	int linkhdr;
3898 
3899 	if (!ISSET(ifp->if_flags, IFF_RUNNING))
3900 		return;
3901 
3902 	/* this is really conservative */
3903 #ifdef INET6
3904 	linkhdr = max_linkhdr + MAX(sizeof(struct ip), sizeof(struct ip6_hdr)) +
3905 	    sizeof(struct gre_header) + sizeof(struct gre_h_key_eoip);
3906 #else
3907 	linkhdr = max_linkhdr + sizeof(struct ip) +
3908 	    sizeof(struct gre_header) + sizeof(struct gre_h_key_eoip);
3909 #endif
3910 	MGETHDR(m, M_DONTWAIT, MT_DATA);
3911 	if (m == NULL)
3912 		return;
3913 
3914 	if (linkhdr > MHLEN) {
3915 		MCLGETL(m, M_DONTWAIT, linkhdr);
3916 		if (!ISSET(m->m_flags, M_EXT)) {
3917 			m_freem(m);
3918 			return;
3919 		}
3920 	}
3921 
3922 	m->m_pkthdr.pf.prio = ifp->if_llprio;
3923 	m->m_pkthdr.len = m->m_len = linkhdr;
3924 	m_adj(m, linkhdr);
3925 
3926 	m = eoip_encap(sc, m, gre_l2_tos(&sc->sc_tunnel, m));
3927 	if (m == NULL)
3928 		return;
3929 
3930 	gre_ip_output(&sc->sc_tunnel, m);
3931 
3932 	timeout_add_sec(&sc->sc_ka_send, sc->sc_ka_timeo);
3933 }
3934 
3935 static void
3936 eoip_keepalive_hold(void *arg)
3937 {
3938 	struct eoip_softc *sc = arg;
3939 	struct ifnet *ifp = &sc->sc_ac.ac_if;
3940 
3941 	if (!ISSET(ifp->if_flags, IFF_RUNNING))
3942 		return;
3943 
3944 	NET_LOCK();
3945 	sc->sc_ka_state = GRE_KA_DOWN;
3946 	gre_link_state(ifp, sc->sc_ka_state);
3947 	NET_UNLOCK();
3948 }
3949 
3950 static void
3951 eoip_keepalive_recv(struct eoip_softc *sc)
3952 {
3953 	switch (sc->sc_ka_state) {
3954 	case GRE_KA_NONE:
3955 		return;
3956 	case GRE_KA_DOWN:
3957 		sc->sc_ka_state = GRE_KA_HOLD;
3958 		sc->sc_ka_holdcnt = sc->sc_ka_holdmax;
3959 		sc->sc_ka_holdmax = MIN(sc->sc_ka_holdmax * 2,
3960 		    16 * sc->sc_ka_count);
3961 		break;
3962 	case GRE_KA_HOLD:
3963 		if (--sc->sc_ka_holdcnt > 0)
3964 			break;
3965 
3966 		sc->sc_ka_state = GRE_KA_UP;
3967 		gre_link_state(&sc->sc_ac.ac_if, sc->sc_ka_state);
3968 		break;
3969 
3970 	case GRE_KA_UP:
3971 		sc->sc_ka_holdmax--;
3972 		sc->sc_ka_holdmax = MAX(sc->sc_ka_holdmax, sc->sc_ka_count);
3973 		break;
3974 	}
3975 
3976 	timeout_add_sec(&sc->sc_ka_hold, sc->sc_ka_timeo * sc->sc_ka_count);
3977 }
3978 
3979 static struct mbuf *
3980 eoip_input(struct gre_tunnel *key, struct mbuf *m,
3981     const struct gre_header *gh, uint8_t otos, int iphlen)
3982 {
3983 	struct eoip_softc *sc;
3984 	struct gre_h_key_eoip *eoiph;
3985 	int hlen, len;
3986 	caddr_t buf;
3987 
3988 	if (gh->gre_flags != htons(GRE_KP | GRE_VERS_1))
3989 		goto decline;
3990 
3991 	hlen = iphlen + sizeof(*gh) + sizeof(*eoiph);
3992 	if (m->m_pkthdr.len < hlen)
3993 		goto decline;
3994 
3995 	m = m_pullup(m, hlen);
3996 	if (m == NULL)
3997 		return (NULL);
3998 
3999 	buf = mtod(m, caddr_t);
4000 	gh = (struct gre_header *)(buf + iphlen);
4001 	eoiph = (struct gre_h_key_eoip *)(gh + 1);
4002 
4003 	key->t_key = eoiph->eoip_tunnel_id;
4004 
4005 	NET_ASSERT_LOCKED();
4006 	sc = RBT_FIND(eoip_tree, &eoip_tree, (const struct eoip_softc *)key);
4007 	if (sc == NULL)
4008 		goto decline;
4009 
4010 	/* it's ours now */
4011 	len = bemtoh16(&eoiph->eoip_len);
4012 	if (len == 0) {
4013 		eoip_keepalive_recv(sc);
4014 		goto drop;
4015 	}
4016 
4017 	m = gre_ether_align(m, hlen);
4018 	if (m == NULL)
4019 		return (NULL);
4020 
4021 	if (m->m_pkthdr.len < len)
4022 		goto drop;
4023 	if (m->m_pkthdr.len != len)
4024 		m_adj(m, len - m->m_pkthdr.len);
4025 
4026 	m->m_flags &= ~(M_MCAST|M_BCAST);
4027 
4028 	gre_l2_prio(&sc->sc_tunnel, m, otos);
4029 
4030 	if_vinput(&sc->sc_ac.ac_if, m);
4031 
4032 	return (NULL);
4033 
4034 decline:
4035 	return (m);
4036 drop:
4037 	m_freem(m);
4038 	return (NULL);
4039 }
4040 
4041 const struct sysctl_bounded_args gre_vars[] = {
4042 	{ GRECTL_ALLOW, &gre_allow, 0, 1 },
4043 	{ GRECTL_WCCP, &gre_wccp, 0, 1 },
4044 };
4045 
4046 int
4047 gre_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
4048     size_t newlen)
4049 {
4050 	int error;
4051 
4052 	NET_LOCK();
4053 	error = sysctl_bounded_arr(gre_vars, nitems(gre_vars), name,
4054 	    namelen, oldp, oldlenp, newp, newlen);
4055 	NET_UNLOCK();
4056 	return error;
4057 }
4058 
4059 static inline int
4060 gre_ip_cmp(int af, const union gre_addr *a, const union gre_addr *b)
4061 {
4062 	switch (af) {
4063 #ifdef INET6
4064 	case AF_INET6:
4065 		return (memcmp(&a->in6, &b->in6, sizeof(a->in6)));
4066 #endif /* INET6 */
4067 	case AF_INET:
4068 		return (memcmp(&a->in4, &b->in4, sizeof(a->in4)));
4069 	default:
4070 		unhandled_af(af);
4071 	}
4072 
4073 	return (0);
4074 }
4075 
4076 static int
4077 gre_cmp_src(const struct gre_tunnel *a, const struct gre_tunnel *b)
4078 {
4079 	uint32_t ka, kb;
4080 	uint32_t mask;
4081 	int rv;
4082 
4083 	/* is K set at all? */
4084 	ka = a->t_key_mask & GRE_KEY_ENTROPY;
4085 	kb = b->t_key_mask & GRE_KEY_ENTROPY;
4086 
4087 	/* sort by whether K is set */
4088 	if (ka > kb)
4089 		return (1);
4090 	if (ka < kb)
4091 		return (-1);
4092 
4093 	/* is K set on both? */
4094 	if (ka != GRE_KEY_NONE) {
4095 		/* get common prefix */
4096 		mask = a->t_key_mask & b->t_key_mask;
4097 
4098 		ka = a->t_key & mask;
4099 		kb = b->t_key & mask;
4100 
4101 		/* sort by common prefix */
4102 		if (ka > kb)
4103 			return (1);
4104 		if (ka < kb)
4105 			return (-1);
4106 	}
4107 
4108 	/* sort by routing table */
4109 	if (a->t_rtableid > b->t_rtableid)
4110 		return (1);
4111 	if (a->t_rtableid < b->t_rtableid)
4112 		return (-1);
4113 
4114 	/* sort by address */
4115 	if (a->t_af > b->t_af)
4116 		return (1);
4117 	if (a->t_af < b->t_af)
4118 		return (-1);
4119 
4120 	rv = gre_ip_cmp(a->t_af, &a->t_src, &b->t_src);
4121 	if (rv != 0)
4122 		return (rv);
4123 
4124 	return (0);
4125 }
4126 
4127 static int
4128 gre_cmp(const struct gre_tunnel *a, const struct gre_tunnel *b)
4129 {
4130 	int rv;
4131 
4132 	rv = gre_cmp_src(a, b);
4133 	if (rv != 0)
4134 		return (rv);
4135 
4136 	return (gre_ip_cmp(a->t_af, &a->t_dst, &b->t_dst));
4137 }
4138 
4139 static inline int
4140 mgre_cmp(const struct mgre_softc *a, const struct mgre_softc *b)
4141 {
4142 	return (gre_cmp_src(&a->sc_tunnel, &b->sc_tunnel));
4143 }
4144 
4145 RBT_GENERATE(mgre_tree, mgre_softc, sc_entry, mgre_cmp);
4146 
4147 static inline int
4148 egre_cmp(const struct egre_softc *a, const struct egre_softc *b)
4149 {
4150 	return (gre_cmp(&a->sc_tunnel, &b->sc_tunnel));
4151 }
4152 
4153 RBT_GENERATE(egre_tree, egre_softc, sc_entry, egre_cmp);
4154 
4155 static int
4156 nvgre_cmp_tunnel(const struct gre_tunnel *a, const struct gre_tunnel *b)
4157 {
4158 	uint32_t ka, kb;
4159 
4160 	ka = a->t_key & GRE_KEY_ENTROPY;
4161 	kb = b->t_key & GRE_KEY_ENTROPY;
4162 
4163 	/* sort by common prefix */
4164 	if (ka > kb)
4165 		return (1);
4166 	if (ka < kb)
4167 		return (-1);
4168 
4169 	/* sort by routing table */
4170 	if (a->t_rtableid > b->t_rtableid)
4171 		return (1);
4172 	if (a->t_rtableid < b->t_rtableid)
4173 		return (-1);
4174 
4175 	/* sort by address */
4176 	if (a->t_af > b->t_af)
4177 		return (1);
4178 	if (a->t_af < b->t_af)
4179 		return (-1);
4180 
4181 	return (0);
4182 }
4183 
4184 static inline int
4185 nvgre_cmp_ucast(const struct nvgre_softc *na, const struct nvgre_softc *nb)
4186 {
4187 	const struct gre_tunnel *a = &na->sc_tunnel;
4188 	const struct gre_tunnel *b = &nb->sc_tunnel;
4189 	int rv;
4190 
4191 	rv = nvgre_cmp_tunnel(a, b);
4192 	if (rv != 0)
4193 		return (rv);
4194 
4195 	rv = gre_ip_cmp(a->t_af, &a->t_src, &b->t_src);
4196 	if (rv != 0)
4197 		return (rv);
4198 
4199 	return (0);
4200 }
4201 
4202 static int
4203 nvgre_cmp_mcast(const struct gre_tunnel *a, const union gre_addr *aa,
4204     unsigned int if0idxa, const struct gre_tunnel *b,
4205     const union gre_addr *ab,unsigned int if0idxb)
4206 {
4207 	int rv;
4208 
4209 	rv = nvgre_cmp_tunnel(a, b);
4210 	if (rv != 0)
4211 		return (rv);
4212 
4213 	rv = gre_ip_cmp(a->t_af, aa, ab);
4214 	if (rv != 0)
4215 		return (rv);
4216 
4217 	if (if0idxa > if0idxb)
4218 		return (1);
4219 	if (if0idxa < if0idxb)
4220 		return (-1);
4221 
4222 	return (0);
4223 }
4224 
4225 static inline int
4226 nvgre_cmp_mcast_sc(const struct nvgre_softc *na, const struct nvgre_softc *nb)
4227 {
4228 	const struct gre_tunnel *a = &na->sc_tunnel;
4229 	const struct gre_tunnel *b = &nb->sc_tunnel;
4230 
4231 	return (nvgre_cmp_mcast(a, &a->t_dst, na->sc_ifp0,
4232 	    b, &b->t_dst, nb->sc_ifp0));
4233 }
4234 
4235 RBT_GENERATE(nvgre_ucast_tree, nvgre_softc, sc_uentry, nvgre_cmp_ucast);
4236 RBT_GENERATE(nvgre_mcast_tree, nvgre_softc, sc_mentry, nvgre_cmp_mcast_sc);
4237 
4238 static inline int
4239 eoip_cmp(const struct eoip_softc *ea, const struct eoip_softc *eb)
4240 {
4241 	const struct gre_tunnel *a = &ea->sc_tunnel;
4242 	const struct gre_tunnel *b = &eb->sc_tunnel;
4243 	int rv;
4244 
4245 	if (a->t_key > b->t_key)
4246 		return (1);
4247 	if (a->t_key < b->t_key)
4248 		return (-1);
4249 
4250 	/* sort by routing table */
4251 	if (a->t_rtableid > b->t_rtableid)
4252 		return (1);
4253 	if (a->t_rtableid < b->t_rtableid)
4254 		return (-1);
4255 
4256 	/* sort by address */
4257 	if (a->t_af > b->t_af)
4258 		return (1);
4259 	if (a->t_af < b->t_af)
4260 		return (-1);
4261 
4262 	rv = gre_ip_cmp(a->t_af, &a->t_src, &b->t_src);
4263 	if (rv != 0)
4264 		return (rv);
4265 
4266 	rv = gre_ip_cmp(a->t_af, &a->t_dst, &b->t_dst);
4267 	if (rv != 0)
4268 		return (rv);
4269 
4270 	return (0);
4271 }
4272 
4273 RBT_GENERATE(eoip_tree, eoip_softc, sc_entry, eoip_cmp);
4274 
4275 static int
4276 nvgre_eb_port_eq(void *arg, void *a, void *b)
4277 {
4278 	struct nvgre_softc *sc = arg;
4279 
4280 	return (gre_ip_cmp(sc->sc_tunnel.t_af, a, b) == 0);
4281 }
4282 
4283 static void *
4284 nvgre_eb_port_take(void *arg, void *port)
4285 {
4286 	union gre_addr *ea = port;
4287 	union gre_addr *endpoint;
4288 
4289 	endpoint = pool_get(&nvgre_endpoint_pool, PR_NOWAIT);
4290 	if (endpoint == NULL)
4291 		return (NULL);
4292 
4293 	*endpoint = *ea;
4294 
4295 	return (endpoint);
4296 }
4297 
4298 static void
4299 nvgre_eb_port_rele(void *arg, void *port)
4300 {
4301 	union gre_addr *endpoint = port;
4302 
4303 	pool_put(&nvgre_endpoint_pool, endpoint);
4304 }
4305 
4306 static size_t
4307 nvgre_eb_port_ifname(void *arg, char *dst, size_t len, void *port)
4308 {
4309 	struct nvgre_softc *sc = arg;
4310 
4311 	return (strlcpy(dst, sc->sc_ac.ac_if.if_xname, len));
4312 }
4313 
4314 static void
4315 nvgre_eb_port_sa(void *arg, struct sockaddr_storage *ss, void *port)
4316 {
4317 	struct nvgre_softc *sc = arg;
4318 	union gre_addr *endpoint = port;
4319 
4320 	switch (sc->sc_tunnel.t_af) {
4321 	case AF_INET: {
4322 		struct sockaddr_in *sin = (struct sockaddr_in *)ss;
4323 
4324 		sin->sin_len = sizeof(*sin);
4325 		sin->sin_family = AF_INET;
4326 		sin->sin_addr = endpoint->in4;
4327 		break;
4328 	}
4329 #ifdef INET6
4330 	case AF_INET6: {
4331 		struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)ss;
4332 
4333 		sin6->sin6_len = sizeof(*sin6);
4334 		sin6->sin6_family = AF_INET6;
4335 		in6_recoverscope(sin6, &endpoint->in6);
4336 
4337 		break;
4338 	}
4339 #endif /* INET6 */
4340 	default:
4341 		unhandled_af(sc->sc_tunnel.t_af);
4342 	}
4343 }
4344